Class: Coradoc::Markdown::Transformer

Inherits:
Parslet::Transform
  • Object
show all
Defined in:
lib/coradoc/markdown/transformer.rb

Overview

Transformer converts Parslet AST into Markdown Document Model objects.

This transformer takes the raw output from the BlockParser/InlineParser and converts it into semantic model objects (Heading, Paragraph, etc.)

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.ald_registryObject

ALD storage - maps name to AttributeList



143
144
145
# File 'lib/coradoc/markdown/transformer.rb', line 143

def ald_registry
  @ald_registry
end

Class Method Details

.apply_ial_to_element(element, ial_content) ⇒ Object

Apply IAL attributes to an element



440
441
442
443
444
445
446
# File 'lib/coradoc/markdown/transformer.rb', line 440

def apply_ial_to_element(element, ial_content)
  attrs = parse_ial_content(ial_content.to_s)
  element.id = attrs[:id] if attrs[:id]
  element.classes = attrs[:classes] if attrs[:classes]
  element.attributes = attrs[:attributes] if attrs[:attributes]
  element
end

.extract_code(code_block) ⇒ Object

Extract code from code_block structure



515
516
517
518
519
520
521
522
523
524
# File 'lib/coradoc/markdown/transformer.rb', line 515

def extract_code(code_block)
  case code_block
  when Array
    code_block.map { |l| l.is_a?(Hash) ? l[:ln].to_s : l.to_s }.join("\n")
  when Hash
    code_block[:ln].to_s
  else
    code_block.to_s
  end
end

.extract_row_cells(row) ⇒ Object



390
391
392
393
394
395
396
# File 'lib/coradoc/markdown/transformer.rb', line 390

def extract_row_cells(row)
  Array(row).map do |cell|
    next cell.to_s.strip unless cell.is_a?(Hash)

    cell[:cell].to_s.strip
  end
end

.extract_text(content) ⇒ Object

Extract text content from nested structures



527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
# File 'lib/coradoc/markdown/transformer.rb', line 527

def extract_text(content)
  case content
  when Array
    content.map { |c| extract_text(c) }.join
  when Hash
    if content.key?(:text)
      content[:text].to_s
    elsif content.key?(:ln)
      content[:ln].to_s
    else
      content.values.map { |v| extract_text(v) }.join
    end
  when Parslet::Slice
    content.to_s
  else
    content.to_s
  end
end

.extract_text_from_p(p) ⇒ Object

Extract text from paragraph structure



503
504
505
506
507
508
509
510
511
512
# File 'lib/coradoc/markdown/transformer.rb', line 503

def extract_text_from_p(p)
  case p
  when Hash
    p[:ln].to_s
  when Array
    p.map { |l| l.is_a?(Hash) ? l[:ln].to_s : l.to_s }.join("\n")
  else
    p.to_s
  end
end

.heading_level(heading) ⇒ Object

ATX heading: level is the count of leading ‘#’ chars. Setext heading: level is 1 for ‘=’ underline, 2 for ‘-’ underline.



324
325
326
327
328
329
330
331
# File 'lib/coradoc/markdown/transformer.rb', line 324

def heading_level(heading)
  s = heading.to_s
  return s.length if s.start_with?('#')
  return 1 if s.start_with?('=')
  return 2 if s.start_with?('-')

  s.length
end

.normalize_list_item_nodes(li) ⇒ Object

Normalize the various shapes a list-item node can take:

Hash {p: ...}     -> [{p: ...}]
Hash {ul: ...}    -> [{ul: ...}]  (nested list, no text)
Array [...]       -> [...] (mixed paragraph + nested list nodes)
String / Slice    -> [s]


373
374
375
376
377
378
379
# File 'lib/coradoc/markdown/transformer.rb', line 373

def normalize_list_item_nodes(li)
  return li if li.is_a?(Array)
  return [li] unless li.is_a?(Hash)
  return [li] if li.key?(:p) || li.key?(:ul) || li.key?(:ol) || li.key?(:li)

  [li]
end

.parse_extension_options(content) ⇒ Object

Parse extension options string into hash



455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# File 'lib/coradoc/markdown/transformer.rb', line 455

def parse_extension_options(content)
  return {} if content.nil? || content.empty?

  result = {}
  scanner = StringScanner.new(content.strip)

  until scanner.eos?
    scanner.skip(/\s+/)
    break if scanner.eos?

    if scanner.scan(/(\w[\w-]*)\s*=\s*/)
      key = scanner[1]
      value = if scanner.scan(/"([^"\\]*)"/)
                scanner[1]
              elsif scanner.scan(/'([^'\\]*)'/)
                scanner[1]
              elsif scanner.scan(/(\S+)/)
                scanner[1]
              else
                ''
              end
      result[key] = value
    else
      # Skip unrecognized character to avoid infinite loop
      scanner.scan(/./)
    end
  end

  result
end

.parse_ial_content(content) ⇒ Object

Parse IAL content string into components Delegates to shared IalParser for consistent parsing



450
451
452
# File 'lib/coradoc/markdown/transformer.rb', line 450

def parse_ial_content(content)
  ParserUtil::IalParser.parse_to_hash(content)
end

.parse_ial_element(ial_content) ⇒ Object

Parse IAL element (can be a reference or full IAL)



487
488
489
490
491
492
493
494
495
496
497
498
499
500
# File 'lib/coradoc/markdown/transformer.rb', line 487

def parse_ial_element(ial_content)
  content = ial_content.to_s.strip
  # Check if it's just a name reference (no . or #)
  if content =~ /\A\w+\z/ && @ald_registry.key?(content)
    @ald_registry[content]
  else
    attrs = parse_ial_content(content)
    AttributeList.new(
      id: attrs[:id],
      classes: attrs[:classes],
      attributes: attrs[:attributes]
    )
  end
end

.register_ald(element) ⇒ Object

Register an ALD (Attribute List Definition)



209
210
211
212
213
214
# File 'lib/coradoc/markdown/transformer.rb', line 209

def register_ald(element)
  name = element[:ald_name].to_s
  ial_content = element[:ial].to_s
  attrs = parse_ial_content(ial_content)
  @ald_registry[name] = AttributeList.new(name: name, **attrs)
end

.transform_definition_list(dl_content) ⇒ Object

Transform definition list



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/coradoc/markdown/transformer.rb', line 399

def transform_definition_list(dl_content)
  # The parser outputs term and definition as separate items
  # We need to group them: [{:def_term=>...}, {:def_content=>...}, ...]
  items = []
  current_term = nil
  current_definitions = []

  Array(dl_content).each do |item|
    next unless item.is_a?(Hash)

    if item.key?(:def_term)
      # Save previous term if exists
      if current_term
        items << DefinitionTerm.new(
          text: current_term.strip,
          definitions: current_definitions
        )
      end

      # Start new term
      current_term = extract_text(item[:def_term])
      current_definitions = []
    elsif item.key?(:def_content)
      # Add definition to current term
      content = extract_text(item[:def_content])
      current_definitions << DefinitionItem.new(content: content.strip)
    end
  end

  # Don't forget the last term
  if current_term
    items << DefinitionTerm.new(
      text: current_term.strip,
      definitions: current_definitions
    )
  end

  DefinitionList.new(items: items)
end

.transform_document(ast) ⇒ Coradoc::Markdown::Document

Transform AST into a Document model

Parameters:

  • ast (Array)

    The parsed AST from BlockParser

Returns:



149
150
151
152
153
# File 'lib/coradoc/markdown/transformer.rb', line 149

def transform_document(ast)
  @ald_registry = {}
  blocks = Array(ast).map { |element| transform_element(element) }.compact
  Document.new(blocks: blocks)
end

.transform_element(element) ⇒ Object

Transform a single element



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/coradoc/markdown/transformer.rb', line 156

def transform_element(element)
  return nil if element.nil?

  case element
  when Hash
    # Handle ALD first (register it)
    if element.key?(:ald_name)
      register_ald(element)
      return nil
    end

    # Handle IAL on its own line (reference or standalone)
    if element.key?(:ial) && !element.key?(:p) && !element.key?(:heading)
      ial = parse_ial_element(element[:ial])
      # Check if it's a reference to an ALD
      return @ald_registry[ial] if ial.is_a?(String) && @ald_registry.key?(ial)

      return ial
    end

    # Handle extension
    return transform_extension(element[:extension]) if element.key?(:extension)

    # Handle extension (direct key)
    return transform_extension(element) if element.key?(:ext_name)

    # Handle math
    return Math.block(extract_text(element[:math_content])) if element.key?(:math_content)

    # Handle footnote reference (inline)
    return FootnoteReference.new(id: element[:fn_ref].to_s) if element.key?(:fn_ref)

    # Try to transform using rules
    transformed = try_transform(element)
    return transformed if transformed

    # If no rule matches, try to extract text
    if element.key?(:ln)
      Paragraph.new(text: element[:ln].to_s)
    elsif element.key?(:text)
      Text.new(content: element[:text].to_s)
    end
  when Array
    # Transform each item
    element.map { |e| transform_element(e) }.compact
  when Parslet::Slice
    Text.new(content: element.to_s)
  else
    Text.new(content: element.to_s)
  end
end

.transform_extension(element) ⇒ Object

Transform extension element



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/coradoc/markdown/transformer.rb', line 217

def transform_extension(element)
  name = element[:ext_name].to_s
  opts = element[:ext_options]
  # Handle empty array from parser
  options = if opts.is_a?(Array) && opts.empty?
              {}
            elsif opts
              parse_extension_options(opts.to_s)
            else
              {}
            end
  body = element[:ext_body]

  Extension.new(
    name: name,
    options: options,
    content: body&.to_s
  )
end

.transform_list(items, ordered:) ⇒ Object

Transform a list (ul/ol). Items arrive as ‘[{p: {ln: “x”}}, …]`, as bare `…` subtrees, or as an Array of mixed paragraph + nested-list nodes when the item has a sublist.



336
337
338
339
340
341
342
343
# File 'lib/coradoc/markdown/transformer.rb', line 336

def transform_list(items, ordered:)
  items = [items] unless items.is_a?(Array)
  list_items = items.map do |node|
    li = node.is_a?(Hash) ? node[:li] : node
    transform_list_item(li)
  end
  List.new(ordered: ordered, items: list_items)
end

.transform_list_item(li) ⇒ Object



345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/coradoc/markdown/transformer.rb', line 345

def transform_list_item(li)
  nodes = normalize_list_item_nodes(li)
  text_parts = []
  sublist = nil
  nodes.each do |n|
    case n
    when Hash
      if n.key?(:p)
        text_parts << extract_text_from_p(n[:p])
      elsif n.key?(:ul)
        sublist = transform_list(n[:ul], ordered: false)
      elsif n.key?(:ol)
        sublist = transform_list(n[:ol], ordered: true)
      end
    else
      text_parts << n.to_s
    end
  end
  item = ListItem.new(text: text_parts.join(' ').strip)
  item.sublist = sublist if sublist
  item
end

.transform_table(element) ⇒ Object

Transform a table. Header row, separator row, and body rows.



382
383
384
385
386
387
388
# File 'lib/coradoc/markdown/transformer.rb', line 382

def transform_table(element)
  headers = extract_row_cells(element.dig(:table_header, :row))
  body_rows = Array(element[:table_body]).map do |body_node|
    extract_row_cells(body_node[:table_body_row][:row]).join(' | ')
  end
  Table.new(headers: headers, rows: body_rows)
end

.try_transform(element) ⇒ Object

Try to transform using the defined rules



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/coradoc/markdown/transformer.rb', line 238

def try_transform(element)
  return nil unless element.is_a?(Hash)

  # Check for known patterns and transform them
  if element.key?(:heading)
    level = heading_level(element[:heading])
    text = extract_text(element[:text]).strip
    heading = Heading.new(level: level, text: text)
    # Apply IAL if present
    apply_ial_to_element(heading, element[:ial]) if element.key?(:ial)
    return heading
  end

  return HorizontalRule.new(style: '---') if element.key?(:hr)

  # Fenced code block with language info
  if element.key?(:info) && element.key?(:code_block)
    language = element[:info].to_s.strip
    code = extract_code(element[:code_block])
    code_block = CodeBlock.new(language: language, code: code)
    apply_ial_to_element(code_block, element[:ial]) if element.key?(:ial)
    return code_block
  end

  if element.key?(:code_block)
    code = extract_code(element[:code_block])
    code_block = CodeBlock.new(code: code)
    apply_ial_to_element(code_block, element[:ial]) if element.key?(:ial)
    return code_block
  end

  if element.key?(:block_quote)
    content = element[:block_quote]
    transformed = content.is_a?(Array) ? content.map { |c| transform_element(c) } : [transform_element(content)]
    text = transformed.compact.map do |c|
      c.is_a?(Base) && c.class.attributes.key?(:text) ? c.text : c.to_s
    end.join("\n")
    blockquote = Blockquote.new(content: text)
    apply_ial_to_element(blockquote, element[:ial]) if element.key?(:ial)
    return blockquote
  end

  if element.key?(:p)
    text = extract_text_from_p(element[:p])
    paragraph = Paragraph.new(text: text)
    apply_ial_to_element(paragraph, element[:ial]) if element.key?(:ial)
    return paragraph
  end

  # Definition list
  return transform_definition_list(element[:dl]) if element.key?(:dl)

  # Unordered list
  return transform_list(element[:ul], ordered: false) if element.key?(:ul)

  # Ordered list
  return transform_list(element[:ol], ordered: true) if element.key?(:ol)

  # Table
  return transform_table(element) if element.key?(:table_header)

  # Footnote definition
  if element.key?(:fn_id)
    content = if element[:fn_content_continued]
                lines = [element[:fn_content]]
                lines += Array(element[:fn_content_continued])
                lines.map { |l| extract_text(l) }.join("\n")
              else
                extract_text(element[:fn_content])
              end
    return Footnote.new(id: element[:fn_id].to_s, content: content.strip)
  end

  # Abbreviation definition
  if element.key?(:abbr_term)
    return Abbreviation.new(
      term: element[:abbr_term].to_s,
      definition: element[:abbr_def].to_s.strip
    )
  end

  nil
end