Class: Coradoc::Input::Html::Converters::Markup

Inherits:
Base
  • Object
show all
Defined in:
lib/coradoc/html/input/converters/markup.rb

Direct Known Subclasses

Code, Em, Mark, Strong

Instance Method Summary collapse

Methods inherited from Base

#convert, #escape_text, #extract_leading_trailing_whitespace, #extract_title, #node_has_ancestor?, #textnode_after_start_with?, #textnode_before_end_with?, #treat, #treat_children, #treat_children_coradoc, #treat_coradoc, #unconstrained_after?, #unconstrained_before?

Instance Method Details

#coradoc_format_typeObject

Subclasses should override this to return the format type



100
101
102
# File 'lib/coradoc/html/input/converters/markup.rb', line 100

def coradoc_format_type
  'text'
end

#extract_text_and_elements(content) ⇒ Object

Extract text content and InlineElements from mixed content array



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/coradoc/html/input/converters/markup.rb', line 61

def extract_text_and_elements(content)
  return [content, []] unless content.is_a?(Array)

  text_parts = []
  elements = []

  content.each do |item|
    case item
    when String
      text_parts << item
    when Coradoc::CoreModel::InlineElement
      elements << item
    when Coradoc::CoreModel::Base
      # For other block types, convert to text
      text_parts << extract_text_from_model(item)
    else
      text_parts << item.to_s
    end
  end

  [text_parts.join, elements]
end

#extract_text_from_model(model) ⇒ Object

Extract text from a CoreModel object



85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/coradoc/html/input/converters/markup.rb', line 85

def extract_text_from_model(model)
  return '' if model.nil?

  if model.is_a?(Coradoc::CoreModel::Base) && model.content.is_a?(String)
    model.content
  elsif model.is_a?(Coradoc::CoreModel::StructuralElement) && model.children.is_a?(Array)
    model.children.map { |c| extract_text_from_model(c) }.join
  elsif model.is_a?(Coradoc::CoreModel::Base) && model.title.is_a?(String)
    model.title
  else
    model.to_s
  end
end

#to_coradoc(node, state = {}) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/coradoc/html/input/converters/markup.rb', line 8

def to_coradoc(node, state = {})
  u_before = unconstrained_before?(node)
  u_after = unconstrained_after?(node)

  leading_ws, trailing_ws =
    extract_leading_trailing_whitespace(node)

  # Wrap whitespace in InlineElement so it can be processed
  leading_whitespace = if leading_ws
                         Coradoc::CoreModel::InlineElement.new(
                           format_type: 'text',
                           content: leading_ws
                         )
                       end
  trailing_whitespace = if trailing_ws
                          Coradoc::CoreModel::InlineElement.new(
                            format_type: 'text',
                            content: trailing_ws
                          )
                        end

  content = treat_children_coradoc(node, state)

  if node_has_ancestor?(node, markup_ancestor_tag_names)
    content
  elsif node.children.empty?
    # Return InlineElement wrapper for whitespace
    if leading_ws
      Coradoc::CoreModel::InlineElement.new(
        format_type: 'text',
        content: leading_ws
      )
    end
  else
    u = (u_before && leading_whitespace.nil?) ||
        (u_after && trailing_whitespace.nil?)

    # Separate text strings from InlineElements in content array
    text_content, nested = extract_text_and_elements(content)

    # Create CoreModel::InlineElement with the appropriate format type
    inline_element = Coradoc::CoreModel::InlineElement.new(
      format_type: coradoc_format_type,
      content: text_content,
      nested_elements: nested.empty? ? nil : nested,
      metadata: { unconstrained: u }
    )
    result = [leading_whitespace, inline_element, trailing_whitespace].compact
    result.length == 1 ? result.first : result
  end
end