Class: Coradoc::Input::Html::Converters::Markup

Inherits:
Base
  • Object
show all
Defined in:
lib/coradoc/html/input/converters/markup.rb

Direct Known Subclasses

Code, Em, Mark, Strong

Instance Method Summary collapse

Methods inherited from Base

#extract_leading_trailing_whitespace, #extract_text_from_content, #extract_title, #node_has_ancestor?, #textnode_after_start_with?, #textnode_before_end_with?, #treat_children_coradoc, #treat_coradoc, #unconstrained_after?, #unconstrained_before?

Instance Method Details

#coradoc_format_typeObject

Subclasses should override this to return the format type



96
97
98
# File 'lib/coradoc/html/input/converters/markup.rb', line 96

def coradoc_format_type
  'text'
end

#extract_text_and_elements(content) ⇒ Object

Extract text content and InlineElements from mixed content array



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/coradoc/html/input/converters/markup.rb', line 57

def extract_text_and_elements(content)
  return [content, []] unless content.is_a?(Array)

  text_parts = []
  elements = []

  content.each do |item|
    case item
    when String
      text_parts << item
    when Coradoc::CoreModel::InlineElement
      elements << item
    when Coradoc::CoreModel::Base
      # For other block types, convert to text
      text_parts << extract_text_from_model(item)
    else
      text_parts << item.to_s
    end
  end

  [text_parts.join, elements]
end

#extract_text_from_model(model) ⇒ Object

Extract text from a CoreModel object



81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/coradoc/html/input/converters/markup.rb', line 81

def extract_text_from_model(model)
  return '' if model.nil?

  if model.is_a?(Coradoc::CoreModel::Base) && model.content.is_a?(String)
    model.content
  elsif model.is_a?(Coradoc::CoreModel::StructuralElement) && model.children.is_a?(Array)
    model.children.map { |c| extract_text_from_model(c) }.join
  elsif model.is_a?(Coradoc::CoreModel::Base) && model.title.is_a?(String)
    model.title
  else
    model.to_s
  end
end

#to_coradoc(node, state = {}) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/coradoc/html/input/converters/markup.rb', line 8

def to_coradoc(node, state = {})
  u_before = unconstrained_before?(node)
  u_after = unconstrained_after?(node)

  leading_ws, trailing_ws =
    extract_leading_trailing_whitespace(node)

  # Wrap whitespace in InlineElement so it can be processed
  leading_whitespace = if leading_ws
                         Coradoc::CoreModel::TextElement.new(
                           content: leading_ws
                         )
                       end
  trailing_whitespace = if trailing_ws
                          Coradoc::CoreModel::TextElement.new(
                            content: trailing_ws
                          )
                        end

  content = treat_children_coradoc(node, state)

  if node_has_ancestor?(node, markup_ancestor_tag_names)
    content
  elsif node.children.empty?
    # Return InlineElement wrapper for whitespace
    if leading_ws
      Coradoc::CoreModel::TextElement.new(
        content: leading_ws
      )
    end
  else
    u = (u_before && leading_whitespace.nil?) ||
        (u_after && trailing_whitespace.nil?)

    # Separate text strings from InlineElements in content array
    text_content, nested = extract_text_and_elements(content)

    # Create CoreModel::InlineElement with the appropriate format type
    inline_element = Coradoc::CoreModel::InlineElement.format_type_class(coradoc_format_type).new(
      content: text_content,
      nested_elements: nested.empty? ? nil : nested,
      metadata: { unconstrained: u }
    )
    result = [leading_whitespace, inline_element, trailing_whitespace].compact
    result.length == 1 ? result.first : result
  end
end