Class: Adocconf::Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/adocconf/extract.rb

Instance Method Summary collapse

Constructor Details

#initialize(document) ⇒ Extractor

Returns a new instance of Extractor.



3
4
5
# File 'lib/adocconf/extract.rb', line 3

def initialize(document)
  @document = document
end

Instance Method Details

#child_sections(node) ⇒ Object



21
22
23
24
# File 'lib/adocconf/extract.rb', line 21

def child_sections(node)
  return [] unless node.respond_to?(:blocks)
  node.blocks.select { |block| block.context == :section }
end

#extractObject



7
8
9
# File 'lib/adocconf/extract.rb', line 7

def extract
  extract_node(@document)
end

#extract_container(_node, sections) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/adocconf/extract.rb', line 31

def extract_container(_node, sections)
  result = {}

  sections.each do |section|
    key = slugify(section.title)

    if result.key?(key)
      raise DuplicateKeyError, "Duplicate section key: #{key}"
    end

    result[key] = extract_node(section)
  end

  result
end

#extract_description_text(description) ⇒ Object



55
56
57
58
59
60
61
62
63
# File 'lib/adocconf/extract.rb', line 55

def extract_description_text(description)
  if description.respond_to?(:text)
    description.text.to_s.strip
  elsif description.respond_to?(:blocks) && description.blocks&.any?
    raise UnsupportedNodeError, "Complex description list values are not supported"
  else
    ""
  end
end

#extract_dlist(block) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/adocconf/extract.rb', line 89

def extract_dlist(block)
  result = {}

  block.items.each do |terms, description|
    if terms.nil? || terms.empty?
      raise InvalidStructureError, "Description list item is missing a term"
    end

    key = extract_term_text(terms.first).strip

    if result.key?(key)
      raise DuplicateKeyError, "Duplicate key in description list: #{key}"
    end

    result[key] = extract_description_text(description)
  end

  result
end

#extract_dlist_term(item) ⇒ Object



109
110
111
112
113
114
115
# File 'lib/adocconf/extract.rb', line 109

def extract_dlist_term(item)
  terms = item.respond_to?(:terms) ? item.terms : []
  raise InvalidStructureError, "Description list item is missing a term" if terms.nil? || terms.empty?

  term = terms.first
  term.respond_to?(:text) ? term.text : term.to_s
end

#extract_dlist_value(item) ⇒ Object



117
118
119
120
121
122
123
# File 'lib/adocconf/extract.rb', line 117

def extract_dlist_value(item)
  return item.text.strip if item.respond_to?(:text) && item.text

  raise UnsupportedNodeError, "Complex description list values are not supported" if item.respond_to?(:blocks) && item.blocks&.any?

  ""
end

#extract_node(node) ⇒ Object



11
12
13
14
15
16
17
18
19
# File 'lib/adocconf/extract.rb', line 11

def extract_node(node)
  sections = child_sections(node)

  if sections.any?
    extract_container(node, sections)
  else
    extract_value_section(node)
  end
end

#extract_table(block) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/adocconf/extract.rb', line 138

def extract_table(block)
  head_rows = block.rows[:head] || []
  body_rows = block.rows[:body] || []

  raise InvalidStructureError, "Table must have a header row" if head_rows.empty?

  headers = head_rows.first.map { |cell| cell.text.to_s.strip }
  raise InvalidStructureError, "Table header cannot be empty" if headers.empty?

  body_rows.map do |row|
    if row.length != headers.length
      raise InvalidStructureError,
            "Table row width mismatch: expected #{headers.length}, got #{row.length}"
    end

    result = {}
    headers.zip(row).each do |header, cell|
      result[header] = cell.text.to_s.strip
    end
    result
  end
end

#extract_term_text(term) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/adocconf/extract.rb', line 47

def extract_term_text(term)
  if term.respond_to?(:text)
    term.text.to_s
  else
    term.to_s
  end
end

#extract_ulist(block) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/adocconf/extract.rb', line 125

def extract_ulist(block)
  block.items.map do |item|
    nested_blocks = item.respond_to?(:blocks) ? item.blocks : []
    nested_lists = nested_blocks.select { |b| b.context == :ulist || b.context == :olist }

    if nested_lists.any?
      raise UnsupportedNodeError, "Nested lists are not supported"
    end

    item.text.to_s.strip
  end
end

#extract_value_section(node) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/adocconf/extract.rb', line 65

def extract_value_section(node)
  blocks = non_section_blocks(node)
  values = []

  blocks.each do |block|
    case block.context
    when :dlist
      values << extract_dlist(block)
    when :ulist
      values << extract_ulist(block)
    when :table
      values << extract_table(block)
    when :paragraph
      # ignored by spec
    when :open, :example, :listing, :literal, :quote, :verse, :stem, :sidebar, :image, :audio, :video
      handle_unsupported(block)
    else
      handle_unsupported(block)
    end
  end

  merge_values(values)
end

#handle_unsupported(block) ⇒ Object



195
196
197
# File 'lib/adocconf/extract.rb', line 195

def handle_unsupported(block)
  raise UnsupportedNodeError, "Unsupported block context: #{block.context}"
end

#merge_values(values) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/adocconf/extract.rb', line 161

def merge_values(values)
  return {} if values.empty?
  return values.first if values.length == 1

  if values.all? { |value| value.is_a?(Hash) }
    merged = {}

    values.each do |hash|
      hash.each do |key, value|
      if merged.key?(key)
        raise DuplicateKeyError, "Duplicate merged key: #{key}"
      end

      merged[key] = value
    end
  end

    return merged
  end

  if values.all? { |value| value.is_a?(Array) }
    return values.flatten(1)
  end

  raise InvalidStructureError, "Mixed value types in section are not supported"
end

#non_section_blocks(node) ⇒ Object



26
27
28
29
# File 'lib/adocconf/extract.rb', line 26

def non_section_blocks(node)
  return [] unless node.respond_to?(:blocks)
  node.blocks.reject { |block| block.context == :section }
end

#slugify(title) ⇒ Object



188
189
190
191
192
193
# File 'lib/adocconf/extract.rb', line 188

def slugify(title)
  value = Slugifier.call(title)
  raise InvalidStructureError, "Section title produced an empty slug" if value.empty?

  value
end