Class: Adocconf::Extractor
- Inherits:
-
Object
- Object
- Adocconf::Extractor
- Defined in:
- lib/adocconf/extract.rb
Instance Method Summary collapse
- #child_sections(node) ⇒ Object
- #extract ⇒ Object
- #extract_container(_node, sections) ⇒ Object
- #extract_description_text(description) ⇒ Object
- #extract_dlist(block) ⇒ Object
- #extract_dlist_term(item) ⇒ Object
- #extract_dlist_value(item) ⇒ Object
- #extract_node(node) ⇒ Object
- #extract_table(block) ⇒ Object
- #extract_term_text(term) ⇒ Object
- #extract_ulist(block) ⇒ Object
- #extract_value_section(node) ⇒ Object
- #handle_unsupported(block) ⇒ Object
-
#initialize(document) ⇒ Extractor
constructor
A new instance of Extractor.
- #merge_values(values) ⇒ Object
- #non_section_blocks(node) ⇒ Object
- #slugify(title) ⇒ Object
Constructor Details
#initialize(document) ⇒ Extractor
Returns a new instance of Extractor.
3 4 5 |
# File 'lib/adocconf/extract.rb', line 3 def initialize(document) @document = document end |
Instance Method Details
#child_sections(node) ⇒ Object
21 22 23 24 |
# File 'lib/adocconf/extract.rb', line 21 def child_sections(node) return [] unless node.respond_to?(:blocks) node.blocks.select { |block| block.context == :section } end |
#extract ⇒ Object
7 8 9 |
# File 'lib/adocconf/extract.rb', line 7 def extract extract_node(@document) end |
#extract_container(_node, sections) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/adocconf/extract.rb', line 31 def extract_container(_node, sections) result = {} sections.each do |section| key = slugify(section.title) if result.key?(key) raise DuplicateKeyError, "Duplicate section key: #{key}" end result[key] = extract_node(section) end result end |
#extract_description_text(description) ⇒ Object
55 56 57 58 59 60 61 62 63 |
# File 'lib/adocconf/extract.rb', line 55 def extract_description_text(description) if description.respond_to?(:text) description.text.to_s.strip elsif description.respond_to?(:blocks) && description.blocks&.any? raise UnsupportedNodeError, "Complex description list values are not supported" else "" end end |
#extract_dlist(block) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/adocconf/extract.rb', line 89 def extract_dlist(block) result = {} block.items.each do |terms, description| if terms.nil? || terms.empty? raise InvalidStructureError, "Description list item is missing a term" end key = extract_term_text(terms.first).strip if result.key?(key) raise DuplicateKeyError, "Duplicate key in description list: #{key}" end result[key] = extract_description_text(description) end result end |
#extract_dlist_term(item) ⇒ Object
109 110 111 112 113 114 115 |
# File 'lib/adocconf/extract.rb', line 109 def extract_dlist_term(item) terms = item.respond_to?(:terms) ? item.terms : [] raise InvalidStructureError, "Description list item is missing a term" if terms.nil? || terms.empty? term = terms.first term.respond_to?(:text) ? term.text : term.to_s end |
#extract_dlist_value(item) ⇒ Object
117 118 119 120 121 122 123 |
# File 'lib/adocconf/extract.rb', line 117 def extract_dlist_value(item) return item.text.strip if item.respond_to?(:text) && item.text raise UnsupportedNodeError, "Complex description list values are not supported" if item.respond_to?(:blocks) && item.blocks&.any? "" end |
#extract_node(node) ⇒ Object
11 12 13 14 15 16 17 18 19 |
# File 'lib/adocconf/extract.rb', line 11 def extract_node(node) sections = child_sections(node) if sections.any? extract_container(node, sections) else extract_value_section(node) end end |
#extract_table(block) ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/adocconf/extract.rb', line 138 def extract_table(block) head_rows = block.rows[:head] || [] body_rows = block.rows[:body] || [] raise InvalidStructureError, "Table must have a header row" if head_rows.empty? headers = head_rows.first.map { |cell| cell.text.to_s.strip } raise InvalidStructureError, "Table header cannot be empty" if headers.empty? body_rows.map do |row| if row.length != headers.length raise InvalidStructureError, "Table row width mismatch: expected #{headers.length}, got #{row.length}" end result = {} headers.zip(row).each do |header, cell| result[header] = cell.text.to_s.strip end result end end |
#extract_term_text(term) ⇒ Object
47 48 49 50 51 52 53 |
# File 'lib/adocconf/extract.rb', line 47 def extract_term_text(term) if term.respond_to?(:text) term.text.to_s else term.to_s end end |
#extract_ulist(block) ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/adocconf/extract.rb', line 125 def extract_ulist(block) block.items.map do |item| nested_blocks = item.respond_to?(:blocks) ? item.blocks : [] nested_lists = nested_blocks.select { |b| b.context == :ulist || b.context == :olist } if nested_lists.any? raise UnsupportedNodeError, "Nested lists are not supported" end item.text.to_s.strip end end |
#extract_value_section(node) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/adocconf/extract.rb', line 65 def extract_value_section(node) blocks = non_section_blocks(node) values = [] blocks.each do |block| case block.context when :dlist values << extract_dlist(block) when :ulist values << extract_ulist(block) when :table values << extract_table(block) when :paragraph # ignored by spec when :open, :example, :listing, :literal, :quote, :verse, :stem, :sidebar, :image, :audio, :video handle_unsupported(block) else handle_unsupported(block) end end merge_values(values) end |
#handle_unsupported(block) ⇒ Object
195 196 197 |
# File 'lib/adocconf/extract.rb', line 195 def handle_unsupported(block) raise UnsupportedNodeError, "Unsupported block context: #{block.context}" end |
#merge_values(values) ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/adocconf/extract.rb', line 161 def merge_values(values) return {} if values.empty? return values.first if values.length == 1 if values.all? { |value| value.is_a?(Hash) } merged = {} values.each do |hash| hash.each do |key, value| if merged.key?(key) raise DuplicateKeyError, "Duplicate merged key: #{key}" end merged[key] = value end end return merged end if values.all? { |value| value.is_a?(Array) } return values.flatten(1) end raise InvalidStructureError, "Mixed value types in section are not supported" end |
#non_section_blocks(node) ⇒ Object
26 27 28 29 |
# File 'lib/adocconf/extract.rb', line 26 def non_section_blocks(node) return [] unless node.respond_to?(:blocks) node.blocks.reject { |block| block.context == :section } end |
#slugify(title) ⇒ Object
188 189 190 191 192 193 |
# File 'lib/adocconf/extract.rb', line 188 def slugify(title) value = Slugifier.call(title) raise InvalidStructureError, "Section title produced an empty slug" if value.empty? value end |