Class: Ace::Support::Markdown::Atoms::SectionExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/ace/support/markdown/atoms/section_extractor.rb

Overview

Pure function to extract sections from markdown using Kramdown AST Supports exact string matching for section headings (v0.1.0)

Class Method Summary collapse

Class Method Details

.elements_to_markdown(elements) ⇒ Object

Convert Kramdown elements back to markdown string



148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 148

def self.elements_to_markdown(elements)
  return "" if elements.empty?

  # Create a new document with these elements
  temp_doc = Kramdown::Document.new("")
  temp_root = temp_doc.root
  temp_root.options[:encoding] = "UTF-8"

  # Add elements to the new root
  elements.each { |el| temp_root.children << el }

  # Convert to markdown
  temp_doc.to_kramdown.strip
end

.empty_result(error_message) ⇒ Object



163
164
165
166
167
168
169
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 163

def self.empty_result(error_message)
  {
    section_content: nil,
    found: false,
    errors: [error_message]
  }
end

.extract(content, heading_text) ⇒ Hash

Extract a section by heading text (exact string match)

Parameters:

  • content (String)

    The markdown content (without frontmatter)

  • heading_text (String)

    The exact heading text to match (e.g., “References”)

Returns:

  • (Hash)

    Result with :section_content (String), :found (Boolean), :errors (Array)



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 17

def self.extract(content, heading_text)
  return empty_result("Empty content") if content.nil? || content.empty?
  return empty_result("Heading text required") if heading_text.nil? || heading_text.empty?

  begin
    # Parse markdown with Kramdown
    doc = Kramdown::Document.new(content, input: "GFM")

    # Find the target header in the AST
    target_header, target_index = find_header(doc.root.children, heading_text)

    unless target_header
      return {
        section_content: nil,
        found: false,
        errors: ["Section not found: #{heading_text}"]
      }
    end

    # Extract content between this header and the next same-or-higher level header
    section_content = extract_section_content(
      doc.root.children,
      target_index,
      target_header.options[:level]
    )

    {
      section_content: section_content,
      found: true,
      errors: []
    }
  rescue => e
    {
      section_content: nil,
      found: false,
      errors: ["Section extraction error: #{e.message}"]
    }
  end
end

.extract_all(content) ⇒ Array<Hash>

Extract all sections with their headings

Parameters:

  • content (String)

    The markdown content

Returns:

  • (Array<Hash>)

    Array of :level, :content



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 60

def self.extract_all(content)
  return [] if content.nil? || content.empty?

  begin
    doc = Kramdown::Document.new(content, input: "GFM")
    headers = find_all_headers(doc.root.children)

    headers.map.with_index do |header_info, idx|
      # Extract content for each section
      content_text = extract_section_content(
        doc.root.children,
        header_info[:index],
        header_info[:level]
      )

      {
        heading: header_info[:text],
        level: header_info[:level],
        content: content_text
      }
    end
  rescue
    []
  end
end

.extract_section_content(elements, start_index, level) ⇒ Object

Extract content elements between a header and the next same-or-higher level header



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 128

def self.extract_section_content(elements, start_index, level)
  content_elements = []

  # Collect elements after the header until next same-or-higher level header
  ((start_index + 1)...elements.length).each do |i|
    el = elements[i]

    # Stop if we hit another header of same or higher level
    if el.type == :header && el.options[:level] <= level
      break
    end

    content_elements << el
  end

  # Convert elements back to markdown
  elements_to_markdown(content_elements)
end

.find_all_headers(elements) ⇒ Object

Find all headers in the document



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 106

def self.find_all_headers(elements)
  headers = []

  elements.each_with_index do |el, idx|
    next unless el.type == :header

    text = el.children
      .select { |c| c.type == :text }
      .map(&:value)
      .join

    headers << {
      text: text,
      level: el.options[:level],
      index: idx
    }
  end

  headers
end

.find_header(elements, heading_text) ⇒ Object

Find a header element by exact text match



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/ace/support/markdown/atoms/section_extractor.rb', line 89

def self.find_header(elements, heading_text)
  elements.each_with_index do |el, idx|
    next unless el.type == :header

    # Extract text from header children
    text = el.children
      .select { |c| c.type == :text }
      .map(&:value)
      .join

    return [el, idx] if text == heading_text
  end

  nil
end