Class: Ragnar::ContextRepacker

Inherits:
Object
  • Object
show all
Defined in:
lib/ragnar/context_repacker.rb

Class Method Summary collapse

Class Method Details

.clean_text(text) ⇒ Object



85
86
87
88
89
90
91
# File 'lib/ragnar/context_repacker.rb', line 85

def self.clean_text(text)
  text
    .gsub(/\s+/, ' ')           # Normalize whitespace
    .gsub(/\n{3,}/, "\n\n")     # Remove excessive newlines
    .gsub(/\.{4,}/, '...')      # Normalize ellipsis
    .strip
end

.repack(documents, query, max_tokens: 2000) ⇒ Object

Repack retrieved documents into optimized context for LLM This reduces redundancy and organizes information better



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/ragnar/context_repacker.rb', line 5

def self.repack(documents, query, max_tokens: 2000)
  return "" if documents.empty?
  
  # Group documents by source file
  grouped = documents.group_by { |doc| doc[:file_path] || doc[:source_file] || "unknown" }
  
  # Build repacked context
  context_parts = []
  
  grouped.each do |source, docs|
    # Combine chunks from the same source
    combined_text = docs.map { |d| d[:chunk_text] || d[:text] || "" }
                        .reject(&:empty?)
                        .join(" ... ")
    
    # Remove excessive whitespace and clean up
    combined_text = clean_text(combined_text)
    
    # Add source header
    context_parts << "Source: #{File.basename(source)}\n#{combined_text}"
  end
  
  # Join all parts with clear separation
  full_context = context_parts.join("\n\n---\n\n")
  
  # Trim to max tokens (rough approximation: ~4 chars per token)
  max_chars = max_tokens * 4
  if full_context.length > max_chars
    full_context = trim_to_relevant(full_context, query, max_chars)
  end
  
  full_context
end

.repack_with_summary(documents, query, llm: nil) ⇒ Object

Create a summary-focused repack for better coherence



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ragnar/context_repacker.rb', line 40

def self.repack_with_summary(documents, query, llm: nil)
  return "" if documents.empty?
  
  # First do basic repacking
  basic_context = repack(documents, query)
  
  # If we have an LLM, try to create a summary
  if llm
    begin
      summary_prompt = <<~PROMPT
        <|system|>
        You are a helpful assistant. Summarize the following information relevant to the query.
        Focus on the most important points. Be concise.
        </s>
        <|user|>
        Query: #{query}
        
        Information:
        #{basic_context[0..1500]}
        
        Provide a brief summary of the key information related to the query.
        </s>
        <|assistant|>
      PROMPT
      
      summary = llm.generate(summary_prompt)
      
      # Combine summary with original context
      <<~CONTEXT
        Summary: #{summary}
        
        Detailed Information:
        #{basic_context}
      CONTEXT
    rescue => e
      puts "Warning: Summary generation failed: #{e.message}"
      basic_context
    end
  else
    basic_context
  end
end

.trim_to_relevant(text, query, max_chars) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/ragnar/context_repacker.rb', line 93

def self.trim_to_relevant(text, query, max_chars)
  # Try to keep the most relevant parts based on query terms
  query_terms = query.downcase.split(/\W+/).reject { |w| w.length < 3 }
  
  # Score each sentence by relevance
  sentences = text.split(/(?<=[.!?])\s+/)
  scored_sentences = sentences.map do |sentence|
    score = query_terms.sum { |term| sentence.downcase.include?(term) ? 1 : 0 }
    { sentence: sentence, score: score }
  end
  
  # Sort by score and reconstruct
  scored_sentences.sort_by! { |s| -s[:score] }
  
  result = []
  current_length = 0
  
  scored_sentences.each do |item|
    sentence_length = item[:sentence].length
    break if current_length + sentence_length > max_chars
    
    result << item[:sentence]
    current_length += sentence_length
  end
  
  result.join(" ")
end