Module: HTM::Config::Builder

Included in:
HTM::Config
Defined in:
lib/htm/config/builder.rb

Instance Method Summary collapse

Instance Method Details

#build_default_embedding_generatorObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/htm/config/builder.rb', line 23

def build_default_embedding_generator
  lambda do |text|
    require 'ruby_llm' unless defined?(RubyLLM)

    configure_ruby_llm(embedding_provider)
    refresh_ollama_models! if embedding_provider == :ollama

    model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
    response = RubyLLM.embed(text, model: model)
    embedding = extract_embedding_from_response(response)

    unless embedding.is_a?(Array) && embedding.all?(Numeric)
      raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
    end

    embedding
  end
end

#build_default_loggerObject



6
7
8
9
10
11
12
13
# File 'lib/htm/config/builder.rb', line 6

def build_default_logger
  logger = Logger.new($stdout)
  logger.level = log_level
  logger.formatter = proc do |severity, datetime, _progname, msg|
    "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
  end
  logger
end

#build_default_proposition_extractorObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/htm/config/builder.rb', line 62

def build_default_proposition_extractor
  lambda do |text|
    require 'ruby_llm' unless defined?(RubyLLM)

    configure_ruby_llm(proposition_provider)
    refresh_ollama_models! if proposition_provider == :ollama

    model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model

    prompt = build_proposition_extraction_prompt(text)
    system_prompt = build_proposition_system_prompt

    chat = RubyLLM.chat(model: model)
    chat.with_instructions(system_prompt)
    response = chat.ask(prompt)

    parse_proposition_response(extract_text_from_response(response))
  end
end

#build_default_tag_extractorObject



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/htm/config/builder.rb', line 42

def build_default_tag_extractor
  lambda do |text, existing_ontology = []|
    require 'ruby_llm' unless defined?(RubyLLM)

    configure_ruby_llm(tag_provider)
    refresh_ollama_models! if tag_provider == :ollama

    model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model

    prompt = build_tag_extraction_prompt(text, existing_ontology)
    system_prompt = build_tag_system_prompt

    chat = RubyLLM.chat(model: model)
    chat.with_instructions(system_prompt)
    response = chat.ask(prompt)

    parse_tag_response(extract_text_from_response(response))
  end
end

#build_default_token_counterObject



15
16
17
18
19
20
21
# File 'lib/htm/config/builder.rb', line 15

def build_default_token_counter
  lambda do |text|
    require 'tiktoken_ruby' unless defined?(Tiktoken)
    encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
    encoder.encode(text).length
  end
end

#build_proposition_extraction_prompt(text) ⇒ Object



158
159
160
# File 'lib/htm/config/builder.rb', line 158

def build_proposition_extraction_prompt(text)
  proposition.user_prompt_template % { text: text }
end

#build_proposition_system_promptObject



162
163
164
# File 'lib/htm/config/builder.rb', line 162

def build_proposition_system_prompt
  proposition.system_prompt.to_s.strip
end

#build_tag_extraction_prompt(text, existing_ontology) ⇒ Object

Prompt Builders



139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/htm/config/builder.rb', line 139

def build_tag_extraction_prompt(text, existing_ontology)
  taxonomy_context = if existing_ontology.any?
                       sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
                       tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
                     else
                       tag.taxonomy_context_empty
                     end

  tag.user_prompt_template % {
    text: text,
    max_depth: max_tag_depth,
    taxonomy_context: taxonomy_context
  }
end

#build_tag_system_promptObject



154
155
156
# File 'lib/htm/config/builder.rb', line 154

def build_tag_system_prompt
  tag.system_prompt.to_s.strip
end

#extract_embedding_from_response(response) ⇒ Object

Response Extraction Helpers



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/htm/config/builder.rb', line 86

def extract_embedding_from_response(response)
  return nil unless response

  case response
  when Array
    response
  when ->(r) { r.respond_to?(:vectors) }
    vectors = response.vectors
    vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
  when ->(r) { r.respond_to?(:to_a) }
    response.to_a
  when ->(r) { r.respond_to?(:embedding) }
    response.embedding
  else
    if response.respond_to?(:instance_variable_get)
      vectors = response.instance_variable_get(:@vectors)
      return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
      return vectors if vectors.is_a?(Array)
    end
    raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
  end
end

#extract_text_from_response(response) ⇒ Object



109
110
111
112
113
114
115
116
117
118
# File 'lib/htm/config/builder.rb', line 109

def extract_text_from_response(response)
  return '' unless response

  case response
  when String then response
  when ->(r) { r.respond_to?(:content) } then response.content.to_s
  when ->(r) { r.respond_to?(:text) } then response.text.to_s
  else response.to_s
  end
end

#parse_proposition_response(text) ⇒ Object



126
127
128
129
130
131
132
133
# File 'lib/htm/config/builder.rb', line 126

def parse_proposition_response(text)
  text.to_s
      .split("\n")
      .map(&:strip)
      .map { |line| line.sub(/^[-*]\s*/, '') }
      .map(&:strip)
      .reject(&:empty?)
end

#parse_tag_response(text) ⇒ Object



120
121
122
123
124
# File 'lib/htm/config/builder.rb', line 120

def parse_tag_response(text)
  tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
  valid_tags = tags.grep(/^[a-z0-9-]+(:[a-z0-9-]+)*$/)
  valid_tags.select { |tag| tag.count(':') < max_tag_depth }
end