Module: HTM::Config::Builder
- Included in:
- HTM::Config
- Defined in:
- lib/htm/config/builder.rb
Instance Method Summary collapse
- #build_default_embedding_generator ⇒ Object
- #build_default_logger ⇒ Object
- #build_default_proposition_extractor ⇒ Object
- #build_default_tag_extractor ⇒ Object
- #build_default_token_counter ⇒ Object
- #build_proposition_extraction_prompt(text) ⇒ Object
- #build_proposition_system_prompt ⇒ Object
-
#build_tag_extraction_prompt(text, existing_ontology) ⇒ Object
Prompt Builders ==========================================================================.
- #build_tag_system_prompt ⇒ Object
-
#extract_embedding_from_response(response) ⇒ Object
Response Extraction Helpers ==========================================================================.
- #extract_text_from_response(response) ⇒ Object
- #parse_proposition_response(text) ⇒ Object
- #parse_tag_response(text) ⇒ Object
Instance Method Details
#build_default_embedding_generator ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/htm/config/builder.rb', line 23 def lambda do |text| require 'ruby_llm' unless defined?(RubyLLM) configure_ruby_llm() refresh_ollama_models! if == :ollama model = == :ollama ? normalize_ollama_model() : response = RubyLLM.(text, model: model) = (response) unless .is_a?(Array) && .all?(Numeric) raise HTM::EmbeddingError, "Invalid embedding response format from #{}" end end end |
#build_default_logger ⇒ Object
6 7 8 9 10 11 12 13 |
# File 'lib/htm/config/builder.rb', line 6 def build_default_logger logger = Logger.new($stdout) logger.level = log_level logger.formatter = proc do |severity, datetime, _progname, msg| "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n" end logger end |
#build_default_proposition_extractor ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/htm/config/builder.rb', line 62 def build_default_proposition_extractor lambda do |text| require 'ruby_llm' unless defined?(RubyLLM) configure_ruby_llm(proposition_provider) refresh_ollama_models! if proposition_provider == :ollama model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model prompt = build_proposition_extraction_prompt(text) system_prompt = build_proposition_system_prompt chat = RubyLLM.chat(model: model) chat.with_instructions(system_prompt) response = chat.ask(prompt) parse_proposition_response(extract_text_from_response(response)) end end |
#build_default_tag_extractor ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/htm/config/builder.rb', line 42 def build_default_tag_extractor lambda do |text, existing_ontology = []| require 'ruby_llm' unless defined?(RubyLLM) configure_ruby_llm(tag_provider) refresh_ollama_models! if tag_provider == :ollama model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model prompt = build_tag_extraction_prompt(text, existing_ontology) system_prompt = build_tag_system_prompt chat = RubyLLM.chat(model: model) chat.with_instructions(system_prompt) response = chat.ask(prompt) parse_tag_response(extract_text_from_response(response)) end end |
#build_default_token_counter ⇒ Object
15 16 17 18 19 20 21 |
# File 'lib/htm/config/builder.rb', line 15 def build_default_token_counter lambda do |text| require 'tiktoken_ruby' unless defined?(Tiktoken) encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo") encoder.encode(text).length end end |
#build_proposition_extraction_prompt(text) ⇒ Object
158 159 160 |
# File 'lib/htm/config/builder.rb', line 158 def build_proposition_extraction_prompt(text) proposition.user_prompt_template % { text: text } end |
#build_proposition_system_prompt ⇒ Object
162 163 164 |
# File 'lib/htm/config/builder.rb', line 162 def build_proposition_system_prompt proposition.system_prompt.to_s.strip end |
#build_tag_extraction_prompt(text, existing_ontology) ⇒ Object
Prompt Builders
139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/htm/config/builder.rb', line 139 def build_tag_extraction_prompt(text, existing_ontology) taxonomy_context = if existing_ontology.any? = existing_ontology.sample([existing_ontology.size, 20].min) tag.taxonomy_context_existing % { sample_tags: .join(', ') } else tag.taxonomy_context_empty end tag.user_prompt_template % { text: text, max_depth: max_tag_depth, taxonomy_context: taxonomy_context } end |
#build_tag_system_prompt ⇒ Object
154 155 156 |
# File 'lib/htm/config/builder.rb', line 154 def build_tag_system_prompt tag.system_prompt.to_s.strip end |
#extract_embedding_from_response(response) ⇒ Object
Response Extraction Helpers
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/htm/config/builder.rb', line 86 def (response) return nil unless response case response when Array response when ->(r) { r.respond_to?(:vectors) } vectors = response.vectors vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors when ->(r) { r.respond_to?(:to_a) } response.to_a when ->(r) { r.respond_to?(:embedding) } response. else if response.respond_to?(:instance_variable_get) vectors = response.instance_variable_get(:@vectors) return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array) return vectors if vectors.is_a?(Array) end raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}" end end |
#extract_text_from_response(response) ⇒ Object
109 110 111 112 113 114 115 116 117 118 |
# File 'lib/htm/config/builder.rb', line 109 def extract_text_from_response(response) return '' unless response case response when String then response when ->(r) { r.respond_to?(:content) } then response.content.to_s when ->(r) { r.respond_to?(:text) } then response.text.to_s else response.to_s end end |
#parse_proposition_response(text) ⇒ Object
126 127 128 129 130 131 132 133 |
# File 'lib/htm/config/builder.rb', line 126 def parse_proposition_response(text) text.to_s .split("\n") .map(&:strip) .map { |line| line.sub(/^[-*]\s*/, '') } .map(&:strip) .reject(&:empty?) end |
#parse_tag_response(text) ⇒ Object
120 121 122 123 124 |
# File 'lib/htm/config/builder.rb', line 120 def parse_tag_response(text) = text.to_s.split("\n").map(&:strip).reject(&:empty?) = .grep(/^[a-z0-9-]+(:[a-z0-9-]+)*$/) .select { |tag| tag.count(':') < max_tag_depth } end |