Class: Llmemory::Extractors::EntityRelationExtractor

Inherits:

Object

Object
Llmemory::Extractors::EntityRelationExtractor

show all

Defined in:: lib/llmemory/extractors/entity_relation_extractor.rb

Constant Summary collapse

MAX_CONVERSATION_CHARS = Long conversations often make the LLM return empty JSON; truncate for extraction.

EXTRACTION_JSON_SCHEMA = JSON Schema for Structured Outputs (OpenAI response_format). Ensures valid entities/relations shape and avoids refusals or malformed JSON.

{
  name: "entity_relation_extraction",
  schema: {
    type: "object",
    properties: {
      entities: {
        type: "array",
        items: {
          type: "object",
          properties: {
            type: { type: "string", description: "Entity type: person, company, place, concept, etc." },
            name: { type: "string", description: "Entity name" }
          },
          required: ["type", "name"],
          additionalProperties: false
        },
        description: "List of entities mentioned in the conversation"
      },
      relations: {
        type: "array",
        items: {
          type: "object",
          properties: {
            subject: { type: "string", description: "Subject entity (use 'User' when the user talks about themselves or their family)" },
            predicate: { type: "string", description: "Relation inferred from context, snake_case (e.g. has_son, works_at, likes, spouse)" },
            object: { type: "string", description: "Object entity (person name, place, concept)" }
          },
          required: ["subject", "predicate", "object"],
          additionalProperties: false
        },
        description: "Subject-predicate-object relations extracted from the conversation"
      }
    },
    required: ["entities", "relations"],
    additionalProperties: false
  }
}.freeze

Instance Method Summary collapse

#extract(conversation_text) ⇒ Object
#extract_once(prompt) ⇒ Object
#initialize(llm: nil) ⇒ EntityRelationExtractor constructor

A new instance of EntityRelationExtractor.

Constructor Details

#initialize(llm: nil) ⇒ `EntityRelationExtractor`

Returns a new instance of EntityRelationExtractor.



51
52
53

# File 'lib/llmemory/extractors/entity_relation_extractor.rb', line 51

def initialize(llm: nil)
  @llm = llm || Llmemory::LLM.client
end

Instance Method Details

#extract(conversation_text) ⇒ `Object`

# File 'lib/llmemory/extractors/entity_relation_extractor.rb', line 55

def extract(conversation_text)
  text = conversation_text.to_s.strip
  text = text[0, MAX_CONVERSATION_CHARS] + "\n[...]" if text.length > MAX_CONVERSATION_CHARS
  prompt = <<~PROMPT
    Infer entities and relations from this user-assistant conversation. Build a knowledge graph from what the user says, even when they don't state facts in formal language.
    - Entities: people, places, companies, concepts (type and name).
    - Relations: infer subject-predicate-object from context. Use "User" as subject when the user talks about themselves or people close to them.
    Examples of inference: "mi hijo se llama Luis" → User has_son Luis; "trabajo en Acme" → User works_at Acme; "no me gustan las macros" → User prefers (or dislikes) Excel macros. Infer family (has_son, has_daughter, spouse), work (works_at, current_job), preferences (likes, prefers), and any other relation that clearly follows from the conversation. Use snake_case predicates.
    Return empty arrays only if the conversation contains no extractable facts.

    Conversation:
    #{text}
  PROMPT

  result = extract_once(prompt.strip)
  # Retry with plain invoke() if API returned empty (avoids empty json_schema response)
  if (result[:entities].empty? && result[:relations].empty?) && text.length > 50
    result = parse_response(@llm.invoke(prompt.strip))
  end
  result
end

#extract_once(prompt) ⇒ `Object`

# File 'lib/llmemory/extractors/entity_relation_extractor.rb', line 77

def extract_once(prompt)
  if @llm.respond_to?(:invoke_with_json_schema)
    begin
      parsed = @llm.invoke_with_json_schema(prompt, EXTRACTION_JSON_SCHEMA)
      if parsed.is_a?(Hash) && !parsed.empty?
        result = parse_response(parsed)
        return result if result[:entities].any? || result[:relations].any?
      end
    rescue Llmemory::LLMError
      # Model may not support response_format json_schema; fall back to invoke + parse
    end
  end

  response = @llm.invoke(prompt)
  parse_response(response)
end