Module: PhraseKit

Defined in:
lib/phrasekit.rb,
lib/phrasekit/miner.rb,
lib/phrasekit/scorer.rb,
lib/phrasekit/tagger.rb,
lib/phrasekit/version.rb

Defined Under Namespace

Classes: Error, Miner, Scorer, Tagger

Constant Summary collapse

VERSION =
"0.2.0"

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.vocabularyObject (readonly)

Returns the value of attribute vocabulary.



24
25
26
# File 'lib/phrasekit.rb', line 24

def vocabulary
  @vocabulary
end

Class Method Details

.encode_tokens(tokens) ⇒ Object

Raises:



56
57
58
59
60
61
62
63
64
# File 'lib/phrasekit.rb', line 56

def encode_tokens(tokens)
  raise Error, "Vocabulary not loaded. Call PhraseKit.load! with vocab_path" unless @vocabulary

  unk_id = @vocabulary[:special_tokens]["<UNK>"]
  tokens.map do |token|
    normalized = token.to_s.downcase
    @vocabulary[:tokens][normalized] || unk_id
  end
end

.healthcheckObject

Raises:



85
86
87
88
89
90
91
92
# File 'lib/phrasekit.rb', line 85

def healthcheck
  raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
  begin
    @matcher.healthcheck
  rescue RuntimeError => e
    raise Error, e.message
  end
end

.load!(automaton_path:, payloads_path:, manifest_path:, vocab_path: nil) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/phrasekit.rb', line 26

def load!(automaton_path:, payloads_path:, manifest_path:, vocab_path: nil)
  @matcher = NativeMatcher.new
  begin
    @matcher.load(automaton_path.to_s, payloads_path.to_s, manifest_path.to_s)
  rescue RuntimeError => e
    raise Error, e.message
  end

  if vocab_path
    begin
      require "json"
      vocab_data = JSON.parse(File.read(vocab_path))
      @vocabulary = {
        tokens: vocab_data["tokens"],
        special_tokens: vocab_data["special_tokens"],
        separator_id: vocab_data["separator_id"]
      }
    rescue => e
      raise Error, "Failed to load vocabulary: #{e.message}"
    end
  else
    @vocabulary = nil
  end
end

.match_text_tokens(tokens:, policy: :leftmost_longest, max: 32) ⇒ Object

Raises:



66
67
68
69
70
71
72
# File 'lib/phrasekit.rb', line 66

def match_text_tokens(tokens:, policy: :leftmost_longest, max: 32)
  raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
  raise Error, "Vocabulary not loaded. Call PhraseKit.load! with vocab_path" unless @vocabulary

  token_ids = encode_tokens(tokens)
  match_tokens(token_ids: token_ids, policy: policy, max: max)
end

.match_tokens(token_ids:, policy: :leftmost_longest, max: 32) ⇒ Object

Raises:



51
52
53
54
# File 'lib/phrasekit.rb', line 51

def match_tokens(token_ids:, policy: :leftmost_longest, max: 32)
  raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
  @matcher.match_tokens(token_ids, policy.to_s, max).map(&:symbolize_keys)
end

.statsObject

Raises:



74
75
76
77
78
79
80
81
82
83
# File 'lib/phrasekit.rb', line 74

def stats
  raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
  begin
    stats_hash = @matcher.stats.symbolize_keys
    stats_hash[:loaded_at] = Time.at(stats_hash[:loaded_at] / 1000.0)
    stats_hash
  rescue RuntimeError => e
    raise Error, e.message
  end
end