Module: PhraseKit
- Defined in:
- lib/phrasekit.rb,
lib/phrasekit/miner.rb,
lib/phrasekit/scorer.rb,
lib/phrasekit/tagger.rb,
lib/phrasekit/version.rb
Defined Under Namespace
Classes: Error, Miner, Scorer, Tagger
Constant Summary
collapse
- VERSION =
"0.2.0"
Class Attribute Summary collapse
Class Method Summary
collapse
Class Attribute Details
.vocabulary ⇒ Object
Returns the value of attribute vocabulary.
24
25
26
|
# File 'lib/phrasekit.rb', line 24
def vocabulary
@vocabulary
end
|
Class Method Details
.encode_tokens(tokens) ⇒ Object
56
57
58
59
60
61
62
63
64
|
# File 'lib/phrasekit.rb', line 56
def encode_tokens(tokens)
raise Error, "Vocabulary not loaded. Call PhraseKit.load! with vocab_path" unless @vocabulary
unk_id = @vocabulary[:special_tokens]["<UNK>"]
tokens.map do |token|
normalized = token.to_s.downcase
@vocabulary[:tokens][normalized] || unk_id
end
end
|
.healthcheck ⇒ Object
85
86
87
88
89
90
91
92
|
# File 'lib/phrasekit.rb', line 85
def healthcheck
raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
begin
@matcher.healthcheck
rescue RuntimeError => e
raise Error, e.message
end
end
|
.load!(automaton_path:, payloads_path:, manifest_path:, vocab_path: nil) ⇒ Object
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
# File 'lib/phrasekit.rb', line 26
def load!(automaton_path:, payloads_path:, manifest_path:, vocab_path: nil)
@matcher = NativeMatcher.new
begin
@matcher.load(automaton_path.to_s, payloads_path.to_s, manifest_path.to_s)
rescue RuntimeError => e
raise Error, e.message
end
if vocab_path
begin
require "json"
vocab_data = JSON.parse(File.read(vocab_path))
@vocabulary = {
tokens: vocab_data["tokens"],
special_tokens: vocab_data["special_tokens"],
separator_id: vocab_data["separator_id"]
}
rescue => e
raise Error, "Failed to load vocabulary: #{e.message}"
end
else
@vocabulary = nil
end
end
|
.match_text_tokens(tokens:, policy: :leftmost_longest, max: 32) ⇒ Object
66
67
68
69
70
71
72
|
# File 'lib/phrasekit.rb', line 66
def match_text_tokens(tokens:, policy: :leftmost_longest, max: 32)
raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
raise Error, "Vocabulary not loaded. Call PhraseKit.load! with vocab_path" unless @vocabulary
token_ids = encode_tokens(tokens)
match_tokens(token_ids: token_ids, policy: policy, max: max)
end
|
.match_tokens(token_ids:, policy: :leftmost_longest, max: 32) ⇒ Object
51
52
53
54
|
# File 'lib/phrasekit.rb', line 51
def match_tokens(token_ids:, policy: :leftmost_longest, max: 32)
raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
@matcher.match_tokens(token_ids, policy.to_s, max).map(&:symbolize_keys)
end
|
.stats ⇒ Object
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/phrasekit.rb', line 74
def stats
raise Error, "PhraseKit not loaded. Call PhraseKit.load! first" unless @matcher
begin
stats_hash = @matcher.stats.symbolize_keys
stats_hash[:loaded_at] = Time.at(stats_hash[:loaded_at] / 1000.0)
stats_hash
rescue RuntimeError => e
raise Error, e.message
end
end
|