Class: PhraseKit::Tagger
- Inherits:
-
Object
- Object
- PhraseKit::Tagger
- Defined in:
- lib/phrasekit/tagger.rb
Defined Under Namespace
Classes: Error
Class Method Summary collapse
Class Method Details
.tag(input_path:, output_path:, artifacts_dir: nil, automaton_path: nil, payloads_path: nil, manifest_path: nil, vocab_path: nil, policy: :leftmost_longest, max_spans: 100, label: "PHRASE", config_path: nil) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/phrasekit/tagger.rb', line 8 def tag( input_path:, output_path:, artifacts_dir: nil, automaton_path: nil, payloads_path: nil, manifest_path: nil, vocab_path: nil, policy: :leftmost_longest, max_spans: 100, label: "PHRASE", config_path: nil ) binary_path = find_binary if config_path.nil? require "tempfile" require "json" if artifacts_dir automaton_path ||= File.join(artifacts_dir, "phrases.daac") payloads_path ||= File.join(artifacts_dir, "payloads.bin") manifest_path ||= File.join(artifacts_dir, "manifest.json") vocab_path ||= File.join(artifacts_dir, "vocab.json") end unless automaton_path && payloads_path && manifest_path && vocab_path raise Error, "Must provide either artifacts_dir or all artifact paths" end config_file = Tempfile.new(["tag_config", ".json"]) config_file.write(JSON.generate({ automaton_path: automaton_path.to_s, payloads_path: payloads_path.to_s, manifest_path: manifest_path.to_s, vocab_path: vocab_path.to_s, policy: policy.to_s, max_spans: max_spans, label: label.to_s })) config_file.flush config_path = config_file.path end cmd = [ binary_path, input_path.to_s, config_path.to_s, output_path.to_s ] output = `#{cmd.shelljoin} 2>&1` unless $?.success? config_file.close! if config_file raise Error, "Tagging failed: #{output}" end config_file.close! if config_file parse_stats(output) end |