Class: PhraseKit::Tagger

Inherits:
Object
  • Object
show all
Defined in:
lib/phrasekit/tagger.rb

Defined Under Namespace

Classes: Error

Class Method Summary collapse

Class Method Details

.tag(input_path:, output_path:, artifacts_dir: nil, automaton_path: nil, payloads_path: nil, manifest_path: nil, vocab_path: nil, policy: :leftmost_longest, max_spans: 100, label: "PHRASE", config_path: nil) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/phrasekit/tagger.rb', line 8

def tag(
  input_path:,
  output_path:,
  artifacts_dir: nil,
  automaton_path: nil,
  payloads_path: nil,
  manifest_path: nil,
  vocab_path: nil,
  policy: :leftmost_longest,
  max_spans: 100,
  label: "PHRASE",
  config_path: nil
)
  binary_path = find_binary

  if config_path.nil?
    require "tempfile"
    require "json"

    if artifacts_dir
      automaton_path ||= File.join(artifacts_dir, "phrases.daac")
      payloads_path ||= File.join(artifacts_dir, "payloads.bin")
      manifest_path ||= File.join(artifacts_dir, "manifest.json")
      vocab_path ||= File.join(artifacts_dir, "vocab.json")
    end

    unless automaton_path && payloads_path && manifest_path && vocab_path
      raise Error, "Must provide either artifacts_dir or all artifact paths"
    end

    config_file = Tempfile.new(["tag_config", ".json"])
    config_file.write(JSON.generate({
      automaton_path: automaton_path.to_s,
      payloads_path: payloads_path.to_s,
      manifest_path: manifest_path.to_s,
      vocab_path: vocab_path.to_s,
      policy: policy.to_s,
      max_spans: max_spans,
      label: label.to_s
    }))
    config_file.flush
    config_path = config_file.path
  end

  cmd = [
    binary_path,
    input_path.to_s,
    config_path.to_s,
    output_path.to_s
  ]
  output = `#{cmd.shelljoin} 2>&1`

  unless $?.success?
    config_file.close! if config_file
    raise Error, "Tagging failed: #{output}"
  end

  config_file.close! if config_file

  parse_stats(output)
end