Module: AcroForge::CLI

Defined in:
lib/acroforge/cli.rb

Constant Summary collapse

EXIT_OK =
0
EXIT_USER_ERROR =
1
EXIT_VALIDATION_ERROR =
2
EXIT_INTERNAL_ERROR =
3
SUBCOMMANDS =
%w[schema relabel compile bootstrap annotate prepare version help].freeze

Class Method Summary collapse

Class Method Details

.cmd_annotate(argv) ⇒ Object

Raises:

  • (ArgumentError)


285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/acroforge/cli.rb', line 285

def cmd_annotate(argv)
  out = nil
  mapping_path = nil
  OptionParser.new do |opts|
    opts.on("--out PATH") { |v| out = v }
    opts.on("--mapping PATH") { |v| mapping_path = v }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)
  raise Errno::ENOENT, mapping_path if mapping_path && !File.exist?(mapping_path)

  out ||= default_annotated_path(pdf)
  result = AcroForge::Annotator.annotate(pdf, out: out, mapping: mapping_path)
  summarize_annotate(result, mapping_path)
  EXIT_OK
end

.cmd_bootstrap(argv) ⇒ Object

Raises:

  • (ArgumentError)


319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/acroforge/cli.rb', line 319

def cmd_bootstrap(argv)
  schema_out = "schema.yml"
  mapping_out = "mapping.yml"
  verbose = false
  OptionParser.new do |opts|
    opts.on("--schema-out PATH") { |v| schema_out = v }
    opts.on("--mapping-out PATH") { |v| mapping_out = v }
    opts.on("-v", "--verbose") { verbose = true }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)

  # Run the engine ONCE. Schema.infer and Relabeler.propose both accept
  # an `engine:` kwarg so they reuse the same compile pass instead of
  # each running their own (which would print the verbose chatter twice).
  require "tmpdir"
  Dir.mktmpdir do |tmp|
    engine = AcroForge::Engine.new(pdf, normalized_dir: tmp)
    silenced(verbose: verbose) { engine.compile! }

    schema = AcroForge::Schema.infer(pdf, engine: engine)
    AcroForge::Schema.dump(schema, schema_out)
    count = schema.size
    puts "Wrote #{schema_out}: #{count} canonical key#{"s" unless count == 1} inferred."

    result = AcroForge::Relabeler.propose(pdf, out: mapping_out, schema: schema, mode: :overwrite, engine: engine)
    summarize_propose(result)
  end
  EXIT_OK
end

.cmd_compile(argv) ⇒ Object

Raises:

  • (ArgumentError)


234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/acroforge/cli.rb', line 234

def cmd_compile(argv)
  schema_path = nil
  OptionParser.new do |opts|
    opts.on("--schema PATH") { |v| schema_path = v }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)

  schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
  require "tmpdir"
  Dir.mktmpdir do |tmp|
    engine = AcroForge::Engine.new(pdf, schema: schema, normalized_dir: tmp)
    result = engine.compile!
    puts "Mapped: #{result[:mapped].size}, Unmapped: #{result[:unmapped].size}"
  end
  EXIT_OK
end

.cmd_prepare(argv) ⇒ Object

Raises:

  • (ArgumentError)


253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/acroforge/cli.rb', line 253

def cmd_prepare(argv)
  out = nil
  schema_path = nil
  OptionParser.new do |opts|
    opts.on("--out PATH") { |v| out = v }
    opts.on("--schema PATH") { |v| schema_path = v }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)
  raise Errno::ENOENT, schema_path if schema_path && !File.exist?(schema_path)

  schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
  result = silenced(verbose: false) do
    AcroForge::Preparer.prepare!(pdf, out: out, schema: schema)
  end
  summarize_prepare(result, pdf, out)
  EXIT_OK
end

.cmd_relabel(argv) ⇒ Object



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/acroforge/cli.rb', line 168

def cmd_relabel(argv)
  action = argv.shift
  case action
  when "propose"
    out = "mapping.yml"
    schema_path = nil
    mode = :merge
    verbose = false
    OptionParser.new do |opts|
      opts.on("--out PATH") { |v| out = v }
      opts.on("--schema PATH") { |v| schema_path = v }
      opts.on("--merge") { mode = :merge }
      opts.on("--overwrite") { mode = :overwrite }
      opts.on("-v", "--verbose") { verbose = true }
    end.parse!(argv)
    pdf = argv.shift
    raise ArgumentError, "missing <pdf> argument" if pdf.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)

    schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
    result = silenced(verbose: verbose) do
      AcroForge::Relabeler.propose(pdf, out: out, schema: schema, mode: mode)
    end
    summarize_propose(result)
    EXIT_OK
  when "apply"
    verbose = false
    # `annotate_out` tracks three states:
    #   false       -> --annotate not passed; no annotation
    #   true        -> --annotate passed without value; use default path
    #   "some/path" -> --annotate=path passed explicitly
    annotate_out = false
    OptionParser.new do |opts|
      opts.on("-v", "--verbose") { verbose = true }
      opts.on("--annotate [PATH]", "Also write an annotated review PDF (default: <source>_annotated.pdf)") do |v|
        annotate_out = v || true
      end
    end.parse!(argv)
    pdf = argv.shift
    mapping = argv.shift
    raise ArgumentError, "missing arguments: expected <pdf> <mapping.yml>" if pdf.nil? || mapping.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)
    raise Errno::ENOENT, mapping unless File.exist?(mapping)

    # Annotation runs BEFORE the rename so the badges show
    # original_field_name -> proposed_key. After the rename, the mapping's
    # PDF field names no longer match the file, so post-rename annotation
    # would render every entry as "missing in mapping" -- useless.
    annotate_path = nil
    if annotate_out
      annotate_path = (annotate_out == true) ? default_annotated_path(pdf) : annotate_out
      silenced(verbose: verbose) do
        AcroForge::Annotator.annotate(pdf, out: annotate_path, mapping: mapping)
      end
    end

    result = silenced(verbose: verbose) { AcroForge::Relabeler.apply!(pdf, mapping) }
    summarize_apply(result, pdf)
    puts "Wrote #{annotate_path}: review snapshot of the mapping plan." if annotate_path
    EXIT_OK
  else
    warn "acroforge: unknown relabel action #{action.inspect}"
    EXIT_USER_ERROR
  end
end

.cmd_schema(argv) ⇒ Object



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/acroforge/cli.rb', line 104

def cmd_schema(argv)
  action = argv.shift
  case action
  when "infer"
    out = "schema.yml"
    sections = []
    verbose = false
    OptionParser.new do |opts|
      opts.on("--out PATH") { |v| out = v }
      opts.on("--sections LIST") { |v| sections = v.split(",").map(&:strip) }
      opts.on("-v", "--verbose") { verbose = true }
    end.parse!(argv)
    pdf = argv.shift
    raise ArgumentError, "missing <pdf> argument" if pdf.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)

    schema = silenced(verbose: verbose) { AcroForge::Schema.infer(pdf, sections: sections) }
    AcroForge::Schema.dump(schema, out)
    count = schema.size
    puts "Wrote #{out}: #{count} canonical key#{"s" unless count == 1} inferred."
    EXIT_OK
  when "merge"
    schema_path = "schema.yml"
    out = nil
    OptionParser.new do |opts|
      opts.on("--schema PATH") { |v| schema_path = v }
      opts.on("--out PATH") { |v| out = v }
    end.parse!(argv)
    mapping_path = argv.shift
    raise ArgumentError, "missing <mapping.yml> argument" if mapping_path.nil?
    raise Errno::ENOENT, mapping_path unless File.exist?(mapping_path)
    out ||= schema_path

    existing = File.exist?(schema_path) ? AcroForge::Schema.load(schema_path) : {}
    keys_before = existing.keys.to_set
    variations_before = existing.each_with_object({}) do |(k, v), acc|
      acc[k] = (v.is_a?(Hash) ? (v[:variations] || []) : []).to_set
    end

    mapping = YAML.load_file(mapping_path) || {}
    merged = AcroForge::Schema.merge(existing, mapping.reject { |k, _| k.to_s.start_with?("_") })
    AcroForge::Schema.dump(merged, out)

    added = (merged.keys.to_set - keys_before).size
    updated = merged.keys.count do |k|
      next false unless keys_before.include?(k)
      (merged[k][:variations] || []).to_set != (variations_before[k] || Set.new)
    end
    summarize_schema_merge(out, added, updated)
    EXIT_OK
  else
    warn "acroforge: unknown schema action #{action.inspect}"
    EXIT_USER_ERROR
  end
end

.default_annotated_path(pdf) ⇒ Object



303
304
305
306
# File 'lib/acroforge/cli.rb', line 303

def default_annotated_path(pdf)
  base = File.basename(pdf, ".*")
  File.join(File.dirname(pdf), "#{base}_annotated.pdf")
end


46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/acroforge/cli.rb', line 46

def print_help(_)
  puts <<~HELP
    acroforge: PDF AcroForm engine + relabeler

    Usage:
      acroforge schema infer <pdf>     [--out schema.yml] [--sections a,b,c] [-v]
      acroforge schema merge <mapping.yml> [--schema schema.yml] [--out schema.yml]
      acroforge relabel propose <pdf>  [--out mapping.yml] [--schema schema.yml] [--merge|--overwrite] [-v]
      acroforge relabel apply <pdf> <mapping.yml> [--annotate[=PATH]] [-v]
      acroforge compile <pdf>          [--schema schema.yml]
      acroforge bootstrap <pdf>        [--schema-out s.yml] [--mapping-out m.yml] [-v]
      acroforge annotate <pdf>         [--mapping mapping.yml] [--out annotated.pdf]
      acroforge prepare <pdf>          [--out prepared.pdf] [--schema schema.yml]
      acroforge version
      acroforge help

    Pass -v or --verbose to bootstrap, schema infer, relabel propose, and
    relabel apply to see the engine's per-field reasoning on stdout.
  HELP
  EXIT_OK
end


41
42
43
44
# File 'lib/acroforge/cli.rb', line 41

def print_version
  puts AcroForge::VERSION
  EXIT_OK
end

.run(argv) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/acroforge/cli.rb', line 18

def run(argv)
  argv = argv.dup
  sub = argv.shift
  return print_help(argv) if sub.nil? || sub == "help"
  return print_version if sub == "version"

  unless SUBCOMMANDS.include?(sub)
    warn "acroforge: unknown subcommand #{sub.inspect}. Try `acroforge help`."
    return EXIT_USER_ERROR
  end

  send("cmd_#{sub}", argv)
rescue AcroForge::ValidationError, AcroForge::RelabelError => e
  warn "acroforge: #{e.message}"
  EXIT_VALIDATION_ERROR
rescue Errno::ENOENT, ArgumentError => e
  warn "acroforge: #{e.message}"
  EXIT_USER_ERROR
rescue => e
  warn "acroforge: internal error (#{e.class}): #{e.message}"
  EXIT_INTERNAL_ERROR
end

.silenced(verbose: false) ⇒ Object

Run ‘block` with $stdout redirected to /dev/null, unless `verbose:` is true. Used to suppress the engine’s per-field chatter during normal CLI runs.



70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/acroforge/cli.rb', line 70

def silenced(verbose: false)
  return yield if verbose
  orig = $stdout
  null = File.open(File::NULL, "w")
  $stdout = null
  begin
    yield
  ensure
    $stdout = orig
    null.close
  end
end

.summarize_annotate(result, mapping_path) ⇒ Object



308
309
310
311
312
313
314
315
316
317
# File 'lib/acroforge/cli.rb', line 308

def summarize_annotate(result, mapping_path)
  if mapping_path
    parts = ["#{result[:mapped]} mapped"]
    parts << "#{result[:unmapped]} no key" if result[:unmapped] > 0
    parts << "#{result[:missing]} not in mapping" if result[:missing] > 0
    puts "Wrote #{result[:out_path]}: #{result[:annotated]} fields annotated (#{parts.join(", ")})."
  else
    puts "Wrote #{result[:out_path]}: #{result[:annotated]} fields annotated."
  end
end

.summarize_apply(result, pdf) ⇒ Object



96
97
98
99
100
101
102
# File 'lib/acroforge/cli.rb', line 96

def summarize_apply(result, pdf)
  parts = ["#{result[:renamed]} renamed"]
  parts << "#{result[:disambiguated]} disambiguated" if result[:disambiguated] > 0
  parts << "#{result[:skipped_null]} skipped (no key)" if result[:skipped_null] > 0
  parts << "#{result[:stale]} stale" if result[:stale] > 0
  puts "Applied to #{pdf}: #{parts.join(", ")}."
end

.summarize_prepare(result, in_path, explicit_out) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
# File 'lib/acroforge/cli.rb', line 273

def summarize_prepare(result, in_path, explicit_out)
  target = result[:out_path]
  where = (target == in_path) ? "in place" : "to #{target}"
  if result[:duplicate_groups].zero?
    puts "Nothing to do: #{in_path} has no duplicate field names."
  else
    parts = ["#{result[:renamed]} duplicates renamed"]
    parts << "#{result[:skipped]} skipped (no heuristic proposal)" if result[:skipped] > 0
    puts "Prepared #{where}: #{result[:duplicate_groups]} duplicate groups, #{parts.join(", ")}."
  end
end

.summarize_propose(result) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/acroforge/cli.rb', line 83

def summarize_propose(result)
  total = result[:total]
  mapped = result[:mapped]
  out = result[:out_path]
  if total.zero?
    puts "Wrote #{out}: no AcroForm fields found in the PDF."
  elsif mapped == total
    puts "Wrote #{out}: #{mapped} of #{total} fields proposed."
  else
    puts "Wrote #{out}: #{mapped} of #{total} fields proposed; #{total - mapped} need manual review."
  end
end

.summarize_schema_merge(out, added, updated) ⇒ Object



160
161
162
163
164
165
166
# File 'lib/acroforge/cli.rb', line 160

def summarize_schema_merge(out, added, updated)
  parts = []
  parts << "#{added} new key#{"s" unless added == 1} added" if added > 0
  parts << "#{updated} existing key#{"s" unless updated == 1} updated" if updated > 0
  detail = parts.empty? ? "no changes" : parts.join(", ")
  puts "Merged into #{out}: #{detail}."
end