Module: AcroForge::CLI

Defined in:
lib/acroforge/cli.rb

Constant Summary collapse

EXIT_OK =
0
EXIT_USER_ERROR =
1
EXIT_VALIDATION_ERROR =
2
EXIT_INTERNAL_ERROR =
3
SUBCOMMANDS =
%w[fields schema relabel compile bootstrap annotate prepare version help].freeze

Class Method Summary collapse

Class Method Details

.cmd_annotate(argv) ⇒ Object

Raises:

  • (ArgumentError)


329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# File 'lib/acroforge/cli.rb', line 329

def cmd_annotate(argv)
  out = nil
  mapping_path = nil
  OptionParser.new do |opts|
    opts.on("--out PATH") { |v| out = v }
    opts.on("--mapping PATH") { |v| mapping_path = v }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)
  raise Errno::ENOENT, mapping_path if mapping_path && !File.exist?(mapping_path)

  out ||= default_annotated_path(pdf)
  result = AcroForge::Annotator.annotate(pdf, out: out, mapping: mapping_path)
  summarize_annotate(result, mapping_path)
  EXIT_OK
end

.cmd_bootstrap(argv) ⇒ Object

Raises:

  • (ArgumentError)


363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# File 'lib/acroforge/cli.rb', line 363

def cmd_bootstrap(argv)
  schema_out = "schema.yml"
  mapping_out = "mapping.yml"
  verbose = false
  OptionParser.new do |opts|
    opts.on("--schema-out PATH") { |v| schema_out = v }
    opts.on("--mapping-out PATH") { |v| mapping_out = v }
    opts.on("-v", "--verbose") { verbose = true }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)

  # Run the engine ONCE. Schema.infer and Relabeler.propose both accept
  # an `engine:` kwarg so they reuse the same compile pass instead of
  # each running their own (which would print the verbose chatter twice).
  require "tmpdir"
  Dir.mktmpdir do |tmp|
    engine = AcroForge::Engine.new(pdf, normalized_dir: tmp)
    silenced(verbose: verbose) { engine.compile!(announce_output: false) }

    schema = AcroForge::Schema.infer(pdf, engine: engine)
    AcroForge::Schema.dump(schema, schema_out)
    count = schema.size
    puts "Wrote #{schema_out}: #{count} canonical key#{"s" unless count == 1} inferred."

    result = AcroForge::Relabeler.propose(pdf, out: mapping_out, schema: schema, mode: :overwrite, engine: engine)
    summarize_propose(result)
  end
  EXIT_OK
end

.cmd_compile(argv) ⇒ Object

Raises:

  • (ArgumentError)


272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/acroforge/cli.rb', line 272

def cmd_compile(argv)
  schema_path = nil
  out = nil
  overwrite = false
  OptionParser.new do |opts|
    opts.on("--schema PATH") { |v| schema_path = v }
    opts.on("--out PATH", "Write the normalized PDF to PATH") { |v| out = v }
    opts.on("--overwrite", "Write the normalized PDF back over the input PDF in place") { overwrite = true }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise ArgumentError, "--out and --overwrite are mutually exclusive" if out && overwrite
  raise Errno::ENOENT, pdf unless File.exist?(pdf)

  schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
  out ||= pdf if overwrite

  engine = AcroForge::Engine.new(pdf, schema: schema, normalized_dir: out ? File.dirname(out) : nil)
  result = engine.compile!(normalized_out: out)
  puts "Mapped: #{result[:mapped].size}, Unmapped: #{result[:unmapped].size}"
  where = overwrite ? "#{engine.normalized_path} (in place)" : engine.normalized_path
  puts "Wrote #{where}: normalized template."
  EXIT_OK
end

.cmd_fields(argv) ⇒ Object

Raises:

  • (ArgumentError)


106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/acroforge/cli.rb', line 106

def cmd_fields(argv)
  json = false
  OptionParser.new { |o| o.on("--json") { json = true } }.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "usage: acroforge fields <pdf> [--json]" unless pdf
  raise Errno::ENOENT, pdf unless File.exist?(pdf)

  fields = AcroForge::Engine.new(pdf).fields

  if json
    puts JSON.pretty_generate(fields)
  elsif fields.empty?
    puts "No AcroForm fields found in #{pdf}."
  else
    print_fields_table(fields)
  end
  EXIT_OK
end

.cmd_prepare(argv) ⇒ Object

Raises:

  • (ArgumentError)


297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/acroforge/cli.rb', line 297

def cmd_prepare(argv)
  out = nil
  schema_path = nil
  OptionParser.new do |opts|
    opts.on("--out PATH") { |v| out = v }
    opts.on("--schema PATH") { |v| schema_path = v }
  end.parse!(argv)
  pdf = argv.shift
  raise ArgumentError, "missing <pdf> argument" if pdf.nil?
  raise Errno::ENOENT, pdf unless File.exist?(pdf)
  raise Errno::ENOENT, schema_path if schema_path && !File.exist?(schema_path)

  schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
  result = silenced(verbose: false) do
    AcroForge::Preparer.prepare!(pdf, out: out, schema: schema)
  end
  summarize_prepare(result, pdf, out)
  EXIT_OK
end

.cmd_relabel(argv) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/acroforge/cli.rb', line 206

def cmd_relabel(argv)
  action = argv.shift
  case action
  when "propose"
    out = "mapping.yml"
    schema_path = nil
    mode = :merge
    verbose = false
    OptionParser.new do |opts|
      opts.on("--out PATH") { |v| out = v }
      opts.on("--schema PATH") { |v| schema_path = v }
      opts.on("--merge") { mode = :merge }
      opts.on("--overwrite") { mode = :overwrite }
      opts.on("-v", "--verbose") { verbose = true }
    end.parse!(argv)
    pdf = argv.shift
    raise ArgumentError, "missing <pdf> argument" if pdf.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)

    schema = schema_path ? AcroForge::Schema.load(schema_path) : {}
    result = silenced(verbose: verbose) do
      AcroForge::Relabeler.propose(pdf, out: out, schema: schema, mode: mode)
    end
    summarize_propose(result)
    EXIT_OK
  when "apply"
    verbose = false
    # `annotate_out` tracks three states:
    #   false       -> --annotate not passed; no annotation
    #   true        -> --annotate passed without value; use default path
    #   "some/path" -> --annotate=path passed explicitly
    annotate_out = false
    OptionParser.new do |opts|
      opts.on("-v", "--verbose") { verbose = true }
      opts.on("--annotate [PATH]", "Also write an annotated review PDF (default: <source>_annotated.pdf)") do |v|
        annotate_out = v || true
      end
    end.parse!(argv)
    pdf = argv.shift
    mapping = argv.shift
    raise ArgumentError, "missing arguments: expected <pdf> <mapping.yml>" if pdf.nil? || mapping.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)
    raise Errno::ENOENT, mapping unless File.exist?(mapping)

    # Annotation runs BEFORE the rename so the badges show
    # original_field_name -> proposed_key. After the rename, the mapping's
    # PDF field names no longer match the file, so post-rename annotation
    # would render every entry as "missing in mapping" -- useless.
    annotate_path = nil
    if annotate_out
      annotate_path = (annotate_out == true) ? default_annotated_path(pdf) : annotate_out
      silenced(verbose: verbose) do
        AcroForge::Annotator.annotate(pdf, out: annotate_path, mapping: mapping)
      end
    end

    result = silenced(verbose: verbose) { AcroForge::Relabeler.apply!(pdf, mapping) }
    summarize_apply(result, pdf)
    puts "Wrote #{annotate_path}: review snapshot of the mapping plan." if annotate_path
    EXIT_OK
  else
    warn "acroforge: unknown relabel action #{action.inspect}"
    EXIT_USER_ERROR
  end
end

.cmd_schema(argv) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/acroforge/cli.rb', line 142

def cmd_schema(argv)
  action = argv.shift
  case action
  when "infer"
    out = "schema.yml"
    sections = []
    verbose = false
    OptionParser.new do |opts|
      opts.on("--out PATH") { |v| out = v }
      opts.on("--sections LIST") { |v| sections = v.split(",").map(&:strip) }
      opts.on("-v", "--verbose") { verbose = true }
    end.parse!(argv)
    pdf = argv.shift
    raise ArgumentError, "missing <pdf> argument" if pdf.nil?
    raise Errno::ENOENT, pdf unless File.exist?(pdf)

    schema = silenced(verbose: verbose) { AcroForge::Schema.infer(pdf, sections: sections) }
    AcroForge::Schema.dump(schema, out)
    count = schema.size
    puts "Wrote #{out}: #{count} canonical key#{"s" unless count == 1} inferred."
    EXIT_OK
  when "merge"
    schema_path = "schema.yml"
    out = nil
    OptionParser.new do |opts|
      opts.on("--schema PATH") { |v| schema_path = v }
      opts.on("--out PATH") { |v| out = v }
    end.parse!(argv)
    mapping_path = argv.shift
    raise ArgumentError, "missing <mapping.yml> argument" if mapping_path.nil?
    raise Errno::ENOENT, mapping_path unless File.exist?(mapping_path)
    out ||= schema_path

    existing = File.exist?(schema_path) ? AcroForge::Schema.load(schema_path) : {}
    keys_before = existing.keys.to_set
    variations_before = existing.each_with_object({}) do |(k, v), acc|
      acc[k] = (v.is_a?(Hash) ? (v[:variations] || []) : []).to_set
    end

    mapping = YAML.load_file(mapping_path) || {}
    merged = AcroForge::Schema.merge(existing, mapping.reject { |k, _| k.to_s.start_with?("_") })
    AcroForge::Schema.dump(merged, out)

    added = (merged.keys.to_set - keys_before).size
    updated = merged.keys.count do |k|
      next false unless keys_before.include?(k)
      (merged[k][:variations] || []).to_set != (variations_before[k] || Set.new)
    end
    summarize_schema_merge(out, added, updated)
    EXIT_OK
  else
    warn "acroforge: unknown schema action #{action.inspect}"
    EXIT_USER_ERROR
  end
end

.default_annotated_path(pdf) ⇒ Object



347
348
349
350
# File 'lib/acroforge/cli.rb', line 347

def default_annotated_path(pdf)
  base = File.basename(pdf, ".*")
  File.join(File.dirname(pdf), "#{base}_annotated.pdf")
end

.format_alternate_name(alt) ⇒ Object



134
135
136
137
138
139
140
# File 'lib/acroforge/cli.rb', line 134

def format_alternate_name(alt)
  case alt
  when nil then ""
  when Hash then "{#{alt.map { |k, v| "#{k}: #{v.inspect}" }.join(", ")}}"
  else alt.to_s
  end
end


125
126
127
128
129
130
131
132
# File 'lib/acroforge/cli.rb', line 125

def print_fields_table(fields)
  headers = ["NAME", "TYPE", "ALTERNATE NAME"]
  rows = fields.map { |f| [f[:name].to_s, f[:type].to_s, format_alternate_name(f[:alternate_name])] }
  widths = headers.each_with_index.map { |h, i| ([h] + rows.map { |r| r[i] }).map(&:length).max }
  ([headers] + rows).each do |row|
    puts row.each_with_index.map { |cell, i| cell.ljust(widths[i]) }.join("  ").rstrip
  end
end


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/acroforge/cli.rb', line 47

def print_help(_)
  puts <<~HELP
    acroforge: PDF AcroForm engine + relabeler

    Usage:
      acroforge fields <pdf>           [--json]
      acroforge schema infer <pdf>     [--out schema.yml] [--sections a,b,c] [-v]
      acroforge schema merge <mapping.yml> [--schema schema.yml] [--out schema.yml]
      acroforge relabel propose <pdf>  [--out mapping.yml] [--schema schema.yml] [--merge|--overwrite] [-v]
      acroforge relabel apply <pdf> <mapping.yml> [--annotate[=PATH]] [-v]
      acroforge compile <pdf>          [--schema schema.yml] [--out normalized.pdf | --overwrite]
      acroforge bootstrap <pdf>        [--schema-out s.yml] [--mapping-out m.yml] [-v]
      acroforge annotate <pdf>         [--mapping mapping.yml] [--out annotated.pdf]
      acroforge prepare <pdf>          [--out prepared.pdf] [--schema schema.yml]
      acroforge version
      acroforge help

    Pass -v or --verbose to bootstrap, schema infer, relabel propose, and
    relabel apply to see the engine's per-field reasoning on stdout.
  HELP
  EXIT_OK
end


42
43
44
45
# File 'lib/acroforge/cli.rb', line 42

def print_version
  puts AcroForge::VERSION
  EXIT_OK
end

.run(argv) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/acroforge/cli.rb', line 19

def run(argv)
  argv = argv.dup
  sub = argv.shift
  return print_help(argv) if sub.nil? || sub == "help"
  return print_version if sub == "version"

  unless SUBCOMMANDS.include?(sub)
    warn "acroforge: unknown subcommand #{sub.inspect}. Try `acroforge help`."
    return EXIT_USER_ERROR
  end

  send("cmd_#{sub}", argv)
rescue AcroForge::ValidationError, AcroForge::RelabelError => e
  warn "acroforge: #{e.message}"
  EXIT_VALIDATION_ERROR
rescue Errno::ENOENT, ArgumentError => e
  warn "acroforge: #{e.message}"
  EXIT_USER_ERROR
rescue => e
  warn "acroforge: internal error (#{e.class}): #{e.message}"
  EXIT_INTERNAL_ERROR
end

.silenced(verbose: false) ⇒ Object

Run ‘block` with $stdout redirected to /dev/null, unless `verbose:` is true. Used to suppress the engine’s per-field chatter during normal CLI runs.



72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/acroforge/cli.rb', line 72

def silenced(verbose: false)
  return yield if verbose
  orig = $stdout
  null = File.open(File::NULL, "w")
  $stdout = null
  begin
    yield
  ensure
    $stdout = orig
    null.close
  end
end

.summarize_annotate(result, mapping_path) ⇒ Object



352
353
354
355
356
357
358
359
360
361
# File 'lib/acroforge/cli.rb', line 352

def summarize_annotate(result, mapping_path)
  if mapping_path
    parts = ["#{result[:mapped]} mapped"]
    parts << "#{result[:unmapped]} no key" if result[:unmapped] > 0
    parts << "#{result[:missing]} not in mapping" if result[:missing] > 0
    puts "Wrote #{result[:out_path]}: #{result[:annotated]} fields annotated (#{parts.join(", ")})."
  else
    puts "Wrote #{result[:out_path]}: #{result[:annotated]} fields annotated."
  end
end

.summarize_apply(result, pdf) ⇒ Object



98
99
100
101
102
103
104
# File 'lib/acroforge/cli.rb', line 98

def summarize_apply(result, pdf)
  parts = ["#{result[:renamed]} renamed"]
  parts << "#{result[:disambiguated]} disambiguated" if result[:disambiguated] > 0
  parts << "#{result[:skipped_null]} skipped (no key)" if result[:skipped_null] > 0
  parts << "#{result[:stale]} stale" if result[:stale] > 0
  puts "Applied to #{pdf}: #{parts.join(", ")}."
end

.summarize_prepare(result, in_path, explicit_out) ⇒ Object



317
318
319
320
321
322
323
324
325
326
327
# File 'lib/acroforge/cli.rb', line 317

def summarize_prepare(result, in_path, explicit_out)
  target = result[:out_path]
  where = (target == in_path) ? "in place" : "to #{target}"
  if result[:duplicate_groups].zero?
    puts "Nothing to do: #{in_path} has no duplicate field names."
  else
    parts = ["#{result[:renamed]} duplicates renamed"]
    parts << "#{result[:skipped]} skipped (no heuristic proposal)" if result[:skipped] > 0
    puts "Prepared #{where}: #{result[:duplicate_groups]} duplicate groups, #{parts.join(", ")}."
  end
end

.summarize_propose(result) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/acroforge/cli.rb', line 85

def summarize_propose(result)
  total = result[:total]
  mapped = result[:mapped]
  out = result[:out_path]
  if total.zero?
    puts "Wrote #{out}: no AcroForm fields found in the PDF."
  elsif mapped == total
    puts "Wrote #{out}: #{mapped} of #{total} fields proposed."
  else
    puts "Wrote #{out}: #{mapped} of #{total} fields proposed; #{total - mapped} need manual review."
  end
end

.summarize_schema_merge(out, added, updated) ⇒ Object



198
199
200
201
202
203
204
# File 'lib/acroforge/cli.rb', line 198

def summarize_schema_merge(out, added, updated)
  parts = []
  parts << "#{added} new key#{"s" unless added == 1} added" if added > 0
  parts << "#{updated} existing key#{"s" unless updated == 1} updated" if updated > 0
  detail = parts.empty? ? "no changes" : parts.join(", ")
  puts "Merged into #{out}: #{detail}."
end