Class: Ucode::Commands::ParseCommand

Inherits:
Object
  • Object
show all
Defined in:
lib/ucode/commands/parse.rb

Overview

‘ucode parse` — streams the Coordinator output into the on-disk JSON tree at `output/`. Single pass: enrich + write per-cp JSON + accumulate aggregates + final flush.

Instance Method Summary collapse

Instance Method Details

#call(version_intent, output_root:) ⇒ Hash

Returns { version:, codepoint_count: }.

Parameters:

  • version_intent (nil, :default, :latest, String)
  • output_root (String, Pathname)

Returns:

  • (Hash)

    { version:, codepoint_count: }



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/ucode/commands/parse.rb', line 20

def call(version_intent, output_root:)
  version = VersionResolver.resolve(version_intent)
  root = Pathname.new(output_root)
  ucd_dir = Cache.ucd_dir(version)
  unihan_dir = Cache.unihan_dir(version)

  coordinator = Coordinator.new
  codepoint_writer = Repo::CodepointWriter.new(root, parallel_workers: workers)
  aggregate = Repo::AggregateWriter.new(root)
  indices_holder = nil

  coordinator.each_codepoint_with_indices(ucd_dir: ucd_dir, unihan_dir: unihan_dir) do |indices, cp|
    indices_holder ||= indices
    codepoint_writer.write(cp)
    aggregate.add(cp)
  end

  aggregate.flush(
    ucd_version: version,
    indices: indices_holder || coordinator.indices_for(ucd_dir: ucd_dir, unihan_dir: unihan_dir),
    property_aliases: load_records(ucd_dir, "PropertyAliases.txt", Parsers::PropertyAliases),
    property_value_aliases: load_records(ucd_dir, "PropertyValueAliases.txt", Parsers::PropertyValueAliases),
    named_sequences: load_records(ucd_dir, "NamedSequences.txt", Parsers::NamedSequences),
  )

  { version: version, codepoint_count: aggregate.codepoint_count }
end