Class: Glossarist::Sts::Importer

Inherits:
Object
  • Object
show all
Defined in:
lib/glossarist/sts/importer.rb

Defined Under Namespace

Classes: DedupState

Constant Summary collapse

STRATEGIES =
%i[skip replace merge].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(duplicate_strategy: :skip) ⇒ Importer

Returns a new instance of Importer.



13
14
15
16
17
18
19
20
21
# File 'lib/glossarist/sts/importer.rb', line 13

def initialize(duplicate_strategy: :skip)
  unless STRATEGIES.include?(duplicate_strategy)
    raise ArgumentError,
          "duplicate_strategy must be one of #{STRATEGIES.join(', ')}, got #{duplicate_strategy}"
  end

  @duplicate_strategy = duplicate_strategy
  @mapper = TermMapper.new
end

Instance Attribute Details

#duplicate_strategyObject (readonly)

Returns the value of attribute duplicate_strategy.



11
12
13
# File 'lib/glossarist/sts/importer.rb', line 11

def duplicate_strategy
  @duplicate_strategy
end

Instance Method Details

#import_into_existing(xml_files, dataset_path) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/glossarist/sts/importer.rb', line 51

def import_into_existing(xml_files, dataset_path)
  existing = load_existing(dataset_path)
  new_concepts = extract_all_concepts(xml_files)
  index = build_concept_index(existing)

  result_state = apply_with_dedup(new_concepts, existing, index)

  save_to_path(existing, dataset_path)

  ImportResult.new(
    concepts: existing.managed_concepts,
    conflicts: result_state.conflicts,
    source_files: xml_files,
    skipped_count: result_state.skipped,
  )
end

#import_new(xml_files, output:, shortname: nil, version: nil, **opts) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/glossarist/sts/importer.rb', line 23

def import_new(xml_files, output:, shortname: nil, version: nil, **opts)
  raw_concepts = extract_all_concepts(xml_files)
  concepts, conflicts, skipped = dedup_concepts(raw_concepts)

  if output.end_with?(".gcr")
    unless shortname
      raise ArgumentError,
            "--shortname is required for GCR output"
    end
    unless version
      raise ArgumentError,
            "--version is required for GCR output"
    end

    create_gcr(concepts, output, shortname: shortname, version: version,
                                 **opts)
  else
    save_dataset(concepts, output)
  end

  ImportResult.new(
    concepts: concepts,
    conflicts: conflicts,
    source_files: xml_files,
    skipped_count: skipped,
  )
end