Class: SmartCsvImport::Matcher

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/smart_csv_import/matcher.rb

Constant Summary collapse

DATE_PATTERNS =
[
  /\A\d{4}-\d{2}-\d{2}\z/,
  /\A\d{1,2}\/\d{1,2}\/\d{4}\z/,
  /\A\d{2}-[A-Za-z]{3}-\d{4}\z/
].freeze
PHONE_PATTERN =
/\A[\d\s\-\(\)\+\.]{7,}\z/
EMAIL_PATTERN =
/\A[^@\s]+@[^@\s]+\.[^@\s]+\z/
VALUE_BOOST =
0.05
VALUE_PENALTY =
-0.10

Instance Method Summary collapse

Constructor Details

#initialize(file_path:, form_class:, confidence_threshold: SmartCsvImport.configuration.confidence_threshold) ⇒ Matcher

Returns a new instance of Matcher.



20
21
22
23
24
25
26
# File 'lib/smart_csv_import/matcher.rb', line 20

def initialize(file_path:, form_class:, confidence_threshold: SmartCsvImport.configuration.confidence_threshold)
  validate_form_class!(form_class)

  @file_path = file_path
  @form_class = form_class
  @confidence_threshold = confidence_threshold
end

Instance Method Details

#callObject



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/smart_csv_import/matcher.rb', line 28

def call
  validate_file!

  parsed_rows = parse_csv
  csv_headers = parsed_rows.first&.keys || []
  @sample_rows = parsed_rows.first(SmartCsvImport.configuration.value_hint_rows)

  log_info("Starting header matching for #{csv_headers.length} columns: #{csv_headers.join(", ")}")
  log_info("Target fields: #{@form_class.csv_fields.keys.join(", ")}")

  results = {}
  attempted_strategies = []
  remaining = csv_headers.dup

  # Tier 1: Custom strategy from form_class
  custom_strategy = @form_class.matching_strategy
  if custom_strategy
    tier_results = run_strategy(custom_strategy, remaining, attempted_strategies, "custom")
    tier_results = with_value_hints(tier_results, @sample_rows, @form_class)
    results, remaining = accept_matches(results, tier_results, remaining)
  end

  # Tier 2 and 3: Vector and LLM, ordered by SmartCsvImport.configuration.default_strategy
  default_tier_strategies.each do |name, strategy|
    tier_results = run_strategy(strategy, remaining, attempted_strategies, name)
    tier_results = with_value_hints(tier_results, @sample_rows, @form_class)
    results, remaining = accept_matches(results, tier_results, remaining)
  end

  # Remaining unresolved headers become UnmatchedResult
  remaining.each do |header|
    log_info("UNMATCHED: '#{header}' — tried: #{attempted_strategies.join(", ")}")
    results[header] = UnmatchedResult.new(
      csv_header: header,
      attempted_strategies: attempted_strategies.dup
    )
  end

  log_info("Matching complete: #{results.count { |_, r| r.matched? }} matched, #{results.count { |_, r| r.unmatched? }} unmatched")
  results
end