Class: SmartCsvImport::Matcher
- Inherits:
-
Object
- Object
- SmartCsvImport::Matcher
- Includes:
- Logging
- Defined in:
- lib/smart_csv_import/matcher.rb
Constant Summary collapse
- DATE_PATTERNS =
[ /\A\d{4}-\d{2}-\d{2}\z/, /\A\d{1,2}\/\d{1,2}\/\d{4}\z/, /\A\d{2}-[A-Za-z]{3}-\d{4}\z/ ].freeze
- PHONE_PATTERN =
/\A[\d\s\-\(\)\+\.]{7,}\z/- EMAIL_PATTERN =
/\A[^@\s]+@[^@\s]+\.[^@\s]+\z/- VALUE_BOOST =
0.05- VALUE_PENALTY =
-0.10
Instance Method Summary collapse
- #call ⇒ Object
-
#initialize(file_path:, form_class:, confidence_threshold: SmartCsvImport.configuration.confidence_threshold) ⇒ Matcher
constructor
A new instance of Matcher.
Constructor Details
#initialize(file_path:, form_class:, confidence_threshold: SmartCsvImport.configuration.confidence_threshold) ⇒ Matcher
Returns a new instance of Matcher.
20 21 22 23 24 25 26 |
# File 'lib/smart_csv_import/matcher.rb', line 20 def initialize(file_path:, form_class:, confidence_threshold: SmartCsvImport.configuration.confidence_threshold) validate_form_class!(form_class) @file_path = file_path @form_class = form_class @confidence_threshold = confidence_threshold end |
Instance Method Details
#call ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/smart_csv_import/matcher.rb', line 28 def call validate_file! parsed_rows = parse_csv csv_headers = parsed_rows.first&.keys || [] @sample_rows = parsed_rows.first(SmartCsvImport.configuration.value_hint_rows) log_info("Starting header matching for #{csv_headers.length} columns: #{csv_headers.join(", ")}") log_info("Target fields: #{@form_class.csv_fields.keys.join(", ")}") results = {} attempted_strategies = [] remaining = csv_headers.dup # Tier 1: Custom strategy from form_class custom_strategy = @form_class.matching_strategy if custom_strategy tier_results = run_strategy(custom_strategy, remaining, attempted_strategies, "custom") tier_results = with_value_hints(tier_results, @sample_rows, @form_class) results, remaining = accept_matches(results, tier_results, remaining) end # Tier 2 and 3: Vector and LLM, ordered by SmartCsvImport.configuration.default_strategy default_tier_strategies.each do |name, strategy| tier_results = run_strategy(strategy, remaining, attempted_strategies, name) tier_results = with_value_hints(tier_results, @sample_rows, @form_class) results, remaining = accept_matches(results, tier_results, remaining) end # Remaining unresolved headers become UnmatchedResult remaining.each do |header| log_info("UNMATCHED: '#{header}' — tried: #{attempted_strategies.join(", ")}") results[header] = UnmatchedResult.new( csv_header: header, attempted_strategies: attempted_strategies.dup ) end log_info("Matching complete: #{results.count { |_, r| r.matched? }} matched, #{results.count { |_, r| r.unmatched? }} unmatched") results end |