Module: SmarterCSV::Reader::Options

Included in:
SmarterCSV::Reader
Defined in:
lib/smarter_csv/reader_options.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{
  acceleration: true, # if user wants to use accelleration or not
  auto_row_sep_chars: 500,
  bad_row_limit: nil,
  chunk_size: nil,
  col_sep: :auto, # was: ',',
  collect_raw_lines: true,
  comment_regexp: nil, # was: /\A#/,
  convert_values_to_numeric: true,
  downcase_header: true,
  duplicate_header_suffix: '', # was: nil,
  field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
  #                          extracted field exceeds this size. Prevents DoS from runaway quoted
  #                          fields (unbounded multiline stitching) or huge inline payloads.
  file_encoding: 'utf-8',
  force_utf8: false,
  headers_in_file: true,
  invalid_byte_sequence: '',
  keep_original_headers: false,
  key_mapping: nil,
  strict: false,              # DEPRECATED -> use missing_headers
  missing_headers: :auto,     # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
  missing_header_prefix: 'column_',
  nil_values_matching: nil,   # regex: set matching values to nil (key kept); pairs with remove_empty_values
  on_bad_row: :raise,
  on_chunk: nil,    # callable: fired after each chunk is parsed, before yielding to the block
  on_complete: nil, # callable: fired once after the entire file is processed
  on_start: nil,    # callable: fired once before the first row is parsed
  quote_boundary: :standard, # :standard (only at field boundary πŸ‘) or :legacy (any quote toggles state πŸ‘Ž)
  quote_char: '"',
  quote_escaping: :auto,
  remove_empty_hashes: true,
  remove_empty_values: true,
  remove_unmapped_keys: false,
  remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
  remove_zero_values: false,
  required_headers: nil,
  required_keys: nil,
  row_sep: :auto, # was: $/,
  silence_missing_keys: false,
  skip_lines: nil,
  strings_as_keys: false,
  strip_chars_from_headers: nil,
  strip_whitespace: true,
  user_provided_headers: nil,
  value_converters: nil,
  verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
  with_line_numbers: false,
}.freeze

Instance Method Summary collapse

Instance Method Details

#process_options(given_options = {}) ⇒ Object

NOTE: this is not called when β€œparse” methods are tested by themselves



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/smarter_csv/reader_options.rb', line 57

def process_options(given_options = {})
  # Debug output before merge β€” check raw verbose value (true or :debug)
  $stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])

  # Special case for :user_provided_headers:
  #
  # If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
  # we could lose the first data row
  #
  # We now err on the side of treating an actual header as data, rather than losing a data row.
  #
  if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
    given_options[:headers_in_file] = false
    warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
  end

  @options = DEFAULT_OPTIONS.dup.merge!(given_options)

  # Normalize verbose to a symbol β€” done once here, stored back into @options.
  # All subsequent checks are free symbol comparisons; no re-evaluation needed.
  #   :quiet  β€” suppress all warnings and notices (good for production)
  #   :normal β€” show behavioral warnings (default; helpful for new users)
  #   :debug  β€” :normal + print computed options and per-row diagnostics
  # nil is silently normalized to :normal; true/false are deprecated.
  case @options[:verbose]
  when :quiet, :normal, :debug
    # keep as is
  when nil
    @options[:verbose] = :normal
  when false
    warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit β€” it is the default)."
    @options[:verbose] = :normal
  when true
    warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
    @options[:verbose] = :debug
  else
    warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
    @options[:verbose] = :normal
  end

  # fix invalid input
  @options[:invalid_byte_sequence] ||= ''

  # Normalize headers: { only: [...] } / { except: [...] } to internal option names.
  # The public API is headers: { only: } or headers: { except: }.
  # Internally we use only_headers: / except_headers: (what the C extension reads).
  if (hdr = @options.delete(:headers)).is_a?(Hash)
    @options[:only_headers]   = hdr[:only]   if hdr.key?(:only)
    @options[:except_headers] = hdr[:except] if hdr.key?(:except)
  end

  # Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
  if given_options.key?(:only_headers) && !given_options.key?(:headers)
    warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
  end
  if given_options.key?(:except_headers) && !given_options.key?(:headers)
    warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
  end

  # Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
  if @options[:only_headers]
    values = Array(@options[:only_headers])
    bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
    raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
    @options[:only_headers] = values.map(&:to_sym)
  end
  if @options[:except_headers]
    values = Array(@options[:except_headers])
    bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
    raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
    @options[:except_headers] = values.map(&:to_sym)
  end

  # Deprecation: remove_values_matching β†’ nil_values_matching
  # Old behavior: removes the key-value pair entirely.
  # New behavior: nil_values_matching sets the value to nil (key kept);
  # combined with the default remove_empty_values: true the net effect is identical.
  # With remove_empty_values: false, the key is retained with a nil value.
  if given_options.key?(:remove_values_matching)
    unless @options[:verbose] == :quiet
      warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
           "Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
           "the net behavior is identical. With 'remove_empty_values: false', matching values " \
           "are set to nil but the key is retained in the result hash."
    end
    @options[:nil_values_matching] ||= @options[:remove_values_matching]
    @options[:remove_values_matching] = nil # clear to prevent double-processing
  end

  # Translate deprecated :strict option to :missing_headers
  if given_options.key?(:strict)
    unless @options[:verbose] == :quiet
      warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
           "Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
    end
    @options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
  end

  # Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
  @options[:strict] = (@options[:missing_headers] == :raise)

  $stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug

  validate_options!(@options)
  @options
end