Module: Philiprehberger::CsvKit

Defined in:
lib/philiprehberger/csv_kit.rb,
lib/philiprehberger/csv_kit/row.rb,
lib/philiprehberger/csv_kit/writer.rb,
lib/philiprehberger/csv_kit/dialect.rb,
lib/philiprehberger/csv_kit/version.rb,
lib/philiprehberger/csv_kit/detector.rb,
lib/philiprehberger/csv_kit/callbacks.rb,
lib/philiprehberger/csv_kit/processor.rb,
lib/philiprehberger/csv_kit/error_handler.rb

Defined Under Namespace

Modules: Callbacks, ErrorHandler Classes: Detector, Dialect, Error, Processor, Row, Writer

Constant Summary collapse

VERSION =
'0.6.0'

Class Method Summary collapse

Class Method Details

.count(path, dialect: nil) ⇒ Integer

Count data rows without loading them all into memory.

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Returns:

  • (Integer)


75
76
77
78
79
80
81
# File 'lib/philiprehberger/csv_kit.rb', line 75

def self.count(path, dialect: nil)
  csv_opts = { headers: true }
  csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
  n = 0
  CSV.foreach(path, **csv_opts) { |_| n += 1 }
  n
end

.each_hash(path, dialect: nil) {|Hash{Symbol => String}| ... } ⇒ Enumerator?

Stream rows one at a time as symbolized hashes with constant memory. Returns an Enumerator if no block is given.

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Yields:

  • (Hash{Symbol => String})

    each row

Returns:

  • (Enumerator, nil)


90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/philiprehberger/csv_kit.rb', line 90

def self.each_hash(path, dialect: nil, &block)
  csv_opts = { headers: true }
  csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect

  enum = Enumerator.new do |yielder|
    CSV.foreach(path, **csv_opts) do |row|
      yielder.yield(row.to_h.transform_keys(&:to_sym))
    end
  end

  block ? enum.each(&block) : enum
end

.filter(path, dialect: nil) {|Hash{Symbol => String}| ... } ⇒ String

Filter rows and return matching rows as a CSV string.

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Yields:

  • (Hash{Symbol => String})

    each row as a symbolized hash

Returns:

  • (String)

    CSV string with headers



125
126
127
128
129
130
131
132
133
134
# File 'lib/philiprehberger/csv_kit.rb', line 125

def self.filter(path, dialect: nil, &)
  rows = to_hashes(path, dialect: dialect).select(&)
  return '' if rows.empty?

  headers = rows.first.keys
  CSV.generate do |csv|
    csv << headers
    rows.each { |row| csv << headers.map { |k| row[k] } }
  end
end

.find(path, dialect: nil) {|Hash{Symbol => String}| ... } ⇒ Hash{Symbol => String}?

Find the first row matching a predicate, streaming (stops as soon as a match is found).

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Yields:

  • (Hash{Symbol => String})

    each row as a symbolized hash

Returns:

  • (Hash{Symbol => String}, nil)

    the first matching row or nil



109
110
111
112
113
114
115
116
117
# File 'lib/philiprehberger/csv_kit.rb', line 109

def self.find(path, dialect: nil, &block)
  csv_opts = { headers: true }
  csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
  CSV.foreach(path, **csv_opts) do |row|
    hash = row.to_h.transform_keys(&:to_sym)
    return hash if block.call(hash)
  end
  nil
end

.headers(path, dialect: nil) ⇒ Array<Symbol>

Return the header row as an array of symbols.

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Returns:

  • (Array<Symbol>)


59
60
61
62
63
64
65
66
67
68
# File 'lib/philiprehberger/csv_kit.rb', line 59

def self.headers(path, dialect: nil)
  csv_opts = {}
  csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
  CSV.open(path, **csv_opts) do |csv|
    row = csv.shift
    return [] unless row

    row.map(&:to_sym)
  end
end

.pluck(path, *keys, dialect: nil) ⇒ Array<Hash{Symbol => String}>

Extract specific columns from a CSV.

Parameters:

  • path (String)

    file path

  • keys (Array<Symbol>)

    column names to extract

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Returns:

  • (Array<Hash{Symbol => String}>)


50
51
52
# File 'lib/philiprehberger/csv_kit.rb', line 50

def self.pluck(path, *keys, dialect: nil)
  to_hashes(path, dialect: dialect).map { |h| h.slice(*keys) }
end

.process(path_or_io, dialect: nil) {|Processor| ... } ⇒ Array<Row>

Streaming DSL — yields a Processor for configuration, then executes.

Parameters:

  • path_or_io (String, IO)

    file path or IO object

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Yields:

  • (Processor)

    processor to configure transforms and validations

Returns:

  • (Array<Row>)

    collected rows



25
26
27
28
29
# File 'lib/philiprehberger/csv_kit.rb', line 25

def self.process(path_or_io, dialect: nil, &block)
  processor = Processor.new(path_or_io, dialect: dialect)
  block.call(processor)
  processor.run
end

.to_hashes(path, dialect: nil) ⇒ Array<Hash{Symbol => String}>

Load an entire CSV into an array of symbolized hashes.

Parameters:

  • path (String)

    file path

  • dialect (Symbol, Hash, nil) (defaults to: nil)

    CSV dialect preset or custom options

Returns:

  • (Array<Hash{Symbol => String}>)


36
37
38
39
40
41
42
# File 'lib/philiprehberger/csv_kit.rb', line 36

def self.to_hashes(path, dialect: nil)
  csv_opts = { headers: true }
  csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
  CSV.foreach(path, **csv_opts).map do |row|
    row.to_h.transform_keys(&:to_sym)
  end
end