Module: ZSV

Defined in:
lib/zsv.rb,
lib/zsv/version.rb,
ext/zsv/zsv_ext.c

Overview

ZSV - SIMD-accelerated CSV parser

A drop-in replacement for Ruby’s CSV stdlib that uses the zsv C library for 10-50x performance improvements on large CSV files.

Examples:

Basic usage

ZSV.foreach("data.csv") do |row|
  puts row.inspect
end

With headers

ZSV.foreach("data.csv", headers: true) do |row|
  puts row["name"]
end

Parse string

rows = ZSV.parse("a,b,c\n1,2,3\n")

Defined Under Namespace

Classes: Error, InvalidEncodingError, MalformedCSVError, Parser

Constant Summary collapse

VERSION =
'1.4.3'

Class Method Summary collapse

Class Method Details

.foreach(path, **options) {|row| ... } ⇒ nil .foreach(path, **options) ⇒ Object

Efficiently streams rows from a CSV file.

Overloads:

  • .foreach(path, **options) {|row| ... } ⇒ nil

    Yields:

    • (row)

    Returns:

    • (nil)


192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'ext/zsv/zsv_ext.c', line 192

static VALUE rb_zsv_foreach(int argc, VALUE *argv, VALUE klass)
{
    VALUE path, opts;
    rb_scan_args(argc, argv, "11", &path, &opts);

    RETURN_ENUMERATOR(klass, argc, argv);

    Check_Type(path, T_STRING);

    zsv_ruby_parser_t *parser = zsv_parser_new_from_path(StringValueCStr(path), opts);
    VALUE parser_obj = wrap_parser(parser);

    /* Ensure cleanup even if exception occurs */
    int state;
    rb_protect((VALUE(*)(VALUE))rb_zsv_parser_each, parser_obj, &state);

    zsv_parser_close(parser);

    if (state) {
        rb_jump_tag(state);
    }

    return Qnil;
}

.new(io) ⇒ Parser

Create a new parser instance

This is a convenience method that creates a Parser object.

Examples:

parser = ZSV.new("data.csv", headers: true)
parser.each { |row| puts row }
parser.close

Parameters:

  • io (String, IO)

    File path or IO object to parse

  • options (Hash)

    Parser options

Returns:

  • (Parser)

    New parser instance



39
40
41
# File 'lib/zsv.rb', line 39

def new(io, **)
  Parser.new(io, **)
end

.open(path, mode = "r", **options) ⇒ Object .open(path, mode = "r", **options) {|parser| ... } ⇒ Object

Opens a CSV file for reading. If a block is given, the parser is automatically closed after the block completes.

Overloads:

  • .open(path, mode = "r", **options) {|parser| ... } ⇒ Object

    Yields:

    • (parser)


277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'ext/zsv/zsv_ext.c', line 277

static VALUE rb_zsv_open(int argc, VALUE *argv, VALUE klass)
{
    VALUE path, mode, opts;
    (void)klass;
    rb_scan_args(argc, argv, "11:", &path, &mode, &opts);

    Check_Type(path, T_STRING);

    /* If mode is a hash, it's actually the options */
    if (!NIL_P(mode) && TYPE(mode) == T_HASH) {
        opts = mode;
        mode = Qnil;
    }

    /* Currently only support read mode */
    if (!NIL_P(mode) && strcmp(StringValueCStr(mode), "r") != 0) {
        rb_raise(rb_eNotImpError, "Only read mode is currently supported");
    }

    zsv_ruby_parser_t *parser = zsv_parser_new_from_path(StringValueCStr(path), opts);
    VALUE parser_obj = wrap_parser(parser);

    if (rb_block_given_p()) {
        int state;
        VALUE result = rb_protect(rb_yield, parser_obj, &state);
        zsv_parser_close(parser);

        if (state) {
            rb_jump_tag(state);
        }

        return result;
    }

    return parser_obj;
}

.parse(string, **options) ⇒ Array

Parses a CSV string and returns all rows as an array.

Returns:

  • (Array)


223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'ext/zsv/zsv_ext.c', line 223

static VALUE rb_zsv_parse(int argc, VALUE *argv, VALUE klass)
{
    VALUE string, opts;
    (void)klass;
    rb_scan_args(argc, argv, "11", &string, &opts);

    Check_Type(string, T_STRING);

    zsv_ruby_parser_t *parser = zsv_parser_new_from_string(string, opts);
    VALUE result = rb_ary_new();

    VALUE row;
    while (!NIL_P(row = zsv_parser_shift(parser))) {
        rb_ary_push(result, row);
    }

    zsv_parser_free(parser);
    return result;
}

.parse_enum(source) ⇒ Enumerator

Parse CSV data and return an Enumerator

This method provides lazy enumeration over CSV rows without loading the entire file into memory.

Examples:

enum = ZSV.parse_enum("a,b\n1,2\n3,4", headers: true)
enum.first # => {"a" => "1", "b" => "2"}

Parameters:

  • source (String, IO)

    CSV data or IO object

  • options (Hash)

    Parser options

Returns:

  • (Enumerator)

    Lazy enumerator over rows



56
57
58
59
60
61
62
63
64
# File 'lib/zsv.rb', line 56

def parse_enum(source, **)
  parser = Parser.new(source, **)

  Enumerator.new do |yielder|
    parser.each { |row| yielder << row }
  ensure
    parser.close
  end
end

.read(path, **options) ⇒ Array

Reads entire CSV file into an array of rows.

Returns:

  • (Array)


249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'ext/zsv/zsv_ext.c', line 249

static VALUE rb_zsv_read(int argc, VALUE *argv, VALUE klass)
{
    VALUE path, opts;
    (void)klass;
    rb_scan_args(argc, argv, "11", &path, &opts);

    Check_Type(path, T_STRING);

    zsv_ruby_parser_t *parser = zsv_parser_new_from_path(StringValueCStr(path), opts);
    VALUE result = rb_ary_new();

    VALUE row;
    while (!NIL_P(row = zsv_parser_shift(parser))) {
        rb_ary_push(result, row);
    }

    zsv_parser_free(parser);
    return result;
}