Class: DuckDB::DataChunk

Inherits:
Object
  • Object
show all
Defined in:
lib/duckdb/data_chunk.rb,
ext/duckdb/data_chunk.c

Overview

The DuckDB::DataChunk represents a chunk of data for table function output.

During table function execution, data chunks are used to return rows.

Example:

done = false
table_function.init { |_init_info| done = false }

table_function.execute do |func_info, output|
  if done
    output.size = 0  # Signal completion
  else
    # High-level API
    output.set_value(0, 0, 42)        # column 0, row 0, value 42
    output.set_value(1, 0, 'Alice')   # column 1, row 0, value 'Alice'
    output.size = 1
    done = true
  end
end

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'ext/duckdb/data_chunk.c', line 57

static VALUE data_chunk_initialize(int argc, VALUE *argv, VALUE self) {
    rubyDuckDBDataChunk *ctx;
    VALUE logical_types;
    idx_t column_count;
    duckdb_logical_type *types;
    long i;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    rb_scan_args(argc, argv, "01", &logical_types);
    if (NIL_P(logical_types)) {
        return self;
    }

    Check_Type(logical_types, T_ARRAY);

    if (ctx->owned && ctx->data_chunk) {
        duckdb_destroy_data_chunk(&(ctx->data_chunk));
        ctx->owned = false;
    }

    column_count = (idx_t)RARRAY_LEN(logical_types);
    types = ALLOC_N(duckdb_logical_type, column_count);

    for (i = 0; i < RARRAY_LEN(logical_types); i++) {
        VALUE logical_type = rb_ary_entry(logical_types, i);
        rubyDuckDBLogicalType *logical_type_ctx = rbduckdb_get_struct_logical_type(logical_type);
        types[i] = logical_type_ctx->logical_type;
    }

    ctx->data_chunk = duckdb_create_data_chunk(types, column_count);
    xfree(types);

    if (!ctx->data_chunk) {
        rb_raise(eDuckDBError, "Failed to create data chunk");
    }

    ctx->owned = true;

    return self;
}

Instance Method Details

#column_countInteger

Returns the number of columns in the data chunk.

data_chunk.column_count  # => 2

Returns:

  • (Integer)


107
108
109
110
111
112
113
114
115
116
# File 'ext/duckdb/data_chunk.c', line 107

static VALUE data_chunk_column_count(VALUE self) {
    rubyDuckDBDataChunk *ctx;
    idx_t count;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    count = duckdb_data_chunk_get_column_count(ctx->data_chunk);

    return ULL2NUM(count);
}

#get_vector(col_idx) ⇒ DuckDB::Vector

Gets the vector at the specified column index.

vector = data_chunk.get_vector(0)

Returns:



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'ext/duckdb/data_chunk.c', line 165

static VALUE data_chunk_get_vector(VALUE self, VALUE col_idx) {
    rubyDuckDBDataChunk *ctx;
    idx_t idx;
    duckdb_vector vector;
    VALUE vector_obj;
    rubyDuckDBVector *vector_ctx;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    idx = NUM2ULL(col_idx);
    vector = duckdb_data_chunk_get_vector(ctx->data_chunk, idx);

    // Create Vector wrapper
    vector_obj = rb_class_new_instance(0, NULL, cDuckDBVector);
    vector_ctx = rbduckdb_get_struct_vector(vector_obj);
    vector_ctx->vector = vector;

    return vector_obj;
}

#resetDuckDB::DataChunk

Resets the data chunk so it can be reused for another batch of rows.

Returns:



104
105
106
107
108
109
110
# File 'lib/duckdb/data_chunk.rb', line 104

def reset
  _reset
  # duckdb_data_chunk_reset may invalidate previously returned data pointers,
  # so drop the cache; vector/type caches remain valid across resets.
  @data_cache = nil
  self
end

#set_value(col_idx, row_idx, value) ⇒ Object

Sets a value at the specified column and row index. Type conversion is automatic based on the column’s logical type.

rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength

Examples:

Set integer value

output.set_value(0, 0, 42)

Set string value

output.set_value(1, 0, 'hello')

Set NULL value

output.set_value(0, 1, nil)

Parameters:

  • col_idx (Integer)

    Column index (0-based)

  • row_idx (Integer)

    Row index (0-based)

  • value (Object)

    Value to set (Integer, Float, String, Time, Date, nil)

Returns:

  • (Object)

    The value that was set



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/duckdb/data_chunk.rb', line 48

def set_value(col_idx, row_idx, value)
  vector = cached_vector(col_idx)
  type_id = cached_type_id(col_idx, vector)

  # Handle NULL
  if value.nil?
    vector.set_validity(row_idx, false)
    return value
  end

  case type_id
  when :boolean
    MemoryHelper.write_boolean(cached_data(col_idx, vector), row_idx, value)
  when :tinyint
    MemoryHelper.write_tinyint(cached_data(col_idx, vector), row_idx, value)
  when :smallint
    MemoryHelper.write_smallint(cached_data(col_idx, vector), row_idx, value)
  when :integer
    MemoryHelper.write_integer(cached_data(col_idx, vector), row_idx, value)
  when :bigint
    MemoryHelper.write_bigint(cached_data(col_idx, vector), row_idx, value)
  when :utinyint
    MemoryHelper.write_utinyint(cached_data(col_idx, vector), row_idx, value)
  when :usmallint
    MemoryHelper.write_usmallint(cached_data(col_idx, vector), row_idx, value)
  when :uinteger
    MemoryHelper.write_uinteger(cached_data(col_idx, vector), row_idx, value)
  when :ubigint
    MemoryHelper.write_ubigint(cached_data(col_idx, vector), row_idx, value)
  when :float
    MemoryHelper.write_float(cached_data(col_idx, vector), row_idx, value)
  when :double
    MemoryHelper.write_double(cached_data(col_idx, vector), row_idx, value)
  when :varchar
    vector.assign_string_element(row_idx, value.to_s)
  when :blob
    vector.assign_string_element_len(row_idx, value.to_s)
  when :timestamp
    MemoryHelper.write_timestamp(cached_data(col_idx, vector), row_idx, value)
  when :timestamp_tz
    MemoryHelper.write_timestamp_tz(cached_data(col_idx, vector), row_idx, value)
  when :date
    MemoryHelper.write_date(cached_data(col_idx, vector), row_idx, value)
  else
    raise ArgumentError, "Unsupported type for DataChunk#set_value: #{type_id} for value `#{value.inspect}`"
  end

  value
end

#sizeInteger

Returns the current number of tuples in the data chunk.

data_chunk.size  # => 100

Returns:

  • (Integer)


126
127
128
129
130
131
132
133
134
135
# File 'ext/duckdb/data_chunk.c', line 126

static VALUE data_chunk_size(VALUE self) {
    rubyDuckDBDataChunk *ctx;
    idx_t size;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    size = duckdb_data_chunk_get_size(ctx->data_chunk);

    return ULL2NUM(size);
}

#size=(size) ⇒ Object

Sets the number of tuples in the data chunk.

data_chunk.size = 50


145
146
147
148
149
150
151
152
153
154
155
# File 'ext/duckdb/data_chunk.c', line 145

static VALUE data_chunk_set_size(VALUE self, VALUE size) {
    rubyDuckDBDataChunk *ctx;
    idx_t sz;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    sz = NUM2ULL(size);
    duckdb_data_chunk_set_size(ctx->data_chunk, sz);

    return size;
}