Class: DuckDB::DataChunk

Inherits:
Object
  • Object
show all
Defined in:
lib/duckdb/data_chunk.rb,
ext/duckdb/data_chunk.c

Overview

The DuckDB::DataChunk represents a chunk of data for table function output.

During table function execution, data chunks are used to return rows.

Example:

done = false
table_function.init { |_init_info| done = false }

table_function.execute do |func_info, output|
  if done
    output.size = 0  # Signal completion
  else
    # High-level API
    output.set_value(0, 0, 42)        # column 0, row 0, value 42
    output.set_value(1, 0, 'Alice')   # column 1, row 0, value 'Alice'
    output.size = 1
    done = true
  end
end

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'ext/duckdb/data_chunk.c', line 47

static VALUE initialize(int argc, VALUE *argv, VALUE self) {
    rubyDuckDBDataChunk *ctx;
    VALUE logical_types;
    idx_t column_count;
    duckdb_logical_type *types;
    long i;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    rb_scan_args(argc, argv, "01", &logical_types);
    if (NIL_P(logical_types)) {
        return self;
    }

    Check_Type(logical_types, T_ARRAY);

    if (ctx->owned && ctx->data_chunk) {
        duckdb_destroy_data_chunk(&(ctx->data_chunk));
        ctx->owned = false;
    }

    column_count = (idx_t)RARRAY_LEN(logical_types);
    types = ALLOC_N(duckdb_logical_type, column_count);

    for (i = 0; i < RARRAY_LEN(logical_types); i++) {
        VALUE logical_type = rb_ary_entry(logical_types, i);
        rubyDuckDBLogicalType *logical_type_ctx = get_struct_logical_type(logical_type);
        types[i] = logical_type_ctx->logical_type;
    }

    ctx->data_chunk = duckdb_create_data_chunk(types, column_count);
    xfree(types);

    if (!ctx->data_chunk) {
        rb_raise(eDuckDBError, "Failed to create data chunk");
    }

    ctx->owned = true;

    return self;
}

Instance Method Details

#column_countInteger

Returns the number of columns in the data chunk.

data_chunk.column_count  # => 2

Returns:

  • (Integer)


97
98
99
100
101
102
103
104
105
106
# File 'ext/duckdb/data_chunk.c', line 97

static VALUE rbduckdb_data_chunk_column_count(VALUE self) {
    rubyDuckDBDataChunk *ctx;
    idx_t count;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    count = duckdb_data_chunk_get_column_count(ctx->data_chunk);

    return ULL2NUM(count);
}

#get_vector(col_idx) ⇒ DuckDB::Vector

Gets the vector at the specified column index.

vector = data_chunk.get_vector(0)

Returns:



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'ext/duckdb/data_chunk.c', line 155

static VALUE rbduckdb_data_chunk_get_vector(VALUE self, VALUE col_idx) {
    rubyDuckDBDataChunk *ctx;
    idx_t idx;
    duckdb_vector vector;
    VALUE vector_obj;
    rubyDuckDBVector *vector_ctx;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    idx = NUM2ULL(col_idx);
    vector = duckdb_data_chunk_get_vector(ctx->data_chunk, idx);

    // Create Vector wrapper
    vector_obj = rb_class_new_instance(0, NULL, cDuckDBVector);
    vector_ctx = get_struct_vector(vector_obj);
    vector_ctx->vector = vector;

    return vector_obj;
}

#resetDuckDB::DataChunk

Resets the data chunk so it can be reused for another batch of rows.

Returns:



104
105
106
107
108
109
110
# File 'lib/duckdb/data_chunk.rb', line 104

def reset
  _reset
  # duckdb_data_chunk_reset may invalidate previously returned data pointers,
  # so drop the cache; vector/type caches remain valid across resets.
  @data_cache = nil
  self
end

#set_value(col_idx, row_idx, value) ⇒ Object

Sets a value at the specified column and row index. Type conversion is automatic based on the column’s logical type.

rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength

Examples:

Set integer value

output.set_value(0, 0, 42)

Set string value

output.set_value(1, 0, 'hello')

Set NULL value

output.set_value(0, 1, nil)

Parameters:

  • col_idx (Integer)

    Column index (0-based)

  • row_idx (Integer)

    Row index (0-based)

  • value (Object)

    Value to set (Integer, Float, String, Time, Date, nil)

Returns:

  • (Object)

    The value that was set



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/duckdb/data_chunk.rb', line 48

def set_value(col_idx, row_idx, value)
  vector = cached_vector(col_idx)
  type_id = cached_type_id(col_idx, vector)

  # Handle NULL
  if value.nil?
    vector.set_validity(row_idx, false)
    return value
  end

  case type_id
  when :boolean
    MemoryHelper.write_boolean(cached_data(col_idx, vector), row_idx, value)
  when :tinyint
    MemoryHelper.write_tinyint(cached_data(col_idx, vector), row_idx, value)
  when :smallint
    MemoryHelper.write_smallint(cached_data(col_idx, vector), row_idx, value)
  when :integer
    MemoryHelper.write_integer(cached_data(col_idx, vector), row_idx, value)
  when :bigint
    MemoryHelper.write_bigint(cached_data(col_idx, vector), row_idx, value)
  when :utinyint
    MemoryHelper.write_utinyint(cached_data(col_idx, vector), row_idx, value)
  when :usmallint
    MemoryHelper.write_usmallint(cached_data(col_idx, vector), row_idx, value)
  when :uinteger
    MemoryHelper.write_uinteger(cached_data(col_idx, vector), row_idx, value)
  when :ubigint
    MemoryHelper.write_ubigint(cached_data(col_idx, vector), row_idx, value)
  when :float
    MemoryHelper.write_float(cached_data(col_idx, vector), row_idx, value)
  when :double
    MemoryHelper.write_double(cached_data(col_idx, vector), row_idx, value)
  when :varchar
    vector.assign_string_element(row_idx, value.to_s)
  when :blob
    vector.assign_string_element_len(row_idx, value.to_s)
  when :timestamp
    MemoryHelper.write_timestamp(cached_data(col_idx, vector), row_idx, value)
  when :timestamp_tz
    MemoryHelper.write_timestamp_tz(cached_data(col_idx, vector), row_idx, value)
  when :date
    MemoryHelper.write_date(cached_data(col_idx, vector), row_idx, value)
  else
    raise ArgumentError, "Unsupported type for DataChunk#set_value: #{type_id} for value `#{value.inspect}`"
  end

  value
end

#sizeInteger

Returns the current number of tuples in the data chunk.

data_chunk.size  # => 100

Returns:

  • (Integer)


116
117
118
119
120
121
122
123
124
125
# File 'ext/duckdb/data_chunk.c', line 116

static VALUE rbduckdb_data_chunk_get_size(VALUE self) {
    rubyDuckDBDataChunk *ctx;
    idx_t size;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    size = duckdb_data_chunk_get_size(ctx->data_chunk);

    return ULL2NUM(size);
}

#size=(size) ⇒ Object

Sets the number of tuples in the data chunk.

data_chunk.size = 50


135
136
137
138
139
140
141
142
143
144
145
# File 'ext/duckdb/data_chunk.c', line 135

static VALUE rbduckdb_data_chunk_set_size(VALUE self, VALUE size) {
    rubyDuckDBDataChunk *ctx;
    idx_t sz;

    TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);

    sz = NUM2ULL(size);
    duckdb_data_chunk_set_size(ctx->data_chunk, sz);

    return size;
}