Module: SparkConnect::Types

Defined in:
lib/spark_connect/types.rb

Overview

The Spark SQL type system.

Every Spark data type is represented by an instance of a DataType subclass. Types convert to and from the protobuf ‘DataType` message via DataType#to_proto and Types.from_proto, and render a Spark-compatible `simpleString` (e.g. `“array<int>”`) and `typeName` (e.g. `“integer”`).

Examples:

SparkConnect::Types::IntegerType.new.simple_string      #=> "int"
SparkConnect::Types.array(SparkConnect::Types::StringType.new).simple_string
#=> "array<string>"

Defined Under Namespace

Classes: ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, VariantType, YearMonthIntervalType

Constant Summary collapse

Proto =
SparkConnect::Proto

Class Method Summary collapse

Class Method Details

.array(element_type, contains_null: true) ⇒ Object



445
# File 'lib/spark_connect/types.rb', line 445

def array(element_type, contains_null: true) = ArrayType.new(element_type, contains_null: contains_null)

.binaryObject



439
# File 'lib/spark_connect/types.rb', line 439

def binary = BinaryType.new

.booleanObject



431
# File 'lib/spark_connect/types.rb', line 431

def boolean = BooleanType.new

.byteObject



432
# File 'lib/spark_connect/types.rb', line 432

def byte = ByteType.new

.dateObject



440
# File 'lib/spark_connect/types.rb', line 440

def date = DateType.new

.decimal(precision = 10, scale = 0) ⇒ Object



444
# File 'lib/spark_connect/types.rb', line 444

def decimal(precision = 10, scale = 0) = DecimalType.new(precision, scale)

.doubleObject



437
# File 'lib/spark_connect/types.rb', line 437

def double = DoubleType.new

.field(name, data_type, nullable: true, metadata: nil) ⇒ Object



448
# File 'lib/spark_connect/types.rb', line 448

def field(name, data_type, nullable: true, metadata: nil) = StructField.new(name, data_type, nullable: nullable, metadata: )

.floatObject



436
# File 'lib/spark_connect/types.rb', line 436

def float = FloatType.new

.from_proto(proto) ⇒ DataType

Convert a protobuf ‘DataType` message into a DataType instance.

Parameters:

Returns:



454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/spark_connect/types.rb', line 454

def from_proto(proto)
  kind = proto.kind
  sub = proto.public_send(kind)
  case kind
  when :null then NullType.new
  when :boolean then BooleanType.new
  when :byte then ByteType.new
  when :short then ShortType.new
  when :integer then IntegerType.new
  when :long then LongType.new
  when :float then FloatType.new
  when :double then DoubleType.new
  when :string then StringType.new(sub.collation.empty? ? "UTF8_BINARY" : sub.collation)
  when :binary then BinaryType.new
  when :date then DateType.new
  when :timestamp then TimestampType.new
  when :timestamp_ntz then TimestampNTZType.new
  when :variant then VariantType.new
  when :calendar_interval then CalendarIntervalType.new
  when :day_time_interval then DayTimeIntervalType.new(sub.start_field || 0, sub.end_field || 3)
  when :year_month_interval then YearMonthIntervalType.new(sub.start_field || 0, sub.end_field || 1)
  when :decimal then DecimalType.new(sub.precision || 10, sub.scale || 0)
  when :char then CharType.new(sub.length)
  when :var_char then VarcharType.new(sub.length)
  when :array then ArrayType.new(from_proto(sub.element_type), contains_null: sub.contains_null)
  when :map then MapType.new(from_proto(sub.key_type), from_proto(sub.value_type), value_contains_null: sub.value_contains_null)
  when :struct
    StructType.new(sub.fields.map do |f|
      StructField.new(f.name, from_proto(f.data_type), nullable: f.nullable,
                                                       metadata: (f. && !f..empty? ? JSON.parse(f.) : nil))
    end)
  else
    raise IllegalArgumentError, "Unsupported proto DataType kind: #{kind}"
  end
end

.integerObject



434
# File 'lib/spark_connect/types.rb', line 434

def integer = IntegerType.new

.longObject



435
# File 'lib/spark_connect/types.rb', line 435

def long = LongType.new

.map(key_type, value_type, value_contains_null: true) ⇒ Object



446
# File 'lib/spark_connect/types.rb', line 446

def map(key_type, value_type, value_contains_null: true) = MapType.new(key_type, value_type, value_contains_null: value_contains_null)

.nullObject



430
# File 'lib/spark_connect/types.rb', line 430

def null = NullType.new

.shortObject



433
# File 'lib/spark_connect/types.rb', line 433

def short = ShortType.new

.stringObject



438
# File 'lib/spark_connect/types.rb', line 438

def string = StringType.new

.struct(*fields) ⇒ Object



447
# File 'lib/spark_connect/types.rb', line 447

def struct(*fields) = StructType.new(fields.flatten)

.timestampObject



441
# File 'lib/spark_connect/types.rb', line 441

def timestamp = TimestampType.new

.timestamp_ntzObject



442
# File 'lib/spark_connect/types.rb', line 442

def timestamp_ntz = TimestampNTZType.new

.variantObject



443
# File 'lib/spark_connect/types.rb', line 443

def variant = VariantType.new

.wrap(**kwargs) ⇒ Object

Helper that wraps a kind message into a ‘DataType` proto.



69
70
71
# File 'lib/spark_connect/types.rb', line 69

def self.wrap(**kwargs)
  Proto::DataType.new(**kwargs)
end