Class: ArrowFormat::FileReader

Inherits:
Object
  • Object
show all
Includes:
Readable, Enumerable
Defined in:
lib/arrow-format/file-reader.rb

Constant Summary collapse

MAGIC =
"ARROW1".b.freeze
MAGIC_BUFFER =
IO::Buffer.for(MAGIC)
START_MARKER_SIZE =
MAGIC_BUFFER.size
END_MARKER_SIZE =
MAGIC_BUFFER.size
STREAMING_FORMAT_START_OFFSET =

<magic number “ARROW1”> <empty padding bytes [to 8 byte boundary]>

8
CONTINUATION_BUFFER =
IO::Buffer.for(MessagePullReader::CONTINUATION_STRING)
:s32
IO::Buffer.size_of(FOOTER_SIZE_FORMAT)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input) ⇒ FileReader

Returns a new instance of FileReader.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/arrow-format/file-reader.rb', line 39

def initialize(input)
  case input
  when IO
    @buffer = IO::Buffer.map(input, nil, 0, IO::Buffer::READONLY)
  when String
    @buffer = IO::Buffer.for(input)
  else
    @buffer = input
  end

  validate
  @footer = read_footer
  @metadata = (@footer.)
  @record_batch_blocks = @footer.record_batches || []
  @schema = read_schema(@footer.schema)
  @dictionaries = read_dictionaries
end

Instance Attribute Details

#metadataObject (readonly)

Returns the value of attribute metadata.



38
39
40
# File 'lib/arrow-format/file-reader.rb', line 38

def 
  @metadata
end

#schemaObject (readonly)

Returns the value of attribute schema.



37
38
39
# File 'lib/arrow-format/file-reader.rb', line 37

def schema
  @schema
end

Instance Method Details

#eachObject



76
77
78
79
80
81
82
# File 'lib/arrow-format/file-reader.rb', line 76

def each
  return to_enum(__method__) {n_record_batches} unless block_given?

  @record_batch_blocks.size.times do |i|
    yield(read(i))
  end
end

#n_record_batchesObject



57
58
59
# File 'lib/arrow-format/file-reader.rb', line 57

def n_record_batches
  @record_batch_blocks.size
end

#read(i) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/arrow-format/file-reader.rb', line 61

def read(i)
  fb_message, body = read_block(@record_batch_blocks[i], :record_batch, i)
  fb_header = fb_message.header
  unless fb_header.is_a?(FB::RecordBatch)
    raise FileReadError.new(@buffer,
                            "Not a record batch message: #{i}: " +
                            fb_header.class.name)
  end
  read_record_batch(fb_message.version,
                    fb_header,
                    fb_message.,
                    @schema,
                    body)
end