Class: Rover::Vector

Inherits:

Object

Object
Rover::Vector

show all

Defined in:: lib/rover/vector.rb

Constant Summary collapse

TYPE_CAST_MAPPING = if a user never specifies types, the defaults are bool, float64, int64, and object

{
  bool: Numo::Bit,
  float32: Numo::SFloat,
  float64: Numo::DFloat,
  int8: Numo::Int8,
  int16: Numo::Int16,
  int32: Numo::Int32,
  int64: Numo::Int64,
  object: Numo::RObject,
  uint8: Numo::UInt8,
  uint16: Numo::UInt16,
  uint32: Numo::UInt32,
  uint64: Numo::UInt64,
  # legacy - must come last
  float: Numo::DFloat,
  int: Numo::Int64,
  uint: Numo::UInt64
}

NOT_SET =

Object.new

Instance Method Summary collapse

#! ⇒ Object
#-@ ⇒ Object
#[](v) ⇒ Object
#[]=(k, v) ⇒ Object
#abs ⇒ Object
#all? ⇒ Boolean
#any? ⇒ Boolean
#ceil(ndigits = 0) ⇒ Object
#clamp(min, max) ⇒ Object
#clamp!(min, max) ⇒ Object
#crosstab(other) ⇒ Object
#diff ⇒ Object

keep same number of rows as original to make it easy to add to original data frame.
#each(&block) ⇒ Object
#each_with_index(&block) ⇒ Object
#empty? ⇒ Boolean
#first(n = NOT_SET) ⇒ Object
#floor(ndigits = 0) ⇒ Object
#frexp ⇒ Object
#head(n = 5) ⇒ Object
#hypot(y) ⇒ Object
#in?(values) ⇒ Boolean
#include?(value) ⇒ Boolean
#initialize(data, type: nil) ⇒ Vector constructor

A new instance of Vector.
#inspect ⇒ Object (also: #to_s)

TODO add type and size?.
#last(n = NOT_SET) ⇒ Object
#ldexp(exponent) ⇒ Object
#ln ⇒ Object
#log(base = NOT_SET) ⇒ Object
#map(&block) ⇒ Object
#map!(&block) ⇒ Object
#max ⇒ Object
#mean ⇒ Object
#median ⇒ Object
#min ⇒ Object
#missing ⇒ Object
#numeric? ⇒ Boolean
#one_hot(drop: false, prefix: nil) ⇒ Object
#percentile(q) ⇒ Object
#reject(&block) ⇒ Object
#round(ndigits = 0) ⇒ Object
#select(&block) ⇒ Object
#size ⇒ Object (also: #length, #count)
#sort ⇒ Object
#std ⇒ Object

uses Bessel’s correction for now since that’s all Numo supports.
#sum ⇒ Object
#tail(n = 5) ⇒ Object
#take(n) ⇒ Object
#tally ⇒ Object

use Ruby tally for performance.
#to(type) ⇒ Object
#to!(type) ⇒ Object
#to_a ⇒ Object
#to_html ⇒ Object

for IRuby.
#to_numo ⇒ Object
#type ⇒ Object
#uniq ⇒ Object
#var ⇒ Object

uses Bessel’s correction for now since that’s all Numo supports.
#zip(other, &block) ⇒ Object

Constructor Details

#initialize(data, type: nil) ⇒ `Vector`

Returns a new instance of Vector.

Raises:

(ArgumentError)

# File 'lib/rover/vector.rb', line 26

def initialize(data, type: nil)
  @data = cast_data(data, type: type)
  raise ArgumentError, "Bad size: #{@data.shape}" unless @data.ndim == 1
end

Instance Method Details

#! ⇒ `Object`

# File 'lib/rover/vector.rb', line 151

def !
  if @data.is_a?(Numo::Bit)
    Vector.new(@data.eq(0))
  else
    raise "Not implemented yet"
  end
end

#-@ ⇒ `Object`



159
160
161

# File 'lib/rover/vector.rb', line 159

def -@
  self * -1
end

#[](v) ⇒ `Object`

# File 'lib/rover/vector.rb', line 88

def [](v)
  if v.is_a?(Vector)
    Vector.new(v.to_numo.mask(@data))
  elsif v.is_a?(Numeric)
    @data[v]
  else
    Vector.new(@data[v])
  end
end

#[]=(k, v) ⇒ `Object`

# File 'lib/rover/vector.rb', line 98

def []=(k, v)
  k = k.to_numo if k.is_a?(Vector)
  @data[k] = v
end

#abs ⇒ `Object`



201
202
203

# File 'lib/rover/vector.rb', line 201

def abs
  Vector.new(@data.abs)
end

#all? ⇒ `Boolean`

Returns:

(Boolean)



308
309
310

# File 'lib/rover/vector.rb', line 308

def all?(...)
  to_a.all?(...)
end

#any? ⇒ `Boolean`

Returns:

(Boolean)



312
313
314

# File 'lib/rover/vector.rb', line 312

def any?(...)
  to_a.any?(...)
end

#ceil(ndigits = 0) ⇒ `Object`

# File 'lib/rover/vector.rb', line 213

def ceil(ndigits = 0)
  if ndigits == 0
    Vector.new(@data.ceil)
  else
    Vector.new(@data.map { |v| v.ceil(ndigits) })
  end
end

#clamp(min, max) ⇒ `Object`



168
169
170

# File 'lib/rover/vector.rb', line 168

def clamp(min, max)
  dup.clamp!(min, max)
end

#clamp!(min, max) ⇒ `Object`

# File 'lib/rover/vector.rb', line 163

def clamp!(min, max)
  @data = @data.clip(min, max)
  self
end

#crosstab(other) ⇒ `Object`

# File 'lib/rover/vector.rb', line 353

def crosstab(other)
  index = uniq.sort
  index_pos = index.to_a.map.with_index.to_h
  df = DataFrame.new({"_" => index})
  other.uniq.sort.each do |k|
    df[k] = 0
  end
  to_a.zip(other.to_a) do |v1, v2|
    df[v2][index_pos[v1]] += 1
  end
  df
end

#diff ⇒ `Object`

keep same number of rows as original to make it easy to add to original data frame

# File 'lib/rover/vector.rb', line 83

def diff
  diff = @data.cast_to(Numo::DFloat).diff
  Vector.new(diff.insert(0, Float::NAN))
end

#each(&block) ⇒ `Object`



263
264
265

# File 'lib/rover/vector.rb', line 263

def each(&block)
  @data.each(&block)
end

#each_with_index(&block) ⇒ `Object`



267
268
269

# File 'lib/rover/vector.rb', line 267

def each_with_index(&block)
  @data.each_with_index(&block)
end

#empty? ⇒ `Boolean`

Returns:

(Boolean)



316
317
318

# File 'lib/rover/vector.rb', line 316

def empty?
  size == 0
end

#first(n = NOT_SET) ⇒ `Object`

# File 'lib/rover/vector.rb', line 328

def first(n = NOT_SET)
  if n == NOT_SET
    @data[0]
  elsif n >= size
    Vector.new(@data)
  else
    Vector.new(@data[0...n])
  end
end

#floor(ndigits = 0) ⇒ `Object`

# File 'lib/rover/vector.rb', line 221

def floor(ndigits = 0)
  if ndigits == 0
    Vector.new(@data.floor)
  else
    Vector.new(@data.map { |v| v.floor(ndigits) })
  end
end

#frexp ⇒ `Object`

# File 'lib/rover/vector.rb', line 253

def frexp
  fraction, exponent = Numo::NMath.frexp(@data)
  [Vector.new(fraction), Vector.new(exponent)]
end

#head(n = 5) ⇒ `Object`

# File 'lib/rover/vector.rb', line 366

def head(n = 5)
  n += size if n < 0
  first(n)
end

#hypot(y) ⇒ `Object`

# File 'lib/rover/vector.rb', line 248

def hypot(y)
  y = y.to_numo if y.is_a?(Rover::Vector)
  Vector.new(Numo::NMath.hypot(@data, y))
end

#in?(values) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/rover/vector.rb', line 137

def in?(values)
  ret = Numo::Bit.new(size).fill(false)
  values.each do |v|
    comp =
      if v.is_a?(Numeric) || v.is_a?(Numo::NArray)
        @data.eq(v)
      else
        Numo::Bit.cast(@data.map { |d| d == v })
      end
    ret |= comp
  end
  Vector.new(ret)
end

#include?(value) ⇒ `Boolean`

Returns:

(Boolean)



320
321
322

# File 'lib/rover/vector.rb', line 320

def include?(value)
  to_a.include?(value)
end

#inspect ⇒ `Object` Also known as: to_s

TODO add type and size?

# File 'lib/rover/vector.rb', line 391

def inspect
  elements = first(5).to_a.map(&:inspect)
  elements << "..." if size > 5
  "#<Rover::Vector [#{elements.join(", ")}]>"
end

#last(n = NOT_SET) ⇒ `Object`

# File 'lib/rover/vector.rb', line 338

def last(n = NOT_SET)
  if n == NOT_SET
    @data[-1]
  elsif n >= size
    Vector.new(@data)
  else
    Vector.new(@data[-n..-1])
  end
end

#ldexp(exponent) ⇒ `Object`

# File 'lib/rover/vector.rb', line 258

def ldexp(exponent)
  exponent = exponent.to_numo if exponent.is_a?(Rover::Vector)
  Vector.new(Numo::NMath.ldexp(@data, exponent))
end

#ln ⇒ `Object`



244
245
246

# File 'lib/rover/vector.rb', line 244

def ln
  log
end

#log(base = NOT_SET) ⇒ `Object`

# File 'lib/rover/vector.rb', line 235

def log(base = NOT_SET)
  if base == NOT_SET
    Vector.new(Numo::NMath.log(@data))
  else
    type = self.type == :float32 ? :float32 : :float64
    Vector.new(@data.to_a.map { |v| Math.log(v, base) }, type: type)
  end
end

#map(&block) ⇒ `Object`

# File 'lib/rover/vector.rb', line 172

def map(&block)
  # convert to Ruby first to cast properly
  # https://github.com/ruby-numo/numo-narray/issues/181
  # numo-narray-alt has same behavior
  Vector.new(@data.to_a.map(&block))
end

#map!(&block) ⇒ `Object`

# File 'lib/rover/vector.rb', line 179

def map!(&block)
  @data = cast_data(@data.to_a.map(&block))
  self
end

#max ⇒ `Object`



271
272
273

# File 'lib/rover/vector.rb', line 271

def max
  @data.max
end

#mean ⇒ `Object`



279
280
281

# File 'lib/rover/vector.rb', line 279

def mean
  @data.mean
end

#median ⇒ `Object`

# File 'lib/rover/vector.rb', line 283

def median
  # need to cast to get correct result
  # https://github.com/ruby-numo/numo-narray/issues/165
  # numo-narray-alt has same behavior
  @data.cast_to(Numo::DFloat).median
end

#min ⇒ `Object`



275
276
277

# File 'lib/rover/vector.rb', line 275

def min
  @data.min
end

#missing ⇒ `Object`

# File 'lib/rover/vector.rb', line 68

def missing
  bit =
    if @data.is_a?(Numo::RObject)
      Numo::Bit.cast(@data.map(&:nil?))
    elsif @data.respond_to?(:isnan)
      @data.isnan
    else
      Numo::Bit.new(size).fill(0)
    end

  Vector.new(bit)
end

#numeric? ⇒ `Boolean`

Returns:

(Boolean)



54
55
56

# File 'lib/rover/vector.rb', line 54

def numeric?
  ![:object, :bool].include?(type)
end

#one_hot(drop: false, prefix: nil) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/rover/vector.rb', line 376

def one_hot(drop: false, prefix: nil)
  raise ArgumentError, "All elements must be strings" unless all?(String)

  new_vectors = {}
  # maybe sort values first
  values = uniq.to_a
  values.shift if drop
  values.each do |v2|
    # TODO use types
    new_vectors["#{prefix}#{v2}"] = (self == v2).to_numo.cast_to(Numo::Int64)
  end
  DataFrame.new(new_vectors)
end

#percentile(q) ⇒ `Object`



290
291
292

# File 'lib/rover/vector.rb', line 290

def percentile(q)
  @data.percentile(q)
end

#reject(&block) ⇒ `Object`



188
189
190

# File 'lib/rover/vector.rb', line 188

def reject(&block)
  Vector.new(@data.to_a.reject(&block))
end

#round(ndigits = 0) ⇒ `Object`

# File 'lib/rover/vector.rb', line 205

def round(ndigits = 0)
  if ndigits == 0
    Vector.new(@data.round)
  else
    Vector.new(@data.map { |v| v.round(ndigits) })
  end
end

#select(&block) ⇒ `Object`



184
185
186

# File 'lib/rover/vector.rb', line 184

def select(&block)
  Vector.new(@data.to_a.select(&block))
end

#size ⇒ `Object` Also known as: length, count



58
59
60

# File 'lib/rover/vector.rb', line 58

def size
  @data.size
end

#sort ⇒ `Object`



197
198
199

# File 'lib/rover/vector.rb', line 197

def sort
  Vector.new(@data.respond_to?(:sort) ? @data.sort : @data.to_a.sort)
end

#std ⇒ `Object`

uses Bessel’s correction for now since that’s all Numo supports



299
300
301

# File 'lib/rover/vector.rb', line 299

def std
  @data.cast_to(Numo::DFloat).stddev
end

#sum ⇒ `Object`



294
295
296

# File 'lib/rover/vector.rb', line 294

def sum
  @data.sum
end

#tail(n = 5) ⇒ `Object`

# File 'lib/rover/vector.rb', line 371

def tail(n = 5)
  n += size if n < 0
  last(n)
end

#take(n) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/rover/vector.rb', line 348

def take(n)
  raise ArgumentError, "attempt to take negative size" if n < 0
  first(n)
end

#tally ⇒ `Object`

use Ruby tally for performance



193
194
195

# File 'lib/rover/vector.rb', line 193

def tally
  @data.to_a.tally
end

#to(type) ⇒ `Object`



35
36
37

# File 'lib/rover/vector.rb', line 35

def to(type)
  dup.to!(type)
end

#to!(type) ⇒ `Object`

# File 'lib/rover/vector.rb', line 39

def to!(type)
  @data = cast_data(@data, type: type)
  self
end

#to_a ⇒ `Object`

# File 'lib/rover/vector.rb', line 48

def to_a
  a = @data.to_a
  a.map! { |v| !v.zero? } if @data.is_a?(Numo::Bit)
  a
end

#to_html ⇒ `Object`

for IRuby

# File 'lib/rover/vector.rb', line 399

def to_html
  require "iruby"

  if size > 7
    # pass 8 rows so maxrows is applied
    IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
  else
    IRuby::HTML.table(to_a)
  end
end

#to_numo ⇒ `Object`



44
45
46

# File 'lib/rover/vector.rb', line 44

def to_numo
  @data
end

#type ⇒ `Object`



31
32
33

# File 'lib/rover/vector.rb', line 31

def type
  TYPE_CAST_MAPPING.find { |_, v| @data.is_a?(v) }[0]
end

#uniq ⇒ `Object`



64
65
66

# File 'lib/rover/vector.rb', line 64

def uniq
  Vector.new(to_a.uniq)
end

#var ⇒ `Object`

uses Bessel’s correction for now since that’s all Numo supports



304
305
306

# File 'lib/rover/vector.rb', line 304

def var
  @data.cast_to(Numo::DFloat).var
end

#zip(other, &block) ⇒ `Object`



324
325
326

# File 'lib/rover/vector.rb', line 324

def zip(other, &block)
  to_a.zip(other.to_a, &block)
end

Class: Rover::Vector

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, type: nil) ⇒ Vector

Instance Method Details

#! ⇒ Object

#-@ ⇒ Object

#[](v) ⇒ Object

#[]=(k, v) ⇒ Object

#abs ⇒ Object

#all? ⇒ Boolean

#any? ⇒ Boolean

#ceil(ndigits = 0) ⇒ Object

#clamp(min, max) ⇒ Object

#clamp!(min, max) ⇒ Object

#crosstab(other) ⇒ Object

#diff ⇒ Object

#each(&block) ⇒ Object

#each_with_index(&block) ⇒ Object

#empty? ⇒ Boolean

#first(n = NOT_SET) ⇒ Object

#floor(ndigits = 0) ⇒ Object

#frexp ⇒ Object

#head(n = 5) ⇒ Object

#hypot(y) ⇒ Object

#in?(values) ⇒ Boolean

#include?(value) ⇒ Boolean

#inspect ⇒ Object Also known as: to_s

#last(n = NOT_SET) ⇒ Object

#ldexp(exponent) ⇒ Object

#ln ⇒ Object

#log(base = NOT_SET) ⇒ Object

#map(&block) ⇒ Object

#map!(&block) ⇒ Object

#max ⇒ Object

#mean ⇒ Object

#median ⇒ Object

#min ⇒ Object

#missing ⇒ Object

#numeric? ⇒ Boolean

#one_hot(drop: false, prefix: nil) ⇒ Object

#percentile(q) ⇒ Object

#reject(&block) ⇒ Object

#round(ndigits = 0) ⇒ Object

#select(&block) ⇒ Object

#size ⇒ Object Also known as: length, count

#sort ⇒ Object

#std ⇒ Object

#sum ⇒ Object

#tail(n = 5) ⇒ Object

#take(n) ⇒ Object

#tally ⇒ Object

#to(type) ⇒ Object

#to!(type) ⇒ Object

#to_a ⇒ Object

#to_html ⇒ Object

#to_numo ⇒ Object

#type ⇒ Object

#uniq ⇒ Object

#var ⇒ Object

#zip(other, &block) ⇒ Object

#initialize(data, type: nil) ⇒ `Vector`

#! ⇒ `Object`

#-@ ⇒ `Object`

#[](v) ⇒ `Object`

#[]=(k, v) ⇒ `Object`

#abs ⇒ `Object`

#all? ⇒ `Boolean`

#any? ⇒ `Boolean`

#ceil(ndigits = 0) ⇒ `Object`

#clamp(min, max) ⇒ `Object`

#clamp!(min, max) ⇒ `Object`

#crosstab(other) ⇒ `Object`

#diff ⇒ `Object`

#each(&block) ⇒ `Object`

#each_with_index(&block) ⇒ `Object`

#empty? ⇒ `Boolean`

#first(n = NOT_SET) ⇒ `Object`

#floor(ndigits = 0) ⇒ `Object`

#frexp ⇒ `Object`

#head(n = 5) ⇒ `Object`

#hypot(y) ⇒ `Object`

#in?(values) ⇒ `Boolean`

#include?(value) ⇒ `Boolean`

#inspect ⇒ `Object` Also known as: to_s

#last(n = NOT_SET) ⇒ `Object`

#ldexp(exponent) ⇒ `Object`

#ln ⇒ `Object`

#log(base = NOT_SET) ⇒ `Object`

#map(&block) ⇒ `Object`

#map!(&block) ⇒ `Object`

#max ⇒ `Object`

#mean ⇒ `Object`

#median ⇒ `Object`

#min ⇒ `Object`

#missing ⇒ `Object`

#numeric? ⇒ `Boolean`

#one_hot(drop: false, prefix: nil) ⇒ `Object`

#percentile(q) ⇒ `Object`

#reject(&block) ⇒ `Object`

#round(ndigits = 0) ⇒ `Object`

#select(&block) ⇒ `Object`

#size ⇒ `Object` Also known as: length, count

#sort ⇒ `Object`

#std ⇒ `Object`

#sum ⇒ `Object`

#tail(n = 5) ⇒ `Object`

#take(n) ⇒ `Object`

#tally ⇒ `Object`

#to(type) ⇒ `Object`

#to!(type) ⇒ `Object`

#to_a ⇒ `Object`

#to_html ⇒ `Object`

#to_numo ⇒ `Object`

#type ⇒ `Object`

#uniq ⇒ `Object`

#var ⇒ `Object`

#zip(other, &block) ⇒ `Object`