Class: GtfsDf::BaseGtfsTable

Inherits:
Object
  • Object
show all
Defined in:
lib/gtfs_df/base_gtfs_table.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input) ⇒ BaseGtfsTable

Returns a new instance of BaseGtfsTable.

Parameters:

  • input (Polars::DataFrame, String, Array)

    A dataframe, a csv path or an array-based table



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/gtfs_df/base_gtfs_table.rb', line 8

def initialize(input)
  @df =
    if input.is_a?(Polars::DataFrame)
      input
    elsif input.is_a?(String)
      # TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
      # https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
      df = Polars.read_csv(input, infer_schema_length: 0, encoding: "utf8-lossy")
        .rename(->(col) { col.strip })

      # Strip out empty lines. Unfortunately read_csv does not support the drop_empty_rows
      # option right now.
      df = df.filter(Polars.all_horizontal(Polars.all.is_null).is_not)

      dtypes = self.class::SCHEMA.slice(*df.columns)
      df
        .with_columns(dtypes.keys.map do |col|
          stripped = Polars.col(col).str.strip_chars
          Polars.when(stripped.str.len_chars.gt(0))
            .then(stripped)
            .otherwise(Polars.lit(nil))
        end)
        .with_columns(dtypes.map do |name, type|
                        Polars.col(name).cast(type)
                      end)
    else
      throw GtfsDf::Error, "Unrecognized input"
    end
  @validator = SchemaValidator.new(@df, self.class)
end

Instance Attribute Details

#dfObject (readonly)

Returns the value of attribute df.



5
6
7
# File 'lib/gtfs_df/base_gtfs_table.rb', line 5

def df
  @df
end

#validatorObject (readonly)

Returns the value of attribute validator.



5
6
7
# File 'lib/gtfs_df/base_gtfs_table.rb', line 5

def validator
  @validator
end

Class Method Details

.empty_dataframeObject



59
60
61
62
63
64
# File 'lib/gtfs_df/base_gtfs_table.rb', line 59

def self.empty_dataframe
  Polars::DataFrame.new(
    const_get(:REQUIRED_FIELDS).map { |field| [field, []] }.to_h,
    schema_overrides: const_get(:SCHEMA)
  )
end

.time_fieldsObject



43
44
45
# File 'lib/gtfs_df/base_gtfs_table.rb', line 43

def self.time_fields
  const_defined?(:TIME_FIELDS) ? const_get(:TIME_FIELDS) : []
end

Instance Method Details

#dataframeObject



55
56
57
# File 'lib/gtfs_df/base_gtfs_table.rb', line 55

def dataframe
  @df
end

#errorsObject



51
52
53
# File 'lib/gtfs_df/base_gtfs_table.rb', line 51

def errors
  @validator.errors
end

#fieldsObject



39
40
41
# File 'lib/gtfs_df/base_gtfs_table.rb', line 39

def fields
  self.class::SCHEMA.keys
end

#valid?Boolean

Returns:

  • (Boolean)


47
48
49
# File 'lib/gtfs_df/base_gtfs_table.rb', line 47

def valid?
  @validator.valid?
end