Class: Rust::CSV

Inherits:
Object show all
Defined in:
lib/rust/core/csv.rb

Overview

Class that handles CSVs (both loading and saving).

Class Method Summary collapse

Class Method Details

.auto_infer_types(dataframe, auto_infer_integers) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/rust/core/csv.rb', line 87

def self.auto_infer_types(dataframe, auto_infer_integers)
    integer_columns = []
    float_columns   = []
    dataframe.column_names.each do |column_name|
        values = dataframe.column(column_name)
        
        if values.all? { |s| s == nil || !!Integer(s) rescue false }
            integer_columns << column_name
        elsif values.all? { |s| s == nil || !!Float(s) rescue false }
            float_columns << column_name
        end
    end
    
    unless auto_infer_integers
        float_columns += integer_columns
        integer_columns = []
    end
    
    integer_columns.each do |numeric_column|
        dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_i : v }
    end
    
    float_columns.each do |numeric_column|
        dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_f : v }
    end
    
    return dataframe
end

.read(filename, **options) ⇒ Object

Reads the CSV at filename. Options can be specified, such as:

  • headers => set to true if the first row contains the headers, false otherwise;

  • infer_numbers => if a column contains only numbers, the values are transformed into floats; true by default;

  • infer_integers => if infer_numbers is active, it distinguishes between integers and floats;

The other options are the ones that can be used in the function R function “read.csv”.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rust/core/csv.rb', line 30

def self.read(filename, **options)
    hash = {}
    labels = nil
    
    infer_numbers       = options.has_key?(:infer_numbers) ? options.delete(:infer_numbers) : true
    infer_integers      = options.delete(:infer_integers)
    
    ::CSV.foreach(filename, **options) do |row|
        # TODO fix this ugly patch
        unless options[:headers]
            options[:headers] = (1..row.size).to_a.map { |e| "X#{e}" }
            
            return CSV.read(filename, **options)
        end
        
        unless labels
            labels = row.headers
            labels.each do |label|
                hash[label] = []
            end
        end
        
        labels.each do |label|
            hash[label] << row[label]
        end
    end
    
    result = Rust::DataFrame.new(hash)
    if infer_numbers
        result = self.auto_infer_types(result, infer_integers)
    end
    
    return result
end

.read_all(pattern, **options) ⇒ Object

Reads a pattern of CSVs (glob-style pattern) and returns a map containing as keys the filenames of the loaded CSVs and as values the corresponding data-frames. Options can be specified (see #read).



15
16
17
18
19
20
21
# File 'lib/rust/core/csv.rb', line 15

def self.read_all(pattern, **options)
    result = DataFrameHash.new
    Dir.glob(pattern).each do |filename|
        result[filename] = CSV.read(filename, **options)
    end
    return result
end

.write(filename, dataframe, **options) ⇒ Object

Writes the dataframe as a CSV at filename. Options can be specified, such as:

  • headers => set to true if the first row should contain the headers, false otherwise;

The other options are the ones that can be used in the function R function “read.csv”.

Raises:

  • (TypeError)


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/rust/core/csv.rb', line 70

def self.write(filename, dataframe, **options)
    raise TypeError, "Expected Rust::DataFrame" unless dataframe.is_a?(Rust::DataFrame)
    
    write_headers = options[:headers] != false
    options[:headers] = dataframe.column_names unless options[:headers]
    
    hash = {}
    ::CSV.open(filename, 'w', write_headers: write_headers, **options) do |csv|
        dataframe.each do |row|
            csv << row
        end
    end
    
    return true
end