Module: Yomise

Defined in:
lib/yomise.rb,
lib/yomise/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

VERSION =
"0.1.4"

Class Method Summary collapse

Class Method Details

.available(data, truevalue: true, falsevalue: false, blank_str_is_false: true) ⇒ Object



201
202
203
204
205
206
207
208
# File 'lib/yomise.rb', line 201

def available(data, truevalue: true, falsevalue: false, blank_str_is_false: true)
	if data.is_a? Rover::Vector
		data.map { |v| is_available(v, truevalue: truevalue, falsevalue: falsevalue, blank_str_is_false: blank_str_is_false) }
	elsif data.is_a? Rover::DataFrame
		dfdata = data.keys.map {|k| data[k].map { |d| is_available(d, truevalue: truevalue, falsevalue: falsevalue, blank_str_is_false: blank_str_is_false) } }
		Rover::DataFrame.new(data.keys.zip(dfdata).map{[_1, _2]}.to_h)
	end
end

.is_available(value, truevalue: true, falsevalue: false, blank_str_is_false: true) ⇒ Object

Rover用: nil や nanを取り除くマスク、または二値の列(true-false value)生成



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/yomise.rb', line 183

def is_available(value, truevalue: true, falsevalue: false, blank_str_is_false: true)
	if value.nil?
		falsevalue
	else
		if value.is_a? Numeric
			!value.nan? ? truevalue : falsevalue
		elsif value.is_a? String
			if value == ""
				blank_str_is_false ? falsevalue : truevalue
			else
				truevalue
			end
		else
			truevalue
		end
	end
end

.read(path, **opt) ⇒ Object



16
17
18
# File 'lib/yomise.rb', line 16

def read(path, **opt)
	return /csv$/i === path ? read_csv(path, **opt) : read_excel(path, **opt)
end

.read_csv(path, format: :rover, encoding: "utf-8", liberal_parsing: true, reconvert_utf8: false, col_sep: ",", index: nil, **opt) ⇒ Object

##Generate Array from CSV File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/yomise.rb', line 22

def read_csv(path, format: :rover, encoding: "utf-8", liberal_parsing: true, reconvert_utf8: false, col_sep: ",", index: nil, **opt)
	## TODO.. index: option that designate column number to generate DF index.
	## That is, revicing set_index method.

	# Get 2D Array
	begin
		if liberal_parsing
			csvd = CSV.read(path, encoding: encoding, liberal_parsing: true)
			if encoding.to_s.downcase != "utf-8" 
				csv = csvd.to_a.map {|l| l.map {|cell| cell.nil? ? nil : cell.encode("utf-8", invalid: :replace, replace: '') }}
			else
				csv = csvd
			end
			
			encoding = "utf-8"
		else
			# Old style (Not Recommended)
			# This "&:read" is not Yomise's function(defined avobe here).. parhaps File's method.
			csv = CSV.parse(File.open(path, encoding: encoding, &:read), col_sep: col_sep)
		end
	rescue
		# Try Another Encoding
		## puts "Fail Encoding #{encoding}. Trying cp932..."
		if liberal_parsing
			csvd = CSV.read(path, encoding: "cp932", liberal_parsing: true)
			if encoding.to_s.downcase != "utf-8" 
				csv = csvd.to_a.map {|l| l.map {|cell| cell.nil? ? nil : cell.encode("utf-8", invalid: :replace, replace: '') }}
			else
				csv = csvd
			end
			
			encoding = "UTF-8"
		else
			# Old style (Not Recommended)
			# This "&:read" is not Yomise's function(defined avobe here).. parhaps File's method.
			csv = CSV.parse(File.open(path, encoding: "cp932", &:read), col_sep: col_sep)
		end
		encoding = "cp932"
	end

	if reconvert_utf8
		csv = csv.map {|l| l.map {|cell| cell.nil? ? nil : cell.encode("UTF-8")}}
	end
	
	if format.to_s == "array"
		return csv
	elsif format.to_s == "hash"
		h, i = to_hash(csv, **opt)
		return h
	elsif format.to_s == "csv"
		return csv.to_csv
	elsif format.to_s == "numo"
		return csv  # Under Construction
	else # include format.nil? (in this case, convert to Daru::DF).

		h, ind_orig = to_hash(csv, index: index, **opt)
		ans = to_df(h, format: format)
		
		# Converting Encode and Setting index.. rover not supported yet
		if format.to_s == "daru"
			ans.convert_enc!(from: encoding, to: "utf-8") if encoding.to_s.downcase != "utf-8"
			begin
				ans.index = ind_orig if index
			rescue
				warn "Indexing failed (Parhaps due to duplicated index)."
			end
		end
		
		return ans
	end
end

.read_excel(path, sheet_i: 0, format: :rover, encoding: "utf-8", index: nil, **opt) ⇒ Object

##Generate Array from EXCEL File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0)



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/yomise.rb', line 96

def read_excel(path, sheet_i: 0, format: :rover, encoding: "utf-8", index: nil, **opt)
	a2d = open_excel(path, sheet_i, encoding: encoding) # Get 2D Array

	if format.to_s == "array"
		return a2d
	elsif format.to_s == "hash"
		h, i = to_hash(a2d, **opt)
		return h
	elsif format.to_s == "csv"
		return a2d.to_csv
	elsif format.to_s == "numo"
		return a2d  # Under Construction
	else # include format.nil?
		h, ind_orig = to_hash(a2d, index: index, **opt)
		ans = to_df(h, format: format)
		if format.to_s == "daru"
			begin
				ans.index = ind_orig if index
			rescue
				warn "Indexing failed (Parhaps due to duplicated index)."
			end
		end
		return ans
	end
end

.recognize_type(str, expected) ⇒ Object



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/yomise.rb', line 277

def recognize_type(str, expected)
	return expected if str.nil?

	order = {:any => 0, :int => 1, :float => 2, :string => 3}
	if /^\s*(-|\+)?\d+\s*$/ === str
		type_of_str = :int
	elsif /^\s*(-|\+)?\d*\.\d*\s*$/ === str || /^\s*(-|\+)?(\d*\.\d+|\d+)(e|E)(-|\+)?\d+\s*$/ === str
		type_of_str = :float
	else
		type_of_str = :string
	end
			
	# p "#{type_of_str}, #{str}" if order[type_of_str] > order[expected]

	return order[type_of_str] > order[expected] ? type_of_str : expected
end

.to_df(d, format: :rover) ⇒ Object

Convert Hash to DataFrame



174
175
176
177
178
179
180
# File 'lib/yomise.rb', line 174

def to_df(d, format: :rover)
	if format.to_s == "daru"
		Daru::DataFrame.new(d)
	else
		Rover::DataFrame.new(d)
	end
end

.to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil, column_from: nil, column_until: nil, header: 0, symbol_header: false, replaced_by_nil: [], analyze_type: true, index: nil) ⇒ Object

Convert 2d Array to Hash header: nil -> Default Headers(:column1, column2,…) are generated. Option line_ignored, is not implemented yet.



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/yomise.rb', line 125

def to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil,
	                 column_from: nil, column_until: nil, 
	                 header: 0, symbol_header: false,
					 replaced_by_nil: [], analyze_type: true,
                     index: nil)
			## TODO.. column_from: , column_until:
	
	# Define Read Range------------		
	lfrom, luntil = line_from, line_until
	lf_reg, lu_reg = line_from.kind_of?(Regexp), line_until.kind_of?(Regexp)
	
	if lf_reg || lu_reg
		lines_ary = array2d.map{ _1.join "," }
		lfrom = lines_ary.find_index{ line_from === _1 } if lf_reg
		luntil = (lines_ary.length-1) - lines_ary.reverse.find_index{ line_until === _1 } if lu_reg
	end

	# And get originally array-----
	output = array2d[lfrom...luntil]
	# -----------------------------

	# Then get data of index-------
	ind_orig = index ? output.map{ _1[index] } : nil
	# -----------------------------
	
	# Selecct Column---------------
	output = output.map { _1[column_from...column_until] } if column_from || column_until
		
	# Define Data Array------------
	output_transpose = output[0].zip(*output[1..])
	output_transpose = fix_array(output_transpose, replaced_by_nil, analyze_type)
	# -----------------------------

	# Define Header----------------
	if header
		hd = check_header(array2d[header])[column_from...column_until]
	else
		hd = [*0...(output.longest_line)].map{"column#{_1}"}
	end
	# hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : check_header(array2d[header])
	
	hd = hd.map { _1.intern } if symbol_header
	# -----------------------------

	# Make Hash(Header => Data Array)  
	return hd.each_with_object({}).with_index {|(hdr, hash), i| hash[hdr]=output_transpose[i]}, ind_orig
end