Module: TSV
- Defined in:
- lib/rbbt/util/R.rb,
lib/rbbt/tsv/melt.rb,
lib/rbbt/tsv/excel.rb,
lib/rbbt/tsv/filter.rb,
lib/rbbt/tsv/matrix.rb,
lib/rbbt/tsv/stream.rb,
lib/rbbt/tsv/marshal.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/attach/util.rb,
lib/rbbt/tsv/field_index.rb,
lib/rbbt/tsv/serializers.rb,
lib/rbbt/tsv/parallel/through.rb,
lib/rbbt/workflow/refactor/entity.rb
Defined Under Namespace
Modules: XLS, XLSX Classes: BinarySerializer, CleanSerializer, FloatArraySerializer, FloatSerializer, IntegerArraySerializer, IntegerSerializer, StrictFloatArraySerializer, StrictIntegerArraySerializer, StringArraySerializer, StringDoubleArraySerializer, StringSerializer, TSVMarshalSerializer, TSVSerializer
Constant Summary collapse
- SERIALIZER_ALIAS =
{ :integer => IntegerSerializer, :float => FloatSerializer, :integer_array => IntegerArraySerializer, :float_array => FloatArraySerializer, :strict_integer_array => StrictIntegerArraySerializer, :strict_float_array => StrictFloatArraySerializer, :marshal => Marshal, :single => StringSerializer, :string => StringSerializer, :list => StringArraySerializer, :flat => StringArraySerializer, :double => StringDoubleArraySerializer, :clean => CleanSerializer, :binary => BinarySerializer, :tsv => TSVSerializer, :marshal_tsv => TSVMarshalSerializer }
Class Attribute Summary collapse
-
.field_index_dir ⇒ Object
Returns the value of attribute field_index_dir.
Instance Attribute Summary collapse
-
#field_indices ⇒ Object
Returns the value of attribute field_indices.
Class Method Summary collapse
- ._clean_float(v) ⇒ Object
- ._excel_data(tsv, options = {}) ⇒ Object
- ._remove_link(value) ⇒ Object
- .attach(*args, **kwargs) ⇒ Object
- .attach_orig ⇒ Object
- .build_traverse_index(files, options = {}) ⇒ Object
- .excel(filename, options = {}) ⇒ Object
- .excel2tsv(filename, options = {}) ⇒ Object
-
.find_path(files, options = {}) ⇒ Object
May make an extra index!.
- .find_traversal(tsv1, tsv2, options = {}) ⇒ Object
- .header_lines(key_field, fields, entry_hash = nil) ⇒ Object
- .index_for_traversal(path, persist_input = false) ⇒ Object
- .melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object
- .merge_different_fields(stream1, stream2, output, options = {}) ⇒ Object
- .merge_row_fields(input, output, options = {}) ⇒ Object
- .open(source, type = nil, options = nil) ⇒ Object
- .original_open ⇒ Object
- .original_pos_index ⇒ Object
- .original_range_index ⇒ Object
- .pos_index(*args, filters: nil, **kwargs) ⇒ Object
- .range_index(*args, filters: nil, **kwargs) ⇒ Object
- .read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object
- .reorder_stream(stream, positions, sep = "\t") ⇒ Object
- .reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ Object
- .xls(filename, options = {}) ⇒ Object
- .xlsx(filename, options = {}) ⇒ Object
Instance Method Summary collapse
- #attach_index(other, index, fields = nil) ⇒ Object
- #attach_same_key(other, fields = nil) ⇒ Object
- #attach_source_key(other, source, options = {}) ⇒ Object
- #dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ Object
- #entity_options ⇒ Object
- #entity_templates ⇒ Object
- #excel(filename, options = {}) ⇒ Object
- #field_index(field) ⇒ Object
- #field_index_select(matches) ⇒ Object
- #filter(filter_dir = nil) ⇒ Object
- #marshal_dump ⇒ Object
- #matrix_melt(*args) ⇒ Object
- #melt(header_field = nil, *info_fields, &block) ⇒ Object
- #merge_different_fields(other, options = {}) ⇒ Object
- #original_dumper_stream ⇒ Object
- #original_reorder ⇒ Object
- #original_to_s ⇒ Object
- #original_unzip ⇒ Object
- #ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
- #ppthrough_callback(&block) ⇒ Object
- #pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
- #R(script, source = nil, options = {}) ⇒ Object
- #R_console(pre_script = nil) ⇒ Object
- #R_interactive(script = nil, source = []) ⇒ Object
- #reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ Object
- #reset_filters ⇒ Object
- #swap_id(field, format, options = {}, &block) ⇒ Object
- #to_s(keys = nil, no_options = false, unmerge = false) ⇒ Object
- #unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ Object
- #with_monitor(use_monitor = true) ⇒ Object
- #xls(filename, options = {}) ⇒ Object
- #xlsx(filename, options = {}) ⇒ Object
Class Attribute Details
.field_index_dir ⇒ Object
Returns the value of attribute field_index_dir.
4 5 6 |
# File 'lib/rbbt/tsv/field_index.rb', line 4 def field_index_dir @field_index_dir end |
Instance Attribute Details
#field_indices ⇒ Object
Returns the value of attribute field_indices.
10 11 12 |
# File 'lib/rbbt/tsv/field_index.rb', line 10 def field_indices @field_indices end |
Class Method Details
._clean_float(v) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/rbbt/tsv/excel.rb', line 16 def self._clean_float(v) case v when Float v.to_s.sub(/e(-?\d+)$/,'E\1') when String if v =~ /^-?[\d\.]+e(-?\d+)$/ v.sub(/e(-?\d+)$/,'E\1') else v end else v end end |
._excel_data(tsv, options = {}) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/rbbt/tsv/excel.rb', line 32 def self._excel_data(tsv, ={}) = IndiferentHash.add_defaults , :sep2 => ', ' name = IndiferentHash. , :name sep2 = IndiferentHash. , :sep2 unmerge = IndiferentHash. , :unmerge sort_by = IndiferentHash. , :sort_by sort_by_cast = IndiferentHash. , :sort_by_cast remove_links = IndiferentHash. , :remove_links i = 1 if sort_by if sort_by_cast data = tsv.sort_by sort_by do |k, v| if Array === v v.first.send(sort_by_cast) else v.send(sort_by_cast) end end else data = tsv.sort_by sort_by end else data = tsv end rows = [] data.through do |key, values| cells = [] cells.push((name and key.respond_to?(:name)) ? key.name || key : key ) values = [values] unless Array === values values.each do |value| v = (name and value.respond_to?(:name)) ? value.name || value : value if Array === v v = v.collect{|_v| _remove_link(_v)} if remove_links v = v.collect{|_v| _clean_float(_v)} if unmerge cells.push v else cells.push v * sep2 end else v = _remove_link(v) if remove_links cells.push v end end rows << cells i += 1 end if unmerge new_rows = [] rows.each do |row| header = row.shift NamedArray.zip_fields(row).each do |values| new_rows << [header] + values end end rows = new_rows end [tsv.all_fields, rows] end |
._remove_link(value) ⇒ Object
8 9 10 11 12 13 14 |
# File 'lib/rbbt/tsv/excel.rb', line 8 def self._remove_link(value) if String === value && value =~ /<([\w]+)[^>]*>(.*?)<\/\1>/ $2 else value end end |
.attach(*args, **kwargs) ⇒ Object
186 187 188 189 190 191 192 |
# File 'lib/rbbt/tsv/refactor.rb', line 186 def attach(*args, **kwargs) if kwargs.include?(:zipped) zipped = kwargs.delete(:zipped) kwargs[:one2one] = zipped unless kwargs.include?(:one2one) end attach_orig(*args, **kwargs) end |
.attach_orig ⇒ Object
185 |
# File 'lib/rbbt/tsv/refactor.rb', line 185 alias attach_orig attach |
.build_traverse_index(files, options = {}) ⇒ Object
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
# File 'lib/rbbt/tsv/attach/util.rb', line 327 def self.build_traverse_index(files, = {}) = Misc.add_defaults , :in_namespace => false, :persist_input => true in_namespace = [:in_namespace] persist_input = [:persist_input] path = find_path(files, ) return nil if path.nil? traversal_ids = path.collect{|p| p.first} Log.debug "Found Traversal: #{traversal_ids * " => "}" index_for_traversal path, persist_input end |
.excel(filename, options = {}) ⇒ Object
296 297 298 299 300 301 302 |
# File 'lib/rbbt/tsv/excel.rb', line 296 def self.excel(filename, = {}) if filename =~ /\.xlsx$/ xlsx(filename, ) else xls(filename, ) end end |
.excel2tsv(filename, options = {}) ⇒ Object
304 305 306 |
# File 'lib/rbbt/tsv/excel.rb', line 304 def self.excel2tsv(filename, ={}) excel(filename, ) end |
.find_path(files, options = {}) ⇒ Object
May make an extra index!
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 |
# File 'lib/rbbt/tsv/attach/util.rb', line 243 def self.find_path(files, = {}) = Misc.add_defaults , :in_namespace => false in_namespace = [:in_namespace] if in_namespace if files.first.all_fields.include? in_namespace ids = [[in_namespace]] else ids = [files.first.all_namespace_fields(in_namespace)] end ids += files[1..-1].collect{|f| f.all_fields} else ids = files.collect{|f| f.all_fields } end id_list = [] ids.each_with_index do |list, i| break if i == ids.length - 1 match = list.select{|field| ids[i + 1].select{|f| Misc.match_fields(field, f) }.any? } return nil if match.empty? id_list << match.first end if ! Misc.match_fields(id_list.last, files.last.all_fields.first) id_list << files.last.all_fields.first id_list.zip(files) else id_list.zip(files[0..-1]) end end |
.find_traversal(tsv1, tsv2, options = {}) ⇒ Object
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 |
# File 'lib/rbbt/tsv/attach/util.rb', line 344 def self.find_traversal(tsv1, tsv2, = {}) = Misc.add_defaults , :in_namespace => false in_namespace = [:in_namespace] identifiers1 = tsv1.identifier_files || [] identifiers1 += [[:identifiers]].flatten if [:identifiers] identifiers2 = tsv2.identifier_files || [] identifiers1.unshift tsv1 identifiers2.unshift tsv2 files1 = [] files2 = [] while identifiers1.any? files1.push identifiers1.shift identifiers2.each_with_index do |e,i| files2 = identifiers2[(0..i)] index = build_traverse_index(files1 + files2.reverse, ) return index if not index.nil? end end return nil end |
.header_lines(key_field, fields, entry_hash = nil) ⇒ Object
87 88 89 90 91 |
# File 'lib/rbbt/tsv/refactor.rb', line 87 def self.header_lines(key_field, fields, entry_hash = nil) entry_hash = entry_hash || {} entry_hash = entry_hash.merge(:key_field => key_field, :fields => fields) TSV::Dumper.header entry_hash end |
.index_for_traversal(path, persist_input = false) ⇒ Object
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
# File 'lib/rbbt/tsv/attach/util.rb', line 277 def self.index_for_traversal(path, persist_input = false) data_key, data_file = path.shift data_index = if data_key == data_file.key_field Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'" nil else Log.debug "Data index required" data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false, :type => (data_file.type == :single ? :single : :flat) end current_index = data_index current_key = data_key while not path.empty? next_key, next_file = path.shift next_fields = next_file.all_fields corrected_next_key = next_fields.select{|f| Misc.match_fields(f, next_key)}.first corrected_current_key = next_fields.select{|f| Misc.match_fields(f, current_key)}.first if current_index.nil? current_index = next_file.index(:target => corrected_next_key, :fields => [corrected_current_key], :persist => persist_input) current_index = current_index.select :key => data_file.keys else next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input next_index.with_unnamed do current_index.with_unnamed do current_index.process current_index.fields.first do |values| if values.nil? nil else new_values = next_index.values_at(*values).flatten if current_index.type == :single new_values.first else new_values end end end current_index.fields = [next_key] end end end current_key = next_key end current_index end |
.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/rbbt/tsv/melt.rb', line 2 def self.melt(tsv, key_field, header_field, fields, *info_fields, &block) dumper = TSV::Dumper.new :key_field => "ID", :fields => [key_field] + info_fields, :type => :list dumper.init TSV.traverse tsv, :into => dumper, :fields => info_fields do |k,values| values = [values] if tsv.type == :single values = values.collect{|v| [v]} if tsv.type == :list values = Misc.zip_fields(values) if tsv.type == :double res = [] values.each_with_index do |value,i| info_values = if block_given? new = block.call value next if new.nil? new else value end info_values = [info_values] unless tsv.type == :double id = [k, i] * ":" res << [id, [k] + [info_values].flatten] end res.extend MultipleResult res end end |
.merge_different_fields(stream1, stream2, output, options = {}) ⇒ Object
154 155 156 157 |
# File 'lib/rbbt/tsv/refactor.rb', line 154 def self.merge_different_fields(stream1, stream2, output, = {}) Open.write(output, Open.collapse_stream(TSV.paste_streams([stream1, stream2], **), compact: true)) #Open.write(output, TSV.paste_streams([stream1, stream2], **options)) end |
.merge_row_fields(input, output, options = {}) ⇒ Object
150 151 152 |
# File 'lib/rbbt/tsv/refactor.rb', line 150 def self.merge_row_fields(input, output, = {}) Open.write(output, Open.collapse_stream(input, **)) end |
.open(source, type = nil, options = nil) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/rbbt/tsv/refactor.rb', line 11 def open(source, type = nil, = nil) type, = nil, type if .nil? and (Hash === type or (String === type and type.include? "~")) = TSV.() if String === and .include? "~" ||= {} [:type] ||= type unless type.nil? if zipped = .delete(:zipped) [:one2one] = zipped end .delete(:keep_empty) original_open(source, ) end |
.original_open ⇒ Object
9 |
# File 'lib/rbbt/tsv/refactor.rb', line 9 alias original_open open |
.original_pos_index ⇒ Object
55 |
# File 'lib/rbbt/tsv/refactor.rb', line 55 alias original_pos_index pos_index |
.original_range_index ⇒ Object
54 |
# File 'lib/rbbt/tsv/refactor.rb', line 54 alias original_range_index range_index |
.pos_index(*args, filters: nil, **kwargs) ⇒ Object
66 67 68 69 70 71 72 73 74 |
# File 'lib/rbbt/tsv/refactor.rb', line 66 def pos_index(*args, filters: nil, **kwargs) if filters raise "Not implemented" if filters.length > 1 method, value = filters.first method.sub!("field:", '') kwargs[:select] = {method => value} end original_pos_index(*args, **kwargs) end |
.range_index(*args, filters: nil, **kwargs) ⇒ Object
56 57 58 59 60 61 62 63 64 |
# File 'lib/rbbt/tsv/refactor.rb', line 56 def range_index(*args, filters: nil, **kwargs) if filters raise "Not implemented" if filters.length > 1 method, value = filters.first method.sub!("field:", '') kwargs[:select] = {method => value} end original_range_index(*args, **kwargs) end |
.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/rbbt/tsv/matrix.rb', line 2 def self.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) tsv = TSV.open(tsv) unless TSV === tsv if others.any? other_tsv = tsv.slice(others) tsv = tsv.slice(tsv.fields - others) end key_field, *fields = tsv.all_fields = tsv..merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil) [:filename] ||= tsv.filename [:identifiers] ||= tsv.identifier_files.first dumper = TSV::Dumper.new() dumper.init TSV.traverse tsv, :into => dumper do |key, values| [key, [fields, values]] end res = TSV.open(dumper.stream, ) if others.any? other_tsv = other_tsv.to_double res.attach other_tsv, :one2one => true else res end end |
.reorder_stream(stream, positions, sep = "\t") ⇒ Object
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/rbbt/tsv/stream.rb', line 253 def self.reorder_stream(stream, positions, sep = "\t") Open.open_pipe do |sin| line = stream.gets line.chomp! unless line.nil? while line =~ /^#\:/ sin.puts line line = stream.gets line.chomp! unless line.nil? end while line =~ /^#/ if Hash === positions new = (0..line.split(sep,-1).length-1).to_a positions.each do |k,v| new[k] = v new[v] = k end positions = new end sin.puts "#" + line.sub(/^#/,'').chomp.split(sep).values_at(*positions).compact * sep line = stream.gets line.chomp! unless line.nil? end while line if Hash === positions new = (0..line.split(sep, -1).length-1).to_a positions.each do |k,v| new[k] = v new[v] = k end positions = new end values = line.split(sep, -1) new_values = values.values_at(*positions) sin.puts new_values * sep line = stream.gets line.chomp! unless line.nil? end end end |
.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ Object
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
# File 'lib/rbbt/tsv/stream.rb', line 297 def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, = nil) parser = TSV::Parser.new TSV.get_stream(stream) = parser. [:key_field] = key_field [:fields] = fields if fields dumper = TSV::Dumper.new dumper.init case parser.type when :single TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => do |keys,values| key = keys.first [key, [values]] end when :double TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => do |keys,values| res = [] keys = [keys] unless Array === keys keys.each_with_index do |key,i| vs = zipped ? values.collect{|l| l.length == 1 ? l : [l[i]] } : values res << [key, vs] end res.extend MultipleResult res end when :list TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => do |keys,values| key = keys === Array ? keys.first : keys [key, values] end when :flat TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => do |keys,values| key = keys === Array ? keys.first : keys [key, values] end else raise "Unknown type: " << parser.type.to_s end dumper end |
.xls(filename, options = {}) ⇒ Object
273 274 275 276 277 278 279 280 281 282 |
# File 'lib/rbbt/tsv/excel.rb', line 273 def self.xls(filename, ={}) if Open.remote? filename TmpFile.with_file nil, :extension => 'xls' do |tmp| Open.download(filename, tmp) TSV::XLS.read(tmp, ) end else TSV::XLS.read(filename, ) end end |
.xlsx(filename, options = {}) ⇒ Object
284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/rbbt/tsv/excel.rb', line 284 def self.xlsx(filename, ={}) if Open.remote? filename TmpFile.with_file nil, :extension => 'xlsx' do |tmp| Open.download(filename, tmp) TSV::XLSX.read(tmp, ) end else TSV::XLSX.read(filename, ) end end |
Instance Method Details
#attach_index(other, index, fields = nil) ⇒ Object
146 147 148 |
# File 'lib/rbbt/tsv/refactor.rb', line 146 def attach_index(tsv, index = nil) self.attach tsv, index: index end |
#attach_same_key(other, fields = nil) ⇒ Object
137 138 139 140 141 142 143 144 |
# File 'lib/rbbt/tsv/refactor.rb', line 137 def attach_same_key(tsv, fields = nil) fields = [fields] unless fields.nil? || Array === fields if fields self.attach tsv, :fields => fields else self.attach tsv end end |
#attach_source_key(other, source, options = {}) ⇒ Object
170 171 172 |
# File 'lib/rbbt/tsv/refactor.rb', line 170 def attach_source_key(other, key) attach other, other_key: key end |
#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ Object
100 101 102 103 104 105 106 |
# File 'lib/rbbt/tsv/refactor.rb', line 100 def dumper_stream(keys = nil, = false, unmerge = false) if Hash === keys original_dumper_stream(keys) else original_dumper_stream(:keys => keys, unmerge: unmerge, preamble: ) end end |
#entity_options ⇒ Object
4 5 6 |
# File 'lib/rbbt/workflow/refactor/entity.rb', line 4 def @entity_options ||= {} end |
#entity_templates ⇒ Object
8 9 10 |
# File 'lib/rbbt/workflow/refactor/entity.rb', line 8 def entity_templates @entity_templates ||= {} end |
#excel(filename, options = {}) ⇒ Object
264 265 266 267 268 269 270 |
# File 'lib/rbbt/tsv/excel.rb', line 264 def excel(filename, ={}) if filename =~ /\.xlsx$/ xlsx(filename, ) else xls(filename, ) end end |
#field_index(field) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/rbbt/tsv/field_index.rb', line 13 def field_index(field) @field_indices ||= {} @field_indices[field] ||= Persist.persist_tsv(self, filename, {:field => field}, :prefix => "FieldIndex", :dir => TSV.field_index_dir, :persist => true, :serializer => :list, :engine => "BDB" ) do |data| data.serializer = :flat tsv = {} case type when :single, :list through :key, [field] do |key, values| value = values.first tsv[value] ||= [] tsv[value] << key end else through :key, [field] do |key, values| values.first.each do |value| tsv[value] ||= [] tsv[value] << key end end end TSV.setup(data, :key_field => field, :fields => ["Keys"], :type => :flat) tsv.each do |v,keys| data[v] = keys.sort end data end end |
#field_index_select(matches) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/rbbt/tsv/field_index.rb', line 44 def field_index_select(matches) final = nil matches.each do |field,values| i = field_index(field) if Array === values keys = values.inject([]){|acc,value| m = i[value]; acc = m.nil? ? acc : Misc.merge_sorted_arrays(acc, m) } else keys = i[values] || [] end final = final.nil? ? keys : Misc.intersect_sorted_arrays(final, keys) end final end |
#filter(filter_dir = nil) ⇒ Object
288 289 290 291 292 293 |
# File 'lib/rbbt/tsv/filter.rb', line 288 def filter(filter_dir = nil) self.extend Filtered self.filter_dir = filter_dir self.filters = [] self end |
#marshal_dump ⇒ Object
2 3 4 5 6 7 8 |
# File 'lib/rbbt/tsv/marshal.rb', line 2 def marshal_dump if defined?(Persist::TCAdapter) && Persist::TCAdapter === self super else [, Annotation.purge(self)] end end |
#matrix_melt(*args) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/rbbt/tsv/matrix.rb', line 32 def matrix_melt(*args) require 'rbbt/association' tsv = TSV.read_matrix(self, *args) melt = Association.index tsv, :persist => false, :recycle => true source_field,_sep,target_field = melt.key_field.partition "~" melt.add_field source_field do |k,v| k.partition("~").first end melt.add_field target_field do |k,v| k.partition("~").last end melt end |
#melt(header_field = nil, *info_fields, &block) ⇒ Object
29 30 31 32 |
# File 'lib/rbbt/tsv/melt.rb', line 29 def melt(header_field = nil, *info_fields, &block) info_fields = fields if info_fields.nil? || info_fields.empty? TSV.melt self, key_field, header_field, fields, *info_fields, &block end |
#merge_different_fields(other, options = {}) ⇒ Object
159 160 161 162 163 164 165 166 167 168 |
# File 'lib/rbbt/tsv/refactor.rb', line 159 def merge_different_fields(other, = {}) TmpFile.with_file do |output| TSV.merge_different_fields(self, other, output, ) .delete :sort tsv = TSV.open output, tsv.key_field = self.key_field unless self.key_field.nil? tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil? tsv end end |
#original_dumper_stream ⇒ Object
99 |
# File 'lib/rbbt/tsv/refactor.rb', line 99 alias original_dumper_stream dumper_stream |
#original_reorder ⇒ Object
33 |
# File 'lib/rbbt/tsv/refactor.rb', line 33 alias original_reorder reorder |
#original_to_s ⇒ Object
108 |
# File 'lib/rbbt/tsv/refactor.rb', line 108 alias original_to_s to_s |
#original_unzip ⇒ Object
25 |
# File 'lib/rbbt/tsv/refactor.rb', line 25 alias original_unzip unzip |
#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/rbbt/tsv/parallel/through.rb', line 24 def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) q = RbbtProcessQueue.new num_procs q.callback &@ppthrough_callback @ppthrough_callback = nil q.init do |k,v| block.call k,v end begin res = through(new_key_field, new_fields, uniq, zipped) do |*p| q.process q end q.join ensure q.clean end res end |
#ppthrough_callback(&block) ⇒ Object
20 21 22 |
# File 'lib/rbbt/tsv/parallel/through.rb', line 20 def ppthrough_callback(&block) @ppthrough_callback = block end |
#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/rbbt/tsv/parallel/through.rb', line 4 def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) q = RbbtThreadQueue.new num_threads q.init(true, &block) begin res = through(new_key_field, new_fields, one2one: zipped) do |*p| q.process p end q.join ensure q.clean end end |
#R(script, source = nil, options = {}) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/rbbt/util/R.rb', line 174 def R(script, source = nil, = {}) , source = source, nil if Hash === source source ||= IndiferentHash. , :source source = [source] unless Array === source require_sources = source.collect{|source| source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists? "source('#{source}')" } * ";\n" if Array === source and source.any? script = require_sources + "\n\n" + script if require_sources = IndiferentHash.pull_keys , :R = IndiferentHash.pull_keys , :open [:monitor] = [:monitor] if .include?(:monitor) [:method] = [:method] if .include?(:method) [:debug] = [:debug] if .include?(:debug) [:erase] = .delete(:erase) if .include?(:erase) [:debug] = true if [:method] == :debug if .delete :debug [:monitor] = true [:method] = :shell erase = .include?(:erase) ? [:erase] : false else erase = .include?(:erase) ? [:erase] : true end tsv_R_option_str = .delete :open tsv_R_option_str = ", " + tsv_R_option_str if String === tsv_R_option_str and not tsv_R_option_str.empty? raw = .delete :raw TmpFile.with_file nil, erase do |f| Open.write(f, self.to_s) script = <<-EOF ## Loading tsv into data data = rbbt.tsv('#{f}'#{tsv_R_option_str}); #{script.strip} ## Resaving data if (! is.null(data)){ rbbt.tsv.write('#{f}', data); } NULL EOF case .delete :method when :eval R.eval_run script else R.run script, end = IndiferentHash.add_defaults , :type => :list if raw Open.read(f) else tsv = TSV.open(f, ) unless [:ignore_output] tsv.key_field = [:key] if .include? :key tsv.namespace ||= self.namespace if self.namespace tsv end end end |
#R_console(pre_script = nil) ⇒ Object
260 261 262 263 264 265 266 267 268 269 270 |
# File 'lib/rbbt/util/R.rb', line 260 def R_console(pre_script = nil) TmpFile.with_file do |f| Log.debug{"R Console:\n" << pre_script } if pre_script TmpFile.with_file(pre_script) do |script_file| Open.write(f, self.to_s) script = "data_file = '#{f}';\n" script << "\n#\{{{Pre-script:\n\n" << pre_script << "\n#}}}Pre-script\n\n" R.console(script) end end end |
#R_interactive(script = nil, source = []) ⇒ Object
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
# File 'lib/rbbt/util/R.rb', line 241 def R_interactive(script = nil, source = []) TmpFile.with_file do |data_file| Open.write(data_file, self.to_s) Log.debug{"R Interactive:\n" << script } if script script =<<-EOF # Loading data data_file = '#{data_file}' data = rbbt.tsv(data_file) # Script #{script} EOF R.interactive(script) end end |
#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ Object
34 35 36 37 38 39 |
# File 'lib/rbbt/tsv/refactor.rb', line 34 def reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) kwargs[:one2one] = zipped if one2one.nil? kwargs.delete :persist kwargs.delete :persist_data original_reorder(key_field, fields, **kwargs) end |
#reset_filters ⇒ Object
295 296 297 298 299 300 301 302 303 304 |
# File 'lib/rbbt/tsv/filter.rb', line 295 def reset_filters if @filter_dir.nil? or @filter_dir.empty? @filters.each do |filter| filter.reset end if Array === @filters return end Dir.glob(File.join(@filter_dir, '*.filter')).each do |f| FileUtils.rm f end end |
#swap_id(field, format, options = {}, &block) ⇒ Object
41 42 43 44 45 46 |
# File 'lib/rbbt/tsv/refactor.rb', line 41 def swap_id(field = 0, merge = false, sep = ":", delete = true, **kwargs) kwargs[:merge] ||= merge kwargs[:sep] ||= sep kwargs[:delete] ||= delete change_id(field, **kwargs) end |
#to_s(keys = nil, no_options = false, unmerge = false) ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/rbbt/tsv/refactor.rb', line 109 def to_s(keys = nil, = false, unmerge = false) if FalseClass === keys or TrueClass === keys or Hash === keys = keys keys = nil end if keys == :sort with_unnamed do keys = self.keys.sort end end = {:keys => keys, unmerge: unmerge} case when TrueClass, FalseClass [:preamble] = ! when Hash .merge!() end io = original_dumper_stream(.merge(stream: StringIO.new)) io.rewind io.read end |
#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ Object
26 27 28 29 30 31 |
# File 'lib/rbbt/tsv/refactor.rb', line 26 def unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) kwargs[:merge] ||= merge kwargs[:sep] ||= sep kwargs[:delete] ||= delete original_unzip(field, **kwargs) end |
#with_monitor(use_monitor = true) ⇒ Object
174 175 176 177 178 179 180 181 182 |
# File 'lib/rbbt/tsv/refactor.rb', line 174 def with_monitor(use_monitor = true) monitor_state = monitor monitor = use_monitor begin yield ensure monitor = monitor_state end end |