Module: TSV

Defined in:
lib/rbbt/util/R.rb,
lib/rbbt/tsv/melt.rb,
lib/rbbt/tsv/excel.rb,
lib/rbbt/tsv/filter.rb,
lib/rbbt/tsv/matrix.rb,
lib/rbbt/tsv/stream.rb,
lib/rbbt/tsv/marshal.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/attach/util.rb,
lib/rbbt/tsv/field_index.rb,
lib/rbbt/tsv/serializers.rb,
lib/rbbt/tsv/parallel/through.rb,
lib/rbbt/workflow/refactor/entity.rb

Defined Under Namespace

Modules: XLS, XLSX Classes: BinarySerializer, CleanSerializer, FloatArraySerializer, FloatSerializer, IntegerArraySerializer, IntegerSerializer, StrictFloatArraySerializer, StrictIntegerArraySerializer, StringArraySerializer, StringDoubleArraySerializer, StringSerializer, TSVMarshalSerializer, TSVSerializer

Constant Summary collapse

SERIALIZER_ALIAS =
{
  :integer => IntegerSerializer, 
  :float => FloatSerializer, 
  :integer_array => IntegerArraySerializer,
  :float_array => FloatArraySerializer,
  :strict_integer_array => StrictIntegerArraySerializer,
  :strict_float_array => StrictFloatArraySerializer,
  :marshal => Marshal,
  :single => StringSerializer,
  :string => StringSerializer,
  :list => StringArraySerializer,
  :flat => StringArraySerializer,
  :double => StringDoubleArraySerializer,
  :clean => CleanSerializer,
  :binary => BinarySerializer,
  :tsv => TSVSerializer,
  :marshal_tsv => TSVMarshalSerializer
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.field_index_dirObject

Returns the value of attribute field_index_dir.



4
5
6
# File 'lib/rbbt/tsv/field_index.rb', line 4

def field_index_dir
  @field_index_dir
end

Instance Attribute Details

#field_indicesObject

Returns the value of attribute field_indices.



10
11
12
# File 'lib/rbbt/tsv/field_index.rb', line 10

def field_indices
  @field_indices
end

Class Method Details

._clean_float(v) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/rbbt/tsv/excel.rb', line 16

def self._clean_float(v)
  case v
  when Float
    v.to_s.sub(/e(-?\d+)$/,'E\1')
  when String
    if v =~ /^-?[\d\.]+e(-?\d+)$/
      v.sub(/e(-?\d+)$/,'E\1') 
    else
      v
    end
  else
    v
  end
end

._excel_data(tsv, options = {}) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/rbbt/tsv/excel.rb', line 32

def self._excel_data(tsv, options ={})
  options = IndiferentHash.add_defaults options, :sep2 => ', '

  name = IndiferentHash.process_options options, :name
  sep2 = IndiferentHash.process_options options, :sep2
  unmerge = IndiferentHash.process_options options, :unmerge
  sort_by = IndiferentHash.process_options options, :sort_by
  sort_by_cast = IndiferentHash.process_options options, :sort_by_cast
  remove_links = IndiferentHash.process_options options, :remove_links

  i = 1
  if sort_by
    if sort_by_cast
      data = tsv.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = tsv.sort_by sort_by
    end
  else
    data = tsv
  end

  rows = []
  data.through do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        v = v.collect{|_v| _remove_link(_v)} if remove_links
        v = v.collect{|_v| _clean_float(_v)} 
        if unmerge
          cells.push v
        else
          cells.push v * sep2
        end
      else
        v = _remove_link(v) if remove_links
        cells.push v
      end
    end

    rows << cells
    i += 1
  end
  if unmerge
    new_rows = []
    rows.each do |row|
      header = row.shift
      NamedArray.zip_fields(row).each do |values|
        new_rows << [header] + values
      end
    end
    rows = new_rows
  end
  [tsv.all_fields, rows]
end


8
9
10
11
12
13
14
# File 'lib/rbbt/tsv/excel.rb', line 8

def self._remove_link(value)
  if String === value && value =~ /<([\w]+)[^>]*>(.*?)<\/\1>/
    $2
  else
    value
  end
end

.attach(*args, **kwargs) ⇒ Object



186
187
188
189
190
191
192
# File 'lib/rbbt/tsv/refactor.rb', line 186

def attach(*args, **kwargs)
  if kwargs.include?(:zipped)
    zipped = kwargs.delete(:zipped)
    kwargs[:one2one] = zipped unless kwargs.include?(:one2one)
  end
  attach_orig(*args, **kwargs)
end

.attach_origObject



185
# File 'lib/rbbt/tsv/refactor.rb', line 185

alias attach_orig attach

.build_traverse_index(files, options = {}) ⇒ Object



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/rbbt/tsv/attach/util.rb', line 327

def self.build_traverse_index(files, options = {})
  options       = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  in_namespace  = options[:in_namespace]
  persist_input = options[:persist_input]

  path = find_path(files, options)

  return nil if path.nil?

  traversal_ids = path.collect{|p| p.first}

  Log.debug "Found Traversal: #{traversal_ids * " => "}"

  index_for_traversal path, persist_input
end

.excel(filename, options = {}) ⇒ Object



296
297
298
299
300
301
302
# File 'lib/rbbt/tsv/excel.rb', line 296

def self.excel(filename, options = {})
  if filename =~ /\.xlsx$/
    xlsx(filename, options)
  else
    xls(filename, options)
  end
end

.excel2tsv(filename, options = {}) ⇒ Object



304
305
306
# File 'lib/rbbt/tsv/excel.rb', line 304

def self.excel2tsv(filename, options ={})
  excel(filename, options)
end

.find_path(files, options = {}) ⇒ Object

May make an extra index!



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/rbbt/tsv/attach/util.rb', line 243

def self.find_path(files, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  if in_namespace
    if files.first.all_fields.include? in_namespace
      ids = [[in_namespace]]
    else
      ids = [files.first.all_namespace_fields(in_namespace)]
    end
    ids += files[1..-1].collect{|f| f.all_fields}
  else
    ids = files.collect{|f| f.all_fields }
  end

  id_list = []

  ids.each_with_index do |list, i|
    break if i == ids.length - 1
    match = list.select{|field| 
      ids[i + 1].select{|f| Misc.match_fields(field, f) }.any?
    }
    return nil if match.empty?
    id_list << match.first
  end

  if ! Misc.match_fields(id_list.last, files.last.all_fields.first)
    id_list << files.last.all_fields.first
    id_list.zip(files)
  else
    id_list.zip(files[0..-1])
  end
end

.find_traversal(tsv1, tsv2, options = {}) ⇒ Object



344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/rbbt/tsv/attach/util.rb', line 344

def self.find_traversal(tsv1, tsv2, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  identifiers1 = tsv1.identifier_files || []
  identifiers1 += [options[:identifiers]].flatten if options[:identifiers]
  identifiers2 = tsv2.identifier_files || []

  identifiers1.unshift tsv1
  identifiers2.unshift tsv2

  files1 = []
  files2 = []
  while identifiers1.any?
    files1.push identifiers1.shift
    identifiers2.each_with_index do |e,i|
      files2 = identifiers2[(0..i)]
      index  = build_traverse_index(files1 + files2.reverse, options)
      return index if not index.nil?
    end
  end

  return nil
end

.header_lines(key_field, fields, entry_hash = nil) ⇒ Object



87
88
89
90
91
# File 'lib/rbbt/tsv/refactor.rb', line 87

def self.header_lines(key_field, fields, entry_hash = nil)
  entry_hash = entry_hash || {}
  entry_hash = entry_hash.merge(:key_field => key_field, :fields => fields)
  TSV::Dumper.header entry_hash
end

.index_for_traversal(path, persist_input = false) ⇒ Object



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/rbbt/tsv/attach/util.rb', line 277

def self.index_for_traversal(path, persist_input = false)
  data_key, data_file = path.shift
  data_index = if data_key == data_file.key_field
                 Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
                 nil
               else
                 Log.debug "Data index required"
                 data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false, :type => (data_file.type == :single ? :single : :flat)
               end

  current_index = data_index
  current_key   = data_key
  while not path.empty?
    next_key, next_file = path.shift

    next_fields = next_file.all_fields
    corrected_next_key = next_fields.select{|f| Misc.match_fields(f, next_key)}.first
    corrected_current_key = next_fields.select{|f| Misc.match_fields(f, current_key)}.first 

    if current_index.nil?
      current_index = next_file.index(:target => corrected_next_key, :fields => [corrected_current_key], :persist => persist_input)
      current_index = current_index.select :key => data_file.keys
    else
      next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input

      next_index.with_unnamed do
        current_index.with_unnamed do
          current_index.process current_index.fields.first do |values|
            if values.nil?
              nil
            else
              new_values = next_index.values_at(*values).flatten
              if current_index.type == :single
                new_values.first
              else
                new_values
              end
            end
          end
          current_index.fields = [next_key]
        end
      end
    end
    current_key = next_key
  end

  current_index

end

.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/rbbt/tsv/melt.rb', line 2

def self.melt(tsv, key_field, header_field, fields, *info_fields, &block)
  dumper = TSV::Dumper.new :key_field => "ID", :fields => [key_field] + info_fields, :type => :list
  dumper.init
  TSV.traverse tsv, :into => dumper, :fields => info_fields do |k,values|
    values = [values] if tsv.type == :single
    values = values.collect{|v| [v]} if tsv.type == :list
    values = Misc.zip_fields(values) if tsv.type == :double

    res = []
    values.each_with_index do |value,i|
      info_values = if block_given?
                      new = block.call value
                      next if new.nil?
                      new
                    else
                      value
                    end
      
      info_values = [info_values] unless tsv.type == :double
      id = [k, i] * ":"
      res << [id, [k] + [info_values].flatten]
    end
    res.extend MultipleResult
    res
  end
end

.merge_different_fields(stream1, stream2, output, options = {}) ⇒ Object



154
155
156
157
# File 'lib/rbbt/tsv/refactor.rb', line 154

def self.merge_different_fields(stream1, stream2, output, options = {})
  Open.write(output, Open.collapse_stream(TSV.paste_streams([stream1, stream2], **options), compact: true))
  #Open.write(output, TSV.paste_streams([stream1, stream2], **options))
end

.merge_row_fields(input, output, options = {}) ⇒ Object



150
151
152
# File 'lib/rbbt/tsv/refactor.rb', line 150

def self.merge_row_fields(input, output, options = {})
  Open.write(output, Open.collapse_stream(input, **options))
end

.open(source, type = nil, options = nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
# File 'lib/rbbt/tsv/refactor.rb', line 11

def open(source, type = nil, options = nil)
  type, options = nil, type if options.nil? and (Hash === type or (String === type and type.include? "~"))
  options = TSV.str2options(options) if String === options and options.include? "~"
  options ||= {}
  options[:type] ||= type unless type.nil?
  if zipped = options.delete(:zipped)
    options[:one2one] = zipped
  end
  options.delete(:keep_empty) 
  original_open(source, options)
end

.original_openObject



9
# File 'lib/rbbt/tsv/refactor.rb', line 9

alias original_open open

.original_pos_indexObject



55
# File 'lib/rbbt/tsv/refactor.rb', line 55

alias original_pos_index pos_index

.original_range_indexObject



54
# File 'lib/rbbt/tsv/refactor.rb', line 54

alias original_range_index range_index

.pos_index(*args, filters: nil, **kwargs) ⇒ Object



66
67
68
69
70
71
72
73
74
# File 'lib/rbbt/tsv/refactor.rb', line 66

def pos_index(*args, filters: nil, **kwargs)
  if filters
    raise "Not implemented" if filters.length > 1
    method, value = filters.first
    method.sub!("field:", '')
    kwargs[:select] = {method => value}
  end
  original_pos_index(*args, **kwargs)
end

.range_index(*args, filters: nil, **kwargs) ⇒ Object



56
57
58
59
60
61
62
63
64
# File 'lib/rbbt/tsv/refactor.rb', line 56

def range_index(*args, filters: nil, **kwargs)
  if filters
    raise "Not implemented" if filters.length > 1
    method, value = filters.first
    method.sub!("field:", '')
    kwargs[:select] = {method => value}
  end
  original_range_index(*args, **kwargs)
end

.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/tsv/matrix.rb', line 2

def self.read_matrix(tsv, field_format = "ID", value_format = "Value", *others)
  tsv = TSV.open(tsv) unless TSV === tsv

  if others.any?
    other_tsv = tsv.slice(others)
    tsv = tsv.slice(tsv.fields - others)
  end

  key_field, *fields = tsv.all_fields
  options = tsv.options.merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil)

  options[:filename] ||= tsv.filename
  options[:identifiers] ||= tsv.identifier_files.first

  dumper = TSV::Dumper.new(options)

  dumper.init
  TSV.traverse tsv, :into => dumper do |key, values|
    [key, [fields, values]]
  end

  res = TSV.open(dumper.stream, options)
  if others.any?
    other_tsv = other_tsv.to_double
    res.attach other_tsv, :one2one => true
  else
    res
  end
end

.reorder_stream(stream, positions, sep = "\t") ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/rbbt/tsv/stream.rb', line 253

def self.reorder_stream(stream, positions, sep = "\t")
  Open.open_pipe do |sin|
    line = stream.gets
    line.chomp! unless line.nil?

    while line =~ /^#\:/
      sin.puts line
      line = stream.gets
      line.chomp! unless line.nil?
    end

    while line  =~ /^#/
      if Hash === positions
        new = (0..line.split(sep,-1).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      sin.puts "#" + line.sub(/^#/,'').chomp.split(sep).values_at(*positions).compact * sep
      line = stream.gets
      line.chomp! unless line.nil?
    end

    while line
      if Hash === positions
        new = (0..line.split(sep, -1).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      values = line.split(sep, -1)
      new_values = values.values_at(*positions)
      sin.puts new_values * sep
      line = stream.gets
      line.chomp! unless line.nil?
    end
  end
end

.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ Object



297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/rbbt/tsv/stream.rb', line 297

def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, bar = nil)
  parser = TSV::Parser.new TSV.get_stream(stream)
  dumper_options = parser.options
  dumper_options[:key_field] = key_field
  dumper_options[:fields] = fields if fields
  dumper = TSV::Dumper.new dumper_options
  dumper.init 
  case parser.type
  when :single
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys.first
      [key, [values]]
    end
  when :double
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      res = []
      keys = [keys] unless Array === keys
      keys.each_with_index do |key,i|
        vs = zipped ?  values.collect{|l| l.length == 1 ? l : [l[i]] } : values
        res << [key, vs]
      end
      res.extend MultipleResult
      res
    end
  when :list
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys === Array ? keys.first : keys
      [key, values]
    end
  when :flat
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys === Array ? keys.first : keys
      [key, values]
    end
  else
    raise "Unknown type: " << parser.type.to_s
  end
  dumper
end

.xls(filename, options = {}) ⇒ Object



273
274
275
276
277
278
279
280
281
282
# File 'lib/rbbt/tsv/excel.rb', line 273

def self.xls(filename, options ={})
  if Open.remote? filename
    TmpFile.with_file nil, :extension => 'xls' do |tmp|
      Open.download(filename, tmp)
      TSV::XLS.read(tmp, options)
    end
  else
    TSV::XLS.read(filename, options)
  end
end

.xlsx(filename, options = {}) ⇒ Object



284
285
286
287
288
289
290
291
292
293
294
# File 'lib/rbbt/tsv/excel.rb', line 284

def self.xlsx(filename, options ={})
  if Open.remote? filename

    TmpFile.with_file nil, :extension => 'xlsx' do |tmp|
      Open.download(filename, tmp)
      TSV::XLSX.read(tmp, options)
    end
  else
    TSV::XLSX.read(filename, options)
  end
end

Instance Method Details

#attach_index(other, index, fields = nil) ⇒ Object



146
147
148
# File 'lib/rbbt/tsv/refactor.rb', line 146

def attach_index(tsv, index = nil)
  self.attach tsv, index: index
end

#attach_same_key(other, fields = nil) ⇒ Object



137
138
139
140
141
142
143
144
# File 'lib/rbbt/tsv/refactor.rb', line 137

def attach_same_key(tsv, fields = nil)
  fields = [fields] unless fields.nil? || Array === fields
  if fields
    self.attach tsv, :fields => fields
  else
    self.attach tsv
  end
end

#attach_source_key(other, source, options = {}) ⇒ Object



170
171
172
# File 'lib/rbbt/tsv/refactor.rb', line 170

def attach_source_key(other, key)
  attach other, other_key: key
end

#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ Object



100
101
102
103
104
105
106
# File 'lib/rbbt/tsv/refactor.rb', line 100

def dumper_stream(keys = nil, no_options = false, unmerge = false)
  if Hash === keys
    original_dumper_stream(keys)
  else
    original_dumper_stream(:keys => keys, unmerge: unmerge, preamble: no_options)
  end
end

#entity_optionsObject



4
5
6
# File 'lib/rbbt/workflow/refactor/entity.rb', line 4

def entity_options
  @entity_options ||= {}
end

#entity_templatesObject



8
9
10
# File 'lib/rbbt/workflow/refactor/entity.rb', line 8

def entity_templates
  @entity_templates ||= {}
end

#excel(filename, options = {}) ⇒ Object



264
265
266
267
268
269
270
# File 'lib/rbbt/tsv/excel.rb', line 264

def excel(filename, options ={})
  if filename =~ /\.xlsx$/
    xlsx(filename, options)
  else
    xls(filename, options)
  end
end

#field_index(field) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/rbbt/tsv/field_index.rb', line 13

def field_index(field)
  @field_indices ||= {}
  @field_indices[field] ||= Persist.persist_tsv(self, filename, {:field => field}, :prefix => "FieldIndex", :dir => TSV.field_index_dir, :persist => true, :serializer => :list, :engine => "BDB" ) do |data|
    data.serializer = :flat

    tsv = {}
    case type 
    when :single, :list
      through :key, [field] do |key, values|
        value = values.first
        tsv[value] ||= []
        tsv[value] << key
      end
    else
      through :key, [field] do |key, values|
        values.first.each do |value|
          tsv[value] ||= []
          tsv[value] << key
        end
      end
    end

    TSV.setup(data, :key_field => field, :fields => ["Keys"], :type => :flat)
    tsv.each do |v,keys|
      data[v] = keys.sort
    end

    data
  end
end

#field_index_select(matches) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/tsv/field_index.rb', line 44

def field_index_select(matches)
  final = nil
  matches.each do |field,values|
    i = field_index(field)

    if Array === values
      keys = values.inject([]){|acc,value| m = i[value]; acc = m.nil? ? acc : Misc.merge_sorted_arrays(acc, m) }
    else
      keys = i[values] || []
    end

    final = final.nil? ? keys : Misc.intersect_sorted_arrays(final, keys)
  end
  final
end

#filter(filter_dir = nil) ⇒ Object



288
289
290
291
292
293
# File 'lib/rbbt/tsv/filter.rb', line 288

def filter(filter_dir = nil)
  self.extend Filtered
  self.filter_dir = filter_dir
  self.filters = []
  self
end

#marshal_dumpObject



2
3
4
5
6
7
8
# File 'lib/rbbt/tsv/marshal.rb', line 2

def marshal_dump
  if defined?(Persist::TCAdapter) && Persist::TCAdapter === self
    super
  else
    [options, Annotation.purge(self)]
  end
end

#matrix_melt(*args) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/tsv/matrix.rb', line 32

def matrix_melt(*args)
  require 'rbbt/association'

  tsv = TSV.read_matrix(self, *args)

  melt = Association.index tsv, :persist => false, :recycle => true
  source_field,_sep,target_field = melt.key_field.partition "~"
  melt.add_field source_field do |k,v|
    k.partition("~").first
  end
  melt.add_field target_field do |k,v|
    k.partition("~").last
  end
  melt
end

#melt(header_field = nil, *info_fields, &block) ⇒ Object



29
30
31
32
# File 'lib/rbbt/tsv/melt.rb', line 29

def melt(header_field = nil, *info_fields, &block)
  info_fields = fields if info_fields.nil? || info_fields.empty?
  TSV.melt self, key_field, header_field, fields, *info_fields, &block
end

#merge_different_fields(other, options = {}) ⇒ Object



159
160
161
162
163
164
165
166
167
168
# File 'lib/rbbt/tsv/refactor.rb', line 159

def merge_different_fields(other, options = {})
  TmpFile.with_file do |output|
    TSV.merge_different_fields(self, other, output, options)
    options.delete :sort
    tsv = TSV.open output, options
    tsv.key_field = self.key_field unless self.key_field.nil?
    tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
    tsv
  end
end

#original_dumper_streamObject



99
# File 'lib/rbbt/tsv/refactor.rb', line 99

alias original_dumper_stream dumper_stream

#original_reorderObject



33
# File 'lib/rbbt/tsv/refactor.rb', line 33

alias original_reorder reorder

#original_to_sObject



108
# File 'lib/rbbt/tsv/refactor.rb', line 108

alias original_to_s to_s

#original_unzipObject



25
# File 'lib/rbbt/tsv/refactor.rb', line 25

alias original_unzip unzip

#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/rbbt/tsv/parallel/through.rb', line 24

def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)

  q = RbbtProcessQueue.new num_procs

  q.callback &@ppthrough_callback
  @ppthrough_callback = nil

  q.init do |k,v|
    block.call k,v
  end

  begin
    res = through(new_key_field, new_fields, uniq, zipped) do |*p|
      q.process q
    end
    q.join
  ensure
    q.clean
  end

  res
end

#ppthrough_callback(&block) ⇒ Object



20
21
22
# File 'lib/rbbt/tsv/parallel/through.rb', line 20

def ppthrough_callback(&block)
  @ppthrough_callback = block
end

#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/rbbt/tsv/parallel/through.rb', line 4

def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
  q = RbbtThreadQueue.new num_threads

  q.init(true, &block)

  begin
    res = through(new_key_field, new_fields, one2one: zipped) do |*p|
      q.process p
    end
    q.join
  ensure
    q.clean
  end

end

#R(script, source = nil, options = {}) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/rbbt/util/R.rb', line 174

def R(script, source = nil, options = {})
  options, source = source, nil if Hash === source

  source ||= IndiferentHash.process_options options, :source
  source = [source] unless Array === source 

  require_sources  = source.collect{|source|
    source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
    "source('#{source}')"
  } * ";\n" if Array === source and source.any?

  script = require_sources + "\n\n" + script if require_sources

  r_options = IndiferentHash.pull_keys options, :R
  open_options = IndiferentHash.pull_keys options, :open

  r_options[:monitor] = options[:monitor] if options.include?(:monitor)
  r_options[:method] = options[:method] if options.include?(:method)
  r_options[:debug] = options[:debug] if options.include?(:debug)
  r_options[:erase] = options.delete(:erase) if options.include?(:erase)

  r_options[:debug] = true if r_options[:method] == :debug
  if r_options.delete :debug
    r_options[:monitor] = true
    r_options[:method] = :shell
    erase = r_options.include?(:erase) ? r_options[:erase] : false
  else
    erase = r_options.include?(:erase) ? r_options[:erase] : true
  end

  tsv_R_option_str = r_options.delete :open
  tsv_R_option_str = ", "  + tsv_R_option_str if String === tsv_R_option_str and not tsv_R_option_str.empty?

  raw = options.delete :raw
  TmpFile.with_file nil, erase do |f|
    Open.write(f, self.to_s)

    script = <<-EOF
## Loading tsv into data
data = rbbt.tsv('#{f}'#{tsv_R_option_str});

#{script.strip}

## Resaving data
if (! is.null(data)){ rbbt.tsv.write('#{f}', data); }
NULL
    EOF

    case r_options.delete :method
    when :eval
      R.eval_run script
    else 
      R.run script, r_options
    end

    open_options = IndiferentHash.add_defaults open_options, :type => :list
    if raw
      Open.read(f)
    else
      tsv = TSV.open(f, open_options) unless options[:ignore_output]
      tsv.key_field = options[:key] if options.include? :key
      tsv.namespace ||= self.namespace if self.namespace
      tsv
    end
  end
end

#R_console(pre_script = nil) ⇒ Object



260
261
262
263
264
265
266
267
268
269
270
# File 'lib/rbbt/util/R.rb', line 260

def R_console(pre_script = nil)
  TmpFile.with_file do |f|
    Log.debug{"R Console:\n" << pre_script } if pre_script
    TmpFile.with_file(pre_script) do |script_file|
      Open.write(f, self.to_s)
      script = "data_file = '#{f}';\n"
      script <<  "\n#\{{{Pre-script:\n\n" << pre_script << "\n#}}}Pre-script\n\n"
      R.console(script)
    end
  end
end

#R_interactive(script = nil, source = []) ⇒ Object



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/rbbt/util/R.rb', line 241

def R_interactive(script = nil, source = [])
  TmpFile.with_file do |data_file|
    Open.write(data_file, self.to_s)

    Log.debug{"R Interactive:\n" << script } if script

    script =<<-EOF
# Loading data
data_file = '#{data_file}'
data = rbbt.tsv(data_file)

# Script
#{script}
    EOF

    R.interactive(script)
  end
end

#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ Object



34
35
36
37
38
39
# File 'lib/rbbt/tsv/refactor.rb', line 34

def reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) 
  kwargs[:one2one] = zipped if one2one.nil?
  kwargs.delete :persist
  kwargs.delete :persist_data
  original_reorder(key_field, fields, **kwargs)
end

#reset_filtersObject



295
296
297
298
299
300
301
302
303
304
# File 'lib/rbbt/tsv/filter.rb', line 295

def reset_filters
  if @filter_dir.nil? or @filter_dir.empty?
    @filters.each do |filter| filter.reset end if Array === @filters
    return
  end

  Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
    FileUtils.rm f
  end
end

#swap_id(field, format, options = {}, &block) ⇒ Object



41
42
43
44
45
46
# File 'lib/rbbt/tsv/refactor.rb', line 41

def swap_id(field = 0, merge = false, sep = ":", delete = true, **kwargs)
  kwargs[:merge] ||= merge
  kwargs[:sep] ||= sep
  kwargs[:delete] ||= delete
  change_id(field, **kwargs)
end

#to_s(keys = nil, no_options = false, unmerge = false) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/rbbt/tsv/refactor.rb', line 109

def to_s(keys = nil, no_options = false, unmerge = false)
  if FalseClass === keys or TrueClass === keys or Hash === keys
    no_options = keys
    keys = nil
  end

  if keys == :sort
    with_unnamed do
      keys = self.keys.sort
    end
  end


  options = {:keys => keys, unmerge: unmerge}
  case no_options
  when TrueClass, FalseClass
    options[:preamble] = !no_options
  when Hash
    options.merge!(no_options)
    
  end
  io = original_dumper_stream(options.merge(stream: StringIO.new))
  io.rewind
  io.read
end

#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ Object



26
27
28
29
30
31
# File 'lib/rbbt/tsv/refactor.rb', line 26

def unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs)
  kwargs[:merge] ||= merge
  kwargs[:sep] ||= sep
  kwargs[:delete] ||= delete
  original_unzip(field, **kwargs)
end

#with_monitor(use_monitor = true) ⇒ Object



174
175
176
177
178
179
180
181
182
# File 'lib/rbbt/tsv/refactor.rb', line 174

def with_monitor(use_monitor = true)
  monitor_state = monitor
  monitor = use_monitor
  begin
    yield
  ensure
    monitor = monitor_state
  end
end

#xls(filename, options = {}) ⇒ Object



256
257
258
# File 'lib/rbbt/tsv/excel.rb', line 256

def xls(filename, options ={})
  TSV::XLS.write(self, filename, options)
end

#xlsx(filename, options = {}) ⇒ Object



260
261
262
# File 'lib/rbbt/tsv/excel.rb', line 260

def xlsx(filename, options ={})
  TSV::XLSX.write(self, filename, options)
end