Module: TSV

Defined in:: lib/rbbt/util/R.rb,
lib/rbbt/tsv/melt.rb,
lib/rbbt/tsv/excel.rb,
lib/rbbt/tsv/filter.rb,
lib/rbbt/tsv/matrix.rb,
lib/rbbt/tsv/stream.rb,
lib/rbbt/tsv/marshal.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/refactor.rb,
lib/rbbt/tsv/attach/util.rb,
lib/rbbt/tsv/field_index.rb,
lib/rbbt/tsv/serializers.rb,
lib/rbbt/tsv/parallel/through.rb,
lib/rbbt/workflow/refactor/entity.rb

Defined Under Namespace

Modules: XLS, XLSX Classes: BinarySerializer, CleanSerializer, FloatArraySerializer, FloatSerializer, IntegerArraySerializer, IntegerSerializer, StrictFloatArraySerializer, StrictIntegerArraySerializer, StringArraySerializer, StringDoubleArraySerializer, StringSerializer, TSVMarshalSerializer, TSVSerializer

Constant Summary collapse

SERIALIZER_ALIAS =

{
  :integer => IntegerSerializer, 
  :float => FloatSerializer, 
  :integer_array => IntegerArraySerializer,
  :float_array => FloatArraySerializer,
  :strict_integer_array => StrictIntegerArraySerializer,
  :strict_float_array => StrictFloatArraySerializer,
  :marshal => Marshal,
  :single => StringSerializer,
  :string => StringSerializer,
  :list => StringArraySerializer,
  :flat => StringArraySerializer,
  :double => StringDoubleArraySerializer,
  :clean => CleanSerializer,
  :binary => BinarySerializer,
  :tsv => TSVSerializer,
  :marshal_tsv => TSVMarshalSerializer
}

Class Attribute Summary collapse

.field_index_dir ⇒ Object

Returns the value of attribute field_index_dir.

Instance Attribute Summary collapse

#field_indices ⇒ Object

Returns the value of attribute field_indices.

Class Method Summary collapse

._clean_float(v) ⇒ Object
._excel_data(tsv, options = {}) ⇒ Object
._remove_link(value) ⇒ Object
.attach(*args, **kwargs) ⇒ Object
.attach_orig ⇒ Object
.build_traverse_index(files, options = {}) ⇒ Object
.excel(filename, options = {}) ⇒ Object
.excel2tsv(filename, options = {}) ⇒ Object
.find_path(files, options = {}) ⇒ Object

May make an extra index!.
.find_traversal(tsv1, tsv2, options = {}) ⇒ Object
.header_lines(key_field, fields, entry_hash = nil) ⇒ Object
.index_for_traversal(path, persist_input = false) ⇒ Object
.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object
.merge_different_fields(stream1, stream2, output, options = {}) ⇒ Object
.merge_row_fields(input, output, options = {}) ⇒ Object
.open(source, type = nil, options = nil) ⇒ Object
.original_open ⇒ Object
.original_pos_index ⇒ Object
.original_range_index ⇒ Object
.pos_index(*args, filters: nil, **kwargs) ⇒ Object
.range_index(*args, filters: nil, **kwargs) ⇒ Object
.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object
.reorder_stream(stream, positions, sep = "\t") ⇒ Object
.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ Object
.xls(filename, options = {}) ⇒ Object
.xlsx(filename, options = {}) ⇒ Object

Instance Method Summary collapse

#attach_index(other, index, fields = nil) ⇒ Object
#attach_same_key(other, fields = nil) ⇒ Object
#attach_source_key(other, source, options = {}) ⇒ Object
#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ Object
#entity_options ⇒ Object
#entity_templates ⇒ Object
#excel(filename, options = {}) ⇒ Object
#field_index(field) ⇒ Object
#field_index_select(matches) ⇒ Object
#filter(filter_dir = nil) ⇒ Object
#marshal_dump ⇒ Object
#matrix_melt(*args) ⇒ Object
#melt(header_field = nil, *info_fields, &block) ⇒ Object
#merge_different_fields(other, options = {}) ⇒ Object
#original_dumper_stream ⇒ Object
#original_reorder ⇒ Object
#original_to_s ⇒ Object
#original_unzip ⇒ Object
#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
#ppthrough_callback(&block) ⇒ Object
#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object
#R(script, source = nil, options = {}) ⇒ Object
#R_console(pre_script = nil) ⇒ Object
#R_interactive(script = nil, source = []) ⇒ Object
#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ Object
#reset_filters ⇒ Object
#swap_id(field, format, options = {}, &block) ⇒ Object
#to_s(keys = nil, no_options = false, unmerge = false) ⇒ Object
#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ Object
#with_monitor(use_monitor = true) ⇒ Object
#xls(filename, options = {}) ⇒ Object
#xlsx(filename, options = {}) ⇒ Object

Class Attribute Details

.field_index_dir ⇒ `Object`

Returns the value of attribute field_index_dir.



4
5
6

# File 'lib/rbbt/tsv/field_index.rb', line 4

def field_index_dir
  @field_index_dir
end

Instance Attribute Details

#field_indices ⇒ `Object`

Returns the value of attribute field_indices.



10
11
12

# File 'lib/rbbt/tsv/field_index.rb', line 10

def field_indices
  @field_indices
end

Class Method Details

._clean_float(v) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 16

def self._clean_float(v)
  case v
  when Float
    v.to_s.sub(/e(-?\d+)$/,'E\1')
  when String
    if v =~ /^-?[\d\.]+e(-?\d+)$/
      v.sub(/e(-?\d+)$/,'E\1') 
    else
      v
    end
  else
    v
  end
end

._excel_data(tsv, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 32

def self._excel_data(tsv, options ={})
  options = IndiferentHash.add_defaults options, :sep2 => ', '

  name = IndiferentHash.process_options options, :name
  sep2 = IndiferentHash.process_options options, :sep2
  unmerge = IndiferentHash.process_options options, :unmerge
  sort_by = IndiferentHash.process_options options, :sort_by
  sort_by_cast = IndiferentHash.process_options options, :sort_by_cast
  remove_links = IndiferentHash.process_options options, :remove_links

  i = 1
  if sort_by
    if sort_by_cast
      data = tsv.sort_by sort_by do |k, v| 
        if Array === v
          v.first.send(sort_by_cast)
        else
          v.send(sort_by_cast)
        end
      end
    else
      data = tsv.sort_by sort_by
    end
  else
    data = tsv
  end

  rows = []
  data.through do |key, values|
    cells = []
    cells.push((name and key.respond_to?(:name)) ?  key.name || key : key )

    values = [values] unless Array === values
    values.each do |value|
      v = (name and value.respond_to?(:name)) ?  value.name || value : value 
      if Array === v
        v = v.collect{|_v| _remove_link(_v)} if remove_links
        v = v.collect{|_v| _clean_float(_v)} 
        if unmerge
          cells.push v
        else
          cells.push v * sep2
        end
      else
        v = _remove_link(v) if remove_links
        cells.push v
      end
    end

    rows << cells
    i += 1
  end
  if unmerge
    new_rows = []
    rows.each do |row|
      header = row.shift
      NamedArray.zip_fields(row).each do |values|
        new_rows << [header] + values
      end
    end
    rows = new_rows
  end
  [tsv.all_fields, rows]
end

._remove_link(value) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 8

def self._remove_link(value)
  if String === value && value =~ /<([\w]+)[^>]*>(.*?)<\/\1>/
    $2
  else
    value
  end
end

.attach(*args, **kwargs) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 186

def attach(*args, **kwargs)
  if kwargs.include?(:zipped)
    zipped = kwargs.delete(:zipped)
    kwargs[:one2one] = zipped unless kwargs.include?(:one2one)
  end
  attach_orig(*args, **kwargs)
end

.attach_orig ⇒ `Object`

185	# File 'lib/rbbt/tsv/refactor.rb', line 185 alias attach_orig attach

.build_traverse_index(files, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/attach/util.rb', line 327

def self.build_traverse_index(files, options = {})
  options       = Misc.add_defaults options, :in_namespace => false, :persist_input => true
  in_namespace  = options[:in_namespace]
  persist_input = options[:persist_input]

  path = find_path(files, options)

  return nil if path.nil?

  traversal_ids = path.collect{|p| p.first}

  Log.debug "Found Traversal: #{traversal_ids * " => "}"

  index_for_traversal path, persist_input
end

.excel(filename, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 296

def self.excel(filename, options = {})
  if filename =~ /\.xlsx$/
    xlsx(filename, options)
  else
    xls(filename, options)
  end
end

.excel2tsv(filename, options = {}) ⇒ `Object`



304
305
306

# File 'lib/rbbt/tsv/excel.rb', line 304

def self.excel2tsv(filename, options ={})
  excel(filename, options)
end

.find_path(files, options = {}) ⇒ `Object`

May make an extra index!

# File 'lib/rbbt/tsv/attach/util.rb', line 243

def self.find_path(files, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  if in_namespace
    if files.first.all_fields.include? in_namespace
      ids = [[in_namespace]]
    else
      ids = [files.first.all_namespace_fields(in_namespace)]
    end
    ids += files[1..-1].collect{|f| f.all_fields}
  else
    ids = files.collect{|f| f.all_fields }
  end

  id_list = []

  ids.each_with_index do |list, i|
    break if i == ids.length - 1
    match = list.select{|field| 
      ids[i + 1].select{|f| Misc.match_fields(field, f) }.any?
    }
    return nil if match.empty?
    id_list << match.first
  end

  if ! Misc.match_fields(id_list.last, files.last.all_fields.first)
    id_list << files.last.all_fields.first
    id_list.zip(files)
  else
    id_list.zip(files[0..-1])
  end
end

.find_traversal(tsv1, tsv2, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/attach/util.rb', line 344

def self.find_traversal(tsv1, tsv2, options = {})
  options      = Misc.add_defaults options, :in_namespace => false
  in_namespace = options[:in_namespace]

  identifiers1 = tsv1.identifier_files || []
  identifiers1 += [options[:identifiers]].flatten if options[:identifiers]
  identifiers2 = tsv2.identifier_files || []

  identifiers1.unshift tsv1
  identifiers2.unshift tsv2

  files1 = []
  files2 = []
  while identifiers1.any?
    files1.push identifiers1.shift
    identifiers2.each_with_index do |e,i|
      files2 = identifiers2[(0..i)]
      index  = build_traverse_index(files1 + files2.reverse, options)
      return index if not index.nil?
    end
  end

  return nil
end

.header_lines(key_field, fields, entry_hash = nil) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 87

def self.header_lines(key_field, fields, entry_hash = nil)
  entry_hash = entry_hash || {}
  entry_hash = entry_hash.merge(:key_field => key_field, :fields => fields)
  TSV::Dumper.header entry_hash
end

.index_for_traversal(path, persist_input = false) ⇒ `Object`

# File 'lib/rbbt/tsv/attach/util.rb', line 277

def self.index_for_traversal(path, persist_input = false)
  data_key, data_file = path.shift
  data_index = if data_key == data_file.key_field
                 Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
                 nil
               else
                 Log.debug "Data index required"
                 data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false, :type => (data_file.type == :single ? :single : :flat)
               end

  current_index = data_index
  current_key   = data_key
  while not path.empty?
    next_key, next_file = path.shift

    next_fields = next_file.all_fields
    corrected_next_key = next_fields.select{|f| Misc.match_fields(f, next_key)}.first
    corrected_current_key = next_fields.select{|f| Misc.match_fields(f, current_key)}.first 

    if current_index.nil?
      current_index = next_file.index(:target => corrected_next_key, :fields => [corrected_current_key], :persist => persist_input)
      current_index = current_index.select :key => data_file.keys
    else
      next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input

      next_index.with_unnamed do
        current_index.with_unnamed do
          current_index.process current_index.fields.first do |values|
            if values.nil?
              nil
            else
              new_values = next_index.values_at(*values).flatten
              if current_index.type == :single
                new_values.first
              else
                new_values
              end
            end
          end
          current_index.fields = [next_key]
        end
      end
    end
    current_key = next_key
  end

  current_index

end

.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ `Object`

# File 'lib/rbbt/tsv/melt.rb', line 2

def self.melt(tsv, key_field, header_field, fields, *info_fields, &block)
  dumper = TSV::Dumper.new :key_field => "ID", :fields => [key_field] + info_fields, :type => :list
  dumper.init
  TSV.traverse tsv, :into => dumper, :fields => info_fields do |k,values|
    values = [values] if tsv.type == :single
    values = values.collect{|v| [v]} if tsv.type == :list
    values = Misc.zip_fields(values) if tsv.type == :double

    res = []
    values.each_with_index do |value,i|
      info_values = if block_given?
                      new = block.call value
                      next if new.nil?
                      new
                    else
                      value
                    end
      
      info_values = [info_values] unless tsv.type == :double
      id = [k, i] * ":"
      res << [id, [k] + [info_values].flatten]
    end
    res.extend MultipleResult
    res
  end
end

.merge_different_fields(stream1, stream2, output, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 154

def self.merge_different_fields(stream1, stream2, output, options = {})
  Open.write(output, Open.collapse_stream(TSV.paste_streams([stream1, stream2], **options), compact: true))
  #Open.write(output, TSV.paste_streams([stream1, stream2], **options))
end

.merge_row_fields(input, output, options = {}) ⇒ `Object`



150
151
152

# File 'lib/rbbt/tsv/refactor.rb', line 150

def self.merge_row_fields(input, output, options = {})
  Open.write(output, Open.collapse_stream(input, **options))
end

.open(source, type = nil, options = nil) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 11

def open(source, type = nil, options = nil)
  type, options = nil, type if options.nil? and (Hash === type or (String === type and type.include? "~"))
  options = TSV.str2options(options) if String === options and options.include? "~"
  options ||= {}
  options[:type] ||= type unless type.nil?
  if zipped = options.delete(:zipped)
    options[:one2one] = zipped
  end
  options.delete(:keep_empty) 
  original_open(source, options)
end

.original_open ⇒ `Object`

9	# File 'lib/rbbt/tsv/refactor.rb', line 9 alias original_open open

.original_pos_index ⇒ `Object`

55	# File 'lib/rbbt/tsv/refactor.rb', line 55 alias original_pos_index pos_index

.original_range_index ⇒ `Object`

54	# File 'lib/rbbt/tsv/refactor.rb', line 54 alias original_range_index range_index

.pos_index(*args, filters: nil, **kwargs) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 66

def pos_index(*args, filters: nil, **kwargs)
  if filters
    raise "Not implemented" if filters.length > 1
    method, value = filters.first
    method.sub!("field:", '')
    kwargs[:select] = {method => value}
  end
  original_pos_index(*args, **kwargs)
end

.range_index(*args, filters: nil, **kwargs) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 56

def range_index(*args, filters: nil, **kwargs)
  if filters
    raise "Not implemented" if filters.length > 1
    method, value = filters.first
    method.sub!("field:", '')
    kwargs[:select] = {method => value}
  end
  original_range_index(*args, **kwargs)
end

.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ `Object`

# File 'lib/rbbt/tsv/matrix.rb', line 2

def self.read_matrix(tsv, field_format = "ID", value_format = "Value", *others)
  tsv = TSV.open(tsv) unless TSV === tsv

  if others.any?
    other_tsv = tsv.slice(others)
    tsv = tsv.slice(tsv.fields - others)
  end

  key_field, *fields = tsv.all_fields
  options = tsv.options.merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil)

  options[:filename] ||= tsv.filename
  options[:identifiers] ||= tsv.identifier_files.first

  dumper = TSV::Dumper.new(options)

  dumper.init
  TSV.traverse tsv, :into => dumper do |key, values|
    [key, [fields, values]]
  end

  res = TSV.open(dumper.stream, options)
  if others.any?
    other_tsv = other_tsv.to_double
    res.attach other_tsv, :one2one => true
  else
    res
  end
end

.reorder_stream(stream, positions, sep = "\t") ⇒ `Object`

# File 'lib/rbbt/tsv/stream.rb', line 253

def self.reorder_stream(stream, positions, sep = "\t")
  Open.open_pipe do |sin|
    line = stream.gets
    line.chomp! unless line.nil?

    while line =~ /^#\:/
      sin.puts line
      line = stream.gets
      line.chomp! unless line.nil?
    end

    while line  =~ /^#/
      if Hash === positions
        new = (0..line.split(sep,-1).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      sin.puts "#" + line.sub(/^#/,'').chomp.split(sep).values_at(*positions).compact * sep
      line = stream.gets
      line.chomp! unless line.nil?
    end

    while line
      if Hash === positions
        new = (0..line.split(sep, -1).length-1).to_a
        positions.each do |k,v|
          new[k] = v
          new[v] = k
        end
        positions = new
      end
      values = line.split(sep, -1)
      new_values = values.values_at(*positions)
      sin.puts new_values * sep
      line = stream.gets
      line.chomp! unless line.nil?
    end
  end
end

.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ `Object`

# File 'lib/rbbt/tsv/stream.rb', line 297

def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, bar = nil)
  parser = TSV::Parser.new TSV.get_stream(stream)
  dumper_options = parser.options
  dumper_options[:key_field] = key_field
  dumper_options[:fields] = fields if fields
  dumper = TSV::Dumper.new dumper_options
  dumper.init 
  case parser.type
  when :single
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys.first
      [key, [values]]
    end
  when :double
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      res = []
      keys = [keys] unless Array === keys
      keys.each_with_index do |key,i|
        vs = zipped ?  values.collect{|l| l.length == 1 ? l : [l[i]] } : values
        res << [key, vs]
      end
      res.extend MultipleResult
      res
    end
  when :list
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys === Array ? keys.first : keys
      [key, values]
    end
  when :flat
    TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
      key = keys === Array ? keys.first : keys
      [key, values]
    end
  else
    raise "Unknown type: " << parser.type.to_s
  end
  dumper
end

.xls(filename, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 273

def self.xls(filename, options ={})
  if Open.remote? filename
    TmpFile.with_file nil, :extension => 'xls' do |tmp|
      Open.download(filename, tmp)
      TSV::XLS.read(tmp, options)
    end
  else
    TSV::XLS.read(filename, options)
  end
end

.xlsx(filename, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 284

def self.xlsx(filename, options ={})
  if Open.remote? filename

    TmpFile.with_file nil, :extension => 'xlsx' do |tmp|
      Open.download(filename, tmp)
      TSV::XLSX.read(tmp, options)
    end
  else
    TSV::XLSX.read(filename, options)
  end
end

Instance Method Details

#attach_index(other, index, fields = nil) ⇒ `Object`



146
147
148

# File 'lib/rbbt/tsv/refactor.rb', line 146

def attach_index(tsv, index = nil)
  self.attach tsv, index: index
end

#attach_same_key(other, fields = nil) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 137

def attach_same_key(tsv, fields = nil)
  fields = [fields] unless fields.nil? || Array === fields
  if fields
    self.attach tsv, :fields => fields
  else
    self.attach tsv
  end
end

#attach_source_key(other, source, options = {}) ⇒ `Object`



170
171
172

# File 'lib/rbbt/tsv/refactor.rb', line 170

def attach_source_key(other, key)
  attach other, other_key: key
end

#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 100

def dumper_stream(keys = nil, no_options = false, unmerge = false)
  if Hash === keys
    original_dumper_stream(keys)
  else
    original_dumper_stream(:keys => keys, unmerge: unmerge, preamble: no_options)
  end
end

#entity_options ⇒ `Object`



4
5
6

# File 'lib/rbbt/workflow/refactor/entity.rb', line 4

def entity_options
  @entity_options ||= {}
end

#entity_templates ⇒ `Object`



8
9
10

# File 'lib/rbbt/workflow/refactor/entity.rb', line 8

def entity_templates
  @entity_templates ||= {}
end

#excel(filename, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/excel.rb', line 264

def excel(filename, options ={})
  if filename =~ /\.xlsx$/
    xlsx(filename, options)
  else
    xls(filename, options)
  end
end

#field_index(field) ⇒ `Object`

# File 'lib/rbbt/tsv/field_index.rb', line 13

def field_index(field)
  @field_indices ||= {}
  @field_indices[field] ||= Persist.persist_tsv(self, filename, {:field => field}, :prefix => "FieldIndex", :dir => TSV.field_index_dir, :persist => true, :serializer => :list, :engine => "BDB" ) do |data|
    data.serializer = :flat

    tsv = {}
    case type 
    when :single, :list
      through :key, [field] do |key, values|
        value = values.first
        tsv[value] ||= []
        tsv[value] << key
      end
    else
      through :key, [field] do |key, values|
        values.first.each do |value|
          tsv[value] ||= []
          tsv[value] << key
        end
      end
    end

    TSV.setup(data, :key_field => field, :fields => ["Keys"], :type => :flat)
    tsv.each do |v,keys|
      data[v] = keys.sort
    end

    data
  end
end

#field_index_select(matches) ⇒ `Object`

# File 'lib/rbbt/tsv/field_index.rb', line 44

def field_index_select(matches)
  final = nil
  matches.each do |field,values|
    i = field_index(field)

    if Array === values
      keys = values.inject([]){|acc,value| m = i[value]; acc = m.nil? ? acc : Misc.merge_sorted_arrays(acc, m) }
    else
      keys = i[values] || []
    end

    final = final.nil? ? keys : Misc.intersect_sorted_arrays(final, keys)
  end
  final
end

#filter(filter_dir = nil) ⇒ `Object`

# File 'lib/rbbt/tsv/filter.rb', line 288

def filter(filter_dir = nil)
  self.extend Filtered
  self.filter_dir = filter_dir
  self.filters = []
  self
end

#marshal_dump ⇒ `Object`

# File 'lib/rbbt/tsv/marshal.rb', line 2

def marshal_dump
  if defined?(Persist::TCAdapter) && Persist::TCAdapter === self
    super
  else
    [options, Annotation.purge(self)]
  end
end

#matrix_melt(*args) ⇒ `Object`

# File 'lib/rbbt/tsv/matrix.rb', line 32

def matrix_melt(*args)
  require 'rbbt/association'

  tsv = TSV.read_matrix(self, *args)

  melt = Association.index tsv, :persist => false, :recycle => true
  source_field,_sep,target_field = melt.key_field.partition "~"
  melt.add_field source_field do |k,v|
    k.partition("~").first
  end
  melt.add_field target_field do |k,v|
    k.partition("~").last
  end
  melt
end

#melt(header_field = nil, *info_fields, &block) ⇒ `Object`

# File 'lib/rbbt/tsv/melt.rb', line 29

def melt(header_field = nil, *info_fields, &block)
  info_fields = fields if info_fields.nil? || info_fields.empty?
  TSV.melt self, key_field, header_field, fields, *info_fields, &block
end

#merge_different_fields(other, options = {}) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 159

def merge_different_fields(other, options = {})
  TmpFile.with_file do |output|
    TSV.merge_different_fields(self, other, output, options)
    options.delete :sort
    tsv = TSV.open output, options
    tsv.key_field = self.key_field unless self.key_field.nil?
    tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
    tsv
  end
end

#original_dumper_stream ⇒ `Object`

99	# File 'lib/rbbt/tsv/refactor.rb', line 99 alias original_dumper_stream dumper_stream

#original_reorder ⇒ `Object`

33	# File 'lib/rbbt/tsv/refactor.rb', line 33 alias original_reorder reorder

#original_to_s ⇒ `Object`

108	# File 'lib/rbbt/tsv/refactor.rb', line 108 alias original_to_s to_s

#original_unzip ⇒ `Object`

25	# File 'lib/rbbt/tsv/refactor.rb', line 25 alias original_unzip unzip

#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ `Object`

# File 'lib/rbbt/tsv/parallel/through.rb', line 24

def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)

  q = RbbtProcessQueue.new num_procs

  q.callback &@ppthrough_callback
  @ppthrough_callback = nil

  q.init do |k,v|
    block.call k,v
  end

  begin
    res = through(new_key_field, new_fields, uniq, zipped) do |*p|
      q.process q
    end
    q.join
  ensure
    q.clean
  end

  res
end

#ppthrough_callback(&block) ⇒ `Object`



20
21
22

# File 'lib/rbbt/tsv/parallel/through.rb', line 20

def ppthrough_callback(&block)
  @ppthrough_callback = block
end

#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ `Object`

# File 'lib/rbbt/tsv/parallel/through.rb', line 4

def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
  q = RbbtThreadQueue.new num_threads

  q.init(true, &block)

  begin
    res = through(new_key_field, new_fields, one2one: zipped) do |*p|
      q.process p
    end
    q.join
  ensure
    q.clean
  end

end

#R(script, source = nil, options = {}) ⇒ `Object`

# File 'lib/rbbt/util/R.rb', line 174

def R(script, source = nil, options = {})
  options, source = source, nil if Hash === source

  source ||= IndiferentHash.process_options options, :source
  source = [source] unless Array === source 

  require_sources  = source.collect{|source|
    source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
    "source('#{source}')"
  } * ";\n" if Array === source and source.any?

  script = require_sources + "\n\n" + script if require_sources

  r_options = IndiferentHash.pull_keys options, :R
  open_options = IndiferentHash.pull_keys options, :open

  r_options[:monitor] = options[:monitor] if options.include?(:monitor)
  r_options[:method] = options[:method] if options.include?(:method)
  r_options[:debug] = options[:debug] if options.include?(:debug)
  r_options[:erase] = options.delete(:erase) if options.include?(:erase)

  r_options[:debug] = true if r_options[:method] == :debug
  if r_options.delete :debug
    r_options[:monitor] = true
    r_options[:method] = :shell
    erase = r_options.include?(:erase) ? r_options[:erase] : false
  else
    erase = r_options.include?(:erase) ? r_options[:erase] : true
  end

  tsv_R_option_str = r_options.delete :open
  tsv_R_option_str = ", "  + tsv_R_option_str if String === tsv_R_option_str and not tsv_R_option_str.empty?

  raw = options.delete :raw
  TmpFile.with_file nil, erase do |f|
    Open.write(f, self.to_s)

    script = <<-EOF
## Loading tsv into data
data = rbbt.tsv('#{f}'#{tsv_R_option_str});

#{script.strip}

## Resaving data
if (! is.null(data)){ rbbt.tsv.write('#{f}', data); }
NULL
    EOF

    case r_options.delete :method
    when :eval
      R.eval_run script
    else 
      R.run script, r_options
    end

    open_options = IndiferentHash.add_defaults open_options, :type => :list
    if raw
      Open.read(f)
    else
      tsv = TSV.open(f, open_options) unless options[:ignore_output]
      tsv.key_field = options[:key] if options.include? :key
      tsv.namespace ||= self.namespace if self.namespace
      tsv
    end
  end
end

#R_console(pre_script = nil) ⇒ `Object`

# File 'lib/rbbt/util/R.rb', line 260

def R_console(pre_script = nil)
  TmpFile.with_file do |f|
    Log.debug{"R Console:\n" << pre_script } if pre_script
    TmpFile.with_file(pre_script) do |script_file|
      Open.write(f, self.to_s)
      script = "data_file = '#{f}';\n"
      script <<  "\n#\{{{Pre-script:\n\n" << pre_script << "\n#}}}Pre-script\n\n"
      R.console(script)
    end
  end
end

#R_interactive(script = nil, source = []) ⇒ `Object`

# File 'lib/rbbt/util/R.rb', line 241

def R_interactive(script = nil, source = [])
  TmpFile.with_file do |data_file|
    Open.write(data_file, self.to_s)

    Log.debug{"R Interactive:\n" << script } if script

    script =<<-EOF
# Loading data
data_file = '#{data_file}'
data = rbbt.tsv(data_file)

# Script
#{script}
    EOF

    R.interactive(script)
  end
end

#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 34

def reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) 
  kwargs[:one2one] = zipped if one2one.nil?
  kwargs.delete :persist
  kwargs.delete :persist_data
  original_reorder(key_field, fields, **kwargs)
end

#reset_filters ⇒ `Object`

# File 'lib/rbbt/tsv/filter.rb', line 295

def reset_filters
  if @filter_dir.nil? or @filter_dir.empty?
    @filters.each do |filter| filter.reset end if Array === @filters
    return
  end

  Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
    FileUtils.rm f
  end
end

#swap_id(field, format, options = {}, &block) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 41

def swap_id(field = 0, merge = false, sep = ":", delete = true, **kwargs)
  kwargs[:merge] ||= merge
  kwargs[:sep] ||= sep
  kwargs[:delete] ||= delete
  change_id(field, **kwargs)
end

#to_s(keys = nil, no_options = false, unmerge = false) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 109

def to_s(keys = nil, no_options = false, unmerge = false)
  if FalseClass === keys or TrueClass === keys or Hash === keys
    no_options = keys
    keys = nil
  end

  if keys == :sort
    with_unnamed do
      keys = self.keys.sort
    end
  end


  options = {:keys => keys, unmerge: unmerge}
  case no_options
  when TrueClass, FalseClass
    options[:preamble] = !no_options
  when Hash
    options.merge!(no_options)
    
  end
  io = original_dumper_stream(options.merge(stream: StringIO.new))
  io.rewind
  io.read
end

#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 26

def unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs)
  kwargs[:merge] ||= merge
  kwargs[:sep] ||= sep
  kwargs[:delete] ||= delete
  original_unzip(field, **kwargs)
end

#with_monitor(use_monitor = true) ⇒ `Object`

# File 'lib/rbbt/tsv/refactor.rb', line 174

def with_monitor(use_monitor = true)
  monitor_state = monitor
  monitor = use_monitor
  begin
    yield
  ensure
    monitor = monitor_state
  end
end

#xls(filename, options = {}) ⇒ `Object`



256
257
258

# File 'lib/rbbt/tsv/excel.rb', line 256

def xls(filename, options ={})
  TSV::XLS.write(self, filename, options)
end

#xlsx(filename, options = {}) ⇒ `Object`



260
261
262

# File 'lib/rbbt/tsv/excel.rb', line 260

def xlsx(filename, options ={})
  TSV::XLSX.write(self, filename, options)
end

Module: TSV

Defined Under Namespace

Constant Summary collapse

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.field_index_dir ⇒ Object

Instance Attribute Details

#field_indices ⇒ Object

Class Method Details

._clean_float(v) ⇒ Object

._excel_data(tsv, options = {}) ⇒ Object

._remove_link(value) ⇒ Object

.attach(*args, **kwargs) ⇒ Object

.attach_orig ⇒ Object

.build_traverse_index(files, options = {}) ⇒ Object

.excel(filename, options = {}) ⇒ Object

.excel2tsv(filename, options = {}) ⇒ Object

.find_path(files, options = {}) ⇒ Object

.find_traversal(tsv1, tsv2, options = {}) ⇒ Object

.header_lines(key_field, fields, entry_hash = nil) ⇒ Object

.index_for_traversal(path, persist_input = false) ⇒ Object

.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ Object

.merge_different_fields(stream1, stream2, output, options = {}) ⇒ Object

.merge_row_fields(input, output, options = {}) ⇒ Object

.open(source, type = nil, options = nil) ⇒ Object

.original_open ⇒ Object

.original_pos_index ⇒ Object

.original_range_index ⇒ Object

.pos_index(*args, filters: nil, **kwargs) ⇒ Object

.range_index(*args, filters: nil, **kwargs) ⇒ Object

.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ Object

.reorder_stream(stream, positions, sep = "\t") ⇒ Object

.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ Object

.xls(filename, options = {}) ⇒ Object

.xlsx(filename, options = {}) ⇒ Object

Instance Method Details

#attach_index(other, index, fields = nil) ⇒ Object

#attach_same_key(other, fields = nil) ⇒ Object

#attach_source_key(other, source, options = {}) ⇒ Object

#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ Object

#entity_options ⇒ Object

#entity_templates ⇒ Object

#excel(filename, options = {}) ⇒ Object

#field_index(field) ⇒ Object

#field_index_select(matches) ⇒ Object

#filter(filter_dir = nil) ⇒ Object

#marshal_dump ⇒ Object

#matrix_melt(*args) ⇒ Object

#melt(header_field = nil, *info_fields, &block) ⇒ Object

#merge_different_fields(other, options = {}) ⇒ Object

#original_dumper_stream ⇒ Object

#original_reorder ⇒ Object

#original_to_s ⇒ Object

#original_unzip ⇒ Object

#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object

#ppthrough_callback(&block) ⇒ Object

#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ Object

#R(script, source = nil, options = {}) ⇒ Object

#R_console(pre_script = nil) ⇒ Object

#R_interactive(script = nil, source = []) ⇒ Object

#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ Object

#reset_filters ⇒ Object

#swap_id(field, format, options = {}, &block) ⇒ Object

#to_s(keys = nil, no_options = false, unmerge = false) ⇒ Object

#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ Object

#with_monitor(use_monitor = true) ⇒ Object

#xls(filename, options = {}) ⇒ Object

#xlsx(filename, options = {}) ⇒ Object

.field_index_dir ⇒ `Object`

#field_indices ⇒ `Object`

._clean_float(v) ⇒ `Object`

._excel_data(tsv, options = {}) ⇒ `Object`

._remove_link(value) ⇒ `Object`

.attach(*args, **kwargs) ⇒ `Object`

.attach_orig ⇒ `Object`

.build_traverse_index(files, options = {}) ⇒ `Object`

.excel(filename, options = {}) ⇒ `Object`

.excel2tsv(filename, options = {}) ⇒ `Object`

.find_path(files, options = {}) ⇒ `Object`

.find_traversal(tsv1, tsv2, options = {}) ⇒ `Object`

.header_lines(key_field, fields, entry_hash = nil) ⇒ `Object`

.index_for_traversal(path, persist_input = false) ⇒ `Object`

.melt(tsv, key_field, header_field, fields, *info_fields, &block) ⇒ `Object`

.merge_different_fields(stream1, stream2, output, options = {}) ⇒ `Object`

.merge_row_fields(input, output, options = {}) ⇒ `Object`

.open(source, type = nil, options = nil) ⇒ `Object`

.original_open ⇒ `Object`

.original_pos_index ⇒ `Object`

.original_range_index ⇒ `Object`

.pos_index(*args, filters: nil, **kwargs) ⇒ `Object`

.range_index(*args, filters: nil, **kwargs) ⇒ `Object`

.read_matrix(tsv, field_format = "ID", value_format = "Value", *others) ⇒ `Object`

.reorder_stream(stream, positions, sep = "\t") ⇒ `Object`

.reorder_stream_tsv(stream, key_field, fields = nil, zipped = true, bar = nil) ⇒ `Object`

.xls(filename, options = {}) ⇒ `Object`

.xlsx(filename, options = {}) ⇒ `Object`

#attach_index(other, index, fields = nil) ⇒ `Object`

#attach_same_key(other, fields = nil) ⇒ `Object`

#attach_source_key(other, source, options = {}) ⇒ `Object`

#dumper_stream(keys = nil, no_options = false, unmerge = false) ⇒ `Object`

#entity_options ⇒ `Object`

#entity_templates ⇒ `Object`

#excel(filename, options = {}) ⇒ `Object`

#field_index(field) ⇒ `Object`

#field_index_select(matches) ⇒ `Object`

#filter(filter_dir = nil) ⇒ `Object`

#marshal_dump ⇒ `Object`

#matrix_melt(*args) ⇒ `Object`

#melt(header_field = nil, *info_fields, &block) ⇒ `Object`

#merge_different_fields(other, options = {}) ⇒ `Object`

#original_dumper_stream ⇒ `Object`

#original_reorder ⇒ `Object`

#original_to_s ⇒ `Object`

#original_unzip ⇒ `Object`

#ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ `Object`

#ppthrough_callback(&block) ⇒ `Object`

#pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block) ⇒ `Object`

#R(script, source = nil, options = {}) ⇒ `Object`

#R_console(pre_script = nil) ⇒ `Object`

#R_interactive(script = nil, source = []) ⇒ `Object`

#reorder(key_field = nil, fields = nil, merge: true, one2one: true, zipped: nil, **kwargs) ⇒ `Object`

#reset_filters ⇒ `Object`

#swap_id(field, format, options = {}, &block) ⇒ `Object`

#to_s(keys = nil, no_options = false, unmerge = false) ⇒ `Object`

#unzip(field = 0, merge = false, sep = ":", delete = true, **kwargs) ⇒ `Object`

#with_monitor(use_monitor = true) ⇒ `Object`

#xls(filename, options = {}) ⇒ `Object`

#xlsx(filename, options = {}) ⇒ `Object`