Module: Misc

Defined in:
lib/rbbt/util/misc.rb,
lib/rbbt/util/misc/math.rb,
lib/rbbt/util/misc/omics.rb,
lib/rbbt/util/misc/objects.rb,
lib/rbbt/util/misc/options.rb,
lib/rbbt/util/misc/serialize.rb,
lib/rbbt/util/misc/development.rb,
lib/rbbt/util/misc/manipulation.rb,
lib/rbbt/util/misc/communication.rb

Constant Summary collapse

Log2Multiplier =
1.0 / Math.log(2.0)
Log10Multiplier =
1.0 / Math.log(10.0)
IUPAC2BASE =
{
  "A" => ["A"],
  "C" => ["C"],
  "G" => ["G"],
  "T" => ["T"],
  "U" => ["U"],
  "R" => "A or G".split(" or "),
  "Y" => "C or T".split(" or "),
  "S" => "G or C".split(" or "),
  "W" => "A or T".split(" or "),
  "K" => "G or T".split(" or "),
  "M" => "A or C".split(" or "),
  "B" => "C or G or T".split(" or "),
  "D" => "A or G or T".split(" or "),
  "H" => "A or C or T".split(" or "),
  "V" => "A or C or G".split(" or "),
  "N" => %w(A C T G),
}
BASE2COMPLEMENT =
{
  "A" => "T",
  "C" => "G",
  "G" => "C",
  "T" => "A",
  "U" => "A",
}
THREE_TO_ONE_AA_CODE =
{
  "ala" =>   "A",
  "arg" =>   "R",
  "asn" =>   "N",
  "asp" =>   "D",
  "cys" =>   "C",
  "glu" =>   "E",
  "gln" =>   "Q",
  "gly" =>   "G",
  "his" =>   "H",
  "ile" =>   "I",
  "leu" =>   "L",
  "lys" =>   "K",
  "met" =>   "M",
  "phe" =>   "F",
  "pro" =>   "P",
  "ser" =>   "S",
  "thr" =>   "T",
  "trp" =>   "W",
  "tyr" =>   "Y",
  "val" =>   "V"
}
CODON_TABLE =
{
  "ATT" => "I",
  "ATC" => "I",
  "ATA" => "I",
  "CTT" => "L",
  "CTC" => "L",
  "CTA" => "L",
  "CTG" => "L",
  "TTA" => "L",
  "TTG" => "L",
  "GTT" => "V",
  "GTC" => "V",
  "GTA" => "V",
  "GTG" => "V",
  "TTT" => "F",
  "TTC" => "F",
  "ATG" => "M",
  "TGT" => "C",
  "TGC" => "C",
  "GCT" => "A",
  "GCC" => "A",
  "GCA" => "A",
  "GCG" => "A",
  "GGT" => "G",
  "GGC" => "G",
  "GGA" => "G",
  "GGG" => "G",
  "CCT" => "P",
  "CCC" => "P",
  "CCA" => "P",
  "CCG" => "P",
  "ACT" => "T",
  "ACC" => "T",
  "ACA" => "T",
  "ACG" => "T",
  "TCT" => "S",
  "TCC" => "S",
  "TCA" => "S",
  "TCG" => "S",
  "AGT" => "S",
  "AGC" => "S",
  "TAT" => "Y",
  "TAC" => "Y",
  "TGG" => "W",
  "CAA" => "Q",
  "CAG" => "Q",
  "AAT" => "N",
  "AAC" => "N",
  "CAT" => "H",
  "CAC" => "H",
  "GAA" => "E",
  "GAG" => "E",
  "GAT" => "D",
  "GAC" => "D",
  "AAA" => "K",
  "AAG" => "K",
  "CGT" => "R",
  "CGC" => "R",
  "CGA" => "R",
  "CGG" => "R",
  "AGA" => "R",
  "AGG" => "R",
  "TAA" => "*",
  "TAG" => "*",
  "TGA" => "*",
}
MUTEX_FOR_THREAD_EXCLUSIVE =
Mutex.new
PUSHBULLET_KEY =
begin
  if ENV["PUSHBULLET_KEY"]
    ENV["PUSHBULLET_KEY"]
  else
    config_api = File.join(ENV['HOME'], 'config/apps/pushbullet/apikey')
    if File.exist? config_api
      File.read(config_api).strip
    else
      nil
    end
  end
end

Class Method Summary collapse

Class Method Details

._convert_match_condition(condition) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/rbbt/util/misc.rb', line 32

def self._convert_match_condition(condition)
  return true if condition == 'true'
  return false if condition == 'false'
  return condition.to_regexp if condition[0] == "/"
  return [:cmp, $1, $2.to_f] if condition =~ /^([<>]=?)(.*)/
  return [:invert, _convert_match_condition(condition[1..-1].strip)] if condition[0] == "!"
  #return {$1 => $2.to_f} if condition =~ /^([<>]=?)(.*)/
  #return {false => _convert_match_condition(condition[1..-1].strip)} if condition[0] == "!"
  return condition
end

._zip_fields(array, max = nil) ⇒ Object



67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/util/misc/objects.rb', line 67

def self._zip_fields(array, max = nil)
  return [] if array.nil? or array.empty? or (first = array.first).nil?
  max = array.collect{|l| l.length}.max if max.nil?
  rest = array[1..-1].collect{|v|
    v.length == 1 & max > 1 ? v * max : v
  }
  first = first * max if first.length == 1 and max > 1

  first.zip(*rest)
end

.add_defaults(options, defaults = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/rbbt/util/misc/options.rb', line 120

def self.add_defaults(options, defaults = {})
  options ||= {}
  case
  when Hash === options
    new_options = options.dup
  when String === options
    new_options = string2hash options
  else
    raise "Format of '#{options.inspect}' not understood. It should be a hash"
  end

  defaults.each do |key, value|
    next if options.include? key

    new_options[key] = value 
  end

  new_options

  options.replace new_options
end

.add_GET_param(url, param, value) ⇒ Object



78
79
80
81
82
83
84
# File 'lib/rbbt/util/misc/options.rb', line 78

def self.add_GET_param(url, param, value)
  clean_url, param_str = url.split("?")
  hash = param_str.nil? ? {} : self.GET_params2hash(param_str)
  IndiferentHash.setup hash
  hash[param] = value
  clean_url << "?" << hash2GET_params(hash)
end

.add_libdir(dir = nil) ⇒ Object



3
4
5
6
# File 'lib/rbbt/util/misc/development.rb', line 3

def self.add_libdir(dir=nil)
  dir ||= File.join(Path.caller_lib_dir(caller.first), 'lib')
  $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
end

.append_zipped(current, new) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/rbbt/util/misc/objects.rb', line 51

def self.append_zipped(current, new)
  if current.empty?
    current.replace new.collect{|e| [e]}
  else
    current.each do |v|
      n = new.shift
      if Array === n
        v.concat new
      else
        v << n
      end
    end
  end
  current
end

.array2hash(array, default = nil) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/rbbt/util/misc/options.rb', line 24

def self.array2hash(array, default = nil)
  hash = {}
  array.each do |key, value|
    value = default.dup if value.nil? and not default.nil?
    hash[key] = value
  end
  hash
end

.benchmark(repeats = 1, message = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/development.rb', line 32

def self.benchmark(repeats = 1, message = nil)
  require 'benchmark'
  res = nil
  begin
    measure = Benchmark.measure do
      repeats.times do
        res = yield
      end
    end
    if message
      STDERR.pust "#{message }: #{ repeats } repeats"
    else
      STDERR.puts "Benchmark for #{ repeats } repeats"
    end
    STDERR.puts measure
  rescue Exception
    STDERR.puts "Benchmark aborted"
    raise $!
  end
  res
end

.binary_include?(array, elem) ⇒ Boolean

Returns:

  • (Boolean)


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rbbt/util/misc/manipulation.rb', line 111

def self.binary_include?(array, elem)
  upper = array.size - 1
  lower = 0

  return -1 if upper < lower

  while(upper >= lower) do
    idx = lower + (upper - lower) / 2
    value = array[idx]

    case elem <=> value
    when 0
      return true
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    else
      raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
    end
  end

  return false
end

.bootstrap(elems, num = nil, options = {}, &block) ⇒ Object



310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'lib/rbbt/util/misc/development.rb', line 310

def self.bootstrap(elems, num = nil, options = {}, &block)
  IndiferentHash.setup options

  num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil if defined?(Rbbt::Config)
  num = :current if num.nil?
  cpus = case num
         when :current
           n = Etc.nprocessors
           n = elems.length / 2 if n > elems.length/2

           if $BOOTSTRAPPED_CURRENT && $BOOTSTRAPPED_CURRENT + n > Etc.nprocessors
             1
           else
             n
           end
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  cpus = 1 if cpus <= 0
  options = Misc.add_defaults options, :respawn => true, :cpus => cpus
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:cpus] } cpus: #{ Misc.fingerprint Annotation.purge(elems) }"
  respawn = options[:respawn] and options[:cpus] and options[:cpus].to_i > 1

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }

  TSV.traverse index, **options do |pos|
    if num == :current
      $BOOTSTRAPPED_CURRENT ||= n 
      $BOOTSTRAPPED_CURRENT += 0 
    end
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    res = begin
            yield elem
          rescue Interrupt
            Log.warn "Process #{Process.pid} was aborted"
            raise $!
          end
    res = nil unless options[:into]
    raise RbbtProcessQueue::RbbtProcessQueueWorker::Respawn, res if respawn == :always and cpus > 1
    res
  end
end

.bootstrap_in_threads(elems, num = :current, options = {}, &block) ⇒ Object



364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# File 'lib/rbbt/util/misc/development.rb', line 364

def self.bootstrap_in_threads(elems, num = :current, options = {}, &block)
  IndiferentHash.setup options
  num = :current if num.nil?
  threads = case num
         when :current
          10
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  options = Misc.add_defaults options, :respawn => true, :threads => threads
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:threads] } threads: #{ Misc.fingerprint Annotated.purge(elems) }"

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }
  TSV.traverse index, options do |pos|
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    begin
      res = yield elem
    rescue Interrupt
      Log.warn "Process #{Process.pid} was aborted"
      raise $!
    end
    res = nil unless options[:into]
    res
  end
end

.choose(array, select) ⇒ Object



97
98
99
# File 'lib/rbbt/util/misc/objects.rb', line 97

def self.choose(array, select)
  array.zip(select).select{|e,s| s }.collect{|e,s| e }
end

.chr_cmp_contigs(chr1, chr2, contigs) ⇒ Object



386
387
388
# File 'lib/rbbt/util/misc/omics.rb', line 386

def self.chr_cmp_contigs(chr1, chr2, contigs)
  contigs.index(chr1) <=> contigs.index(chr2)
end

.chr_cmp_strict(chr1, chr2) ⇒ Object



372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/rbbt/util/misc/omics.rb', line 372

def self.chr_cmp_strict(chr1, chr2)
  chr1 = chr1.sub(/^chr/, '')
  chr2 = chr2.sub(/^chr/, '')
  if (m1 = chr1.match(/(\d+)$/)) && (m2 = chr2.match(/(\d+)$/))
    m1[1].to_i <=> m2[1].to_i
  elsif chr1 =~ /\d+$/
    -1
  elsif chr2 =~ /\d+$/
    1
  else
    chr1 <=> chr2
  end
end

.collapse_ranges(ranges) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/manipulation.rb', line 2

def self.collapse_ranges(ranges)
  processed = []
  last = nil
  final = []
  ranges.sort_by{|range| range.begin }.each do |range|
    rbegin = range.begin
    rend = range.end
    if last.nil? or rbegin > last
      processed << [rbegin, rend]
      last = rend
    else
     new_processed = []
      processed.each do |pbegin,pend|
        if pend < rbegin
          final << [pbegin, pend]
        else
          eend = [rend, pend].max
          new_processed << [pbegin, eend]
          break
        end
      end
      processed = new_processed
      last = rend if rend > last
    end
  end

  final.concat processed
  final.collect{|b,e| (b..e)}
end

.consolidate(list) ⇒ Object



40
41
42
43
44
45
46
47
48
49
# File 'lib/rbbt/util/misc/objects.rb', line 40

def self.consolidate(list)
  list.inject(nil){|acc,e|
    if acc.nil?
      acc = e
    else
      acc.concat e
      acc
    end
  }
end

.correct_icgc_mutation(pos, ref, mut_str) ⇒ Object



119
120
121
122
123
124
# File 'lib/rbbt/util/misc/omics.rb', line 119

def self.correct_icgc_mutation(pos, ref, mut_str)
  mut = mut_str
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
  mut = "+" << mut if ref == '-'
  [pos, [mut]]
end

.correct_mutation(pos, ref, mut_str) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/rbbt/util/misc/omics.rb', line 126

def self.correct_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  ref = '' if ref == '-'
  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    m = '' if m == '-'
    case
    when ref.empty?
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    else
      if ref == '-'
        res = '+' + m
      else
        res = '-' * ref.length
        res << m unless m == '-'
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end

  [pos, muts]
end

.correct_vcf_mutation(pos, ref, mut_str) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/util/misc/omics.rb', line 163

def self.correct_vcf_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1 unless ref.empty?
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    case
    when (ref.empty? or ref == '-')
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    when (ref == m)
      nil
    else
      if ref == '-'
        res = '+' + m
      else
        if ref[0] == m[0]
          res = '+' << m[1..-1]
        else
          res = '-' * ref.length
          res << m unless m == '-'
        end
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end.compact

  [pos, muts]
end

.counts(array) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/rbbt/util/misc/math.rb', line 77

def self.counts(array)
  counts = {}
  array.each do |e|
    counts[e] ||= 0
    counts[e] += 1
  end

  counts
end

.divide(array, num) ⇒ Object

Divides the array into num chunks of the same size by placing one element in each chunk iteratively.



208
209
210
211
212
213
214
215
216
217
# File 'lib/rbbt/util/misc/development.rb', line 208

def self.divide(array, num)
  num = 1 if num == 0
  chunks = []
  num.to_i.times do chunks << [] end
  array.each_with_index{|e, i|
    c = i % num
    chunks[c] << e
  }
  chunks
end

.do_once(&block) ⇒ Object



126
127
128
129
130
131
# File 'lib/rbbt/util/misc/development.rb', line 126

def self.do_once(&block)
  return nil if $__did_once
  $__did_once = true
  yield
  nil
end

.ensembl_server(organism) ⇒ Object



330
331
332
333
334
335
336
337
# File 'lib/rbbt/util/misc/omics.rb', line 330

def self.ensembl_server(organism)
  date = organism.split("/")[1]
  if date.nil?
    "www.ensembl.org"
  else
    "#{ date }.archive.ensembl.org"
  end
end

.field_position(fields, field, quiet = false) ⇒ Object

Raises:

  • (FieldNotFoundError)


101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/misc/objects.rb', line 101

def self.field_position(fields, field, quiet = false)
  return field if Integer === field or Range === field
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
  fields.each_with_index{|f,i| return i if f == field}
  field_re = Regexp.new /^#{field}$/i
  fields.each_with_index{|f,i| return i if f =~ field_re}
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
end

.genomic_location_cmp(gpos1, gpos2, sep = ":") ⇒ Object



390
391
392
393
394
395
396
397
398
399
400
# File 'lib/rbbt/util/misc/omics.rb', line 390

def self.genomic_location_cmp(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr1 <=> chr2
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":") ⇒ Object



414
415
416
417
418
419
420
421
422
423
424
# File 'lib/rbbt/util/misc/omics.rb', line 414

def self.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_contigs(chr1, chr2, contigs)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_strict(gpos1, gpos2, sep = ":") ⇒ Object



402
403
404
405
406
407
408
409
410
411
412
# File 'lib/rbbt/util/misc/omics.rb', line 402

def self.genomic_location_cmp_strict(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_strict(chr1, chr2)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_mutations_to_BED(mutations, chr_prefix = false, sort_order = :normal) ⇒ Object



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
# File 'lib/rbbt/util/misc/omics.rb', line 533

def self.genomic_mutations_to_BED(mutations, chr_prefix = false, sort_order = :normal)
  io = if Array === sort_order

         case chr_prefix.to_s.downcase
         when "remove"
           sort_order = sort_order.collect{|chr| chr.sub('chr', '') }  if sort_order.first.include?('chr') 
         when "true", "add"
           sort_order = sort_order.collect{|chr| "chr" + chr }  unless sort_order.first.include?('chr')
         end

         sort_genomic_locations_by_contig(mutations, sort_order)

       else

         case sort_order.to_s
         when 'strict'
           sort_genomic_locations_strict(mutations)
         else
           sort_genomic_locations(mutations)
         end

       end

  TSV.traverse io, :type => :array, :into => :stream do |mutation|
    chr, pos, mut, *rest = mutation.split(":")
    size = case mut
           when nil
             1
           when /^\+(.*)/
             1 + $1.length
           when /^\-(.*)/
             $1.length
           else
             mut.length
           end

    case chr_prefix.to_s.downcase
    when "true", "add"
      chr = "chr" + chr if ! chr.include?('chr')
    when "remove"
      chr = chr.sub("chr", '') if chr.include?('chr')
    end
    [chr, pos.to_i - 1, pos.to_i - 1 + size, mutation] * "\t"
  end
end

.GET_params2hash(string) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/rbbt/util/misc/options.rb', line 54

def self.GET_params2hash(string)
  hash = {}
  string.split('&').collect{|item|
    key, value = item.split("=").values_at 0, 1
    hash[key] = value.nil? ? "" : CGI.unescape(value)
  }
  hash
end

.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rbbt/util/misc/math.rb', line 105

def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
  name1 ||= "list 1"
  name2 ||= "list 2"
  name3 ||= "list 3"

  sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}

  total = total.length if Array === total

  label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
  label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
  label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
  if total
    label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
  else
    label << "| INTERSECTION: #{sizes[6]}"
  end

  max = total || sizes.max
  sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
end

.hash2GET_params(hash) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/util/misc/options.rb', line 63

def self.hash2GET_params(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object Array).include? v.class.to_s
    v = case 
        when Symbol === v
          v.to_s
        when Array === v
          v * ","
        else
          CGI.escape(v.to_s).gsub('%2F','/')
        end
    [ Symbol === k ? k.to_s : k,  v] * "="
  }.compact * "&"
end

.hash2string(hash) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/options.rb', line 46

def self.hash2string(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object).include? v.class.to_s
    [ Symbol === k ? ":" << k.to_s : k.to_s.chomp,
      Symbol === v ? ":" << v.to_s : v.to_s.chomp] * "="
  }.compact * "#"
end

.hash_to_html_tag_attributes(hash) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rbbt/util/misc/options.rb', line 86

def self.hash_to_html_tag_attributes(hash)
  return "" if hash.nil? or hash.empty?
  hash.collect{|k,v| 
    case 
    when (k.nil? or v.nil? or (String === v and v.empty?))
      nil
    when Array === v
      [k,"'" << v * " " << "'"] * "="
    when String === v
      [k,"'" << v << "'"] * "="
    when Symbol === v
      [k,"'" << v.to_s << "'"] * "="
    when TrueClass === v
      [k,"'" << v.to_s << "'"] * "="
    when Numeric === v
      [k,"'" << v.to_s << "'"] * "="
    else
      nil
    end
  }.compact * " "
end

.html_tag(tag, content = nil, params = {}) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rbbt/util/misc/options.rb', line 108

def self.html_tag(tag, content = nil, params = {})
  attr_str = hash_to_html_tag_attributes(params)
  attr_str = " " << attr_str if String === attr_str and attr_str != ""
  html = if content.nil?
    "<#{ tag }#{attr_str}/>"
  else
    "<#{ tag }#{attr_str}>#{ content.to_s }</#{ tag }>"
  end

  html
end

.in_delta?(a, b, delta = 0.0001) ⇒ Boolean

Returns:

  • (Boolean)


128
129
130
# File 'lib/rbbt/util/misc/math.rb', line 128

def self.in_delta?(a, b, delta = 0.0001)
  (a.to_f - b.to_f).abs < delta
end

.index_BED(source, destination, sorted = false) ⇒ Object



488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
# File 'lib/rbbt/util/misc/omics.rb', line 488

def self.index_BED(source, destination, sorted = false)

  pos_function = Proc.new do |k|
    k.split(":").values_at(1, 2).collect{|i| i.to_i}
  end
  if Open.exists? destination
    Persist::Sharder.new destination, false, "fwt", :pos_function => pos_function  do |key|
      key.split(":")[0]
    end
  else
    io = IO === io ? io : Open.open(source) 

    max_size = 0
    nio = Misc.open_pipe do |sin|
      while line = io.gets
        chr, start, eend, id, *rest = line.chomp.split("\t")
        l = id.length
        max_size = l if max_size < l
        chr = chr.sub('chr','')
        sin << [chr, start, eend, id] * "\t" << "\n"
      end
    end

    TmpFile.with_file do |tmpfile|
      Misc.consume_stream(nio, false, tmpfile)

      value_size = max_size
      destination = destination.find if Path === destination
      sharder = Persist::Sharder.new destination, true, "fwt", :value_size => value_size, :range => true, :pos_function => pos_function  do |key|
        key.split(":")[0]
      end

      TSV.traverse tmpfile, :type => :array, :bar => "Creating BED index for #{Misc.fingerprint source}" do |line|
        next if line.empty?
        chr, start, eend, id, *rest = line.split("\t")
        key = [chr, start, eend] * ":"
        sharder[key] = id
      end
      sharder.read

      sharder
    end
  end
end

.insist(times = 4, sleep = nil, msg = nil) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/rbbt/util/misc/development.rb', line 137

def self.insist(times = 4, sleep = nil, msg = nil)
  sleep_array = nil

  try = 0
  begin
    begin
      yield
    rescue Exception
      if Array === times
        sleep_array = times
        times = sleep_array.length
        sleep = sleep_array.shift
      end

      if sleep.nil?
        sleep_array = ([0] + [0.001, 0.01, 0.1, 0.5] * (times / 3)).sort[0..times-1]
        sleep = sleep_array.shift
      end
      raise $!
    end
  rescue TryAgain
    sleep sleep
    retry
  rescue StopInsist
    raise $!.exception
  rescue Aborted, Interrupt
    if msg
      Log.warn("Not Insisting after Aborted: #{$!.message} -- #{msg}")
    else
      Log.warn("Not Insisting after Aborted: #{$!.message}")
    end
    raise $!
  rescue Exception
    Log.exception $! if ENV["RBBT_LOG_INSIST"] == 'true'
    if msg
      Log.warn("Insisting after exception: #{$!.class} #{$!.message} -- #{msg}")
    elsif FalseClass === msg
      nil
    else
      Log.warn("Insisting after exception:  #{$!.class} #{$!.message}")
    end

    if sleep and try > 0
      sleep sleep
      sleep = sleep_array.shift || sleep if sleep_array
    else
      Thread.pass
    end

    try += 1
    retry if try < times
    raise $!
  end
end

.intersect_sorted_arrays(a1, a2) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/util/misc/manipulation.rb', line 60

def self.intersect_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  intersect = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      intersect << e1
      e1, e2 = a1.shift, a2.shift
    when -1
      e1 = a1.shift while not e1.nil? and e1 < e2
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  intersect
end

.intersect_streams(f1, f2, out, sep = ":") ⇒ Object



430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
# File 'lib/rbbt/util/misc/omics.rb', line 430

def self.intersect_streams(f1, f2, out, sep=":")
  finish = false
  return if f1.eof? or f2.eof?
  line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
  line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
  while not finish
    cmp = intersect_streams_cmp_chr(chr1,chr2)
    case cmp
    when -1
      move = 1
    when 1
      move = 2
    else
      if eend1 < start2
        move = 1
      elsif eend2 < start1
        move = 2
      else
        pos2 = f2.pos

        sline2, schr2, sstart2, seend2, srest2 = line2, chr2, start2, eend2, rest2
        while chr1 == chr2 and eend1 >= start2
          out.puts line1 + "\t" + line2 if start1 <= eend2
          if f2.eof?
            chr2 = 'next2'
          else
            line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
          end
        end
        line2, chr2, start2, eend2, rest2 = sline2, schr2, sstart2, seend2, srest2
        f2.seek(pos2)
        move = 1
      end
    end

    case move
    when 1
      if f1.eof?
        finish = true
      else
        line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
      end
    when 2
      if f2.eof?
        finish = true
      else
        line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
      end
    end
  end
end

.intersect_streams_cmp_chr(chr1, chr2) ⇒ Object



426
427
428
# File 'lib/rbbt/util/misc/omics.rb', line 426

def self.intersect_streams_cmp_chr(chr1, chr2)
  chr1 <=> chr2
end

.intersect_streams_read(io, sep = ":") ⇒ Object



359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/rbbt/util/misc/omics.rb', line 359

def self.intersect_streams_read(io, sep=":")
  line = io.gets.chomp
  parts = line.split(sep, -1)
  chr, start, eend, *rest = parts
  start = start.to_i
  if eend =~ /^\d+(\t.*)?$/
    eend = eend.to_i
  else
    eend = start.to_i
  end
  [line,chr, start, eend, rest]
end

.IUPAC_to_base(iupac) ⇒ Object



294
295
296
# File 'lib/rbbt/util/misc/omics.rb', line 294

def self.IUPAC_to_base(iupac)
  IUPAC2BASE[iupac]
end

.load_yaml(yaml) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/rbbt/util/misc/serialize.rb', line 2

def self.load_yaml(yaml)
  case yaml
  when IO, StringIO
    if YAML.respond_to?(:unsafe_load)
      YAML.unsafe_load(yaml)
    else
      YAML.load(yaml)
    end
  when (defined?(Path) && Path)
    yaml.open do |io|
      load_yaml(io)
    end
  when String
    if Misc.is_filename?(yaml)
      File.open(yaml) do |io|
        load_yaml(io)
      end
    else
      load_yaml(StringIO.new(yaml))
    end
  else
    raise "Unkown YAML object: #{Misc.fingerprint yaml}"
  end
end

.log10(x) ⇒ Object



9
10
11
# File 'lib/rbbt/util/misc/math.rb', line 9

def self.log10(x)
  Math.log(x) * Log10Multiplier
end

.log2(x) ⇒ Object



5
6
7
# File 'lib/rbbt/util/misc/math.rb', line 5

def self.log2(x)
  Math.log(x) * Log2Multiplier
end

.match_fields(field1, field2) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
# File 'lib/rbbt/util/misc/objects.rb', line 3

def self.match_fields(field1, field2)
  return true if field1 == field2 
  if m = field1.match(/\((.*)\)/)
    field1 = m[1]
  end

  if m = field2.match(/\((.*)\)/)
    field2 = m[1]
  end

  field1 == field2
end

.match_value(value, condition) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rbbt/util/misc.rb', line 43

def self.match_value(value, condition)
  condition = _convert_match_condition(condition.strip) if String === condition

  case condition
  when Regexp
    !! value.match(condition)
  when NilClass, TrueClass
    value === TrueClass or (String === value and value.downcase == 'true')
  when FalseClass
    value === FalseClass or (String === value and value.downcase == 'false')
  when String
    Numeric === value ? value.to_f == condition.to_f : value == condition
  when Numeric
    value.to_f == condition.to_f
  when Array
    case condition.first
    when :cmp
      value.to_f.send(condition[1], condition[2])
    when :invert
      ! match_value(value, condition[1] )
    else
      condition.inject(false){|acc,e| acc = acc ? true : match_value(value, e) }
    end
  else
    raise "Condition not understood: #{Misc.fingerprint condition}"
  end
end

.max(list) ⇒ Object



13
14
15
16
17
18
19
20
# File 'lib/rbbt/util/misc/math.rb', line 13

def self.max(list)
  max = nil
  list.each do |v|
    next if v.nil?
    max = v if max.nil? or v > max
  end
  max
end

.mean(list) ⇒ Object



44
45
46
# File 'lib/rbbt/util/misc/math.rb', line 44

def self.mean(list)
  sum(list.compact.collect{|v| v.to_f } ) / list.compact.length
end

.median(array) ⇒ Object



48
49
50
51
52
# File 'lib/rbbt/util/misc/math.rb', line 48

def self.median(array)
  sorted = array.sort
  len = sorted.length
  (sorted[(len - 1) / 2] + sorted[len / 2]).to_f / 2
end

.memory_use(pid = nil) ⇒ Object



400
401
402
403
404
405
406
407
408
# File 'lib/rbbt/util/misc/development.rb', line 400

def self.memory_use(pid=nil)
  pid ||= $$
  begin
    Open.read("/proc/#{pid}/status").match(/VmRSS:\s+(\d+)/)[1].to_i
  rescue ProcessFailed
    raise "Process with #{pid} not found"
  end
  #`ps -o rss -p #{pid || $$}`.strip.split.last.to_i
end

.memprofObject



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rbbt/util/misc/development.rb', line 110

def self.memprof
  require 'memprof'
  Memprof.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    Memprof.stop
    print Memprof.stats
  end

  res
end

.merge_sorted_arrays(a1, a2) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rbbt/util/misc/manipulation.rb', line 79

def self.merge_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  new = []
  while true
    case
    when (e1 and e2)
      case e1 <=> e2
      when 0
        new << e1 
        e1, e2 = a1.shift, a2.shift
      when -1
        new << e1
        e1 = a1.shift
      when 1
        new << e2
        e2 = a2.shift
      end
    when e2
      new << e2
      new.concat a2
      break
    when e1
      new << e1
      new.concat a1
      break
    else
      break
    end
  end
  new
end

.min(list) ⇒ Object



22
23
24
25
26
27
28
29
# File 'lib/rbbt/util/misc/math.rb', line 22

def self.min(list)
  min = nil
  list.each do |v|
    next if v.nil?
    min = v if min.nil? or v < min
  end
  min
end

.name2basename(file) ⇒ Object



107
108
109
# File 'lib/rbbt/util/misc.rb', line 107

def self.name2basename(file)
  sanitize_filename(file.gsub("/",'·').gsub("~", '-'))
end

.notify(description, event = 'notification', key = nil) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/rbbt/util/misc/communication.rb', line 15

def self.notify(description, event='notification', key = nil)
  if PUSHBULLET_KEY.nil? and key.nil?
    Log.warn "Could not notify, no PUSHBULLET_KEY"
    return
  end

  Thread.new do
    application = 'rbbt'
    event ||= 'notification'
    key ||= PUSHBULLET_KEY
    `curl -s --header "Authorization: Bearer #{key}" -X POST https://api.pushbullet.com/v2/pushes --header 'Content-Type: application/json' --data-binary '{"type": "note", "title": "#{event}", "body": "#{description}"}'`
  end
end

.object_delta(*args) ⇒ Object



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/rbbt/util/misc/development.rb', line 276

def self.object_delta(*args)
  res, delta = nil, nil
  MUTEX_FOR_THREAD_EXCLUSIVE.synchronize do
    pre = Set.new
    delta = Set.new

    GC.start
    ObjectSpace.each_object(*args) do |o|
      pre.add o
    end

    res = yield

    GC.start
    ObjectSpace.each_object(*args) do |o|
      delta.add o unless pre.include? o
    end

  end
  Log.info "Delta: #{delta.inspect}"
  res
end

.ordered_divide(array, num) ⇒ Object

Divides the array into chunks of num same size by placing one element in each chunk iteratively.



221
222
223
224
225
226
227
228
229
230
231
# File 'lib/rbbt/util/misc/development.rb', line 221

def self.ordered_divide(array, num)
  last = array.length - 1
  chunks = []
  current = 0
  while current <= last
    next_current = [last, current + num - 1].min
    chunks << array[current..next_current]
    current = next_current + 1
  end
  chunks
end

.parse_cmd_params(str) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/rbbt/util/misc/options.rb', line 3

def self.parse_cmd_params(str)
  return str if Array === str
  str.scan(/
           (?:["']([^"']*?)["']) |
           ([^"'\s]+)
  /x).flatten.compact
end

.positional2hash(keys, *values) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/util/misc/options.rb', line 11

def self.positional2hash(keys, *values)
  if Hash === values.last
    extra = values.pop
    inputs = Misc.zip2hash(keys, values)
    inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
    inputs = Misc.add_defaults inputs, extra
    inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
    inputs
  else
    Misc.zip2hash(keys, values)
  end
end

.pre_forkObject



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/development.rb', line 8

def self.pre_fork
  Persist::CONNECTIONS.values.each do |db|
    db.close if db.write?
  end
  Log::ProgressBar::BARS.clear
  ObjectSpace.each_object(Mutex) do |m|
    begin
      m.unlock
    rescue ThreadError
    end if m.locked?
  end
end

.prepare_entity(entity, field, options = {}) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/rbbt/util/misc/objects.rb', line 16

def self.prepare_entity(entity, field, options = {})
  return entity unless defined? Entity
  return entity unless String === entity or Array === entity
  options ||= {}

  dup_array = options.delete :dup_array

  if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
    params = options.dup

    params[:format] ||= params.delete "format"
    params.merge!(:format => _format) unless _format.nil? or (params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?)))

    mod = Entity === field ? field : Entity.formats[field]

    entity = entity.dup
    entity = (entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) 

    entity = mod.setup(entity, params)
  end

  entity
end

.process_options(hash, *keys) ⇒ Object



142
143
144
145
146
147
148
149
150
151
# File 'lib/rbbt/util/misc/options.rb', line 142

def self.process_options(hash, *keys)
  defaults = keys.pop if Hash === keys.last
  hahs = Misc.add_defaults hash, defaults if defaults

  if keys.length == 1
    hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s) 
  else
    keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
  end
end

.process_to_hash(list) ⇒ Object



41
42
43
44
# File 'lib/rbbt/util/misc/options.rb', line 41

def self.process_to_hash(list)
  result = yield list
  zip2hash(list, result)
end

.profile(options = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/misc/development.rb', line 93

def self.profile(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::FlatPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_graph(options = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/util/misc/development.rb', line 75

def self.profile_graph(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    #result.eliminate_methods!([/annotated_array_clean_/])
    printer = RubyProf::GraphPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_html(options = {}) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/development.rb', line 54

def self.profile_html(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::MultiPrinter.new(result)
    TmpFile.with_file do |dir|
      FileUtils.mkdir_p dir unless File.exist? dir
      printer.print(:path => dir, :profile => 'profile')
      CMD.cmd("firefox  -no-remote  '#{ dir }'")
    end
  end

  res
end

.proportions(array) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/rbbt/util/misc/math.rb', line 87

def self.proportions(array)
  total = array.length

  proportions = Hash.new 0

  array.each do |e|
    proportions[e] += 1.0 / total
  end

  class << proportions; self;end.class_eval do
    def to_s
      sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
    end
  end

  proportions
end

.pull_keys(hash, prefix) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/rbbt/util/misc/options.rb', line 153

def self.pull_keys(hash, prefix)
  new = {}
  hash.keys.each do |key|
    if key.to_s =~ /#{ prefix }_(.*)/
      case
      when String === key
        new[$1] = hash.delete key
      when Symbol === key
        new[$1.to_sym] = hash.delete key
      end
    else
      if key.to_s == prefix.to_s
        new[key] = hash.delete key
      end
    end
  end

  new
end

.random_sample_in_range(total, size) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/rbbt/util/misc/development.rb', line 233

def self.random_sample_in_range(total, size)
  p = Set.new

  if size > total / 10
    template = (0..total - 1).to_a
    size.times do |i|
      pos = (rand * (total - i)).floor
      if pos == template.length - 1
        v = template.pop
      else
        v, n = template[pos], template[-1]
        template.pop
        template[pos] = n
      end
      p << v
    end
  else
    size.times do
      pos = nil
      while pos.nil?
        pos = (rand * total).floor
        if p.include? pos
          pos = nil
        end
      end
      p << pos
    end
  end
  p
end

.reset_do_onceObject



133
134
135
# File 'lib/rbbt/util/misc/development.rb', line 133

def self.reset_do_once
  $__did_once = false
end

.sample(ary, size, replacement = false) ⇒ Object



264
265
266
267
268
269
270
271
272
# File 'lib/rbbt/util/misc/development.rb', line 264

def self.sample(ary, size, replacement = false)
  if ary.respond_to? :sample
    ary.sample size
  else
    total = ary.length
    p = random_sample_in_range(total, size)
    ary.values_at *p
  end
end

.sanitize_filename(filename, length = 254) ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rbbt/util/misc.rb', line 111

def self.sanitize_filename(filename, length = 254)
  if filename.length > length
    if filename =~ /(\..{2,9})$/
      extension = $1
    else
      extension = ''
    end

    post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension

    filename = filename[0..(length - post_fix.length - 1)] << post_fix
  else
    filename
  end
  filename
end

.sd(list) ⇒ Object



71
72
73
74
75
# File 'lib/rbbt/util/misc/math.rb', line 71

def self.sd(list)
  return nil if list.length < 3
  variance = self.variance(list)
  Math.sqrt(variance)
end

.select_ranges(stream1, stream2, sep = "\t") ⇒ Object



482
483
484
485
486
# File 'lib/rbbt/util/misc/omics.rb', line 482

def self.select_ranges(stream1, stream2, sep = "\t")
  Misc.open_pipe do |sin|
    intersect_streams(stream1, stream2,sin, sep)
  end
end

.send_email(from, to, subject, message, options = {}) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/util/misc/communication.rb', line 48

def self.send_email(from, to, subject, message, options = {})
  require 'mail'

  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login, :files => []

  server, port, user, pass, from_alias, to_alias, auth, files = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth, :files

  files = [] if files.nil?
  files = [files] unless Array === files

  Mail.defaults do
    delivery_method :smtp, address: server, port: port, user_name: user, password: pass
  end

  mail = Mail.deliver do 
    from  "#{from_alias} <#{from}>"
    to "#{to_alias} <#{to}>"
    subject subject

    text_part do 
      body message
    end

    files.each do |file|
      file = file.find if Path === file
      file = file.path if Step === file
    end
  end
end

.send_email_old(from, to, subject, message, options = {}) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/util/misc/communication.rb', line 29

def self.send_email_old(from, to, subject, message, options = {})
  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login

  server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth

  msg = <<-END_OF_MESSAGE
From: #{from_alias} <#{from}>
To: #{to_alias} <#{to}>
Subject: #{subject}

#{message}
END_OF_MESSAGE

  Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
    smtp.send_message msg, from, to
  end
end

.sort_genomic_locations(stream, sep = ":") ⇒ Object



355
356
357
# File 'lib/rbbt/util/misc/omics.rb', line 355

def self.sort_genomic_locations(stream, sep = ":")
  sort_stream(stream, '#', "-k1,1 -k2,2n -t#{sep}")
end

.sort_genomic_locations_by_contig(stream, contigs, sep = ":") ⇒ Object



339
340
341
342
343
344
345
346
347
348
349
# File 'lib/rbbt/util/misc/omics.rb', line 339

def self.sort_genomic_locations_by_contig(stream, contigs, sep = ":")
  ext_stream = TSV.traverse stream, :type => :array, :into => :stream do |line|
    chr = line.partition(sep).first
    num = contigs.index chr
    num.to_s + sep + line
  end

  TSV.traverse sort_stream(ext_stream, '#', "-k1,1n -k3,3n -t#{sep}"), :type => :array, :into => :stream do |line|
    line.partition(sep).last
  end
end

.sort_genomic_locations_strict(stream, sep = ":") ⇒ Object



351
352
353
# File 'lib/rbbt/util/misc/omics.rb', line 351

def self.sort_genomic_locations_strict(stream, sep = ":")
  sort_stream(stream, '#', "-k1,1V -k2,2n -t#{sep}")
end

.sort_mutation_stream(stream, sep = ":") ⇒ Object



583
584
585
# File 'lib/rbbt/util/misc/omics.rb', line 583

def self.sort_mutation_stream(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutation_stream_strict(stream, sep = ":") ⇒ Object



579
580
581
# File 'lib/rbbt/util/misc/omics.rb', line 579

def self.sort_mutation_stream_strict(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -V -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutations_strict(mutations) ⇒ Object Also known as: sort_mutations



299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/rbbt/util/misc/omics.rb', line 299

def self.sort_mutations_strict(mutations)
  mutations.collect do |mutation|
    chr, pos, mut = mutation.split ":"
    chr.sub!(/^chr/i,'')
    chr = 22 if chr == "Y"
    chr = 23 if chr == "X"
    chr = 24 if chr == "MT" or chr == "M"
    [chr.to_i, pos.to_i, mut, mutation]
  end.sort do |a,b|
    case a[0] <=> b[0]
    when -1
      -1
    when 1
      1
    when 0
      case a[1] <=> b[1]
      when -1
        -1
      when 1
        1
      when 0
        a[2] <=> b[2]
      end
    end
  end.collect{|p| p.last }
end

.sorted_array_hits(a1, a2) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/util/misc/manipulation.rb', line 36

def self.sorted_array_hits(a1, a2)
  e1, e2 = a1.shift, a2.shift
  counter = 0
  match = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      match << counter
      e1, e2 = a1.shift, a2.shift
      counter += 1
    when -1
      while not e1.nil? and e1 < e2
        e1 = a1.shift 
        counter += 1
      end
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  match
end

.ssh_connection(server, reset = false) ⇒ Object



439
440
441
442
443
444
445
446
447
448
449
450
451
# File 'lib/rbbt/util/misc/development.rb', line 439

def self.ssh_connection(server, reset = false)
  @@ssh_connections ||= {}
  @@ssh_connections.delete server if reset
  @@ssh_connections[server] ||= begin
                                  require 'pty'
                                  master, slave = PTY.open
                                  read, write = Misc.pipe
                                  pid = spawn("ssh '#{server}' 'shopt -s expand_aliases; bash -l' ", :in => read, :out => slave, :err => STDERR.fileno)
                                  read.close
                                  slave.close
                                  [write, master, pid]
                                end
end

.ssh_run(server, script = nil) ⇒ Object



434
435
436
437
# File 'lib/rbbt/util/misc/development.rb', line 434

def self.ssh_run(server, script = nil)
  require 'rbbt/util/ssh'
  SSHLine.ruby(server, script)
end

.ssh_run_alt(server, script = nil) ⇒ Object



453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# File 'lib/rbbt/util/misc/development.rb', line 453

def self.ssh_run_alt(server, script = nil)
  Log.debug "Run ssh script in #{server}:\n#{script}"

  write, master, pid = ssh_connection(server)
  write, master = ssh_connection(server, true) if PTY.check pid
  write.puts "echo '#{script.gsub("'", '"') + "\n" + 'puts "\nCMD_OUT_END"'}' | ruby "
  lines = []
  while true
    line = master.gets
    break if line.strip == "CMD_OUT_END"
    lines << line.strip unless line.strip.empty?
  end
  lines * "\n"

end

.ssh_run_old(server, script = nil) ⇒ Object



427
428
429
430
431
432
# File 'lib/rbbt/util/misc/development.rb', line 427

def self.ssh_run_old(server, script = nil)
  Log.debug "Run ssh script in #{server}:\n#{script}"

  #CMD.cmd("ssh '#{server}' 'shopt -s expand_aliases; bash -l -c \"ruby\"' ", :in => script, :log => true).read
  CMD.cmd("ssh '#{server}' ruby", :in => script, :log => true).read
end

.std_num_vector(v, min, max) ⇒ Object



31
32
33
34
35
36
37
38
# File 'lib/rbbt/util/misc/math.rb', line 31

def self.std_num_vector(v, min, max)
  v_min = Misc.min(v)
  v_max = Misc.max(v)
  v_range = v_max - v_min
  range = max.to_f - min.to_f

  v.collect{|e| (e.nil? || e.nan?) ? e : min + range * (e.to_f - v_min) / v_range } 
end

.string2const(string) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/development.rb', line 21

def self.string2const(string)
  return nil if string.nil?
  mod = Kernel

  string.to_s.split('::').each do |str|
    mod = mod.const_get str
  end

  mod
end

.string2hash(string) ⇒ Object

options end



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/rbbt/util/misc/options.rb', line 219

def self.string2hash(string)
  options = {}

  string.split('#').each do |str|
    key, sep, value = str.partition "="

    key = key[1..-1].to_sym if key[0] == ":"

    options[key] = true and next if value.empty?
    options[key] = value[1..-1].to_sym and next if value[0] == ":"
    options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
    options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
    options[key] = value.to_i and next if value =~ /^\d+$/
    options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
    options[key] = true and next if value == "true"
    options[key] = false and next if value == "false"
    options[key] = value and next 

    options[key] = begin
                     saved_safe = $SAFE
                     $SAFE = 0
                     eval(value)
                   rescue Exception
                     value
                   ensure
                     $SAFE = saved_safe
                   end
  end

  return options

end

.sum(list) ⇒ Object



40
41
42
# File 'lib/rbbt/util/misc/math.rb', line 40

def self.sum(list)
  list.compact.inject(0.0){|acc,e| acc += e }
end

.time_tickObject



299
300
301
302
303
304
305
306
307
308
# File 'lib/rbbt/util/misc/development.rb', line 299

def self.time_tick
  if $_last_time_tick.nil?
    $_last_time_tick = Time.now
    puts "Tick started: #{Time.now}"
  else
    ellapsed = Time.now - $_last_time_tick
    puts "Tick ellapsed: #{ellapsed.to_i} s. #{(ellapsed * 1000).to_i - ellapsed.to_i * 1000} ms"
    $_last_time_tick = Time.now
  end
end

.timeout_insist(time, msg = nil, &block) ⇒ Object



469
470
471
472
473
474
475
476
477
478
# File 'lib/rbbt/util/misc/development.rb', line 469

def self.timeout_insist(time, msg = nil, &block)
  Misc.insist do
    begin
      Timeout.timeout(time, TryAgain, msg, &block)
    rescue TryAgain
      Log.low "Timeout detected after #{time} seconds"
      raise $!
    end
  end
end

.timespan(str, default = "s") ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/util/misc.rb', line 75

def self.timespan(str, default = "s")

  return - timespan(str[1..-1], default) if str[0] == "-"
  
  if str.include?(":")
    seconds, minutes, hours = str.split(":").reverse
    return seconds.to_i + minutes.to_i * 60 + hours.to_i * 60 * 60
  end

  tokens = {
    "s" => (1),
    "sec" => (1),
    "m" => (60),
    "min" => (60),
    "''" => (1),
    "'" => (60),
    "h" => (60 * 60),
    "d" => (60 * 60 * 24),
    "w" => (60 * 60 * 24 * 7),
    "mo" => (60 * 60 * 24 * 31),
    "y" => (60 * 60 * 24 * 365),
  }

  tokens[nil] = tokens[default]
  tokens[""] = tokens[default]
  time = 0
  str.scan(/(\d+)(\w*)/).each do |amount, measure|
    time += amount.to_i * tokens[measure]
  end
  time
end

.tokenize(str) ⇒ Object



71
72
73
# File 'lib/rbbt/util/misc.rb', line 71

def self.tokenize(str)
  str.scan(/"([^"]*)"|'([^']*)'|([^"'\s]+)/).flatten.compact
end

.total_length(ranges) ⇒ Object



32
33
34
# File 'lib/rbbt/util/misc/manipulation.rb', line 32

def self.total_length(ranges)
  self.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
end

.translate_dna_mutation_hgvs2rbbt(cds) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/rbbt/util/misc/omics.rb', line 205

def self.translate_dna_mutation_hgvs2rbbt(cds)
  change = begin
             case
             when cds =~ />/
               cds.split(">").last
             when cds =~ /delins/
               del, ins = cds.split("delins")
               start, eend = del.split("_")
               del_size = eend.to_i - start.to_i + 1
               if ins =~ /^[ACTG]+$/i
                 ("-" * del_size) + ins
               else
                 Log.debug "Unknown delins: #{ cds }"
                 "?(" << cds << ")"
               end
             when cds =~ /del/
               deletion = cds.split("del").last.chomp
               case
               when deletion =~ /^\d+$/
                 "-" * deletion.to_i
               when deletion =~ /^[ACTG]+$/i
                 "-" * deletion.length
               else
                 Log.debug "Unknown deletion: #{ cds }"
                 "?(" << cds << ")"
               end
             when cds =~ /ins/
               insertion = cds.split("ins").last
               case
               when insertion =~ /^\d+$/
                 "+" + "N" * insertion.to_i
               when insertion =~ /^[NACTG]+$/i
                 "+" + insertion
               else
                 Log.debug "Unknown insertion: #{cds }"
                 "?(" << cds << ")"
               end
             else
               Log.debug "Unknown change: #{cds}"
               "?(" << cds << ")"
             end
           rescue
             Log.debug "Error processing change: #{cds}"
             "?(" << cds << ")"
           end
  change
end

.translate_prot_mutation_hgvs2rbbt(mutation) ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/rbbt/util/misc/omics.rb', line 253

def self.translate_prot_mutation_hgvs2rbbt(mutation)
  mutation.sub!('p.', '')
  if m = mutation.match(/([a-z]{3})(\d+)([a-z]{3})/i)
    ref = m[1]
    num = m[2]
    alt = m[3]
    alt = "*" if alt == "Ter"
    ref = THREE_TO_ONE_AA_CODE[ref.downcase]
    alt = (alt == "Ter" || alt == "*") ? "*" : THREE_TO_ONE_AA_CODE[alt.downcase]
    mutation = [ref, num, alt] * ""
  end
  one_aa_code = THREE_TO_ONE_AA_CODE.values
  one_aa_code << "X" << "B" << "Z" << "J" << "*" << "?"
  one_aa_code_re = one_aa_code*""
  subs = Regexp.new("^[#{one_aa_code_re}]\\d+[#{one_aa_code_re}]")
  f_aa = Regexp.new("^[#{one_aa_code_re}]\\d+")
  mutation = case
             when mutation =~ subs
               mutation
             when mutation =~ /fs/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Frameshift: #{mutation}"
                 nil
               else
                 Regexp.last_match(0) + "Frameshift"
               end
             when mutation =~ /ins|del|>/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Indel"
                 nil
               else
                 Regexp.last_match(0) + "Indel"
               end
             else
               Log.debug "Unknown change: #{mutation}"
               nil
             end
end

.try3times(&block) ⇒ Object



192
193
194
# File 'lib/rbbt/util/misc/development.rb', line 192

def self.try3times(&block)
  insist(3, &block)
end

.unzip_in_dir(file, dir) ⇒ Object



411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/rbbt/util/misc/development.rb', line 411

def self.unzip_in_dir(file, dir)
  raise "Target is not a directory: #{file}" if File.exist?(dir) and not File.directory?(dir)
  if Open.remote? file
    file = file.find if Path === file
    Open.open(file) do |stream|
      TmpFile.with_file(stream.read, true, :extension => 'zip') do |zip_file|
        CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
      end
    end
  else
    file = file.find if Path === file
    zip_file = file
    CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
  end
end

.variance(list) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/rbbt/util/misc/math.rb', line 54

def self.variance(list)
  return nil if list.length < 3
  mean = mean(list)
  list = list.compact
  list_length = list.length

  total_square_distance = 0.0
  list.each do |value|
    distance = value.to_f - mean
    total_square_distance += distance * distance
  end

  variance = total_square_distance / (list_length - 1)

end

.zip2hash(list1, list2) ⇒ Object



33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/options.rb', line 33

def self.zip2hash(list1, list2)
  hash = {}
  list1.each_with_index do |e,i|
    hash[e] = list2[i]
  end
  hash
end

.zip_fields(array) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/rbbt/util/misc/objects.rb', line 78

def self.zip_fields(array)
  if array.length < 10000
    _zip_fields(array)
  else
    zipped_slices = []
    max = array.collect{|l| l.length}.max
    array.each_slice(10000) do |slice|
      zipped_slices << _zip_fields(slice, max)
    end
    new = zipped_slices.first
    zipped_slices[1..-1].each do |rest|
      rest.each_with_index do |list,i|
        new[i].concat list
      end
    end
    new
  end
end

.zscore(e, list) ⇒ Object



132
133
134
135
136
# File 'lib/rbbt/util/misc/math.rb', line 132

def self.zscore(e, list)
  m = Misc.mean(list)
  sd = Misc.sd(list)
  (e.to_f - m) / sd
end