Class: BacterialIdentificator

Inherits:
Object
  • Object
show all
Defined in:
lib/bacterial-identificator.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options, root) ⇒ BacterialIdentificator

Initialize BacterialIdentificator options, options, ROOT, options, options)



23
24
25
26
27
28
29
30
31
# File 'lib/bacterial-identificator.rb', line 23

def initialize options, root

  @root = root
  @mash_file = options[:mash_file]
  @genome_list = options[:genome_list]
  @proc = options[:proc].to_i
  @output=options[:output]

end

Instance Attribute Details

#genomes_listObject (readonly)

Returns the value of attribute genomes_list.



19
20
21
# File 'lib/bacterial-identificator.rb', line 19

def genomes_list
  @genomes_list
end

#statsObject (readonly)

Returns the value of attribute stats.



19
20
21
# File 'lib/bacterial-identificator.rb', line 19

def stats
  @stats
end

Instance Method Details

#consensus_referenceObject

consensus species model



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/bacterial-identificator.rb', line 71

def consensus_reference

  all_hits = {}
  @genome_hits.each do |g, hits|
    hits.each do |h|
      score = h[3].split("/")[0].to_i
      if ! all_hits.has_key? h[0]
        all_hits[h[0]] = score
      else
        all_hits[h[0]] += score
      end
    end
  end
  return all_hits.sort_by { |k,v| v }.to_h

end

#mash_genome(genome) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/bacterial-identificator.rb', line 50

def mash_genome genome

  # Reference-ID, Query-ID, Mash-distance, P-value, and Matching-hashes
  # fields = ["hit","query","distance","pvalue","match"]

  results_raw = `#{@root}/mash.linux dist #{@mash_file} #{genome}`
  results = []

  results_raw.split("\n").each do |l|
    lA = l.chomp.split("\t")
    next if lA[-1].split("/")[0] == '0' # no match
    results << (lA[0..0] + lA[2..-1])
  end

  results_sorted = results.sort {|a,b| a[1] <=> b[1]}

  return results_sorted

end

print json



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/bacterial-identificator.rb', line 89

def print_output

  case @output.downcase
  when "csv"
    @genome_hits.each do |g, hits|
      hits.each do |h|
        puts "#{g},#{h.join(',')}"
      end
    end
  when "json"
    new_genome_hits = {}
    @genome_hits.each do |g, hits|
      new_genome_hits[g] = []
      hits.each do |h|
        new_genome_hits[g].push(Hash[["hit","distance","e-value","score"].zip(h)])
      end
    end
    puts JSON.pretty_generate({genomes: new_genome_hits, summary: summary})
  else
    @genome_hits.each do |g, hits|
      hits.each do |h|
        out = h.join("\t")
        puts "#{g}\t#{out}"
      end
    end
  end

end

#run_identificationObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/bacterial-identificator.rb', line 34

def run_identification

  @genome_hits = {}
  @genome_list.each do |g|
    @genome_hits[g] = []
  end

  Parallel.map(@genome_list, in_threads: @proc) do |g|
    @genome_hits[g] = mash_genome g
  end

  print_output

end

#summaryObject



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/bacterial-identificator.rb', line 118

def summary

  genome_hit_association =  {}

  @genome_hits.each do |g, hits|
    genome_hit_association[hits[0][0]] = 0 if ! genome_hit_association.has_key? hits[0][0]
    genome_hit_association[hits[0][0]] += 1
  end

  population = {
    consensus: consensus_reference.first[0],
    genome_hits: genome_hit_association
  }

  return population

end