Class: SequenceFasta
- Inherits:
-
Object
- Object
- SequenceFasta
- Defined in:
- lib/bacterial-annotator/sequence-fasta.rb
Instance Attribute Summary collapse
-
#annotation_files ⇒ Object
readonly
Returns the value of attribute annotation_files.
-
#fasta_file ⇒ Object
readonly
Returns the value of attribute fasta_file.
-
#fasta_flat ⇒ Object
readonly
Returns the value of attribute fasta_flat.
Instance Method Summary collapse
-
#initialize(root, outdir, fasta_file, meta) ⇒ SequenceFasta
constructor
Initialize fasta holder.
-
#print_sequence_for_gbk(seq) ⇒ Object
Utility function to print the sequence to the end of a gbk file.
-
#run_prodigal ⇒ Object
Run prodigal on the genome to annotate.
-
#split_fasta ⇒ Object
Split Multi Fasta file RETURN : array of fasta files.
-
#split_genbank ⇒ Object
Split Multi Genbanks file RETURN : array of genbank files.
Constructor Details
#initialize(root, outdir, fasta_file, meta) ⇒ SequenceFasta
Initialize fasta holder
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 16 def initialize root, outdir, fasta_file, @root = root @outdir = outdir @fasta_file = fasta_file @fasta_flat = Bio::FlatFile.auto(@fasta_file) if @fasta_flat.dbclass != Bio::FastaFormat abort "Aborting : The input sequence is not a fasta file !" end @meta = @annotation_files = nil @single_fasta = nil @seq_info = nil end |
Instance Attribute Details
#annotation_files ⇒ Object (readonly)
Returns the value of attribute annotation_files.
13 14 15 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 13 def annotation_files @annotation_files end |
#fasta_file ⇒ Object (readonly)
Returns the value of attribute fasta_file.
13 14 15 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 13 def fasta_file @fasta_file end |
#fasta_flat ⇒ Object (readonly)
Returns the value of attribute fasta_flat.
13 14 15 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 13 def fasta_flat @fasta_flat end |
Instance Method Details
#print_sequence_for_gbk(seq) ⇒ Object
Utility function to print the sequence to the end of a gbk file
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 117 def print_sequence_for_gbk seq outseq = "ORIGIN\n" # puts "ORIGIN" sequence = seq.seq.downcase nt_left = true it = 0 while nt_left if sequence.length > it+60 nt_to_add = sequence[it..(it+59)] # printf "%9s ", (ntNum - l.size + 2) outseq += "%9s " % (it+1) outseq += nt_to_add.scan(/.{1,10}/).join(" ") outseq += "\n" it += 60 else nt_to_add = sequence[it..sequence.length-1] outseq += "%9s " % (it+1) outseq += nt_to_add.scan(/.{1,10}/).join(" ") outseq += "\n" outseq += "//" nt_left = false end end return outseq, sequence.length end |
#run_prodigal ⇒ Object
Run prodigal on the genome to annotate
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 37 def run_prodigal @annotation_files = {} Dir.mkdir "#{@outdir}" if ! Dir.exists? "#{@outdir}" if @meta==1 system("#{@root}/prodigal.linux -p meta -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q") else system("#{@root}/prodigal.linux -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q") end @annotation_files = { multiGBK: "#{@outdir}/Genbanks.gbk", contigs: [], contigs_length: [], genes: "#{@outdir}/Genes.fa", proteins: "#{@outdir}/Proteins.fa", prot_ids_by_contig: {}, fasta_path: "#{@outdir}/single-fasta/", gbk_path: "#{@outdir}/single-genbank/" } split_fasta split_genbank extract_cds_names @annotation_files end |
#split_fasta ⇒ Object
Split Multi Fasta file RETURN : array of fasta files
68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 68 def split_fasta @single_fasta = {} Dir.mkdir("#{@outdir}/single-fasta") if ! Dir.exists?("#{@outdir}/single-fasta") @fasta_flat.each_entry do |seq| file_name = seq.definition.chomp.split(" ")[0] @annotation_files[:contigs] << "#{file_name}" @annotation_files[:contigs_length] << seq.seq.length File.open("#{@outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite| fwrite.write(seq) end @single_fasta[file_name] = seq end end |
#split_genbank ⇒ Object
Split Multi Genbanks file RETURN : array of genbank files
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/bacterial-annotator/sequence-fasta.rb', line 85 def split_genbank multigbk = "#{@outdir}/Genbanks.gbk" Dir.mkdir("#{@outdir}/single-genbank")if ! Dir.exists?("#{@outdir}/single-genbank") File.open(multigbk,"r") do |f| fopen = nil while l = f.gets if l[0..9] == "DEFINITION" file_name = l.chomp.split(";")[2].gsub("seqhdr","").delete("\"").delete("=").split(" ")[0] outseq, seq_length = print_sequence_for_gbk @single_fasta[file_name] spacer = " " * (20-seq_length.to_s.length) date = DateTime.now month = Date::ABBR_MONTHNAMES[date.month] day = "%02d" % date.day year = date.year locus = "LOCUS #{file_name}#{spacer}#{seq_length.to_s} bp DNA linear BCT #{day}-#{month}-#{year}\n" locus += "DEFINITION #{file_name}\n" fopen = File.open("#{@outdir}/single-genbank/#{file_name}.gbk", "w") fopen.write(locus) elsif l[0..1] == "//" fopen.write(outseq) fopen.close elsif ! l.include? " /note=" fopen.write(l) end end end end |