Class: NexusParser::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/nexus_parser/parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(lexer, builder) ⇒ Parser

Returns a new instance of Parser.



4
5
6
7
# File 'lib/nexus_parser/parser.rb', line 4

def initialize(lexer, builder)
  @lexer = lexer
  @builder = builder
end

Instance Method Details

#check_initialization_of_ntax_ncharObject



171
172
173
174
175
176
177
178
179
180
181
# File 'lib/nexus_parser/parser.rb', line 171

def check_initialization_of_ntax_nchar
  # check for character dimensions, if otherwise not set generate them
  if @builder.nexus_file.vars[:nchar] && @builder.nexus_file.characters == []
    (0..(@builder.nexus_file.vars[:nchar].to_i - 1)).each {|i| @builder.stub_chr }
  end
  
  # check for taxa dimensions, if otherwise not set generate them
  if @builder.nexus_file.vars[:ntax] && @builder.nexus_file.taxa == []
    (0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
  end
end

#parse_assumptions_blkObject



316
317
# File 'lib/nexus_parser/parser.rb', line 316

def parse_assumptions_blk
end

#parse_authors_blkObject

just removes it for the time being



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/nexus_parser/parser.rb', line 60

def parse_authors_blk
  # thing has non single word key/value pairs, like "AUTHOR NAME", SIGH
  # for now just slurp it all up.
  @lexer.pop(NexusParser::Tokens::AuthorsBlk )

  #while true
  #  if @lexer.peek(NexusParser::Tokens::EndBlk)
  #    @lexer.pop(NexusParser::Tokens::EndBlk)
  #    break
  #  else

   #   while @lexer.peek(NexusParser::Tokens::ValuePair)
   #     # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
   #     @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) 
   #   end
      
      #@lexer.pop(NexusParser::Tokens::ID) if @lexer.peek(NexusParser::Tokens::ID)
   # end
  #end
end

#parse_characters_blkObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/nexus_parser/parser.rb', line 119

def parse_characters_blk 
  
  inf = 0 
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion characters or more likely the parser is caught in an infinite loop trying to parser character data. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
      break 
    else
      @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
      @lexer.pop(NexusParser::Tokens::LinkLine) if @lexer.peek(NexusParser::Tokens::LinkLine) # trashing these for now
      
      parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
      parse_format if @lexer.peek(NexusParser::Tokens::Format) 
      
      parse_chr_state_labels if @lexer.peek(NexusParser::Tokens::CharStateLabels)

      parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix) 
  
      # handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)


      @lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
      @lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
  
      false
    end
  end
  @lexer.pop(NexusParser::Tokens::EndBlk)
end

#parse_chr_state_labelsObject



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/nexus_parser/parser.rb', line 183

def parse_chr_state_labels
  @lexer.pop(NexusParser::Tokens::CharStateLabels)
 
  inf = 0 
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion character state labels or more likely the parser is caught in an infinite loop while trying to parser character state labels. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::SemiColon)    
      break 
    else
      opts = {}
      
      name = ""
      index = @lexer.pop(NexusParser::Tokens::Number).value.to_i
      (name = @lexer.pop(NexusParser::Tokens::Label).value) if @lexer.peek(NexusParser::Tokens::Label) # not always given a letter

      @lexer.pop(NexusParser::Tokens::BckSlash) if @lexer.peek(NexusParser::Tokens::BckSlash)

      if !@lexer.peek(NexusParser::Tokens::Comma) || !@lexer.peek(NexusParser::Tokens::SemiColon)
        i = 0

        # three kludge lines, need to figure out the label/number priority, could be issue in list order w/in tokens
        while @lexer.peek(NexusParser::Tokens::Label) || @lexer.peek(NexusParser::Tokens::Number)
          opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Label).value}) if @lexer.peek(NexusParser::Tokens::Label)
          opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Number).value.to_s}) if @lexer.peek(NexusParser::Tokens::Number)

          i += 1
        end  
      end

      @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
      
      opts.update({:index => (index - 1), :name => name})
     
      raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
      @builder.update_chr(opts)
    end     

  end
  @lexer.pop(NexusParser::Tokens::SemiColon) 
end

#parse_codens_blkObject



319
320
321
# File 'lib/nexus_parser/parser.rb', line 319

def parse_codens_blk
  # not likely
end

#parse_dimensionsObject



161
162
163
164
165
166
167
168
169
# File 'lib/nexus_parser/parser.rb', line 161

def parse_dimensions  
  @lexer.pop(NexusParser::Tokens::Dimensions)
  while @lexer.peek(NexusParser::Tokens::ValuePair)
    @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
  end
  # the last value pair with a ; is automagically handled, don't try popping it again
  
  check_initialization_of_ntax_nchar
end

#parse_fileObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/nexus_parser/parser.rb', line 9

def parse_file
  # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
  # blks = []
  @lexer.pop(NexusParser::Tokens::NexusStart)
   
  while @lexer.peek(NexusParser::Tokens::BeginBlk)
     
    @lexer.pop(NexusParser::Tokens::BeginBlk) # pop it
    
    if @lexer.peek(NexusParser::Tokens::AuthorsBlk)
      parse_authors_blk
      
    # we parse these below 
    elsif @lexer.peek(NexusParser::Tokens::TaxaBlk)
      
      @lexer.pop(NexusParser::Tokens::TaxaBlk )
      parse_taxa_blk
         
    elsif @lexer.peek(NexusParser::Tokens::ChrsBlk)
      @lexer.pop(NexusParser::Tokens::ChrsBlk)
      parse_characters_blk

    elsif @lexer.peek(NexusParser::Tokens::NotesBlk)
      @lexer.pop(NexusParser::Tokens::NotesBlk)  
      parse_notes_blk

    # we should parse this
    elsif @lexer.peek(NexusParser::Tokens::SetsBlk)
      @lexer.pop(NexusParser::Tokens::SetsBlk)

    # we don't parse these 
    elsif @lexer.peek(NexusParser::Tokens::TreesBlk)
      @foo =  @lexer.pop(NexusParser::Tokens::TreesBlk).value
 
    elsif @lexer.peek(NexusParser::Tokens::LabelsBlk)
      @lexer.pop(NexusParser::Tokens::LabelsBlk)
 
    elsif @lexer.peek(NexusParser::Tokens::MqCharModelsBlk)
      @lexer.pop(NexusParser::Tokens::MqCharModelsBlk) 

    elsif @lexer.peek(NexusParser::Tokens::AssumptionsBlk)
      @lexer.pop(NexusParser::Tokens::AssumptionsBlk)

    elsif @lexer.peek(NexusParser::Tokens::CodonsBlk)
      @lexer.pop(NexusParser::Tokens::CodonsBlk)
    end
    
  end
end

#parse_formatObject

prolly pop header then fuse with parse_dimensions



152
153
154
155
156
157
158
159
# File 'lib/nexus_parser/parser.rb', line 152

def parse_format
  @lexer.pop(NexusParser::Tokens::Format) 
  while @lexer.peek(NexusParser::Tokens::ValuePair)
    @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
  end

  check_initialization_of_ntax_nchar
end

#parse_labels_blkObject



309
310
311
# File 'lib/nexus_parser/parser.rb', line 309

def parse_labels_blk

end

#parse_matrixObject



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/nexus_parser/parser.rb', line 226

def parse_matrix
  @lexer.pop(NexusParser::Tokens::Matrix)
  i = 0
    while true
      if @lexer.peek(NexusParser::Tokens::SemiColon)
       break 
      else
        t = @lexer.pop(NexusParser::Tokens::Label).value

        @builder.update_taxon(:index => i, :name => t) # if it exists its not re-added

        @builder.code_row(i, @lexer.pop(NexusParser::Tokens::RowVec).value)
    
        i += 1
      end
    end
  @lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon 
end

#parse_mesquite_blkObject



328
329
330
# File 'lib/nexus_parser/parser.rb', line 328

def parse_mesquite_blk

end

#parse_mesquitecharmodels_blkObject



323
324
325
# File 'lib/nexus_parser/parser.rb', line 323

def parse_mesquitecharmodels_blk
  # nor this
end

#parse_notes_blkObject

this suck(s/ed), it needs work when a better API for Mesquite comes out



246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/nexus_parser/parser.rb', line 246

def parse_notes_blk
  # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusParser::Note

  @vars = {} 
  inf = 0 # a crude iteration checker
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion notes or more likely parser is caught in an infinite loop inside the Begin Notes block.  Check for double single quotes in this block.") if inf > 100000
    if @lexer.peek(NexusParser::Tokens::EndBlk)
      @lexer.pop(NexusParser::Tokens::EndBlk)
      @builder.add_note(@vars) # one still left to add
      break
    else

      if @lexer.peek(NexusParser::Tokens::ValuePair)
        @vars.update(@lexer.pop(NexusParser::Tokens::ValuePair).value)
    
      elsif @lexer.peek(NexusParser::Tokens::Label)
        if @vars[:type] # we have the data for this row write it, and start a new one    
          
          @builder.add_note(@vars)
          @vars = {}
        else
          @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
        end
      elsif @lexer.peek(NexusParser::Tokens::FileLbl)  
        @lexer.pop(NexusParser::Tokens::FileLbl)
        @vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
      end
    end
  end
end

#parse_sets_blkObject



313
314
# File 'lib/nexus_parser/parser.rb', line 313

def parse_sets_blk
end

#parse_taxa_blkObject



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/nexus_parser/parser.rb', line 81

def parse_taxa_blk 
  @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title)

  # need to not ignore to test against
  parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion taxa or more likely the parser is caught in an infinite loop trying to parser taxon labels. Check for double single quotes in this block.") if inf > 100000
  
    if @lexer.peek(NexusParser::Tokens::EndBlk)
      @lexer.pop(NexusParser::Tokens::EndBlk)
      break
    else

      if @lexer.peek(NexusParser::Tokens::Taxlabels)
        @lexer.pop(NexusParser::Tokens::Taxlabels) if @lexer.peek(NexusParser::Tokens::Taxlabels)
        i = 0
        while @lexer.peek(NexusParser::Tokens::Label)
          @builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value) 
          i += 1
        end 
        @lexer.pop(NexusParser::Tokens::SemiColon) if @lexer.peek(NexusParser::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
      
      elsif  @lexer.peek(NexusParser::Tokens::MesquiteIDs)

        @lexer.pop(NexusParser::Tokens::MesquiteIDs) # trashing these for now
      elsif  @lexer.peek(NexusParser::Tokens::MesquiteBlockID)
        @lexer.pop(NexusParser::Tokens::MesquiteBlockID) 
      end
      
    end
  end


end

#parse_trees_blkObject



305
306
307
# File 'lib/nexus_parser/parser.rb', line 305

def parse_trees_blk
  true
end