Class: NexusParser::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/nexus_parser/parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(lexer, builder) ⇒ Parser

Returns a new instance of Parser.



4
5
6
7
# File 'lib/nexus_parser/parser.rb', line 4

def initialize(lexer, builder)
  @lexer = lexer
  @builder = builder
end

Instance Method Details

#check_initialization_of_ntax_ncharObject



175
176
177
178
179
180
181
182
183
184
185
# File 'lib/nexus_parser/parser.rb', line 175

def check_initialization_of_ntax_nchar
  # check for character dimensions, if otherwise not set generate them
  if @builder.nexus_file.vars[:nchar] && @builder.nexus_file.characters == []
    (0..(@builder.nexus_file.vars[:nchar].to_i - 1)).each {|i| @builder.stub_chr }
  end

  # check for taxa dimensions, if otherwise not set generate them
  if @builder.nexus_file.vars[:ntax] && @builder.nexus_file.taxa == []
    (0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
  end
end

#parse_assumptions_blkObject



385
386
# File 'lib/nexus_parser/parser.rb', line 385

def parse_assumptions_blk
end

#parse_authors_blkObject

just removes it for the time being



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/nexus_parser/parser.rb', line 60

def parse_authors_blk
  # thing has non single word key/value pairs, like "AUTHOR NAME", SIGH
  # for now just slurp it all up.
  @lexer.pop(NexusParser::Tokens::AuthorsBlk )

  #while true
  #  if @lexer.peek(NexusParser::Tokens::EndBlk)
  #    @lexer.pop(NexusParser::Tokens::EndBlk)
  #    break
  #  else

   #   while @lexer.peek(NexusParser::Tokens::ValuePair)
   #     # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
   #     @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
   #   end

      #@lexer.pop(NexusParser::Tokens::ID) if @lexer.peek(NexusParser::Tokens::ID)
   # end
  #end
end

#parse_characters_blkObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/nexus_parser/parser.rb', line 119

def parse_characters_blk

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion characters or more likely the parser is caught in an infinite loop trying to parser character data. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
      break
    else
      @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
      @lexer.pop(NexusParser::Tokens::LinkLine) if @lexer.peek(NexusParser::Tokens::LinkLine) # trashing these for now

      parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
      parse_format if @lexer.peek(NexusParser::Tokens::Format)

      parse_chr_state_labels if @lexer.peek(NexusParser::Tokens::CharStateLabels)

      parse_chr_labels if @lexer.peek(NexusParser::Tokens::CharLabels)

      parse_state_labels if @lexer.peek(NexusParser::Tokens::StateLabels)

      parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix)

      # handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)


      @lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
      @lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
    end
  end
  @lexer.pop(NexusParser::Tokens::EndBlk)
end

#parse_chr_labelsObject



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/nexus_parser/parser.rb', line 231

def parse_chr_labels
  @lexer.pop(NexusParser::Tokens::CharLabels)

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion character labels or more likely the parser is caught in an infinite loop while trying to parse character labels. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::SemiColon)
      break
    else
      i = 0
      while @lexer.peek(NexusParser::Tokens::CharacterLabel)
        @builder.update_chr_name(
          i, @lexer.pop(NexusParser::Tokens::CharacterLabel).value
        )

        i += 1
      end
    end
  end
  @lexer.pop(NexusParser::Tokens::SemiColon)
end

#parse_chr_state_labelsObject



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/nexus_parser/parser.rb', line 187

def parse_chr_state_labels
  @lexer.pop(NexusParser::Tokens::CharStateLabels)

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion character state labels or more likely the parser is caught in an infinite loop while trying to parser character state labels. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::SemiColon)
      break
    else
      opts = {}
      name = ""

      index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i

      (name = @lexer.pop(NexusParser::Tokens::CharacterLabel).value) if @lexer.peek(NexusParser::Tokens::CharacterLabel) # not always given a letter

      @lexer.pop(NexusParser::Tokens::BckSlash) if @lexer.peek(NexusParser::Tokens::BckSlash)

      if !@lexer.peek(NexusParser::Tokens::Comma) || !@lexer.peek(NexusParser::Tokens::SemiColon)
        i = 0

        while @lexer.peek(NexusParser::Tokens::CharacterLabel)
          opts.update({
            i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
          })

          i += 1
        end
      end

      @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon

      opts.update({:index => (index - 1), :name => name})

      raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
      @builder.update_chr(opts)
    end

  end
  @lexer.pop(NexusParser::Tokens::SemiColon)
end

#parse_codens_blkObject



388
389
390
# File 'lib/nexus_parser/parser.rb', line 388

def parse_codens_blk
  # not likely
end

#parse_dimensionsObject



165
166
167
168
169
170
171
172
173
# File 'lib/nexus_parser/parser.rb', line 165

def parse_dimensions
  @lexer.pop(NexusParser::Tokens::Dimensions)
  while @lexer.peek(NexusParser::Tokens::ValuePair)
    @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
  end
  # the last value pair with a ; is automagically handled, don't try popping it again

  check_initialization_of_ntax_nchar
end

#parse_fileObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/nexus_parser/parser.rb', line 9

def parse_file
  # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
  # blks = []
  @lexer.pop(NexusParser::Tokens::NexusStart)

  while @lexer.peek(NexusParser::Tokens::BeginBlk)

    @lexer.pop(NexusParser::Tokens::BeginBlk) # pop it

    if @lexer.peek(NexusParser::Tokens::AuthorsBlk)
      parse_authors_blk

    # we parse these below
    elsif @lexer.peek(NexusParser::Tokens::TaxaBlk)

      @lexer.pop(NexusParser::Tokens::TaxaBlk )
      parse_taxa_blk

    elsif @lexer.peek(NexusParser::Tokens::ChrsBlk)
      @lexer.pop(NexusParser::Tokens::ChrsBlk)
      parse_characters_blk

    elsif @lexer.peek(NexusParser::Tokens::NotesBlk)
      @lexer.pop(NexusParser::Tokens::NotesBlk)
      parse_notes_blk

    # we should parse this
    elsif @lexer.peek(NexusParser::Tokens::SetsBlk)
      @lexer.pop(NexusParser::Tokens::SetsBlk)

    # we don't parse these
    elsif @lexer.peek(NexusParser::Tokens::TreesBlk)
      @foo =  @lexer.pop(NexusParser::Tokens::TreesBlk).value

    elsif @lexer.peek(NexusParser::Tokens::LabelsBlk)
      @lexer.pop(NexusParser::Tokens::LabelsBlk)

    elsif @lexer.peek(NexusParser::Tokens::MqCharModelsBlk)
      @lexer.pop(NexusParser::Tokens::MqCharModelsBlk)

    elsif @lexer.peek(NexusParser::Tokens::AssumptionsBlk)
      @lexer.pop(NexusParser::Tokens::AssumptionsBlk)

    elsif @lexer.peek(NexusParser::Tokens::CodonsBlk)
      @lexer.pop(NexusParser::Tokens::CodonsBlk)
    end

  end
end

#parse_formatObject

prolly pop header then fuse with parse_dimensions



154
155
156
157
158
159
160
161
162
163
# File 'lib/nexus_parser/parser.rb', line 154

def parse_format
  @lexer.pop(NexusParser::Tokens::Format)

  while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
    @lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
    @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
  end

  check_initialization_of_ntax_nchar
end

#parse_labels_blkObject



378
379
380
# File 'lib/nexus_parser/parser.rb', line 378

def parse_labels_blk

end

#parse_matrixObject



293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/nexus_parser/parser.rb', line 293

def parse_matrix
  @lexer.pop(NexusParser::Tokens::Matrix)
  i = 0
    while true
      if @lexer.peek(NexusParser::Tokens::SemiColon)
       break
      else
        t = @lexer.pop(NexusParser::Tokens::Label).value

        @builder.update_taxon(:index => i, :name => t) # if it exists its not re-added

        @builder.code_row(i, @lexer.pop(NexusParser::Tokens::RowVec).value)

        i += 1
      end
    end
  @lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
end

#parse_mesquite_blkObject



397
398
399
# File 'lib/nexus_parser/parser.rb', line 397

def parse_mesquite_blk

end

#parse_mesquitecharmodels_blkObject



392
393
394
# File 'lib/nexus_parser/parser.rb', line 392

def parse_mesquitecharmodels_blk
  # nor this
end

#parse_notes_blkObject

this suck(s/ed), it needs work when a better API for Mesquite comes out



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# File 'lib/nexus_parser/parser.rb', line 313

def parse_notes_blk
  # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusParser::Note

  @vars = {}
  inf = 0 # a crude iteration checker
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion notes or more likely parser is caught in an infinite loop inside the Begin Notes block.  Check for double single quotes in this block.") if inf > 100000
    if @lexer.peek(NexusParser::Tokens::EndBlk)
      @lexer.pop(NexusParser::Tokens::EndBlk)
      @builder.add_note(@vars) # one still left to add
      break
    else

      if @lexer.peek(NexusParser::Tokens::ValuePair)
        @vars.update(@lexer.pop(NexusParser::Tokens::ValuePair).value)

      elsif @lexer.peek(NexusParser::Tokens::FileLbl)
        @lexer.pop(NexusParser::Tokens::FileLbl)
        @vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally

      else @lexer.peek(NexusParser::Tokens::Label)
        # If we already have a :type set then the Label we just peeked starts a
        # new row, so write the current one and then start a new one.
        if @vars[:type]
          @builder.add_note(@vars)
          @vars = {}
        end

        @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
      end
    end
  end
end

#parse_sets_blkObject



382
383
# File 'lib/nexus_parser/parser.rb', line 382

def parse_sets_blk
end

#parse_state_labelsObject



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/nexus_parser/parser.rb', line 255

def parse_state_labels
  @lexer.pop(NexusParser::Tokens::StateLabels)

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion state labels or more likely the parser is caught in an infinite loop while trying to parse state labels. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::SemiColon)
      break
    else
      opts = {}

      index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i

      if !@lexer.peek(NexusParser::Tokens::Comma) && !@lexer.peek(NexusParser::Tokens::SemiColon)
        i = 0

        while @lexer.peek(NexusParser::Tokens::CharacterLabel)
          opts.update({
            i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
          })

          i += 1
        end
      end

      @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon

      opts.update({:index => (index - 1)})

      @builder.update_chr_states(opts)
    end

  end
  @lexer.pop(NexusParser::Tokens::SemiColon)
end

#parse_taxa_blkObject



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/nexus_parser/parser.rb', line 81

def parse_taxa_blk
  @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title)

  # need to not ignore to test against
  parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)

  inf = 0
  while true
    inf += 1
    raise(NexusParser::ParseError,"Either you have a gazillion taxa or more likely the parser is caught in an infinite loop trying to parser taxon labels. Check for double single quotes in this block.") if inf > 100000

    if @lexer.peek(NexusParser::Tokens::EndBlk)
      @lexer.pop(NexusParser::Tokens::EndBlk)
      break
    else

      if @lexer.peek(NexusParser::Tokens::Taxlabels)
        @lexer.pop(NexusParser::Tokens::Taxlabels) if @lexer.peek(NexusParser::Tokens::Taxlabels)
        i = 0
        while @lexer.peek(NexusParser::Tokens::Label)
          @builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
          i += 1
        end
        @lexer.pop(NexusParser::Tokens::SemiColon) if @lexer.peek(NexusParser::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working

      elsif  @lexer.peek(NexusParser::Tokens::MesquiteIDs)

        @lexer.pop(NexusParser::Tokens::MesquiteIDs) # trashing these for now
      elsif  @lexer.peek(NexusParser::Tokens::MesquiteBlockID)
        @lexer.pop(NexusParser::Tokens::MesquiteBlockID)
      end

    end
  end


end

#parse_trees_blkObject



374
375
376
# File 'lib/nexus_parser/parser.rb', line 374

def parse_trees_blk
  true
end