Class: SportDb::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ Tokenizer

Returns a new instance of Tokenizer.



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/sportdb/parser.rb', line 58

def initialize( txt )
   parser = Parser.new
 
   tree = []
   
   lines = txt.split( "\n" )
   lines.each_with_index do |line,i|
       next if line.strip.empty? || line.strip.start_with?( '#' )
       ##   support for inline (end-of-line) comments
       line = line.sub(  /#.*/, '' ).strip

       puts "line >#{line}<"
       tokens = parser.tokenize( line )
       pp tokens
   
       tree << tokens
   end
 

=begin   
   ## quick hack
   ##   turn all  text tokens followed by minute token
   ##     into player tokens!!!
   ##
   ##   also auto-convert text tokens into team tokens - why? why not?
   tree.each do |tokens|
      tokens.each_with_index do |t0,idx|
         t1 = tokens[idx+1]
         if t1 && t1[0] == :minute && t0[0] == :text
              t0[0] = :player 
         end
      end
   end
=end

=begin
## auto-add/insert start tokens for known line patterns
##    START_GOALS for  goals_line
##    why? why not?
=end

   ## flatten
   @tokens = []
   tree.each do |tokens|
      @tokens += tokens 
      @tokens  << [:NEWLINE, "\n"]   ## auto-add newlines
   end
 

   ## convert to racc format
   @tokens = @tokens.map do |tok|
        if tok.size == 1
          [tok[0].to_s, tok[0].to_s]
        elsif tok.size == 2
 #############
 ## pass 1
 ##   replace all texts with keyword matches (e.g. group, round, leg, etc.)
            if tok[0] == :TEXT
               text = tok[1]
               tok = if parser.is_group?( text )
                       [:GROUP, text]
                     elsif parser.is_round?( text ) || parser.is_leg?( text )
                       [:ROUND, text]
                     else
                       tok  ## pass through as-is (1:1)
                     end
            end
 ## pass 2
           tok
    else
           raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
        end
   end
end

Instance Attribute Details

#tokensObject (readonly)

Returns the value of attribute tokens.



56
57
58
# File 'lib/sportdb/parser.rb', line 56

def tokens
  @tokens
end

Instance Method Details

#next_tokenObject



135
136
137
# File 'lib/sportdb/parser.rb', line 135

def next_token
   @tokens.shift
end