Class: SportDb::OutlineReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/parser/outline_reader.rb

Constant Summary collapse

HEADING_BLANK_RE =

note: skip “decorative” only heading e.g. ========

todo/check:  find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
%r{\A
={1,}
\z}x
HEADING_RE =

note: like in wikimedia markup (and markdown) all optional trailing ==== too

%r{\A
(?<marker>={1,})       ## 1. leading ======
  [ ]*
(?<text>[^=]+)         ## 2. text   (note: for now no "inline" = allowed)
  [ ]*
  =*                   ## 3. (optional) trailing ====
\z}x

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ OutlineReader

Returns a new instance of OutlineReader.



80
81
82
# File 'lib/sportdb/parser/outline_reader.rb', line 80

def initialize( txt )
  @txt = txt
end

Class Method Details

.debug=(value) ⇒ Object



65
# File 'lib/sportdb/parser/outline_reader.rb', line 65

def self.debug=(value) @@debug = value; end

.debug?Boolean

Returns:

  • (Boolean)


66
# File 'lib/sportdb/parser/outline_reader.rb', line 66

def self.debug?() @@debug ||= false; end

.parse(txt) ⇒ Object



76
77
78
# File 'lib/sportdb/parser/outline_reader.rb', line 76

def self.parse( txt )
  new( txt ).parse
end

.read(path) ⇒ Object

use - rename to read_file or from_file etc. - why? why not?



71
72
73
74
# File 'lib/sportdb/parser/outline_reader.rb', line 71

def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ) {|f| f.read }
  parse( txt )
end

Instance Method Details

#debug?Boolean

Returns:

  • (Boolean)


67
# File 'lib/sportdb/parser/outline_reader.rb', line 67

def debug?()  self.class.debug?; end

#parseObject



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/sportdb/parser/outline_reader.rb', line 99

def parse
  outline=[]   ## outline structure
  start_para = true      ## start new para(graph) on new text line?

  @txt.each_line do |line|
      line = line.strip      ## todo/fix: keep leading and trailing spaces - why? why not?

      if line.empty?    ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
        start_para = true
        next
      end

      break if line == '__END__'

      next if line.start_with?( '#' )   ## skip comments too
      ## strip inline (until end-of-line) comments too
      ##  e.g Eupen | KAS Eupen ## [de]
      ##   => Eupen | KAS Eupen
      ##  e.g bq   Bonaire,  BOE        # CONCACAF
      ##   => bq   Bonaire,  BOE
      line = line.sub( /#.*/, '' ).strip
      pp line    if debug?

      ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
      next if HEADING_BLANK_RE.match( line )  # skip "decorative" only heading e.g. ========

       ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
      if m=HEADING_RE.match( line )
         start_para = true

         heading_marker = m[:marker]
         heading_level  = heading_marker.length   ## count number of = for heading level
         heading        = m[:text].strip

         puts "heading #{heading_level} >#{heading}<"   if debug?
         outline << [:"h#{heading_level}", heading]
      else    ## assume it's a (plain/regular) text line
         if start_para
           outline << [:p, [line]]
           start_para = false
         else
           node = outline[-1]    ## get last entry
           if node[0] == :p      ##  assert it's a p(aragraph) node!!!
              node[1] << line    ## add line to p(aragraph)
           else
             puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
             pp node
             exit 1
           end
         end
      end
  end
  outline
end