Module: Fbtxt2csv

Defined in:
lib/fbtxt2json/fbtxt2csv.rb

Constant Summary collapse

MAX_HEADERS =
[
  'League',
  'Date',
  'Time',
  'Team 1',
  'Team 2',
  'Score',      ## generic score - do NOT know if FT/ET or such
  'HT',
  'FT',
  'ET',
  'P',
  'Round',
  'Status',
  'Ground',
]
MIN_HEADERS =

always keep even if all empty

[   ## always keep even if all empty
  'League',
  'Date',
  'Team 1',
  'Team 2'
]

Class Method Summary collapse

Class Method Details

.main(args = ARGV) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/fbtxt2json/fbtxt2csv.rb', line 5

def self.main( args=ARGV )



opts = {  debug:  false,
          output: nil,
          seasons: [],
       }

parser = OptionParser.new do |parser|
parser.banner = "Usage: #{$PROGRAM_NAME} [options] DATAFILES and/or DIRS"

##
## check if git has a offline option?? (use same)
##             check for other tools - why? why not?
#    parser.on( "-q", "--quiet",
#                 "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
#      opts[:debug] = false
#    end

   parser.on( "--verbose", "--debug",
               "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
     opts[:debug] = true
   end

   parser.on( "-o PATH", "--output PATH",
                  "output to file" ) do |output|
     opts[:output] = output
   end

   parser.on( "--seasons SEASONS",
                  "turn on processing only seasons (default: #{!opts[:seasons].empty?})" ) do |seasons|
       pp seasons
       seasons = seasons.split( /[, ]/ )
       seasons = seasons.map {|season| Season.parse(season) }
       opts[:seasons] = seasons
   end
end
parser.parse!( args )


puts "OPTS:"
p opts
puts "ARGV:"
p args



paths = if args.empty?
          ['/sports/openfootball/euro/2021--europe/euro.txt']
        else
          args
        end


if opts[:debug]
   SportDb::QuickMatchReader.debug = true
   SportDb::MatchParser.debug      = true
else
   SportDb::QuickMatchReader.debug = false
   SportDb::MatchParser.debug      = false
   LogUtils::Logger.root.level = :info
end



recs = []

paths.each do |path|
   if Dir.exist?( path )
      puts "==> reading dir >#{path}<..."

      datafiles = SportDb::Pathspec._find( path, seasons: opts[:seasons] )
      pp datafiles
      puts "   #{datafiles.size} datafile(s)"
      datafiles.each_with_index do |datafile,j|
         puts "    reading file [#{j+1}/#{datafiles.size}] >#{datafile}<..."
         txt = read_text( datafile )
         recs += parse( txt )
      end
   elsif File.file?( path )    ## note - File.exist? also incl. Dir - use anyway - why? why not?
      puts "==> reading file >#{path}<..."
      txt = read_text( path )
      recs += parse( txt )
   else ## not a file or dir report error
       raise ArgumentError, "file/dir does NOT exist - #{path}"
   end
end


recs, headers = vacuum( recs )
pp recs[0,2]   ## dump first 2 records
pp headers
puts "  #{recs.size} record(s)"


if opts[:output]
   puts "==> writing matches to #{opts[:output]}"
   write_csv( opts[:output], recs,
              headers: headers )
end


puts "bye"
end

.parse(txt) ⇒ Object

check - name parse_txt or txt_to_csv or such - why? why not?



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/fbtxt2json/fbtxt2csv.rb', line 184

def self.parse( txt )   ### check - name parse_txt or txt_to_csv or such - why? why not?
   quick = SportDb::QuickMatchReader.new( txt )
   matches = quick.parse
   name    = quick.league_name   ## quick hack - get league+season via league_name

   recs = []


   matches.each do |match|
      ## pp match
      ## pp match.status
      ## pp match.round
      ## pp match.score
      ## pp match.score

      round = String.new
      round << "#{match.group}, "    if match.group
      round << match.round           if match.round

      ## note - make.score hash uses symbols!!!
      ##         e.g. score[:ht] and NOT score['ht'] !!!
      #    make sure hash keys are always strings
      score = match.score
      score = score.transform_keys(&:to_s)   if score.is_a?( Hash )

      ground =   if match.ground.is_a?( Array )
                     match.ground.join(', ')
                 else  ## assume string or nil
                     match.ground ? match.ground : ''
                 end

      rec = [
            #############################
            ## todo/fix - split league into league_name and season!!!!
            ###############################
            name,  ## league name
            match.date ? match.date : '',
            match.time ? match.time : '',
            match.team1,
            match.team2,
            score.is_a?( Array ) && score.size == 2 ?  "#{score[0]}-#{score[1]}" : '',
            score.is_a?( Hash ) && score['ht'] ?  "#{score['ht'][0]}-#{score['ht'][1]}" : '',
            score.is_a?( Hash ) && score['ft'] ?  "#{score['ft'][0]}-#{score['ft'][1]}" : '',
            score.is_a?( Hash ) && score['et'] ?  "#{score['et'][0]}-#{score['et'][1]}" : '',
            score.is_a?( Hash ) && score['p']  ?  "#{score['p'][0]}-#{score['p'][1]}" : '',
            round,
            match.status ? match.status : '',
            ground,
       ]

       ## add more attributes e.g. ground, etc.

       recs << rec
   end

   puts "  #{recs.size} record(s)"

   if quick.errors?
      puts "!! #{quick.errors.size} parse error(s):"
      pp quick.errors
      exit 1
   end

   recs
end

.vacuum(rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/fbtxt2json/fbtxt2csv.rb', line 140

def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
  ## check for unused columns and strip/remove
  counter = Array.new( MAX_HEADERS.size, 0 )
  rows.each do |row|
     row.each_with_index do |col, idx|
       counter[idx] += 1  unless col.nil? || col.empty?
     end
  end

  ## pp counter

  ## check empty columns
  headers       = []
  indices       = []
  empty_headers = []
  empty_indices = []

  counter.each_with_index do |num, idx|
     header = MAX_HEADERS[ idx ]
     if num > 0 || (num == 0 && fixed_headers.include?( header ))
       headers << header
       indices << idx
     else
       empty_headers << header
       empty_indices << idx
     end
  end

  if empty_indices.size > 0
    rows = rows.map do |row|
             row_vacuumed = []
             row.each_with_index do |col, idx|
               ## todo/fix: use values or such??
               row_vacuumed << col   unless empty_indices.include?( idx )
             end
             row_vacuumed
         end
    end

  [rows, headers]
end