Class: Rsssf::Fmtfix

Inherits:

Object

Object
Rsssf::Fmtfix

show all

Defined in:: lib/rsssf/fmtfix/dates.rb,
lib/rsssf/fmtfix/goals.rb,
lib/rsssf/fmtfix/score.rb,
lib/rsssf/fmtfix/errata.rb,
lib/rsssf/fmtfix/fmtfix.rb,
lib/rsssf/fmtfix/rounds.rb,
lib/rsssf/fmtfix/tables.rb,
lib/rsssf/fmtfix/headers.rb,
lib/rsssf/fmtfix/outline.rb,
lib/rsssf/fmtfix/topscorers.rb,
lib/rsssf/fmtfix/fmtfix-base.rb,
lib/rsssf/fmtfix/dates_helpers.rb,
lib/rsssf/fmtfix/patch_headings.rb

Overview

todo: find a better name e.g. Format or Fixer or ??

Constant Summary collapse

MONTH_LINES = note - (re)use the same date regex style & capture names from football.txt tokenizer

parse_names( <<TXT )
January    Jan
February   Feb
March      Mar
April      Apr
May
June       Jun
July       Jul
August     Aug
September  Sept  Sep
October    Oct
November   Nov
December   Dec
TXT

MONTH_NAMES =

build_names( MONTH_LINES )

MONTH_MAP = pp MONTH_NAMES

build_map( MONTH_LINES, downcase: true )

DAY_LINES =

parse_names( <<TXT )
Monday                   Mon  Mo
Tuesday            Tues  Tue  Tu
Wednesday                Wed  We
Thursday    Thurs  Thur  Thu  Th
Friday                   Fri  Fr
Saturday                 Sat  Sa
Sunday                   Sun  Su
TXT

DAY_NAMES =

build_names( DAY_LINES )

DAY_MAP = pp DAY_NAMES

build_map( DAY_LINES, downcase: true )

DATE_I_RE = e.g. Aug 9 Fri Aug 9 Fri Aug 9 Fri, Aug 9 Fri, Aug 9 2024 Fri, Aug 9, 2024 Aug 9, 2024 Aug 9, 2024 note - eat-up optional comma after DAY_NAMES!! add around for date not known perfectly around Mar 29 ca. Nov 1 Jan 25/87 - support two-digit year Jan 28/87 extra/bonus - allows (double) space typo for month day e.g Aug 9

%r{
(?<date>
  \b
     ## optional around qualifier
     ((?<around>   around
                 | ca?\.)
                  [ ]
     )?
     ## optional day name
     ((?<day_name>#{DAY_NAMES})
           (?: ,?[ ]+)
     )?
     (?<month_name>#{MONTH_NAMES})
            [ ]{1,2}   ## note - allow (double) space typo
     (?<day>\d{1,2})
          \b
     ## optional year
     (   (?:      ,? [ ]       ## note - comma optional with single space required for now
                (?<year>\d{4})        ## optional year 2025 (yyyy)
            |     /
                (?<yy>\d{2})
          )
            \b
     )?
)}ix

DATE_IB_RE = date i - alt style with weekday at the end (used in arg2026.txt) e.g. Mar 23, Mon Mar 25, Wed Apr 1, Wed May 3, Sun Jul 26, Sun

%r{
(?<date>
  \b
     (?<month_name>#{MONTH_NAMES})
            [ ]{1,2}   ## note - allow (double) space typo
     (?<day>\d{1,2})
          , [ ]?
      (?<day_name>#{DAY_NAMES})
     \b
)}ix

DATE_II_RE =

%r{
(?<date>
  \b
     ## optional day name
     ((?<day_name>#{DAY_NAMES})
           (?: ,?[ ]+)
     )?
     (?<day>\d{1,2})
         [ ]
     (?<month_name>#{MONTH_NAMES})
          \b
     ## optional year
     (?:  [ ]
        (?<year>\d{4})        ## optional year 2025 (yyyy)
        \b
     )?
)}ix

DATE_LEGS_RE =

%r{
(?<date_legs>
 \b
     (?<month_name1>#{MONTH_NAMES})
          [ ]
     (?<day1>\d{1,2})
       (?:
             , [ ]{0,5}
           | [ ]{1,5} and [ ]{1,5}
           | [ ]{0,5} & [ ]{0,5}
        )
    (?:     ## note - make 2nd month_name optiona
        (?<month_name2>#{MONTH_NAMES})
          [ ]
      )?
     (?<day2>\d{1,2})
      \b
    ## optional two-digit year
     (?:    /
          (?<yy2>\d{2})
            \b
     )?
)}ix

DATE_LIST_RE = merge date_list and date_legs ?? or keep date_legs always with two dates by definition?? and date_list starting w/ three or more dates? May 2,3,4 Feb 28, Mar 1,2

%r{
(?<date_list>
 \b
     (?<month_name1>#{MONTH_NAMES})
          [ ]
     (?<day1>\d{1,2})
     (?:  [,;] [ ]{0,5}  )


      (?:  ## note - make 2nd month_name optiona
        (?<month_name2>#{MONTH_NAMES})
          [ ]
      )?
     (?<day2>\d{1,2})
       (?: [,;] [ ]{0,5}  )


      (?:     ## note - make 3rd month_name optiona
        (?<month_name3>#{MONTH_NAMES})
          [ ]
      )?
     (?<day3>\d{1,2})
     \b


     ### optional fourth date
     (?:
         [,;] [ ]{0,5}
         (?:   ## note - make 4th month_name optiona
            (?<month_name4>#{MONTH_NAMES})
            [ ]
         )?
       (?<day4>\d{1,2})
         \b
     )?
)}ix

DATE_RANGE_RE =

%r{
(?<date_range>
 \b
     (?<month_name1>#{MONTH_NAMES})
          [ ]
     (?<day1>\d{1,2})
            [ ]? - [ ]?
    (?:   ## optional month
       (?<month_name2>#{MONTH_NAMES})
           [ ]
    )?
      (?<day2>\d{1,2})
     \b
)}ix

FMT_DAY_NAMES =

[
    nil,   ##  or use '!ERROR!' - why? why not?
    'Mon',  # 1
    'Tue',  # 2
    'Wed',  # 3
    'Thu',  # 4
    'Fri',  # 5
    'Sat',  # 6
    'Sun',  # 7
]

FMT_MONTH_NAMES =

[
    nil,    ## or use '!ERROR!' - why? why not?
    'Jan',  # 1
    'Feb',  # 2
    'Mar',  # 3
    'Apr',  # 4
    'May',  # 5
    'Jun',  # 6
    'Jul',  # 7
    'Aug',  # 8
    'Sep',  # 9
    'Oct',  # 10
    'Nov',  # 11
    'Dec',  # 12
]

GOALS_ =

%q{
      [^:\[\]\n]*?
         \b
         \d{1,3}  '?  ## incl. minute
      [^\[\]\n]*?
}

ERRATAS =

{
## in austria
##       avoid confusion with  /DD is year!!!
##          maybe make it a switch to turn on
    '[Nov 13/14]' => '[Nov 13,14]',
    '[Mar 25/26]' => '[Mar 25,26]',
    '[Aug 12/13]' => '[Aug 12,13]',



## "classic" typos
    ## month
    '[Niv 8]' => '[Nov 8]',
    '[Mov 7]'  => '[Nov 7]',
    '[Mov 26]' => '[Nov 26]',
    ## double brackets
    '[Apr 15]]'                    => '[Apr 15]',
    "[[36' Hansen, 58' Glasner]"  => "[36' Hansen, 58' Glasner]",
    ### more
    '  att; '  => ' att: '   ##  e.g. Wembley; att; 11,689

}

ROUND_PAT_BASE = e.g. round 1, round 2, etc. matchday 1 week 1 note - add optional Matchday 1 of 2 or such keep why? why not? matchweek used by premerleague.com week used in msl/usa (no matchdays/rounds) note - matchweek might start on tuesday (e.g. tue to mon) or check if always 7day week? note - use 1-9 regex (cannot start with 0) - why? why not? make week 01 or round 01 or matchday 01 possible?

%q{
         (   Round
           | Matchday
           | Matchweek
           | Week )   [ ]{1,2}  [1-9][0-9]*

        (?:    ## note - add optional   Matchday 1 of 2 or such
               [ ] of [ ] [1-9][0-9]*
        )?
}

ROUND_NAMES_EN = add more pattern via config todo/fix - check if .txt is empty do NOT add ( || will match everything!!) rename names_misc to names_more - why? why not?

read_patterns( "#{Rsssf.config_dir}/rounds_en.txt" )

ROUND_NAMES_ES =

read_patterns( "#{Rsssf.config_dir}/rounds_es.txt" )

ROUND_NAMES_MISC =

read_patterns( "#{Rsssf.config_dir}/rounds_misc.txt" )

ROUND_PAT =

ROUND_PAT_BASE + ' | ' + ROUND_NAMES_EN.join( ' | ' ) +
' | ' + ROUND_NAMES_ES.join( ' | ' ) +
' | ' + ROUND_NAMES_MISC.join( ' | ' )

TABLE_HEADER_RE =

%r{
      ############
      ## negative & positive lookaheads

##         (?!
##               .* [ ]{2,}       ## no (inline)  double (or more) spaces allowed
##         )

      (?:
      ## (i)  can only start with non-zero number
      ##      or alpha
      ##
      ##  A.  or
      ##  1.  or
      ##   mixed with dot  1A. yes/no?, A1. yes/no?,  1B1. ?
      ##   1.K    - 1.Klasse

      ##
      ##  note - \b(oundary) - to always get complete tokens (alphanum) tokens
      ##            note - \b includes [a-z0-9_] PLUS underscore (_)
      ##                          check if underscore is \b
      ##                              e.g.   09_  or _09 or  match \b[0-9]\b  ???
      ##   use our own asserts?
      ##      BNUM (boundary number) e.g. [^0-9]
      ##      BALPHA (boundary alpha) e.g. [^a-z]
      ##      BALNUM (boundary alphanum) e.g. [^a-z0-9]
      ##    classic is   [^a-z0-9_]

         (?<header>
          (?=
              .*  \p{L}+    ## must incl. alpha character - not only numbers!!
          )

  ## note
  ##   order matters
  ##   move specific first!!

            \b
             (?:     [0-9]+\p{L}  [0-9\p{L}]* \b    ## (ii) mixed alphanum (starting w/ num)
                |    [0-9]+  \b  \.?  (?! \d)    ## (i)  num
                |  \p{L}+[0-9]  [0-9\p{L}]* \b     ## (iiii) mixed alphanum (starting w/ alpha)
                |  \p{L}+  \b    \.?              ## (iii) alpha
             )
             (?:
                ## " (i-iiii) connector options  (a) single space
                ##                                   -- exclude numbers on numbers (FIX)
                ##                               (b) dash (-) or slash (/)
                ##                                  --  must be alpha(.?)-alpha
                ##                                        incl.  K.-H.  with trailing dot
                ##                              add ampersand (&) too
                ##                                    w/ leading & trailing opt space?
                ##                                                incl.  K.&H., K. & H.
               (?:   [ ]?
                   | (?<! \d)  -   ## add negative lookbehind&ahead (no numbers please)
                     (?! \d)
                   |  /
                )
                    ## repeat (i-iiii) see above
                       ## todo - do NOT allow numbers followed by numbers
                \b
                (?:  [0-9]+ \b    (?! [ ] \d)     ## (i) num - no more ordinals - why? why not?
                  |  [0-9]+\p{L}  [0-9\p{L}]* \b     ## (ii) mixed alphanum (starting w/ num)
                                                  ##     group 1a 1FC?? - why? why not?
                  |  \p{L}+  \b   \.?              ## (iii) alpha
                  |  \p{L}+[0-9]  [0-9\p{L}]* \b   ## (iiii) mixed alphanum (starting w/ alpha)
               )
            )*
            (?:
                 [ ]
                \(  [^:()\[\]]+?  \)
            )?
        )  ## end-of-capture header
   )
   :?    ## optional colon (:) e.g. final table:
}ix

TABLE_RE =

%r{

         ### optional table header
          (?:
             ### negative lookahead
             ##    MUST NOT match  standing line e.g.  10  3  4
             ##      or         table heading (see below)
             ##      or   -----  (old style structured heading left overs)
                    (?! ^[ ]* (?:   [^\n]+?  [ ]+ \d{1,3} [ ]+ \d{1,3} [ ]+ \d{1,3}
                                |   (?: GP | M | Team ) [ ]
                                |  -{3,}
                              )
                     )

             ## (i) table header
             ##
             ## fix - make header match more strict!!!
             ##   e.g. do NOT match ---  or more than three spaces or such
             ## exlcude in header
             ##   NB:
             ##    [*]
             ##    [1]
             ## exclude heading === e.g.
             ##    ==== USL Premier Development
                 ^
                [ ]*


              ## exclude comma (,) - why? why not?
              ##   and numbers  - unless group 1
              ## e.g. Kaczor 78 - Dreßel 19, Steinkogler 50,
              ## B'schweig  2-1 Schalke    (Handschuh 38, Popivoda 55 - Fischer 82)
              ##  M'gladbach 2-1 1. FC Köln (Jensen 6, Wittkamp 35 - D.Müller 78)
              ##   Kraft 3, E.Kremers 38)
              ##  Schalke     4-0 Tasmania    (Klose 2, 78, Herrmann 40, Kreuz 82)
              ##
              ## allow name such as
              ##    USL - 1ST DIVISION (2nd Division)


             (?<header>  [^=*:,0-9\[\]\n]+?
                          ([ ] \d{1,2} \b)?   ## optional number only at the end e.g. group 1
                     )
                  :?  ## optional colon (:) e.g. final table:
                  ## cut-off everything separated by more than three spaces
                  ##   e.g. might be "inline" table heading (follow table header name)
                  ##  e.g. Group 1                  M     W     T     L    GF    GA    DIF   PTS
                  (?: [ ]{4,} (?: GP | M |Team ) [ ]  [^\n]+? )?
              [ ]*
             ## note - allow optional blank line - why? why not?
             (?:  \n ^[ ]* )?
             \n
          )?


      #### optional  table heading line
      (?:  ^(?:
          #{table_heading_( 'GP  W   L   D  GF  GA  PTS?' )}
        | #{table_heading_( 'GP  W   L   T  GF  GA  PTS?' )}
        | #{table_heading_( 'GP  W   T   L  GF  GA  PTS?' )}
        | #{table_heading_( 'GP  W   D   L  GF  GA  PTS?' )}
        ##  SW  sudden death win, SL sudden death lose
        | #{table_heading_( 'GP  W   L  SW  GF  GA  PTS?' )}
        | #{table_heading_( 'GP  W SW  SL   L   GF  GA  PTS?' )}
        | #{table_heading_( 'GP  W SOW SOL  L  GF  GA PTS?'   )}
        ##  mx/spanish
        | #{table_heading_( 'M   W   T   L  GF  GC  DIF  PTS' )}
        | #{table_heading_( 'M   W   T   L  GF  GA PTS AVGE' )}
        | #{table_heading_( 'Team  M  W  T  L  GF-GA  PTS')}
        | #{table_heading_( 'Team   M  W  T  L  GF-GA  PTS EP  TP')}
        )
       ## note - allow optional blank line - why? why not?
          (?: \n ^[ ]* )?
            \n
      )?


  ## MUST be followed by a table (standing) line
  ## e.g.  1.FC Cincinnati    34  20  9  5  57-39  69
  ##
  ##   note - allow "run-on" e.g. LB14 on first number
  ## Hudson Valley Quickstrike LB14  12   0   2   40   9   38
  ## Hudson Valley Quickstrike LB12  11   1   0   26   9   33
  ##
  ##    17    11     5     1    40    16    +24    38
  ##  or
  ###  + 1.DC United                       32 17  6/ 3  6 65-43 57

         ^
         (?:
               [^\n]+?
                 (?:
                    (?:

                      \d{1,3}
                 [ ]+ \d{1,3}  ## win
 (?: [ ]+ | [ ]* / [ ]* ) \d{1,3}  ## draw
                 [ ]+ \d{1,3}  ## lose
                 [ ]+ \d{1,3}  (?:  [ ]* [:-] [ ]*
                                  | [ ]+ )  \d{1,3}
                 [ ]+ [+-]? \d{1,3} \b  # might be diff or point allow +/-!!
                   )
                 )
               [^\n]*?
          )
         \n

         ## eat-up the rest
         .*?   ## non-greedy - match everything (incl. newline!) until
                 (?:   \n (?= \n)    ## break on blank line (\n\n) or end-of-string/file
                          | \z
                 )

}ixm

OPT_REF = let’s you check optional ref e.g. ‹§fin›

%q{
   (?: [ ]*
     ‹ (?<ref> §[^›]+?) ›
   )?
}

HEADER_ROUND_RE = note - allow optional colon e.g. Playoff: Round 21:

%r{\A
        [ ]*
         (?<round> #{ROUND_PAT})
              :?   ## note - allow optional colon (:)  e.g. Playoff:
            #{OPT_REF}
         [ ]*
\z}ix

HEADER_DATE_RE =

%r{\A
      [ ]*
      \[  #{date_(DATE_I_RE, DATE_IB_RE,
                  DATE_II_RE,
                  DATE_RANGE_RE,
                  DATE_LIST_RE, DATE_LEGS_RE,
                  )}
      \]
      [ ]*
\z}ix

HEADER_DATE_II_RE = alternate date header (no brackets incl. year) Aug 7 1999 Sep 4 1999 Oct 23 1999 Nov 20 1999 Apr 1 2000

%r{\A
      [ ]*
         #{date_(DATE_I_RE, DATE_II_RE)}
      [ ]*
\z}ix

CITY_ = Sep 16, Berchtold 26, Glasner 54, Kuljic 60 — note - exclude numbers in follow-up text!!! use a shared pattern for city-like text !! maybe allow more and make more specific later exclude comma (,) - why? why not? split in CITY_ and CITY_PLUS_ or such? or find a better name ?? allow number if: Happel-Stadion, Wien, att: 9,200 Happel-Stadion, Wien; att: 7000 Innsbruck; att: 6700 Wörthersee-Stadion, Klagenfurt; att: 30,000 Wörthersee Stadion, Klagenfurt; att: 20,500 Hayward, Calif.; att: 5.528 -- note: dot (.) NOT comma (,) Apr 30, 28 Black Arena, Klagenfurt; att: 30,000 Wörthersee Stadion, known as 28 Black Arena for sponsorship reasons Ernst-Happel-Stadion, Wien; att: 20100; ref: Hofmann

%q{   (?<city>  (?:   [^0-9:;\[\]]+?
                  | .+?
                      [ ] att: [ ] [0-9,.]+
                      (?: [;,] [ ] ref: [ ] .+?  ## w/ optional ref:
                      )?
               )
     )
}

HEADER_DATE_N_CITY_RE = Jun 3, Ferrol Apr 2, Wembley -or- Sat May 17 - at Millennium Stadium, Cardiff Sun May 25 - at Millennium Stadium, Cardiff

%r{\A
      [ ]*
      \[  #{date_(DATE_I_RE,
                  DATE_II_RE)}
           (?:       , [ ]*
               | [ ] - [ ] at [ ]
            )
           #{CITY_}
      \]
      [ ]*
\z}ix

HEADER_DATE_ALT_RE = alternate date header with brackets (in oost02.txt) [31-08] change to _ 31/08 _ [07-09] [07-09] [30-05, Thaur]

%r{\A
      [ ]*
      \[  (?<date>
             (?<day> \d{1,2}) - (?<month> \d{1,2})
          )
          (?:
              , [ ]*
              #{CITY_}
          )?
      \]
      [ ]*
\z}ix

HEADER_ROUND_N_DATE_RE =

%r{\A
        [ ]*
         (?<round> #{ROUND_PAT})
         [ ]+
        \[
           #{date_(DATE_I_RE, DATE_IB_RE, DATE_II_RE,
                   DATE_RANGE_RE,
                   DATE_LIST_RE, DATE_LEGS_RE)}
        \]
        #{OPT_REF}
        [ ]*
\z}ix

HEADER_ROUND_N_DATE_N_CITY_RE = Final [May 1, Klagenfurt]

%r{\A
        [ ]*
         (?<round> #{ROUND_PAT})
         [ ]+
        \[  #{date_(DATE_I_RE, DATE_II_RE)}
             , [ ]*
           #{CITY_}
        \]
        [ ]*
\z}ix

HEADER_ROUND_N_CITY_RE = Final [in Völs] Final [in Kundl]

%r{\A
        [ ]*
         (?<round> #{ROUND_PAT})
         [ ]+
        \[in [ ]+ #{CITY_}
        \]
        [ ]*
\z}ix

HEADER_ROUND_N_CITY_N_DATE_RE = reverse Final [Graz, May 12] Super Cup Final [Graz, Jul 6] Final [London, Feb 27]

%r{\A
        [ ]*
         (?<round> #{ROUND_PAT})
         [ ]+
        \[ #{CITY_}
             , [ ]*
            #{date_(DATE_I_RE, DATE_II_RE)}
        \]
        [ ]*
\z}ix

HX_RE =

%r{          ## negative lookahead
         ##   do NOT match  =-=
         ##   do NOT match  ===========  (without any heading text!!)
         ##     e.g.
         ##       Fall season
         ##       ===========

        (?! ^[ ]* (?:    =-=
                     |  ={1,} [ ]* $
                   )
         )

         ^
        [ ]*

      (?<marker> ={1,6})
         [ ]*
      (?<text> .+?)
         #{OPT_REF}
         [ ]*
$}x

TOPSCORERS_RE = e.g. topscorer, topscorers top scorer, top scorers scorer, scorers

%r{^     [ ]*
    (?<header>
       (?: top [ ]?)?  ## note - optional top
          scorers?      ## singular or plural
     )
      (?: [ ]* :)?    ## note - optional colon
       [ ]*
      \n{0,2}       ## note - optional leading blank line!!

    .*?             ## non-greedy - match everything until
  (?:   \n (?= \n)    ## blank line (\n\n) or end-of-string/file
      | \z
  )
}ixm

Class Method Summary collapse

.build_map(lines, downcase: false) ⇒ Object
.build_names(lines) ⇒ Object
.date_(*re) ⇒ Object

helper for inline regexes (with union) and escaped.
.fmtfix(txt, heading_patches: nil) ⇒ Object

convenience helper.
.fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ Object

convenience helper.
.mkheading_regex(str) ⇒ Object
.parse_heading_patches(txt) ⇒ Object
.parse_names(txt) ⇒ Object
.read_heading_patches(path) ⇒ Object
.table_heading_(line) ⇒ Object

Instance Method Summary collapse

#_build_date(m) ⇒ Object

“internal” date helpers.
#_build_date_legs(m) ⇒ Object
#_build_date_list(m) ⇒ Object
#_build_date_range(m) ⇒ Object
#_fmt_date(date, format: nil) ⇒ Object

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.
#_fmt_date_legs(legs, format: nil) ⇒ Object

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.
#_fmt_date_list(list, format: nil) ⇒ Object

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.
#_fmt_date_range(range, format: nil) ⇒ Object

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.
#_norm_date(m, format: nil) ⇒ Object

note - line-by-line processing / matching.
#_patch_heading(txt, rxs, title) ⇒ Object
#_scan_outline(txt) ⇒ Object
#autofix(txt) ⇒ Object
#autofix_outline(txt, title:) ⇒ Object
#build_outline(txt) ⇒ Object
#fmtfix(txt, heading_patches: nil) ⇒ Object
#fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ Object
#handle_errata_txt(txt) ⇒ Object
#handle_goals(txt, opts: {}) ⇒ Object
#handle_header(line) ⇒ Object
#handle_score(txt) ⇒ Object
#handle_tables(txt, tables: []) ⇒ Object
#handle_topscorers(txt, topscorers: [], opts: {}) ⇒ Object
#patch_headings(txt, patches) ⇒ Object

Class Method Details

.build_map(lines, downcase: false) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates_helpers.rb', line 43

def self.build_map( lines,
               downcase: false )
   ## note: downcase name!!!
  ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
  ##  {"january" => 1,  "jan" => 1,
  ##   "february" => 2, "feb" => 2,
  ##   "march" => 3,    "mar" => 3,
  ##   "april" => 4,    "apr" => 4,
  ##   "may" => 5,
  ##   "june" => 6,     "jun" => 6, ...
  lines.each_with_index.reduce( {} ) do |h,(line,i)|
    line.each do |name|
       h[ downcase ? name.downcase : name ] = i+1
    end  ## note: start mapping with 1 (and NOT zero-based, that is, 0)
    h
  end
end

.build_names(lines) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates_helpers.rb', line 36

def self.build_names( lines )
  ## join all words together into a single string e.g.
  ##   January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
  lines.map { |line| line.join('|') }.join('|')
end

.date_(*re) ⇒ `Object`

helper for inline regexes (with union) and escaped

Raises:

(ArgumentError)

# File 'lib/rsssf/fmtfix/headers.rb', line 51

def self.date_( *re )
      raise ArgumentError, "more than one date regex expected, got #{re}"  if re.size < 1

      ## (auto-)wrap in non-capature group - why? why not?
      "(?: #{Regexp.union( *re ).source})"
end

.fmtfix(txt, heading_patches: nil) ⇒ `Object`

convenience helper

# File 'lib/rsssf/fmtfix/fmtfix.rb', line 42

def self.fmtfix( txt, heading_patches: nil )
      @@fmtfix ||= new   ## use a "shared" built-in fmtfix
      @@fmtfix.fmtfix( txt, heading_patches: heading_patches )
end

.fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ `Object`

convenience helper

# File 'lib/rsssf/fmtfix/fmtfix.rb', line 8

def self.fmtfix_pages( pages, outdir:, path:, heading_patches: nil )
      @@fmtfix ||= new   ## use a "shared" built-in fmtfix
      @@fmtfix.fmtfix_pages( pages, outdir: outdir,
                                    path: path,
                                    heading_patches: heading_patches )
end

.mkheading_regex(str) ⇒ `Object`

# File 'lib/rsssf/fmtfix/patch_headings.rb', line 12

def self.mkheading_regex( str )

    str = str.strip
    ##
    ## change all spaces (other than [ ] and $$ ) to interpunkt
    str = str.gsub( %r{
                             (?<charclass> [ ]* \[ [^\[\]]+ \] [*?+]? [ ]*)
                          |  (?<newline>   [ ]* \$\$ [ ]*)
                          |  (?<spaces>    [ ]+)
                        }x
                       ) do
              m = Regexp.last_match
              if m[:spaces]
                 ' [ ] '     ##  change space to [ ]
              elsif m[:newline]
                 ' \s+ '     ##  $$ => \s+  -- note - make sure \s incl. newline!!
              else
                 m[0]         ## keep as is
              end
            end

  ##  escpape  .   to \.
  ##  change  ~    to [ ]?  -- that is, optional space
  ##  change  ( )  to \( \)
   str = str.gsub(  '~', ' [ ]? ' )
   str = str.gsub(  '.', '\.' )
   str = str.gsub(  '(', '\(' )
   str = str.gsub(  ')', '\)' )


  ### last step change builtins
  ##     '$SEASON$' => '\d{4}/(?:\d{2}|\d{4})',
   str = str.gsub( '$SEASON$', '\d{4}/(?:\d{2}|\d{4})' )

end

.parse_heading_patches(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/patch_headings.rb', line 50

def self.parse_heading_patches( txt )
   patches = {}

   header = nil

   txt.each_line do |line|

      line = line.strip
      next  if line.empty? || line.start_with?('#')
      break if line == '__END__'

      ## check if heading
      if m=%r{ ^
              [ ]* =+ [ ]*
                (?<text> .+?)
              (?: [ ]* =+ )?
               [ ]*
                $
              }x.match(line)

          header = patches[m[:text]] = []
      else
          re =  mkheading_regex( line )
          ## note - wrap in %r{^$}ix
          header <<  %r{^ #{re} $}ix
      end


   end
   patches
end

.parse_names(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates_helpers.rb', line 9

def self.parse_names( txt )
  lines = [] # array of lines (with words)

  txt.each_line do |line|
    line = line.strip

    next if line.empty?
    next if line.start_with?( '#' )   ## skip comments too

    ## strip inline (until end-of-line) comments too
    ##   e.g. Janvier  Janv  Jan  ## check janv in use??
    ##   =>   Janvier  Janv  Jan

    line = line.sub( /#.*/, '' ).strip
    ## pp line

    values = line.split( /[ \t]+/ )
    ## pp values

    ## todo/fix -- add check for duplicates
    lines << values
  end
  lines

end

.read_heading_patches(path) ⇒ `Object`

83	# File 'lib/rsssf/fmtfix/patch_headings.rb', line 83 def self.read_heading_patches( path ) parse_heading_patches( read_text( path)); end

.table_heading_(line) ⇒ `Object`

# File 'lib/rsssf/fmtfix/tables.rb', line 6

def self.table_heading_( line )
  ## M   W  T  L  GF  GA  PTS  AVGE
  ##  =>
  ## (?:
  ##   [ ]+ M  [ ]+  W [ ]+ T [ ]+ L [ ]+ GF [ ]+ GA [ ]+ PTS [ ]+ AVGE  [ ]*
  ##  )

   cols = line.strip.split( /[ ]+/ )

   "(?: [ ]+ #{cols.join(' [ ]+ ')} [ ]*)"
end

Instance Method Details

#_build_date(m) ⇒ `Object`

“internal” date helpers

# File 'lib/rsssf/fmtfix/dates.rb', line 305

def _build_date( m )
             ## quick fix for undefined group name reference
             m = m.named_captures.transform_keys(&:to_sym)  if m.is_a?(MatchData)

            date = {}
         ## map month names
         ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
            date[:y]  = m[:year].to_i(10)  if m[:year]
            ## check - use y too for two-digit year or keep separate - why? why not?
            date[:yy] = m[:yy].to_i(10)    if m[:yy]    ## two digit year (e.g. 25 or 78 etc.)
            date[:m] = m[:month].to_i(10)  if m[:month]
            date[:m] = MONTH_MAP[ m[:month_name].downcase ]   if m[:month_name]
            date[:d]  = m[:day].to_i(10)   if m[:day]
            date[:wday] = DAY_MAP[ m[:day_name].downcase ]   if m[:day_name]

            date[:around] = true     if m[:around]

            date
end

#_build_date_legs(m) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates.rb', line 325

def _build_date_legs( m )
             ## quick fix for undefined group name reference
             m = m.named_captures.transform_keys(&:to_sym)  if m.is_a?(MatchData)

             legs = {}
            ## map month names
            ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
            date = {}
            date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
            date[:d]  = m[:day1].to_i(10)
            legs[:date1] = date

            date = {}
            date[:m] = MONTH_MAP[ m[:month_name2].downcase ]   if m[:month_name2]
            date[:d]  = m[:day2].to_i(10)
            date[:yy] = m[:yy2].to_i(10)    if m[:yy2]    ## two digit year (e.g. 25 or 78 etc.)
            legs[:date2] = date

            legs
end

#_build_date_list(m) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates.rb', line 348

def _build_date_list( m )
             ## quick fix for undefined group name reference
             m = m.named_captures.transform_keys(&:to_sym)  if m.is_a?(MatchData)

            list = {}
            ## map month names
            ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
            date = {}
            date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
            date[:d]  = m[:day1].to_i(10)
            list[:date1] = date

            date = {}
            date[:m] = MONTH_MAP[ m[:month_name2].downcase ]   if m[:month_name2]
            date[:d]  = m[:day2].to_i(10)
            list[:date2] = date

            date = {}
            date[:m] = MONTH_MAP[ m[:month_name3].downcase ]   if m[:month_name3]
            date[:d]  = m[:day3].to_i(10)
            list[:date3] = date

            if m[:day4]
               date = {}
               date[:m] = MONTH_MAP[ m[:month_name4].downcase ]   if m[:month_name4]
               date[:d]  = m[:day4].to_i(10)
               list[:date4] = date
            end

            list
end

#_build_date_range(m) ⇒ `Object`

# File 'lib/rsssf/fmtfix/dates.rb', line 381

def _build_date_range( m )
             ## quick fix for undefined group name reference
             m = m.named_captures.transform_keys(&:to_sym)  if m.is_a?(MatchData)

             range = {}
            ## map month names
            ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
            date = {}
            date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
            date[:d]  = m[:day1].to_i(10)
            range[:date1] = date

            date = {}
            date[:m] = MONTH_MAP[ m[:month_name2].downcase ]   if m[:month_name2]
            date[:d]  = m[:day2].to_i(10)
            range[:date2] = date

            range
end

#_fmt_date(date, format: nil) ⇒ `Object`

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.

# File 'lib/rsssf/fmtfix/dates.rb', line 430

def _fmt_date( date, format: nil )   ### use format: 'numeric' for  23/7 or 23/7/2010 etc.
    buf = String.new

    if format && format.downcase == 'numeric'
      buf << "#{date[:d]}/#{date[:m]}"

      if date[:y]       ## (optional) four-digit year e.g. 2010
        buf << "/#{date[:y]}"
      elsif date[:yy]   ## (optional) two-digit year  e.g. 98
        buf << ("/%02d" % date[:yy])    ## note - make sure 0,1,2 become 00, 01, 02
      end

      buf
    else    ## use Fri Feb 7 2025
      ## check for "canonical" convention for around/ca. date or such
      buf << "c. "   if date[:around]

      buf << "#{FMT_DAY_NAMES[date[:wday]]} "  if date[:wday]
      buf << "#{FMT_MONTH_NAMES[date[:m]]} "
      buf << "#{date[:d]}"


      if date[:y]
         buf << " #{date[:y]}"
      elsif date[:yy]
         ## note - expand two-digit year to four-digit year
         buf << if date[:yy] < 30
                   ## note - make sure 0,1,2 become 00, 01, 02
                  " 20%02d" % date[:yy]   ## 2000, 2001, .. 2029
                else
                 " 19%02d" % date[:yy]   ## 1930, 1931 .. 1999
                end
      end

      buf
    end

    buf
end

#_fmt_date_legs(legs, format: nil) ⇒ `Object`

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.

# File 'lib/rsssf/fmtfix/dates.rb', line 470

def _fmt_date_legs( legs, format: nil )   ### use format: 'numeric' for  23/7 or 23/7/2010 etc.
    buf = String.new

    buf << "#{FMT_MONTH_NAMES[legs[:date1][:m]]} "
    buf << "#{legs[:date1][:d]}"
    buf << " & "
    buf << "#{FMT_MONTH_NAMES[legs[:date2][:m]]} "  if legs[:date2][:m]
    buf << "#{legs[:date2][:d]}"

    if legs[:date2][:yy]
         ## note - expand two-digit year to four-digit year
         buf << if legs[:date2][:yy] < 30
                   ## note - make sure 0,1,2 become 00, 01, 02
                  " 20%02d" % legs[:date2][:yy]   ## 2000, 2001, .. 2029
                else
                 " 19%02d" % legs[:date2][:yy]   ## 1930, 1931 .. 1999
                end
    end

    buf
end

#_fmt_date_list(list, format: nil) ⇒ `Object`

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.

# File 'lib/rsssf/fmtfix/dates.rb', line 493

def _fmt_date_list( list, format: nil )   ### use format: 'numeric' for  23/7 or 23/7/2010 etc.
    buf = String.new

    buf << "#{FMT_MONTH_NAMES[list[:date1][:m]]} "
    buf << "#{list[:date1][:d]}"

    if list[:date2][:m]  ## add extra space if month present
      buf << "; #{FMT_MONTH_NAMES[list[:date2][:m]]} "
    else
      buf << ","
    end
    buf << "#{list[:date2][:d]}"

    if list[:date3][:m]   ## add extra space if month present
      buf << "; #{FMT_MONTH_NAMES[list[:date3][:m]]} "
    else
      buf << ","
    end
    buf << "#{list[:date3][:d]}"

    if list[:date4]
      if list[:date4][:m]   ## add extra space if month present
         buf << "; #{FMT_MONTH_NAMES[list[:date4][:m]]} "
      else
         buf << ","
      end
      buf << "#{list[:date4][:d]}"
    end


    buf
end

#_fmt_date_range(range, format: nil) ⇒ `Object`

use format: ‘numeric’ for 23/7 or 23/7/2010 etc.

# File 'lib/rsssf/fmtfix/dates.rb', line 527

def _fmt_date_range( range, format: nil )   ### use format: 'numeric' for  23/7 or 23/7/2010 etc.
    buf = String.new

    buf << "#{FMT_MONTH_NAMES[range[:date1][:m]]} "
    buf << "#{range[:date1][:d]}"
    buf << "-"
    buf << "#{FMT_MONTH_NAMES[range[:date2][:m]]} "  if range[:date2][:m]
    buf << "#{range[:date2][:d]}"

    buf
end

#_norm_date(m, format: nil) ⇒ `Object`

note - line-by-line processing / matching

# File 'lib/rsssf/fmtfix/headers.rb', line 245

def _norm_date( m, format: nil )
   ## quick fix for undefined group name reference
   m = m.named_captures.transform_keys(&:to_sym)  if m.is_a?(MatchData)

  if m[:date_list]
    _fmt_date_list(_build_date_list( m ), format: format )
  elsif m[:date_legs]
    _fmt_date_legs(_build_date_legs( m ), format: format )
  elsif m[:date_range]
    _fmt_date_range(_build_date_range( m ), format: format )
  else   ## assume m[:date]
    _fmt_date(_build_date( m ), format: format )
  end
end

#_patch_heading(txt, rxs, title) ⇒ `Object`

# File 'lib/rsssf/fmtfix/patch_headings.rb', line 89

def _patch_heading( txt, rxs, title )
   found_match = false
   rxs.each do |rx|
     txt = txt.sub( rx ) do |match|
               puts "  found heading match >#{match}< replace with >== #{title}<"

               if title == '*'    ## use orginal title/ do NOT replace/normalize
                  ## note - keep going with replacements here
                  ##
                  ##  note - autoremove (optional) trailing colon (:)
                  ##                                    or  dot (.)
                  ##              use .chomp(':') instead - why? why not?
                  match = match.sub( /[.:]$/, '').rstrip
                  "== #{match}\n"
               else
                  ## note - only short-circuit match if NOT generic replace
                  found_match = true
                  "== #{title}\n"
               end
             end
     ## note - break on first match
     break if found_match
   end
   txt
end

#_scan_outline(txt) ⇒ `Object`

171	# File 'lib/rsssf/fmtfix/outline.rb', line 171 def _scan_outline( txt ) txt.scan( HX_RE ); end

#autofix(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/fmtfix-base.rb', line 9

def autofix( txt )

 ##
 ## make sure no tabs (expand to two spaces)
  txt = txt.gsub( "\t", '  ' )
  txt = txt.gsub( "\r\n", "\n" )  ## unify newline

  ## fix unicode space !! use code point!!
  txt = txt.gsub( /[ ]/, ' ' )




    txt = handle_tables( txt )     ## e.g. final/halfway table (aka standings)
    txt = handle_topscorers( txt )


    txt = handle_errata_txt( txt )



  #####
   ## line-by-line processing / matching

   newtxt = String.new
   txt.each_line do |line|
        ## check if line incl. newline? - yes

         ## note - handle_header returns nil if no match
         ##            otherwise the reformatted (new) line !!!
         newline = handle_header( line.rstrip )

         newtxt <<   (newline ? newline : line)
   end

   txt = newtxt


   txt = handle_score( txt )



   txt = handle_goals( txt )


  ###
  ## todo
  ##   fix subs in lineup  in oost00.txt
  # Salzburg: Safar - Szewczyk (97./Lipcsei) - Winklhofer, C.Jank - Laessig,
  #        Hütter (71./Meyssen) - Nikolic, Aufhauser, Kitzbichler - Struber,
  #        Polster (56./Sabitzer)



  txt
end

#autofix_outline(txt, title:) ⇒ `Object`

# File 'lib/rsssf/fmtfix/outline.rb', line 36

def autofix_outline( txt, title: )


     hx =  txt.scan( HX_RE )

     ### note - shortcircuit if no headings found!!!
     return txt    if hx.size == 0




     ## update counts/usage of h1,h2,h3,h4,h5,h6
     counts = [nil,0,0,0,0,0,0]
     hx.each do |marker,_|
                   level = marker.size;
                   counts[ level ] += 1
     end

      ## flatten level; only record levels with heading counts
      levels = []
      counts.each_with_index do  |count,level|
            levels << level    if count && count > 0
      end



      #####
      ### special case for first heading
      ##    check if heading is matching title AND the only one in top level
      htop_marker, htop_header = hx[0]
      htop_level = htop_marker.size

      ##  top heading MUST always be lowest (top)
      if htop_level == levels[0]
        if counts[htop_level] == 1
          ##  check if same as title
          ##    if yes pop (that is, remove too)
          if htop_header == title
             counts[htop_level] = 0  ## update/reset counter
             levels.shift            ## remove first level (inline op)!!!

             ### note - space in header must be replaces with [ ]!!!!
             ##                    or \\   with Regex.escape!!!
             ###  note - MUST escape string for regex e.g. [Bra..] or 1.
             ###
             ###   V COPA BRASIL - 1979 [Brazilian Championship]
             ##    check if space works with escape??

             htop_re = %r{
                            ^
                           [ ]* #{htop_marker}
                           [ ]* #{Regexp.escape(htop_header)}
                              .*?
                           $    ## or use \n - why? why not?
                         }x
             ## remove line in txt too
             txt = txt.sub( htop_re ) do |match|
                   puts "   removing top heading matching title  -- >#{match}<"
                                 ''
                              end
          else
             ## warn/log  - heading top NOT matching page title
             msg = "first top heading NOT matching page title  #{htop_header} <=> #{title}"
             puts "!! WARN #{msg}"
             log( msg )
          end
        else
          ## warn/log   - more than one top level heading!!!
          msg = "more than one (#{counts[htop_level]}) top heading #{htop_level} found " +
                "in page with title #{title}"
          ## maybe add headers in the future - why? why not?
          log( msg )
       end
      else
         ## warn/log   - top heading NOT top!!
          msg = "top heading #{htop_level} not top (#{levels[0]}) " +
                "in page with title #{title}"
          log( msg )
      end


       mapping = {}
       levels.each_with_index do |level,i|
            from = level
            to   = i+1
            mapping[from] = to
       end


      # rewrite headings
       txt = txt.gsub( HX_RE ) do
                 m = Regexp.last_match
                 old_marker = m[:marker]
                 old_level  = m[:marker].size

                 new_level = mapping[old_level]

                 if new_level.nil?
                    puts "!! no heading #{old_level} mapping found in page >#{title}<:"
                    puts "match:"
                    pp m
                    puts "counts:"
                    pp counts
                    puts "levels:"
                    pp levels
                    puts "mapping:"
                    pp mapping
                    exit 1
                 end

                 new_marker =  '=' * new_level

                 ## remove level diff from marker
                 ##
                 ##  maybe in the future use track trailing marker too
                 ##   and rebuild heading/header instead of gsub

                ## note -  always start at level 2 (page title like in wikipedia is level 1)
                 ##                  thus, new_level+1

                 if (old_level - new_level+1) > 0
                    ## note - will remove diff from leading (and possibly trailing) marker too
                    m[0].gsub( old_marker, new_marker+'=' )
                 else
                    m[0]
                 end
           end


    txt

end

#build_outline(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/outline.rb', line 173

def build_outline( txt )

     hx =  txt.scan( HX_RE )


     counts = [nil,0,0,0,0,0,0]  ## note - index 0 is nil
                                 ##  index 1 (h1) is 0 etc.

     hx.each { |marker,_| counts[ marker.size ] +=1 }


     buf = String.new
     buf += "  outline:"
     buf += " " +
          "#{counts[1]==0 ? '-' : 'h1'}/" +
          "#{counts[2]==0 ? '-' : 'h2'}/" +
          "#{counts[3]==0 ? '-' : 'h3'}/" +
          "#{counts[4]==0 ? '-' : 'h4'}/" +
          "#{counts[5]==0 ? '-' : 'h5'}/" +
          "#{counts[6]==0 ? '-' : 'h6'}" +
          "\n"

         buf += "           " +
              "#{counts[1]==0 ? '-' : counts[1]}/" +
               "#{counts[2]==0 ? '-' : counts[2]}/" +
               "#{counts[3]==0 ? '-' : counts[3]}/" +
               "#{counts[4]==0 ? '-' : counts[4]}/" +
               "#{counts[5]==0 ? '-' : counts[5]}/" +
               "#{counts[6]==0 ? '-' : counts[6]}" +
               "\n"

     hx.each do |marker,text|
        buf << "    (%d) %-6s" % [marker.size, marker]
        buf <<  "  "
        buf << text
        buf << "\n"
     end


     ## count anchors (aka a name)
     ##  e.g
       aname = txt.scan( /‹§  [^›]+  ›/x )

        if aname.size > 0
          buf << "\n"
          buf << "  aname #{aname.size}: "
          buf <<  aname.join( ',' )
          buf << "\n"
        end

        buf
end

#fmtfix(txt, heading_patches: nil) ⇒ `Object`

# File 'lib/rsssf/fmtfix/fmtfix.rb', line 49

def fmtfix( txt,  heading_patches: nil )

        ### note - step 1
        ##      autofix-outline
        ##  and patch headings/outline if empty
        ##        with at_headings.txt, de_headings.txt etc.

        ## get title
        meta = Page.parse_meta( txt )
        title = meta[:title] || 'n/a'

        newtxt = autofix_outline( txt, title: title )


        if heading_patches
            ##
            ## check if any headings / outline
             headings = _scan_outline( newtxt )
             if headings.size == 0
                newtxt = patch_headings( newtxt, heading_patches )
             end
        end


        newtxt = autofix( newtxt )



=begin
        ##
        ## add (quick) outline
        outline = build_outline( newtxt )

        ## add inside  <!-- source: ...  [auto-add here] -->
        ## e.g.
        ##   <!--
        ##      source: https://rsssf.org/tableso/oost98.html
        ##    -->

        newtxt = newtxt.sub( %r{^[ ]*<!--
                       [ \n]*
                         (source: .+?)
                        [ \n]*
                      -->
                   }ix,
               "<!--\n  \\1\n\n#{outline} -->" )
=end
         newtxt
end

#fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ `Object`

# File 'lib/rsssf/fmtfix/fmtfix.rb', line 15

def fmtfix_pages( pages, outdir:,
                         path:,         ## (lookup search) path (array expected!!!)
                         heading_patches: nil )

     pages.each_with_index do |config,i|

            puts "==> #{i+1}/#{pages.size} #{config.pretty_inspect}..."

            page = config['page']
            dirname  = File.dirname( page )
            basename = File.basename( page, File.extname( page ) )
            extname  = File.extname( page )

            inname = "#{dirname}/#{basename}.txt"
            filename = find_file!( inname, path: path )

            txt = read_text( filename )
            newtxt = fmtfix( txt, heading_patches: heading_patches )

            outfile = File.join(  outdir, "#{basename}.txt" )
            write_text( outfile, newtxt )
     end
end

#handle_errata_txt(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/errata.rb', line 33

def handle_errata_txt( txt )
   ERRATAS.each do |errata,replace|
      txt = txt.gsub( errata, replace )
   end

   txt
end

#handle_goals(txt, opts: {}) ⇒ `Object`

# File 'lib/rsssf/fmtfix/goals.rb', line 38

def handle_goals( txt, opts: {} )


##
##  quick fix - change [pen] to (pen) and
##                     [og] to (og)
##   e.g. [Parkin 57 [og] - Nogan 47]
##        [McIndoe 11 [pen] Green 20, Blundell 90 - Robinson 74]


   txt = txt.gsub( '[pen]', '(pen)')
   txt = txt.gsub( '[og]', '(og)')



=begin
   ##   [15' Barisic, 80' Gilewicz; 10' (og) Barisic]
   ##  try (simple) goal line
   ##   note keep leading spaces / indent

##  note - first line must include a score!!
###      change to named captures!! - use \k<> !!!
   txt = txt.gsub( %r{^
                        (  .+?
                             \d{1,2}-\d{1,2}
                           .*?
                          \n
                         )
                     ([ ]*)
                       \[
                        ( .*?
                           \b\d{1,3}'  ## incl. minute
                          .*?
                        )
                      \]
                     [ ]*
                    $}ix,
                    '\1\2(\3)' )
=end




  ##  try (simple double) goal line
   ##   note keep leading spaces / indent
  ## [21' Dospel, 42' and 64' Mayrleb, 51' Datoru, 72' Sobczak; 25' and
  ## 90' B.Akwuegbu]
  ##  -or-
  ###  [Jose Manuel Jurado 12, Diego Forlán 40, 63,
  ##   "Simao" Pedro Fonseca 90]
  ##  [Rubén Suárez 10; Abdoulay Konko 12, 63, Alvaro Negredo 27,
  ##   "Renato" Dirnei Florencio 87]


##  ["Edmilson" Gomes de Moraes 40, Marco Perez 68,
##   Ander Herrera 82; Fernando Fernandez 1, 27,
##   Juan Miguel Jimenez "Juanmi" 6, 28, Quincy Owusu-abeyie 35]
##  or
##  [Jose Manuel Casado 16,Emiliano Armenteros 20,
##   Jorge Andujar Moreno "Coke" 60; Jose Javier Barkero 14pen,
##   Jose Antonio Culebras 90+].
##    note - remove optional

   txt = txt.gsub( %r{^
                     ([ ]*)
                       \[
                        (            #{GOALS_}
                              \n     #{GOALS_}
                             (?:
                                 \n  #{GOALS_}
                             )?
                        )
                      \]
                      \.?  ## optional trailing dot
                      [ ]*
                    $}ix,
                    '\1(\2)' )


## note - match for single line goes last !!


###
###    [Fernando Llorente 47]
##   [Sebastián Fernández 44; Aritz Aduriz 9, Joaquín Sanchez 71, 75]
   ##  try (simple) goal line with number only!!!
   ##   note keep leading spaces / indent

## Fluminense     3-0   0-2  São Caetano
##    [Magno Alves 70', 88', Roni 75']
##    [Daniel 15', Magrão 46'p]

## fix/fix/fix - merge with rule above!!!
##                make minute optional!!!

   txt = txt.gsub( %r{^
                      (?<match>  .+?
                                \d{1,2}-\d{1,2}
                                .*?
                         )
                          \n
                     (?<indent1>  [ ]*)
                       \[
                        (?<goals1> #{GOALS_})
                      \]
                     [ ]*
                (?:   ## check for second goal line following
                      ##   used in br for aggregate matches
                    \n
                    (?<indent2> [ ]*)
                       \[
                        (?<goals2> #{GOALS_})
                      \]
                     [ ]*
                )?
                    $}ix ) do |match|

                     if opts[:goals]
                        puts "  match:"
                        puts match
                     end

                      m = Regexp.last_match
                      buf = String.new
                      buf += "#{m[:match]}\n"
                      buf += "#{m[:indent1]}(#{m[:goals1]})"
                      buf += "\n#{m[:indent2]}(#{m[:goals2]})"   if m[:indent2] && m[:goals2]
                      buf
                    end

   txt
end

#handle_header(line) ⇒ `Object`

# File 'lib/rsssf/fmtfix/headers.rb', line 261

def handle_header( line )
      ## note - returns    newline (matched header line reformatted)
      ##                    or nil (if no match!!)
      ##
       line = line.rstrip   ## expect chomp of newline "upstream" - why? why not?


      if m = HEADER_ROUND_RE.match(line.rstrip)
                   "▪ #{m[:round]} ▪\n"
      elsif m = HEADER_DATE_RE.match(line.rstrip)
                   ## e.g. [Nov 20]
                   ## e.g. [April 1]
                   date = _norm_date( m )
                   "_ #{date} _\n"
      elsif m = HEADER_DATE_N_CITY_RE.match(line.rstrip)
                   ## e.g. [Jun 3, Ferrol]
                   ## e.g. [Apr 2, Wembley]
                   ##   [Sat May 17 - at Millennium Stadium, Cardiff]
                   ##   [Sun May 25 - at Millennium Stadium, Cardiff]

                   date = _norm_date( m )

                   ##  note - check for special case
                   ##     [Dec 10, replay]
                   ##  change to  ▪ Replay ▪   _ Dec 10 _
                   if m[:city] == 'replay'
                      "▪ Replay ▪  _ #{date} _\n"
                   else
                      "_ #{date} _ @ #{m[:city]}\n"
                   end
      elsif m = HEADER_DATE_II_RE.match(line.rstrip)
                    ##  note - no enclosing brackets []!!!
                    ## e.g. Nov 20 1999  or Nov 20, 1999
                    ##      Apr 1 2000   or Apr 1, 2000
                     date = _norm_date( m )
                   "_ #{date} _\n"
      elsif m = HEADER_DATE_ALT_RE.match(line.rstrip)
                    ## e.g. [07-09]
                    ##      [30-05, Thaur]
                    ## date = _norm_date( m, format: 'numeric' )
                    date = _norm_date( m  )
                    buf = String.new
                    buf += "_ #{date} _"
                    buf += " @ #{m[:city]}"    if m[:city]
                    buf += "\n"
                    buf
      elsif m = HEADER_ROUND_N_DATE_RE.match(line.strip)
                     date = _norm_date( m )
                   "▪ #{m[:round]} ▪  #{date}\n"
      elsif m = HEADER_ROUND_N_DATE_N_CITY_RE.match(line.strip)
                     date = _norm_date( m )
                   "▪ #{m[:round]} ▪  #{date} @ #{m[:city]}\n"
      elsif m = HEADER_ROUND_N_CITY_RE.match(line.strip)
                   "▪ #{m[:round]} ▪  @ #{m[:city]}\n"
      elsif m = HEADER_ROUND_N_CITY_N_DATE_RE.match(line.strip)
                     date = _norm_date( m )
                    ## note - reverse (rotate) date & city
                   "▪ #{m[:round]} ▪  #{date} @ #{m[:city]}\n"
       else
         nil
       end
end

#handle_score(txt) ⇒ `Object`

# File 'lib/rsssf/fmtfix/score.rb', line 5

def handle_score( txt )

    ## fix typos - move to errata
    txt = txt.gsub( 'paet, 3-4 pen]', '[aet, 3-4 pen]' )




    ###  [aet]   => (aet)         -- after extra time
    ##   [asdet] => (asdet)      -- after sudden death extra time
    txt = txt.gsub( '[aet]',   '(aet)' )
    txt = txt.gsub( '[asdet]', '(asdet)' )




   ## [aet, 2-3 pen] => (aet, 2-3 pen)
   ##  [aet, 9-10 pen]
   ## [aet, 2-3pen]
   ## [aet, 7-6pen]

   txt = txt.gsub( %r{
                         \[
                            aet[,;.] [ ]?
                             (\d{1,2}-\d{1,2}) [ ]? pen
                         \]
                    }ix,
                    '(aet, \1 pen)')


###  [aet, pen 4-3]
##   [aet, pen 2-4]
###    =>
##      (aet, 2-4 pen)
   txt = txt.gsub( %r{
                         \[
                            aet[,;.] [ ]?
                             pen [ ] (\d{1,2}-\d{1,2})
                         \]
                    }ix,
                    '(aet, \1 pen)')

 ##   [5-4 pen]
 ##   [3-4 pen]
 ##   [1-3 pen], [1-3pen]
   txt = txt.gsub( %r{
                         \[
                           (\d{1,2}-\d{1,2}) [ ]? pen
                         \]
                    }ix,
                    '(\1 pen)')


##   [Pen 4-1], [Pen 4-5], [Pen 1-3]
##      =>
##    (4-1 pen)
   txt = txt.gsub( %r{
                         \[
                           pen [ ] (\d{1,2}-\d{1,2})
                         \]
                    }ix,
                    '(\1 pen)')


##  [5-3 PK], [6-5 PK]
##      =>
##   (6-5 pen)
      txt = txt.gsub( %r{
                           \[
                              (\d{1,2}-\d{1,2}) [ ] PK
                         \]
                        }ix,
                        '(\1 pen)'  )



 ##
 ## check special case usage - uniques?
 ##    [8-7 pen(no extra time)]
 ##    [Pen 2-4 (1-3?)]


   txt
end

#handle_tables(txt, tables: []) ⇒ `Object`

# File 'lib/rsssf/fmtfix/tables.rb', line 282

def handle_tables( txt, tables: [] )


   txt = txt.gsub( TABLE_RE ) do |match|

                 m = Regexp.last_match

                 puts "  proc table >#{m[:header]}< block:"
                 puts ">>> (begin)"
                 puts match
                 puts "<<< (end)"

                    ## remove everyting
                    ##  or put in comment block later with command line option/switch!!
                    ##    ''

                     ## replace with "collapsed" marker



                    tables << match
                    table_id = tables.size
                    if m[:header]   ## note - header might be missing
                                    ##   table starting w/ blank line
                       "<!-- $table#{table_id}$ - #{m[:header]} -->\n"
                    else
                       "<!-- $table#{table_id}$ -->\n"
                    end
                  end
   txt
end

#handle_topscorers(txt, topscorers: [], opts: {}) ⇒ `Object`

# File 'lib/rsssf/fmtfix/topscorers.rb', line 29

def handle_topscorers( txt, topscorers: [], opts: {} )
   txt = txt.gsub( TOPSCORERS_RE ) do |match|
                 if opts[:topscorers]
                   puts "  proc topscorers block:"
                   puts match
                 end

                    ## remove everyting
                    ##  or put in comment block later with command line option/switch!!
                    ##    ''

                    ## replace with "collapsed" marker
                      topscorers << match
                    topscorers_id = topscorers.size
                    "<!-- $topscorers#{topscorers_id}$ -->\n\n"
                  end
   txt
end

#patch_headings(txt, patches) ⇒ `Object`

# File 'lib/rsssf/fmtfix/patch_headings.rb', line 116

def patch_headings( txt, patches )

     patches.each do |title, rxs|
         txt = _patch_heading( txt, rxs, title )
     end
     txt
end

Class: Rsssf::Fmtfix

Overview

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.build_map(lines, downcase: false) ⇒ Object

.build_names(lines) ⇒ Object

.date_(*re) ⇒ Object

.fmtfix(txt, heading_patches: nil) ⇒ Object

.fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ Object

.mkheading_regex(str) ⇒ Object

.parse_heading_patches(txt) ⇒ Object

.parse_names(txt) ⇒ Object

.read_heading_patches(path) ⇒ Object

.table_heading_(line) ⇒ Object

Instance Method Details

#_build_date(m) ⇒ Object

#_build_date_legs(m) ⇒ Object

#_build_date_list(m) ⇒ Object

#_build_date_range(m) ⇒ Object

#_fmt_date(date, format: nil) ⇒ Object

#_fmt_date_legs(legs, format: nil) ⇒ Object

#_fmt_date_list(list, format: nil) ⇒ Object

#_fmt_date_range(range, format: nil) ⇒ Object

#_norm_date(m, format: nil) ⇒ Object

#_patch_heading(txt, rxs, title) ⇒ Object

#_scan_outline(txt) ⇒ Object

#autofix(txt) ⇒ Object

#autofix_outline(txt, title:) ⇒ Object

#build_outline(txt) ⇒ Object

#fmtfix(txt, heading_patches: nil) ⇒ Object

#fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ Object

#handle_errata_txt(txt) ⇒ Object

#handle_goals(txt, opts: {}) ⇒ Object

#handle_header(line) ⇒ Object

#handle_score(txt) ⇒ Object

#handle_tables(txt, tables: []) ⇒ Object

#handle_topscorers(txt, topscorers: [], opts: {}) ⇒ Object

#patch_headings(txt, patches) ⇒ Object

.build_map(lines, downcase: false) ⇒ `Object`

.build_names(lines) ⇒ `Object`

.date_(*re) ⇒ `Object`

.fmtfix(txt, heading_patches: nil) ⇒ `Object`

.fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ `Object`

.mkheading_regex(str) ⇒ `Object`

.parse_heading_patches(txt) ⇒ `Object`

.parse_names(txt) ⇒ `Object`

.read_heading_patches(path) ⇒ `Object`

.table_heading_(line) ⇒ `Object`

#_build_date(m) ⇒ `Object`

#_build_date_legs(m) ⇒ `Object`

#_build_date_list(m) ⇒ `Object`

#_build_date_range(m) ⇒ `Object`

#_fmt_date(date, format: nil) ⇒ `Object`

#_fmt_date_legs(legs, format: nil) ⇒ `Object`

#_fmt_date_list(list, format: nil) ⇒ `Object`

#_fmt_date_range(range, format: nil) ⇒ `Object`

#_norm_date(m, format: nil) ⇒ `Object`

#_patch_heading(txt, rxs, title) ⇒ `Object`

#_scan_outline(txt) ⇒ `Object`

#autofix(txt) ⇒ `Object`

#autofix_outline(txt, title:) ⇒ `Object`

#build_outline(txt) ⇒ `Object`

#fmtfix(txt, heading_patches: nil) ⇒ `Object`

#fmtfix_pages(pages, outdir:, path:, heading_patches: nil) ⇒ `Object`

#handle_errata_txt(txt) ⇒ `Object`

#handle_goals(txt, opts: {}) ⇒ `Object`

#handle_header(line) ⇒ `Object`

#handle_score(txt) ⇒ `Object`

#handle_tables(txt, tables: []) ⇒ `Object`

#handle_topscorers(txt, topscorers: [], opts: {}) ⇒ `Object`

#patch_headings(txt, patches) ⇒ `Object`