Class: Pubid::Csa::Parser

Inherits:

Parslet::Parser

Object
Parslet::Parser
Pubid::Csa::Parser

show all

Defined in:: lib/pubid/csa/parser.rb

Instance Method Summary collapse

#parse(input) ⇒ Object

Preprocessing to normalize input.

Instance Method Details

#parse(input) ⇒ `Object`

Preprocessing to normalize input

Raises:

(Parslet::ParseFailed)

# File 'lib/pubid/csa/parser.rb', line 368

def parse(input)
  # Skip comment lines
  raise Parslet::ParseFailed.new("Comment line") if input.strip.start_with?("#")

  # Remove CONSOLIDATED notation FIRST (before other processing)
  normalized = input.gsub(/\s*\(\s*CONSOLIDATED\s*\)\s*/, " ")
  normalized = normalized.gsub(/\s*\bCONSOLIDATED\b\s*/, " ")

  # Fix missing space before reaffirmation
  # Matches: XX(RYYYY), XXX(RYY), or any non-space chars before (R
  # Examples: 04(R2009) -> 04 (R2009), 16(R24) -> 16 (R24)
  normalized = normalized.gsub(/(\S)\(R(\d{2,4})\)/, '\1 (R\2)')

  # DO NOT normalize NO., let it be parsed as a separate identifier component

  # Normalize CEI/IEC to IEC (CEI is French name for IEC)
  normalized = normalized.gsub("CEI/IEC", "IEC")
  normalized = normalized.gsub(/\bCEI\b/, "IEC")

  # Track original publisher prefix
  publisher_prefix = if normalized.start_with?("CAN/CSA-")
                       "CAN/CSA-"
                     elsif normalized.start_with?("CAN3-")
                       "CAN3-"
                     elsif normalized.start_with?("CSA ")
                       "CSA"
                     end

  # Normalize CAN/CSA- and CAN3- to CSA (preserving space before code)
  normalized = normalized.gsub("CAN/CSA-", "CSA ")
  normalized = normalized.gsub("CAN3-", "CSA ")

  # Clean up extra spaces
  normalized = normalized.gsub(/\s+/, " ").strip

  # Parse and inject publisher_prefix into result
  result = super(normalized)

  # Inject publisher_prefix if we have one
  if publisher_prefix && result.is_a?(Hash)
    inject_publisher_prefix(result, publisher_prefix)
  end

  result
end

Class: Pubid::Csa::Parser

Instance Method Summary collapse

Instance Method Details

#parse(input) ⇒ Object

#parse(input) ⇒ `Object`