Class: Pubid::CenCenelec::Parser

Inherits:
Parslet::Parser
  • Object
show all
Defined in:
lib/pubid/cen_cenelec/parser.rb

Class Method Summary collapse

Class Method Details

.parse(input) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/pubid/cen_cenelec/parser.rb', line 126

def self.parse(input)
  # Normalize special dash characters
  normalized = input.gsub(/[\u2011\u00AD]/, "-")

  # Remove trailing hash symbols
  normalized = normalized.gsub(/#.*$/, "").strip

  # Filter out parenthetical notes (case-insensitive, multiple patterns)
  normalized = normalized.gsub(/\s*\([^)]*corrigendum[^)]*\)/i, "")

  # Normalize dash to slash in publisher combinations
  normalized = normalized.gsub("CEN-CLC", "CEN/CLC")
    .gsub("CLC-CEN", "CLC/CEN")
    .gsub("GUIDE", "Guide")
  new.parse(normalized)
end