Class: Pubid::Iso::UrnParser
- Inherits:
-
Object
- Object
- Pubid::Iso::UrnParser
- Defined in:
- lib/pubid/iso/urn_parser.rb
Overview
Parses RFC 5141-bis compliant URNs into ISO identifiers
URN format: urn:iso:std:publisher:type:number:year:supplements
Examples:
-
urn:iso:std:iso:9001:2019
-
urn:iso:std:iso:tr:9001:2019
-
urn:iso:std:iso:9001:2019:stage-40.00
-
urn:iso:std:iso:9001:2019:amd:1:2020
-
urn:iso:std:iso-iec:27001:2013
Constant Summary collapse
- TYPED_STAGE_REVERSE_MAP =
Reverse mappings from URN format to PubID components
{ "WD" => :wd, "WDS" => :wds, "CD" => :cd, "CDV" => :cdv, "DIS" => :dis, "FDIS" => :fdis, "PDAM" => :pdam, "DAM" => :dam, "FDAM" => :fdamd, "DCOR" => :dcor, "FDCOR" => :fdcor, "CDTS" => :cdts, "DTS" => :dts, "FDTS" => :fdts, }.freeze
- SUPPLEMENT_TYPE_MAP =
{ "amd" => :amd, "cor" => :cor, "sup" => :sup, "add" => :add, }.freeze
- TYPE_CODE_REVERSE_MAP =
{ "tr" => :tr, "ts" => :ts, "pas" => :pas, "guide" => :guide, "dir" => :dir, "dir-sup" => :dir_sup, "iwa-sup" => :iwa_sup, "isp" => :isp, "iwa" => :iwa, "r" => :r, "data" => :data, }.freeze
Class Method Summary collapse
-
.parse(urn) ⇒ Identifier
Parse ISO URN string.
Instance Method Summary collapse
-
#parse_urn(urn) ⇒ Identifier
Parse URN string into identifier.
Class Method Details
.parse(urn) ⇒ Identifier
Parse ISO URN string
58 59 60 |
# File 'lib/pubid/iso/urn_parser.rb', line 58 def self.parse(urn) new.parse_urn(urn) end |
Instance Method Details
#parse_urn(urn) ⇒ Identifier
Parse URN string into identifier
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
# File 'lib/pubid/iso/urn_parser.rb', line 65 def parse_urn(urn) # Remove urn:iso:std: prefix unless urn.start_with?("urn:iso:std:") raise ArgumentError, "Invalid ISO URN: #{urn}" end parts = urn.sub("urn:iso:std:", "").split(":") # Parse publisher(s) - first part publishers = parse_publisher(parts.shift) # Parse type - optional (defaults to IS) type_code = nil type_code = parse_type(parts.first) if parts.first && TYPE_CODE_REVERSE_MAP.key?(parts.first.downcase) parts.shift if type_code # Parse number number_part = parts.shift number, part, subpart = parse_number_part(number_part) # Parse year if present (4-digit year) year = nil if parts.first&.match(/^\d{4}$/) year = parts.shift end # Handle URN-style part notation (:-22, :-5-1-1, etc.) # These can come before or after year/edition in URN format # Note: The parts are split by ':', so :-22 becomes two parts: "10164" and "-22" if parts.first&.start_with?("-") part_str = parts.shift # Re-parse the number part with the URN-style part number, part, subpart = parse_number_part("#{number}#{part_str}") end # Parse year if present (4-digit year) - may come after part if year.nil? && parts.first&.match(/^\d{4}$/) year = parts.shift end # Parse edition if present (ed-N format) - comes after year in URN edition = nil if parts.first&.start_with?("ed-") edition = parts.shift.sub("ed-", "").to_i end # Parse language if present (2-letter codes like "en" or comma-separated) languages = nil if parts.first && !parts.first.match(/^\d+$/) && !parts.first.match?(/^(amd|cor|sup|add|v\d+|stage-|ed-)/i) && !TYPED_STAGE_REVERSE_MAP.key?(parts.first.upcase) && !parts.first.match?(/^[A-Z]+\.\d+$/i) languages = parse_languages(parts.shift) end # Check for stage (stage-XX.XX or typed stage like WD, CD, etc.) # IMPORTANT: This must come AFTER edition/part/language parsing but BEFORE supplements parsing stage_code = nil stage_iteration = nil harmonized_stage_code = nil # Track full harmonized code for lookup stage_from_abbr = nil # Track stage code from typed abbreviation if parts.first&.start_with?("stage-") stage_str = parts.shift stage_code, stage_iteration = parse_stage_code(stage_str) # Set harmonized_stage_code AFTER parse_stage_code has stripped .vX suffix harmonized_stage_code = stage_str.sub("stage-", "").sub(/\.v\d+$/i, "") elsif TYPED_STAGE_REVERSE_MAP.key?(parts.first&.upcase) || (parts.first&.match?(/^[A-Za-z]+\.\d+$/) && TYPED_STAGE_REVERSE_MAP.key?(parts.first.upcase.split(".").first)) stage_abbr = parts.shift.upcase # Check for iteration (WD.2 format) if stage_abbr.include?(".") abbr_part, iteration_part = stage_abbr.split(".") stage_code = TYPED_STAGE_REVERSE_MAP[abbr_part] stage_from_abbr = stage_code stage_iteration = iteration_part.to_i else stage_code = TYPED_STAGE_REVERSE_MAP[stage_abbr] stage_from_abbr = stage_code end end # Check for supplements (amd, cor, sup, add) supplements = [] while parts.any? supp_type = nil supp_number = nil supp_year = nil supp_stage = nil # Check for supplement stage if parts.first&.start_with?("stage-") supp_stage_data = parts.shift supp_stage, = parse_stage_code(supp_stage_data) elsif TYPED_STAGE_REVERSE_MAP.key?(parts.first&.upcase) supp_stage_abbr = parts.shift.upcase supp_stage = TYPED_STAGE_REVERSE_MAP[supp_stage_abbr] end # Check for supplement type (amd, cor, sup, add) if SUPPLEMENT_TYPE_MAP.key?(parts.first&.downcase) supp_type = SUPPLEMENT_TYPE_MAP[parts.shift.downcase] end # Check for year or supplement number if parts.first&.match(/^\d+$/) if parts.first&.match(/^\d{4}$/) # 4 digits = year supp_year = parts.shift else # 1-3 digits = supplement number supp_number = parts.shift.to_i end end # Check for version (v1, v2, etc.) after year or number if parts.first&.start_with?("v") version_str = parts.shift supp_number = version_str.sub("v", "").to_i # Handle version with iteration (v1.2) if supp_number.to_s.include?(".") supp_number, = supp_number.to_s.split(".") supp_number = supp_number.to_i end end # Next part might be year if not already set if supp_year.nil? && parts.first&.match(/^\d{4}$/) supp_year = parts.shift end # Check for language after supplement supp_languages = nil if parts.first && !parts.first.match(/^\d+$/) && !parts.first.match?(/^(amd|cor|sup|add|v\d+|stage-)/i) supp_languages = parse_languages(parts.shift) end supplements << { type: supp_type, number: supp_number, year: supp_year, stage: supp_stage, languages: supp_languages, } end # Build the identifier hash build_identifier(publishers, number, part, subpart, type_code, stage_code, stage_iteration, harmonized_stage_code, stage_from_abbr, year, edition, languages, supplements) end |