Class: Pubid::Iso::Builder

Inherits:
Builder::Base show all
Defined in:
lib/pubid/iso/builder.rb

Instance Attribute Summary

Attributes inherited from Builder::Base

#identifier

Instance Method Summary collapse

Constructor Details

#initialize(scheme) ⇒ Builder

Returns a new instance of Builder.



6
7
8
# File 'lib/pubid/iso/builder.rb', line 6

def initialize(scheme)
  @scheme = scheme
end

Instance Method Details

#build(parsed_hash) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/pubid/iso/builder.rb', line 43

def build(parsed_hash)
  # For ISO/R legacy format, split into publisher and type
  if parsed_hash[:iso_r_prefix]
    parsed_hash[:publisher] = "ISO"
    parsed_hash[:type_with_stage] = "R"
    parsed_hash.delete(:iso_r_prefix)
  end

  # For NSB-prefixed identifiers (FprISO, WD/ISO), set stage
  if parsed_hash[:nsb_stage]
    nsb_stage = parsed_hash.delete(:nsb_stage)
    # Map NSB stage to typed stage
    case nsb_stage
    when "Fpr"
      parsed_hash[:type_with_stage] = "PRF"
    when "WD"
      parsed_hash[:type_with_stage] = "WD"
    end
  end

  # Instantiate the identifier based on the typed stage
  identifier = locate_identifier_klass(parsed_hash).new

  # For French GUIDE entries: "Guide ISO/CEI 37:1995"
  if type_with_stage_fr = parsed_hash.delete(:type_with_stage_fr)
    parsed_hash[:type_with_stage] = type_with_stage_fr
  end

  # For DirectivesSupplement, rename :publisher to :supplement_publisher
  if identifier.is_a?(Identifiers::DirectivesSupplement) && parsed_hash[:publisher]
    parsed_hash[:supplement_publisher] = parsed_hash.delete(:publisher)
  end

  # Merge copublishers into publisher object
  if parsed_hash[:publisher] && parsed_hash[:copublishers]
    copublisher_strings = parsed_hash[:copublishers].map do |cp|
      cp[:copublisher]
    end
    parsed_hash[:publisher] = {
      publisher: parsed_hash[:publisher],
      copublisher: copublisher_strings,
    }
  end

  assign_attributes(identifier, parsed_hash)

  # If typed_stage, stage, or type are still nil after building,
  # set them to the default International Standard values
  if identifier.typed_stage.nil?
    default_typed_stage = @scheme.locate_typed_stage_by_abbr("")
    identifier.typed_stage = default_typed_stage
    identifier.stage = default_typed_stage.to_stage
    identifier.type = default_typed_stage.to_type
  end

  identifier
end

#cast(type, value) ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/pubid/iso/builder.rb', line 111

def cast(type, value)
  case type
  when :base_identifier
    # If it has a base identifier, we need to build a supplement
    # We assume that the base identifier is already a valid Identifier object
    build(value)

    # If there is a base_identifier, and it has a joint_identifier, we need to use a CombinedIdentifier.

  when :publisher, :directives_supplement_body, :supplement_publisher
    # value can be either a string OR a hash with publisher + copublisher
    if value.is_a?(Hash)
      Pubid::Iso::Components::Publisher.new(
        publisher: value[:publisher],
        copublisher: value[:copublisher],
      )
    else
      Pubid::Iso::Components::Publisher.new(publisher: value)
    end

  when :copublishers
    # Copublishers already merged into publisher
    # Create array of Publisher objects for identifier.copublishers attribute
    if value.nil? || value.empty?
      nil
    else
      value.map do |copublisher|
        Pubid::Iso::Components::Publisher.new(publisher: copublisher[:copublisher])
      end
    end

  when :year
    # For TC documents, parser returns :year but identifier uses :date
    # Return as date for assignment
    { date: Pubid::Components::Date.new(year: value.to_s) }

  when :number_with_part
    # "1234" (no part)
    # or "1234-1" ('1' is part)
    # or "1234-1-2" ('1' is part, '2' is subpart)
    # or "29110-5-1-1" ('5' is part, '1-1' is subpart)
    # or "105/F" ('F' is part)
    # or "5843/6" ('6' is part)
    # LEGACY: "4037-1979" (number-year, year should become date)

    # Split the number into parts
    normalized_value = value.to_s.tr("#{Parser::DASH_CHARS.join}/", "-")

    # for "1 IEC" ('IEC' is part) (in case of "ISO/IEC DIR 1 IEC")
    normalized_value.gsub!(" ", "-")

    parts = normalized_value.split("-").reject(&:empty?)
    number = parts.shift # The first part is always the number
    part = parts.shift&.strip # The second part is the part, if present
    subpart = parts.any? ? parts.join("-") : nil # The remaining parts form the subpart, if present

    # LEGACY FORMAT FIX: If "part" is a 4-digit year (1900-2099), move it to date field
    # This handles legacy formats like "ISO 4037-1979" where hyphen was used instead of colon
    if part&.match?(/^\d{4}$/)
      year_value = part.to_i
      # Only treat as year if in reasonable year range (excludes part numbers like "1751")
      if year_value.between?(1900, 2099)
        return {
          number: Pubid::Iso::Components::Code.new(number: number),
          date: Pubid::Components::Date.new(year: part),
        }
      end
    end

    part = convert_roman_to_integer(part)

    code_hash = { number: Pubid::Iso::Components::Code.new(number: number) }

    if part
      code_hash[:part] = Pubid::Iso::Components::Code.new(number: part)
    end

    if subpart
      code_hash[:subpart] =
        Pubid::Iso::Components::Code.new(number: subpart)
    end

    code_hash

  when :directives_type
    # nothing to do here, just return nil
    nil

  when :type_with_stage
    # "WD"
    # "PAS"
    # "CD TR"
    original_value = value.to_s # Store the original parsed value
    iteration = original_value.match(/(\d+)$/)
    normalized_value = original_value.sub(iteration.to_s, "")
    typed_stage = locate_typed_stage(normalized_value || "")

    # Create a copy with the original abbreviation preserved
    typed_stage_with_original = typed_stage.dup
    typed_stage_with_original.original_abbr = original_value.strip

    ##
    # Always use TypedStage in an Identifier or separate Type and Stage.
    {
      stage: typed_stage_with_original.to_stage,
      type: typed_stage_with_original.to_type,
      typed_stage: typed_stage_with_original,
    }
  when :stage_iteration
    # "1" or "2"
    Pubid::Iso::Components::Code.new(number: value.to_s)

  when :date
    parse_date(value)

  when :edition
    # value can be "Ed.2", "Ed 2", "ED1", "Edition 13", or just "Ed"
    original_text = value.to_s
    # Extract just the digit(s) for the number field
    number_string = original_text.match(/\d+/)&.to_s
    number_code = number_string ? Pubid::Iso::Components::Code.new(number: number_string) : nil
    Pubid::Components::Edition.new(number: number_code,
                                   original_text: original_text)

  when :languages
    parse_languages(value)

  when :all_parts
    # Set all_parts boolean attribute directly on identifier
    true

  # ISO 4214:2022 | IDF/RM 254:2022
  when :joint_identifier
    case value[:publisher]
    when "IDF"
      require_relative "../idf/builder"
      Idf::Builder.new.build(value)
    end

  when :subgroup
    # Handle JTC 1 subgroup in directives (ISO/IEC JTC 1 DIR)
    # Store as a component for potential use in rendering
    Pubid::Iso::Components::Code.new(number: value.to_s)

  when :supplements
    # Handle bundled supplements (+ operator)
    # Each supplement is a hash that needs to be built
    value.map { |supplement_hash| build(supplement_hash[:supplement]) }

  when :base_document
    # For bundled identifiers, build the base document
    build(value)

  # TC Document attributes
  when :tc_type, :sc_type, :wg_type
    # TC, SC, WG types are code components
    Pubid::Iso::Components::Code.new(number: value.to_s)

  when :tc_number, :sc_number, :wg_number
    # TC, SC, WG numbers are code components
    Pubid::Iso::Components::Code.new(number: value.to_s)

  when :year
    # For TC documents with year, convert to Date
    Pubid::Components::Date.new(year: value.to_s)

  when :number
    # For TC documents, number is the document number (N number)
    # For regular identifiers, this is handled in :number_with_part
    if value.is_a?(Parslet::Slice) || value.is_a?(String) ||
        value.is_a?(Integer)
      Pubid::Iso::Components::Code.new(number: value.to_s)
    else
      value
    end

  when :stage
    # Raw stage code from URN parser (e.g., "10.00", "50.00")
    # Convert to type_with_stage format for builder
    # Look up the typed stage from stage code
    typed_stage = Scheme.locate_typed_stage_by_stage_code(value.to_s)
    if typed_stage
      typed_stage.abbr.is_a?(Array) ? typed_stage.abbr.first : typed_stage.abbr
    else
      # Fallback: return the raw stage code (shouldn't happen with valid data)
      value.to_s
    end

  else
    raise ArgumentError, "Unknown parameter type: #{type}"
  end
end

#handle_key(identifier, key, value) ⇒ Object



101
102
103
104
105
106
107
108
109
# File 'lib/pubid/iso/builder.rb', line 101

def handle_key(identifier, key, value)
  if key == :joint_identifier
    identifier.additional_identifiers ||= []
    identifier.additional_identifiers << value
    true
  else
    false
  end
end

#locate_identifier_klass(parsed_hash) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/pubid/iso/builder.rb', line 10

def locate_identifier_klass(parsed_hash)
  # If there is a joint_identifier, we need to use a CombinedIdentifier
  # which takes `base_identifier` and `additional_identifiers`
  if parsed_hash[:joint_identifier]
    return CombinedIdentifier
  end

  # If there are supplements, we need to use a BundledIdentifier
  # which takes `base_document` and `supplements`
  if parsed_hash[:supplements]
    return BundledIdentifier
  end

  # TC documents are identified by the presence of tc_type
  if parsed_hash[:tc_type]
    return Identifiers::TcDocument
  end

  # Check the `:type_with_stage` to determine the identifier class
  # 1. :type_with_stage will be nil if:
  # a) It is an IS.
  # b) It is a Directive Supplements. The "SUP" keyword may be entirely missing, and hence nil. If the base type_with_stage is a directive, then if the type_with_stage is blank, it is a directive supplement.

  if parsed_hash[:type_with_stage].nil? && parsed_hash[:base_identifier] && parsed_hash[:base_identifier][:type_with_stage] == "DIR"
    # Directive Supplement without "SUP" keyword
    parsed_hash[:type_with_stage] = "SUP"
  end

  typed_stage = locate_typed_stage(parsed_hash[:type_with_stage])

  @scheme.locate_identifier_klass_by_type_code(typed_stage.type_code)
end