Class: Relaton::Nist::ModsParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton/nist/mods_parser.rb

Constant Summary collapse

RELATION_TYPES =
{
  "otherVersion" => "editionOf",
  "preceding" => "updates",
  "succeeding" => "updatedBy",
}.freeze
ATTRS =
%i[type docidentifier title source abstract date contributor
relation place series].freeze

Instance Method Summary collapse

Constructor Details

#initialize(doc, series, errors = {}) ⇒ ModsParser

Returns a new instance of ModsParser.



18
19
20
21
22
# File 'lib/relaton/nist/mods_parser.rb', line 18

def initialize(doc, series, errors = {})
  @doc = doc
  @series = series
  @errors = errors
end

Instance Method Details

#create_contributor(entity, role) ⇒ Object



166
167
168
169
170
171
172
173
# File 'lib/relaton/nist/mods_parser.rb', line 166

def create_contributor(entity, role)
  case entity
  when Bib::Person
    Bib::Contributor.new(role: role, person: entity)
  when Bib::Organization
    Bib::Contributor.new(role: role, organization: entity)
  end
end

#create_date(date, type) ⇒ Object



134
135
136
137
# File 'lib/relaton/nist/mods_parser.rb', line 134

def create_date(date, type)
  Date.new type: type, at: decode_date(date)
rescue ::Date::Error
end

#create_entity(name) ⇒ Object



175
176
177
178
179
180
# File 'lib/relaton/nist/mods_parser.rb', line 175

def create_entity(name)
  case name.type
  when "personal" then [create_person(name), "author"]
  when "corporate" then [create_org(name), "publisher"]
  end
end

#create_org(name) ⇒ Object



196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/relaton/nist/mods_parser.rb', line 196

def create_org(name)
  names = name.name_part.reject(&:type).map do |n|
    Bib::TypedLocalizedString.new(
      content: n.content.gsub("\n", " ").squeeze(" ").strip,
    )
  end
  url = name.name_identifier&.first&.content
  identifier = []
  if url
    identifier << Bib::OrganizationType::Identifier.new(type: "uri",
                                                         content: url)
  end
  Bib::Organization.new(name: names, identifier: identifier)
end

#create_person(name) ⇒ Object



182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/relaton/nist/mods_parser.rb', line 182

def create_person(name)
  # exclude typed name parts because they are not actual name parts
  cname = name.name_part.reject(&:type).map(&:content).join(" ")
  completename = Bib::LocalizedString.new(content: cname, language: "en")
  fname = Bib::FullName.new(completename: completename)
  name_id = name.name_identifier&.first
  identifier = []
  if name_id
    identifier << Bib::Person::Identifier.new(type: "uri",
                                               content: name_id.content)
  end
  Bib::Person.new(name: fname, identifier: identifier)
end

#create_region(state) ⇒ Object



252
253
254
255
256
# File 'lib/relaton/nist/mods_parser.rb', line 252

def create_region(state)
  [Bib::Place::RegionType.new(iso: state)]
rescue ArgumentError
  []
end


221
222
223
224
225
226
227
228
# File 'lib/relaton/nist/mods_parser.rb', line 221

def create_related_item(item)
  item_id = get_id_from_str related_item_id(item)
  return if item_id.nil? || item_id.empty?

  docid = Bib::Docidentifier.new(type: "NIST", content: item_id)
  fref = Bib::Formattedref.new(content: item_id)
  ItemData.new(docidentifier: [docid], formattedref: fref)
end

#create_title(title, type, non_sort = nil) ⇒ Object



100
101
102
103
104
# File 'lib/relaton/nist/mods_parser.rb', line 100

def create_title(title, type, non_sort = nil)
  content = title.gsub("\n", " ").squeeze(" ").strip
  content = "#{non_sort.content}#{content}".squeeze(" ") if non_sort
  Bib::Title.new content: content, type: type, language: "en", script: "Latn"
end

#decode_date(date) ⇒ Object



139
140
141
142
143
144
145
146
# File 'lib/relaton/nist/mods_parser.rb', line 139

def decode_date(date)
  if date.encoding == "marc" && date.content.size == 6
    ::Date.strptime(date.content, "%y%m%d").to_s
  elsif date.encoding == "iso8601"
    ::Date.strptime(date.content, "%Y%m%d").to_s
  else date.content
  end
end

#get_id_from_str(str) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/relaton/nist/mods_parser.rb', line 48

def get_id_from_str(str)
  return if str.nil? || str.empty?

  ::Pubid::Nist::Identifier.parse(str).to_s
rescue ::Pubid::Core::Errors::ParseError
  str.gsub(".", " ").sub(/^[\D]+/, &:upcase)
end

#parseBib::ItemData

Returns:

  • (Bib::ItemData)


25
26
27
28
29
30
31
# File 'lib/relaton/nist/mods_parser.rb', line 25

def parse
  args = ATTRS.each_with_object({}) do |attr, hash|
    hash[attr] = send("parse_#{attr}")
  end
  args[:ext] = Ext.new(doctype: parse_doctype, flavor: "nilst")
  ItemData.new(**args)
end

#parse_abstractObject



116
117
118
119
120
121
122
123
124
# File 'lib/relaton/nist/mods_parser.rb', line 116

def parse_abstract
  abstract = Array(@doc.abstract).map do |a|
    content = a.content.gsub("\n", " ").squeeze(" ").strip
    Bib::Abstract.new content: content, language: "en",
                                     script: "Latn"
  end
  @errors[:abstract] &&= abstract.empty?
  abstract
end

#parse_contributorObject



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/relaton/nist/mods_parser.rb', line 150

def parse_contributor
  # exclude primary contributors to avoid duplication
  contributor = @doc.name.reject { |n| n.usage == "primary" }.map do |name|
    entity, default_role = create_entity(name)
    next unless entity

    role = (name.role || []).reduce([]) do |a, r|
      a + r.role_term.map { |rt| Bib::Contributor::Role.new(type: rt.content) }
    end
    role << Bib::Contributor::Role.new(type: default_role) if role.empty?
    create_contributor(entity, role)
  end.compact
  @errors[:contributor] &&= contributor.empty?
  contributor
end

#parse_dateObject



126
127
128
129
130
131
132
# File 'lib/relaton/nist/mods_parser.rb', line 126

def parse_date
  date = @doc.origin_info[0].date_issued.map do |di|
    create_date(di, "issued")
  end.compact
  @errors[:date] &&= date.empty?
  date
end

#parse_docidentifierArray<Bib::Docidentifier>

Returns:

  • (Array<Bib::Docidentifier>)


36
37
38
39
40
41
42
43
# File 'lib/relaton/nist/mods_parser.rb', line 36

def parse_docidentifier
  ids = [
    { type: "NIST", content: pub_id, primary: true },
    { type: "DOI", content: parse_doi },
  ].reject { |id| id[:content].nil? || id[:content].empty? }
  @errors[:docidentifier] &&= ids.empty?
  ids.map { |id| Bib::Docidentifier.new(**id) }
end

#parse_doctypeObject



148
# File 'lib/relaton/nist/mods_parser.rb', line 148

def parse_doctype = Doctype.new(content: "standard")

#parse_doiObject



68
69
70
71
72
73
74
75
76
# File 'lib/relaton/nist/mods_parser.rb', line 68

def parse_doi
  url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } }
  return if url.nil?

  id = remove_doi_prefix(url.content)
  return if id.nil?

  replace_wrong_doi(id)
end

#parse_placeObject



242
243
244
245
246
247
248
249
250
# File 'lib/relaton/nist/mods_parser.rb', line 242

def parse_place
  place = @doc.origin_info.select { |p| p.event_type == "publisher" }.map do |p|
    pl = p.place[0].place_term[0].content
    /(?<city>\w+), (?<state>\w+)/ =~ pl
    Bib::Place.new(city: city, region: create_region(state))
  end
  @errors[:place] &&= place.empty?
  place
end

#parse_relationObject



211
212
213
214
215
216
217
218
219
# File 'lib/relaton/nist/mods_parser.rb', line 211

def parse_relation
  relations = Array(@doc.related_item).reject { |ri| ri.type == "series" }.filter_map do |ri|
    type = RELATION_TYPES[ri.type]
    bibitem = create_related_item(ri)
    Relation.new(type: type, bibitem: bibitem) if bibitem
  end
  @errors[:relation] &&= relations.empty?
  relations
end

#parse_seriesObject



258
259
260
261
262
263
264
265
266
267
# File 'lib/relaton/nist/mods_parser.rb', line 258

def parse_series
  series = Array(@doc.related_item).select { |ri| ri.type == "series" }.map do |ri|
    tinfo = ri.title_info[0]
    tcontent = tinfo.title[0].strip
    title = Bib::Title.new(content: tcontent)
    Bib::Series.new(title: [title], number: tinfo.part_number&.first)
  end
  @errors[:series] &&= series.empty?
  series
end

#parse_sourceObject



106
107
108
109
110
111
112
113
114
# File 'lib/relaton/nist/mods_parser.rb', line 106

def parse_source
  source = @doc.location.map do |location|
    url = location.url.first
    type = url.usage == "primary display" ? "doi" : "src"
    Bib::Uri.new content: url.content, type: type
  end
  @errors[:source] &&= source.empty?
  source
end

#parse_titleArray<Bib::Title>

Returns:

  • (Array<Bib::Title>)


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/relaton/nist/mods_parser.rb', line 81

def parse_title
  title = @doc.title_info.reduce([]) do |a, ti|
    next a if ti.type == "alternative"

    a += ti.title.map { |t| create_title(t, "title-main", ti.non_sort&.first) }
    next a unless ti.sub_title

    a + ti.sub_title.map { |t| create_title(t, "title-part") }
  end
  if title.size > 1
    content = title.map { |t| t.content }.join(" - ")
    title << create_title(content, "main")
  elsif title.size == 1
    title[0].instance_variable_set :@type, "main"
  end
  @errors[:title] &&= title.empty?
  title
end

#parse_typeObject



33
# File 'lib/relaton/nist/mods_parser.rb', line 33

def parse_type = "standard"

#pub_idString

Returns:

  • (String)


46
# File 'lib/relaton/nist/mods_parser.rb', line 46

def pub_id = get_id_from_str parse_doi


230
231
232
233
234
235
236
237
238
239
240
# File 'lib/relaton/nist/mods_parser.rb', line 230

def related_item_id(item)
  if item.other_type && item.other_type[0..6] == "10.6028"
    item.other_type
  else
    item.name[0].name_part[0].content
  end => id
  doi = remove_doi_prefix(id)
  return if doi.nil?

  replace_wrong_doi(doi)
end

#remove_doi_prefix(id) ⇒ Object



78
# File 'lib/relaton/nist/mods_parser.rb', line 78

def remove_doi_prefix(id) = id.match(/10\.6028\/(.+)/)&.send(:[], 1)

#replace_wrong_doi(id) ⇒ String

Returns:

  • (String)


57
58
59
60
61
62
63
64
65
66
# File 'lib/relaton/nist/mods_parser.rb', line 57

def replace_wrong_doi(id)
  case id
  when "NBS.CIRC.sup" then "NBS.CIRC.24e7sup"
  when "NBS.CIRC.supJun1925-Jun1926" then "NBS.CIRC.24e7sup2"
  when "NBS.CIRC.supJun1925-Jun1927" then "NBS.CIRC.24e7sup3"
  when "NBS.CIRC.24supJuly1922" then "NBS.CIRC.24e6sup"
  when "NBS.CIRC.24supJan1924" then "NBS.CIRC.24e6sup2"
  else id
  end
end