Class: Relaton::Nist::ModsParser
- Inherits:
-
Object
- Object
- Relaton::Nist::ModsParser
- Defined in:
- lib/relaton/nist/mods_parser.rb
Constant Summary collapse
- RELATION_TYPES =
{ "otherVersion" => "editionOf", "preceding" => "updates", "succeeding" => "updatedBy", }.freeze
- ATTRS =
%i[type docidentifier title source abstract date contributor relation place series].freeze
Instance Method Summary collapse
- #create_contributor(entity, role) ⇒ Object
- #create_date(date, type) ⇒ Object
- #create_entity(name) ⇒ Object
- #create_org(name) ⇒ Object
- #create_person(name) ⇒ Object
- #create_region(state) ⇒ Object
- #create_related_item(item) ⇒ Object
- #create_title(title, type, non_sort = nil) ⇒ Object
- #decode_date(date) ⇒ Object
- #get_id_from_str(str) ⇒ Object
-
#initialize(doc, series, errors = {}) ⇒ ModsParser
constructor
A new instance of ModsParser.
- #parse ⇒ Bib::ItemData
- #parse_abstract ⇒ Object
- #parse_contributor ⇒ Object
- #parse_date ⇒ Object
- #parse_docidentifier ⇒ Array<Bib::Docidentifier>
- #parse_doctype ⇒ Object
- #parse_doi ⇒ Object
- #parse_place ⇒ Object
- #parse_relation ⇒ Object
- #parse_series ⇒ Object
- #parse_source ⇒ Object
- #parse_title ⇒ Array<Bib::Title>
- #parse_type ⇒ Object
- #pub_id ⇒ String
- #related_item_id(item) ⇒ Object
- #remove_doi_prefix(id) ⇒ Object
- #replace_wrong_doi(id) ⇒ String
Constructor Details
#initialize(doc, series, errors = {}) ⇒ ModsParser
Returns a new instance of ModsParser.
18 19 20 21 22 |
# File 'lib/relaton/nist/mods_parser.rb', line 18 def initialize(doc, series, errors = {}) @doc = doc @series = series @errors = errors end |
Instance Method Details
#create_contributor(entity, role) ⇒ Object
172 173 174 175 176 177 178 179 |
# File 'lib/relaton/nist/mods_parser.rb', line 172 def create_contributor(entity, role) case entity when Bib::Person Bib::Contributor.new(role: role, person: entity) when Bib::Organization Bib::Contributor.new(role: role, organization: entity) end end |
#create_date(date, type) ⇒ Object
140 141 142 143 |
# File 'lib/relaton/nist/mods_parser.rb', line 140 def create_date(date, type) Date.new type: type, at: decode_date(date) rescue ::Date::Error end |
#create_entity(name) ⇒ Object
181 182 183 184 185 186 |
# File 'lib/relaton/nist/mods_parser.rb', line 181 def create_entity(name) case name.type when "personal" then [create_person(name), "author"] when "corporate" then [create_org(name), "publisher"] end end |
#create_org(name) ⇒ Object
202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/relaton/nist/mods_parser.rb', line 202 def create_org(name) names = name.name_part.reject(&:type).map do |n| Bib::TypedLocalizedString.new( content: n.content.gsub("\n", " ").squeeze(" ").strip, ) end url = name.name_identifier&.first&.content identifier = [] if url identifier << Bib::OrganizationType::Identifier.new(type: "uri", content: url) end Bib::Organization.new(name: names, identifier: identifier) end |
#create_person(name) ⇒ Object
188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/relaton/nist/mods_parser.rb', line 188 def create_person(name) # exclude typed name parts because they are not actual name parts cname = name.name_part.reject(&:type).map(&:content).join(" ") completename = Bib::LocalizedString.new(content: cname, language: "en") fname = Bib::FullName.new(completename: completename) name_id = name.name_identifier&.first identifier = [] if name_id identifier << Bib::Person::Identifier.new(type: "uri", content: name_id.content) end Bib::Person.new(name: fname, identifier: identifier) end |
#create_region(state) ⇒ Object
258 259 260 261 262 |
# File 'lib/relaton/nist/mods_parser.rb', line 258 def create_region(state) [Bib::Place::RegionType.new(iso: state)] rescue ArgumentError [] end |
#create_related_item(item) ⇒ Object
227 228 229 230 231 232 233 234 |
# File 'lib/relaton/nist/mods_parser.rb', line 227 def (item) item_id = get_id_from_str (item) return if item_id.nil? || item_id.empty? docid = Bib::Docidentifier.new(type: "NIST", content: item_id) fref = Bib::Formattedref.new(content: item_id) ItemData.new(docidentifier: [docid], formattedref: fref) end |
#create_title(title, type, non_sort = nil) ⇒ Object
106 107 108 109 110 |
# File 'lib/relaton/nist/mods_parser.rb', line 106 def create_title(title, type, non_sort = nil) content = title.gsub("\n", " ").squeeze(" ").strip content = "#{non_sort.content}#{content}".squeeze(" ") if non_sort Bib::Title.new content: content, type: type, language: "en", script: "Latn" end |
#decode_date(date) ⇒ Object
145 146 147 148 149 150 151 152 |
# File 'lib/relaton/nist/mods_parser.rb', line 145 def decode_date(date) if date.encoding == "marc" && date.content.size == 6 ::Date.strptime(date.content, "%y%m%d").to_s elsif date.encoding == "iso8601" ::Date.strptime(date.content, "%Y%m%d").to_s else date.content end end |
#get_id_from_str(str) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/relaton/nist/mods_parser.rb', line 48 def get_id_from_str(str) return if str.nil? || str.empty? # DOIs arrive dotted ("NIST.HB.135e2022-upd1"); that MR-style form # parses through a path that skips UpdateCodes date lookup and drops # letter suffixes. Space the leading "PUB.SERIES." prefix so it parses # like the canonical short form (only the separator dots are touched, # not any inside the number). Then force :human rendering. spaced = str.sub(/\A(NIST|NBS)\.([A-Z][A-Za-z]*)\./, '\1 \2 ') ::Pubid::Nist::Identifier.parse(spaced).to_s(format: :human) rescue Parslet::ParseFailed str.gsub(".", " ").sub(/^[\D]+/, &:upcase) end |
#parse ⇒ Bib::ItemData
25 26 27 28 29 30 31 |
# File 'lib/relaton/nist/mods_parser.rb', line 25 def parse args = ATTRS.each_with_object({}) do |attr, hash| hash[attr] = send("parse_#{attr}") end args[:ext] = Ext.new(doctype: parse_doctype, flavor: "nilst") ItemData.new(**args) end |
#parse_abstract ⇒ Object
122 123 124 125 126 127 128 129 130 |
# File 'lib/relaton/nist/mods_parser.rb', line 122 def parse_abstract abstract = Array(@doc.abstract).map do |a| content = a.content.gsub("\n", " ").squeeze(" ").strip Bib::Abstract.new content: content, language: "en", script: "Latn" end @errors[:abstract] &&= abstract.empty? abstract end |
#parse_contributor ⇒ Object
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/relaton/nist/mods_parser.rb', line 156 def parse_contributor # exclude primary contributors to avoid duplication contributor = @doc.name.reject { |n| n.usage == "primary" }.map do |name| entity, default_role = create_entity(name) next unless entity role = (name.role || []).reduce([]) do |a, r| a + r.role_term.map { |rt| Bib::Contributor::Role.new(type: rt.content) } end role << Bib::Contributor::Role.new(type: default_role) if role.empty? create_contributor(entity, role) end.compact @errors[:contributor] &&= contributor.empty? contributor end |
#parse_date ⇒ Object
132 133 134 135 136 137 138 |
# File 'lib/relaton/nist/mods_parser.rb', line 132 def parse_date date = @doc.origin_info[0].date_issued.map do |di| create_date(di, "issued") end.compact @errors[:date] &&= date.empty? date end |
#parse_docidentifier ⇒ Array<Bib::Docidentifier>
36 37 38 39 40 41 42 43 |
# File 'lib/relaton/nist/mods_parser.rb', line 36 def parse_docidentifier ids = [ { type: "NIST", content: pub_id, primary: true }, { type: "DOI", content: parse_doi }, ].reject { |id| id[:content].nil? || id[:content].empty? } @errors[:docidentifier] &&= ids.empty? ids.map { |id| Bib::Docidentifier.new(**id) } end |
#parse_doctype ⇒ Object
154 |
# File 'lib/relaton/nist/mods_parser.rb', line 154 def parse_doctype = Doctype.new(content: "standard") |
#parse_doi ⇒ Object
74 75 76 77 78 79 80 81 82 |
# File 'lib/relaton/nist/mods_parser.rb', line 74 def parse_doi url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } } return if url.nil? id = remove_doi_prefix(url.content) return if id.nil? replace_wrong_doi(id) end |
#parse_place ⇒ Object
248 249 250 251 252 253 254 255 256 |
# File 'lib/relaton/nist/mods_parser.rb', line 248 def parse_place place = @doc.origin_info.select { |p| p.event_type == "publisher" }.map do |p| pl = p.place[0].place_term[0].content /(?<city>\w+), (?<state>\w+)/ =~ pl Bib::Place.new(city: city, region: create_region(state)) end @errors[:place] &&= place.empty? place end |
#parse_relation ⇒ Object
217 218 219 220 221 222 223 224 225 |
# File 'lib/relaton/nist/mods_parser.rb', line 217 def parse_relation relations = Array(@doc.).reject { |ri| ri.type == "series" }.filter_map do |ri| type = RELATION_TYPES[ri.type] bibitem = (ri) Relation.new(type: type, bibitem: bibitem) if bibitem end @errors[:relation] &&= relations.empty? relations end |
#parse_series ⇒ Object
264 265 266 267 268 269 270 271 272 273 |
# File 'lib/relaton/nist/mods_parser.rb', line 264 def parse_series series = Array(@doc.).select { |ri| ri.type == "series" }.map do |ri| tinfo = ri.title_info[0] tcontent = tinfo.title[0].strip title = Bib::Title.new(content: tcontent) Bib::Series.new(title: [title], number: tinfo.part_number&.first) end @errors[:series] &&= series.empty? series end |
#parse_source ⇒ Object
112 113 114 115 116 117 118 119 120 |
# File 'lib/relaton/nist/mods_parser.rb', line 112 def parse_source source = @doc.location.map do |location| url = location.url.first type = url.usage == "primary display" ? "doi" : "src" Bib::Uri.new content: url.content, type: type end @errors[:source] &&= source.empty? source end |
#parse_title ⇒ Array<Bib::Title>
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/relaton/nist/mods_parser.rb', line 87 def parse_title title = @doc.title_info.reduce([]) do |a, ti| next a if ti.type == "alternative" a += ti.title.map { |t| create_title(t, "title-main", ti.non_sort&.first) } next a unless ti.sub_title a + ti.sub_title.map { |t| create_title(t, "title-part") } end if title.size > 1 content = title.map { |t| t.content }.join(" - ") title << create_title(content, "main") elsif title.size == 1 title[0].instance_variable_set :@type, "main" end @errors[:title] &&= title.empty? title end |
#parse_type ⇒ Object
33 |
# File 'lib/relaton/nist/mods_parser.rb', line 33 def parse_type = "standard" |
#pub_id ⇒ String
46 |
# File 'lib/relaton/nist/mods_parser.rb', line 46 def pub_id = get_id_from_str parse_doi |
#related_item_id(item) ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 |
# File 'lib/relaton/nist/mods_parser.rb', line 236 def (item) if item.other_type && item.other_type[0..6] == "10.6028" item.other_type else item.name[0].name_part[0].content end => id doi = remove_doi_prefix(id) return if doi.nil? replace_wrong_doi(doi) end |
#remove_doi_prefix(id) ⇒ Object
84 |
# File 'lib/relaton/nist/mods_parser.rb', line 84 def remove_doi_prefix(id) = id.match(/10\.6028\/(.+)/)&.send(:[], 1) |
#replace_wrong_doi(id) ⇒ String
63 64 65 66 67 68 69 70 71 72 |
# File 'lib/relaton/nist/mods_parser.rb', line 63 def replace_wrong_doi(id) case id when "NBS.CIRC.sup" then "NBS.CIRC.24e7sup" when "NBS.CIRC.supJun1925-Jun1926" then "NBS.CIRC.24e7sup2" when "NBS.CIRC.supJun1925-Jun1927" then "NBS.CIRC.24e7sup3" when "NBS.CIRC.24supJuly1922" then "NBS.CIRC.24e6sup" when "NBS.CIRC.24supJan1924" then "NBS.CIRC.24e6sup2" else id end end |