Class: Relaton::Nist::ModsParser
- Inherits:
-
Object
- Object
- Relaton::Nist::ModsParser
- Defined in:
- lib/relaton/nist/mods_parser.rb
Constant Summary collapse
- RELATION_TYPES =
{ "otherVersion" => "editionOf", "preceding" => "updates", "succeeding" => "updatedBy", }.freeze
- ATTRS =
%i[type docidentifier title source abstract date contributor relation place series].freeze
Instance Method Summary collapse
- #create_contributor(entity, role) ⇒ Object
- #create_date(date, type) ⇒ Object
- #create_entity(name) ⇒ Object
- #create_org(name) ⇒ Object
- #create_person(name) ⇒ Object
- #create_region(state) ⇒ Object
- #create_related_item(item) ⇒ Object
- #create_title(title, type, non_sort = nil) ⇒ Object
- #decode_date(date) ⇒ Object
- #get_id_from_str(str) ⇒ Object
-
#initialize(doc, series, errors = {}) ⇒ ModsParser
constructor
A new instance of ModsParser.
- #parse ⇒ Bib::ItemData
- #parse_abstract ⇒ Object
- #parse_contributor ⇒ Object
- #parse_date ⇒ Object
- #parse_docidentifier ⇒ Array<Bib::Docidentifier>
- #parse_doctype ⇒ Object
- #parse_doi ⇒ Object
- #parse_place ⇒ Object
- #parse_relation ⇒ Object
- #parse_series ⇒ Object
- #parse_source ⇒ Object
- #parse_title ⇒ Array<Bib::Title>
- #parse_type ⇒ Object
- #pub_id ⇒ String
- #related_item_id(item) ⇒ Object
- #remove_doi_prefix(id) ⇒ Object
- #replace_wrong_doi(id) ⇒ String
Constructor Details
#initialize(doc, series, errors = {}) ⇒ ModsParser
Returns a new instance of ModsParser.
18 19 20 21 22 |
# File 'lib/relaton/nist/mods_parser.rb', line 18 def initialize(doc, series, errors = {}) @doc = doc @series = series @errors = errors end |
Instance Method Details
#create_contributor(entity, role) ⇒ Object
166 167 168 169 170 171 172 173 |
# File 'lib/relaton/nist/mods_parser.rb', line 166 def create_contributor(entity, role) case entity when Bib::Person Bib::Contributor.new(role: role, person: entity) when Bib::Organization Bib::Contributor.new(role: role, organization: entity) end end |
#create_date(date, type) ⇒ Object
134 135 136 137 |
# File 'lib/relaton/nist/mods_parser.rb', line 134 def create_date(date, type) Date.new type: type, at: decode_date(date) rescue ::Date::Error end |
#create_entity(name) ⇒ Object
175 176 177 178 179 180 |
# File 'lib/relaton/nist/mods_parser.rb', line 175 def create_entity(name) case name.type when "personal" then [create_person(name), "author"] when "corporate" then [create_org(name), "publisher"] end end |
#create_org(name) ⇒ Object
196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/relaton/nist/mods_parser.rb', line 196 def create_org(name) names = name.name_part.reject(&:type).map do |n| Bib::TypedLocalizedString.new( content: n.content.gsub("\n", " ").squeeze(" ").strip, ) end url = name.name_identifier&.first&.content identifier = [] if url identifier << Bib::OrganizationType::Identifier.new(type: "uri", content: url) end Bib::Organization.new(name: names, identifier: identifier) end |
#create_person(name) ⇒ Object
182 183 184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/relaton/nist/mods_parser.rb', line 182 def create_person(name) # exclude typed name parts because they are not actual name parts cname = name.name_part.reject(&:type).map(&:content).join(" ") completename = Bib::LocalizedString.new(content: cname, language: "en") fname = Bib::FullName.new(completename: completename) name_id = name.name_identifier&.first identifier = [] if name_id identifier << Bib::Person::Identifier.new(type: "uri", content: name_id.content) end Bib::Person.new(name: fname, identifier: identifier) end |
#create_region(state) ⇒ Object
252 253 254 255 256 |
# File 'lib/relaton/nist/mods_parser.rb', line 252 def create_region(state) [Bib::Place::RegionType.new(iso: state)] rescue ArgumentError [] end |
#create_related_item(item) ⇒ Object
221 222 223 224 225 226 227 228 |
# File 'lib/relaton/nist/mods_parser.rb', line 221 def (item) item_id = get_id_from_str (item) return if item_id.nil? || item_id.empty? docid = Bib::Docidentifier.new(type: "NIST", content: item_id) fref = Bib::Formattedref.new(content: item_id) ItemData.new(docidentifier: [docid], formattedref: fref) end |
#create_title(title, type, non_sort = nil) ⇒ Object
100 101 102 103 104 |
# File 'lib/relaton/nist/mods_parser.rb', line 100 def create_title(title, type, non_sort = nil) content = title.gsub("\n", " ").squeeze(" ").strip content = "#{non_sort.content}#{content}".squeeze(" ") if non_sort Bib::Title.new content: content, type: type, language: "en", script: "Latn" end |
#decode_date(date) ⇒ Object
139 140 141 142 143 144 145 146 |
# File 'lib/relaton/nist/mods_parser.rb', line 139 def decode_date(date) if date.encoding == "marc" && date.content.size == 6 ::Date.strptime(date.content, "%y%m%d").to_s elsif date.encoding == "iso8601" ::Date.strptime(date.content, "%Y%m%d").to_s else date.content end end |
#get_id_from_str(str) ⇒ Object
48 49 50 51 52 53 54 |
# File 'lib/relaton/nist/mods_parser.rb', line 48 def get_id_from_str(str) return if str.nil? || str.empty? ::Pubid::Nist::Identifier.parse(str).to_s rescue ::Pubid::Core::Errors::ParseError str.gsub(".", " ").sub(/^[\D]+/, &:upcase) end |
#parse ⇒ Bib::ItemData
25 26 27 28 29 30 31 |
# File 'lib/relaton/nist/mods_parser.rb', line 25 def parse args = ATTRS.each_with_object({}) do |attr, hash| hash[attr] = send("parse_#{attr}") end args[:ext] = Ext.new(doctype: parse_doctype, flavor: "nilst") ItemData.new(**args) end |
#parse_abstract ⇒ Object
116 117 118 119 120 121 122 123 124 |
# File 'lib/relaton/nist/mods_parser.rb', line 116 def parse_abstract abstract = Array(@doc.abstract).map do |a| content = a.content.gsub("\n", " ").squeeze(" ").strip Bib::Abstract.new content: content, language: "en", script: "Latn" end @errors[:abstract] &&= abstract.empty? abstract end |
#parse_contributor ⇒ Object
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/relaton/nist/mods_parser.rb', line 150 def parse_contributor # exclude primary contributors to avoid duplication contributor = @doc.name.reject { |n| n.usage == "primary" }.map do |name| entity, default_role = create_entity(name) next unless entity role = (name.role || []).reduce([]) do |a, r| a + r.role_term.map { |rt| Bib::Contributor::Role.new(type: rt.content) } end role << Bib::Contributor::Role.new(type: default_role) if role.empty? create_contributor(entity, role) end.compact @errors[:contributor] &&= contributor.empty? contributor end |
#parse_date ⇒ Object
126 127 128 129 130 131 132 |
# File 'lib/relaton/nist/mods_parser.rb', line 126 def parse_date date = @doc.origin_info[0].date_issued.map do |di| create_date(di, "issued") end.compact @errors[:date] &&= date.empty? date end |
#parse_docidentifier ⇒ Array<Bib::Docidentifier>
36 37 38 39 40 41 42 43 |
# File 'lib/relaton/nist/mods_parser.rb', line 36 def parse_docidentifier ids = [ { type: "NIST", content: pub_id, primary: true }, { type: "DOI", content: parse_doi }, ].reject { |id| id[:content].nil? || id[:content].empty? } @errors[:docidentifier] &&= ids.empty? ids.map { |id| Bib::Docidentifier.new(**id) } end |
#parse_doctype ⇒ Object
148 |
# File 'lib/relaton/nist/mods_parser.rb', line 148 def parse_doctype = Doctype.new(content: "standard") |
#parse_doi ⇒ Object
68 69 70 71 72 73 74 75 76 |
# File 'lib/relaton/nist/mods_parser.rb', line 68 def parse_doi url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } } return if url.nil? id = remove_doi_prefix(url.content) return if id.nil? replace_wrong_doi(id) end |
#parse_place ⇒ Object
242 243 244 245 246 247 248 249 250 |
# File 'lib/relaton/nist/mods_parser.rb', line 242 def parse_place place = @doc.origin_info.select { |p| p.event_type == "publisher" }.map do |p| pl = p.place[0].place_term[0].content /(?<city>\w+), (?<state>\w+)/ =~ pl Bib::Place.new(city: city, region: create_region(state)) end @errors[:place] &&= place.empty? place end |
#parse_relation ⇒ Object
211 212 213 214 215 216 217 218 219 |
# File 'lib/relaton/nist/mods_parser.rb', line 211 def parse_relation relations = Array(@doc.).reject { |ri| ri.type == "series" }.filter_map do |ri| type = RELATION_TYPES[ri.type] bibitem = (ri) Relation.new(type: type, bibitem: bibitem) if bibitem end @errors[:relation] &&= relations.empty? relations end |
#parse_series ⇒ Object
258 259 260 261 262 263 264 265 266 267 |
# File 'lib/relaton/nist/mods_parser.rb', line 258 def parse_series series = Array(@doc.).select { |ri| ri.type == "series" }.map do |ri| tinfo = ri.title_info[0] tcontent = tinfo.title[0].strip title = Bib::Title.new(content: tcontent) Bib::Series.new(title: [title], number: tinfo.part_number&.first) end @errors[:series] &&= series.empty? series end |
#parse_source ⇒ Object
106 107 108 109 110 111 112 113 114 |
# File 'lib/relaton/nist/mods_parser.rb', line 106 def parse_source source = @doc.location.map do |location| url = location.url.first type = url.usage == "primary display" ? "doi" : "src" Bib::Uri.new content: url.content, type: type end @errors[:source] &&= source.empty? source end |
#parse_title ⇒ Array<Bib::Title>
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/relaton/nist/mods_parser.rb', line 81 def parse_title title = @doc.title_info.reduce([]) do |a, ti| next a if ti.type == "alternative" a += ti.title.map { |t| create_title(t, "title-main", ti.non_sort&.first) } next a unless ti.sub_title a + ti.sub_title.map { |t| create_title(t, "title-part") } end if title.size > 1 content = title.map { |t| t.content }.join(" - ") title << create_title(content, "main") elsif title.size == 1 title[0].instance_variable_set :@type, "main" end @errors[:title] &&= title.empty? title end |
#parse_type ⇒ Object
33 |
# File 'lib/relaton/nist/mods_parser.rb', line 33 def parse_type = "standard" |
#pub_id ⇒ String
46 |
# File 'lib/relaton/nist/mods_parser.rb', line 46 def pub_id = get_id_from_str parse_doi |
#related_item_id(item) ⇒ Object
230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/relaton/nist/mods_parser.rb', line 230 def (item) if item.other_type && item.other_type[0..6] == "10.6028" item.other_type else item.name[0].name_part[0].content end => id doi = remove_doi_prefix(id) return if doi.nil? replace_wrong_doi(doi) end |
#remove_doi_prefix(id) ⇒ Object
78 |
# File 'lib/relaton/nist/mods_parser.rb', line 78 def remove_doi_prefix(id) = id.match(/10\.6028\/(.+)/)&.send(:[], 1) |
#replace_wrong_doi(id) ⇒ String
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/relaton/nist/mods_parser.rb', line 57 def replace_wrong_doi(id) case id when "NBS.CIRC.sup" then "NBS.CIRC.24e7sup" when "NBS.CIRC.supJun1925-Jun1926" then "NBS.CIRC.24e7sup2" when "NBS.CIRC.supJun1925-Jun1927" then "NBS.CIRC.24e7sup3" when "NBS.CIRC.24supJuly1922" then "NBS.CIRC.24e6sup" when "NBS.CIRC.24supJan1924" then "NBS.CIRC.24e6sup2" else id end end |