Module: Relaton::Oasis::DataParserUtils
- Included in:
- DataParser, DataPartParser
- Defined in:
- lib/relaton/oasis/data_parser_utils.rb
Overview
Common methods for document and part parsers.
Instance Method Summary collapse
- #create_contribution_info(person_node, type, description = []) ⇒ Object
- #create_ext ⇒ Object
- #create_person(name, email = nil, org = nil) ⇒ Object
- #decode_cf_email(encoded) ⇒ Object
- #page ⇒ Object
-
#parse_chairs ⇒ Object
rubocop:disable Metrics/MethodLength.
-
#parse_contributor ⇒ Array<Bib::Contributor>
Parse contributor.
-
#parse_docid ⇒ Array<Bib::Docidentifier>
Parse document identifier.
-
#parse_doctype ⇒ Doctype
Parse document type.
-
#parse_editors ⇒ Object
rubocop:disable Metrics/MethodLength.
-
#parse_editors_from_text ⇒ Object
rubocop:disable Metrics/MethodLength.
-
#parse_errata(id) ⇒ String
Parse document identifier errata.
-
#parse_part(docid) ⇒ String
Parse document identifier part.
-
#parse_spec(num) ⇒ String
Parse document identifier specification.
-
#parse_technology_area(node) ⇒ Array<String>
Parse technology area.
- #person_affiliation(org) ⇒ Object
- #person_email(email) ⇒ Object
- #publisher_oasis ⇒ Object
-
#retry_page(url, agent, retries = 3) ⇒ Mechanize::Page?
Retry to get page.
Instance Method Details
#create_contribution_info(person_node, type, description = []) ⇒ Object
112 113 114 115 116 117 118 119 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 112 def create_contribution_info(person_node, type, description = []) name = person_node.text.match(/^[^(]+/).to_s.strip email, org = person_node.xpath ".//a[@href]" entity = create_person name, email, org desc = description.map { |d| Bib::LocalizedMarkedUpString.new(content: d) } role = Bib::Contributor::Role.new(type: type, description: desc) Bib::Contributor.new(role: [role], person: entity) end |
#create_ext ⇒ Object
258 259 260 261 262 263 264 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 258 def create_ext Ext.new( doctype: parse_doctype, flavor: "oasis", technology_area: parse_technology_area, ) end |
#create_person(name, email = nil, org = nil) ⇒ Object
121 122 123 124 125 126 127 128 129 130 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 121 def create_person(name, email = nil, org = nil) forename, surname = name.split fn = Bib::FullNameType::Forename.new(content: forename, language: "en", script: "Latn") sn = Bib::LocalizedString.new(content: surname, language: "en", script: "Latn") fullname = Bib::FullName.new(surname: sn, forename: [fn]) Bib::Person.new(name: fullname, email: person_email(email), affiliation: person_affiliation(org)) end |
#decode_cf_email(encoded) ⇒ Object
146 147 148 149 150 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 146 def decode_cf_email(encoded) bytes = [encoded].pack("H*").bytes key = bytes.first bytes[1..].map { |b| (b ^ key).chr }.join end |
#page ⇒ Object
50 51 52 53 54 55 56 57 58 59 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 50 def page return @page if defined? @page if link_node && link_node[:href].match?(/\.html$/) agent = Mechanize.new agent.agent.allowed_error_codes = [404] resp = retry_page(link_node[:href], agent) @page = resp if resp && resp.code == "200" end end |
#parse_chairs ⇒ Object
rubocop:disable Metrics/MethodLength
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 79 def parse_chairs # rubocop:disable Metrics/MethodLength result = if page xpath = "//p[preceding-sibling::p" \ "[starts-with(., 'Chair')]]" \ "[following-sibling::p" \ "[starts-with(., 'Editor')]]" page.xpath(xpath).map do |p| create_contribution_info(p, "editor", ["Chair"]) end else [] end @errors[:chairs] &&= result.empty? result end |
#parse_contributor ⇒ Array<Bib::Contributor>
Parse contributor.
10 11 12 13 14 15 16 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 10 def parse_contributor result = publisher_oasis + + parse_editorialgroup_contributor + parse_chairs + parse_editors @errors[:contributor] &&= result.empty? result end |
#parse_docid ⇒ Array<Bib::Docidentifier>
Parse document identifier.
216 217 218 219 220 221 222 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 216 def parse_docid id = "OASIS #{parse_docnumber}" result = [Bib::Docidentifier.new(type: "OASIS", content: id, primary: true)] @errors[:docid] &&= result.empty? result end |
#parse_doctype ⇒ Doctype
Parse document type.
229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 229 def parse_doctype type = case text when /OASIS Project Specification/, /Committee Specification/ "specification" when /Technical Memorandum/ then "memorandum" when /Technical Resolution/ then "resolution" else "standard" end result = Doctype.new(content: type) @errors[:doctype] &&= result.nil? result end |
#parse_editors ⇒ Object
rubocop:disable Metrics/MethodLength
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 95 def parse_editors # rubocop:disable Metrics/MethodLength result = if page xpath = "//p[contains(@class, 'Contributor')]" \ "[preceding-sibling::p" \ "[starts-with(., 'Editor')]]" \ "[following-sibling::p" \ "[contains(@class, 'Title')]]" page.xpath(xpath).map do |p| create_contribution_info(p, "editor") end else parse_editors_from_text end @errors[:editors] &&= result.empty? result end |
#parse_editors_from_text ⇒ Object
rubocop:disable Metrics/MethodLength
35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 35 def parse_editors_from_text # rubocop:disable Metrics/MethodLength result = if text text.match(/(?<=Edited\sby\s)[^.]+/).to_s .split(/,?\sand\s|,\s/).map do |c| role = [Bib::Contributor::Role.new(type: "editor")] Bib::Contributor.new(role: role, person: create_person(c)) end else [] end @errors[:editors] &&= result.empty? result end |
#parse_errata(id) ⇒ String
Parse document identifier errata.
201 202 203 204 205 206 207 208 209 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 201 def parse_errata(id) return id.sub("errata", "Errata") if id.match?(/errata\d+/i) case title when /Plus\sErrata\s(\d+)/ then "#{id}-plus-Errata#{$1}" when /Errata\s(\d+)/ then "#{id}-Errata#{$1}" else id end end |
#parse_part(docid) ⇒ String
Parse document identifier part.
185 186 187 188 189 190 191 192 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 185 def parse_part(docid) return docid if docid.match?(/(?:Part|Pt)\d+/i) case title when /Part\s(\d+)/ then "#{docid}-Pt#{$1}" else docid end end |
#parse_spec(num) ⇒ String
Parse document identifier specification.
170 171 172 173 174 175 176 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 170 def parse_spec(num) case text when /OASIS Project Specification (\d+)/ then "#{num}-PS#{$1}" when /Committee Specification (\d+)/ then "#{num}-CS#{$1}" else num end end |
#parse_technology_area(node) ⇒ Array<String>
Parse technology area.
247 248 249 250 251 252 253 254 255 256 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 247 def parse_technology_area(node) xpath = "./summary/div/div" \ "/ul[@class='technology-areas__list']/li/a" result = node.xpath(xpath).map do |ta| ta.text.strip.gsub(/\s/, "-") .sub("development", "Development") end @errors[:technology_area] &&= result.empty? result end |
#person_affiliation(org) ⇒ Object
152 153 154 155 156 157 158 159 160 161 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 152 def person_affiliation(org) return [] unless org org_name = org.text.gsub(/[\r\n]+/, " ") organization = Bib::Organization.new( name: [Bib::TypedLocalizedString.new(content: org_name)], uri: [Bib::Uri.new(type: "uri", content: org[:href])], ) [Bib::Affiliation.new(organization: organization)] end |
#person_email(email) ⇒ Object
132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 132 def person_email(email) return [] unless email href = email[:href] if href.start_with?("mailto:") [href.split(":")[1]] elsif (cf_email = email.at(".//span[@data-cfemail]")) decoded = decode_cf_email(cf_email["data-cfemail"]) decoded.empty? ? [] : [decoded] else [] end end |
#publisher_oasis ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 18 def publisher_oasis org = Bib::Organization.new( name: [Bib::TypedLocalizedString.new(content: "OASIS")], uri: [Bib::Uri.new(type: "uri", content: "https://www.oasis-open.org/")], ) role = [ Bib::Contributor::Role.new( type: "authorizer", description: [Bib::LocalizedMarkedUpString.new( content: "Standards Development Organization", )], ), Bib::Contributor::Role.new(type: "publisher"), ] [Bib::Contributor.new(organization: org, role: role)] end |
#retry_page(url, agent, retries = 3) ⇒ Mechanize::Page?
Retry to get page.
70 71 72 73 74 75 76 77 |
# File 'lib/relaton/oasis/data_parser_utils.rb', line 70 def retry_page(url, agent, retries = 3) sleep 1 # to avoid 429 error agent.get url rescue Errno::ETIMEDOUT, Net::OpenTimeout => e retry if (retries -= 1).positive? Util.error "Failed to get page `#{url}`\n#{e.}" nil end |