Class: Relaton::Ecma::StandardParser

Inherits:
Object
  • Object
show all
Includes:
ParserCommon
Defined in:
lib/relaton/ecma/standard_parser.rb

Constant Summary collapse

ATTRS =
%i[docidentifier title date source abstract relation edition ext].freeze

Instance Method Summary collapse

Methods included from ParserCommon

#contributor, #default_bib_hash, #fetch_doctype, #fetch_ext

Constructor Details

#initialize(hit:, doc:, errors: {}) ⇒ StandardParser

Returns a new instance of StandardParser.

Parameters:

  • hit (Nokogiri::XML::Element)

    document hit

  • doc (Mechanize::Page)

    fetched document page

  • errors (Hash) (defaults to: {})

    error tracking hash



11
12
13
14
15
# File 'lib/relaton/ecma/standard_parser.rb', line 11

def initialize(hit:, doc:, errors: {})
  @hit = hit
  @doc = doc
  @errors = errors
end

Instance Method Details

#fetch_abstractArray<Relaton::Bib::LocalizedMarkedUpString>

Returns:

  • (Array<Relaton::Bib::LocalizedMarkedUpString>)


46
47
48
49
50
51
52
53
54
55
# File 'lib/relaton/ecma/standard_parser.rb', line 46

def fetch_abstract
  content = @doc.xpath('//div[@class="ecma-item-content"]/p').map do |a|
    a.text.strip.squeeze(" ").gsub("\r\n", "")
  end.join "\n"
  return [] if content.empty?

  result = [Bib::Abstract.new(content: content, language: "en", script: "Latn")]
  @errors[:standard_abstract] &&= result.empty?
  result
end

#fetch_dateArray<Relaton::Bib::Date>

Returns:

  • (Array<Relaton::Bib::Date>)


58
59
60
61
62
63
64
65
# File 'lib/relaton/ecma/standard_parser.rb', line 58

def fetch_date
  result = @doc.xpath('//p[@class="ecma-item-edition"]').map do |d|
    date = d.text.split(", ").last
    Bib::Date.new type: "published", at: date
  end
  @errors[:standard_date] &&= result.empty?
  result
end

#fetch_docidentifierArray<Relaton::Bib::Docidentifier>

Returns:

  • (Array<Relaton::Bib::Docidentifier>)


30
31
32
33
34
# File 'lib/relaton/ecma/standard_parser.rb', line 30

def fetch_docidentifier
  result = super(@hit.text)
  @errors[:standard_docidentifier] &&= result.empty?
  result
end

#fetch_editionRelaton::Bib::Edition?

Returns:

  • (Relaton::Bib::Edition, nil)


102
103
104
105
106
107
# File 'lib/relaton/ecma/standard_parser.rb', line 102

def fetch_edition
  cnt = fetch_edition_content
  result = Bib::Edition.new(content: cnt) if cnt && !cnt.empty?
  @errors[:standard_edition] &&= result.nil?
  result
end

#fetch_relationArray<Relaton::Bib::Relation>

Returns:

  • (Array<Relaton::Bib::Relation>)


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/relaton/ecma/standard_parser.rb', line 80

def fetch_relation # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
  edition_parser = EditionParser.new(doc: @doc, bib: {}, errors: @errors)
  result = @doc.xpath("//ul[@class='ecma-item-archives']/li").filter_map do |rel|
    ref, ed, date, vol = edition_parser.edition_id_parts rel.at("span").text
    next if ed.nil? || ed.empty?

    docid = Bib::Docidentifier.new(type: "ECMA", content: ref, primary: true)
    source = rel.xpath("span/a").map { |l| Bib::Uri.new type: "pdf", content: l[:href] }
    edition = Bib::Edition.new content: ed
    extent = edition_parser.create_extent(vol)
    @errors[:standard_relation_extent] &&= extent.nil?
    bibitem = ItemData.new(
      docidentifier: [docid], formattedref: Bib::Formattedref.new(content: ref), date: date, edition: edition,
      source: source, extent: extent
    )
    Bib::Relation.new(type: "updates", bibitem: bibitem)
  end
  @errors[:standard_relation] &&= result.empty?
  result
end

#fetch_sourceArray<Relaton::Bib::Uri>

Returns:

  • (Array<Relaton::Bib::Uri>)


68
69
70
71
72
73
74
75
76
77
# File 'lib/relaton/ecma/standard_parser.rb', line 68

def fetch_source # rubocop:disable Metrics/AbcSize
  source = []
  source << Bib::Uri.new(type: "src", content: @hit[:href]) if @hit[:href]
  ref = @doc.at('//div[@class="ecma-item-content-wrapper"]/span/a',
                '//div[@class="ecma-item-content-wrapper"]/a')
  source << Bib::Uri.new(type: "pdf", content: ref[:href]) if ref
  result = source + edition_translation_source(fetch_edition_content)
  @errors[:standard_source] &&= result.empty?
  result
end

#fetch_titleArray<Relaton::Bib::Title>

Returns:

  • (Array<Relaton::Bib::Title>)


37
38
39
40
41
42
43
# File 'lib/relaton/ecma/standard_parser.rb', line 37

def fetch_title
  result = @doc.xpath('//p[@class="ecma-item-short-description"]').map do |t|
    Bib::Title.new(content: t.text.strip, language: "en", script: "Latn")
  end
  @errors[:standard_title] &&= result.empty?
  result
end

#to_bib_hashHash

Returns bibliographic item attributes.

Returns:

  • (Hash)

    bibliographic item attributes



18
19
20
21
22
# File 'lib/relaton/ecma/standard_parser.rb', line 18

def to_bib_hash
  bib = default_bib_hash
  ATTRS.each { |a| bib[a] = send "fetch_#{a}" }
  bib
end

#translation_sourceArray

Returns precomputed translation sources.

Returns:

  • (Array)

    precomputed translation sources



25
26
27
# File 'lib/relaton/ecma/standard_parser.rb', line 25

def translation_source
  @translation_source ||= parse_translation_source
end