Module: Iev::SubjectAreas
- Defined in:
- lib/iev/subject_areas.rb
Defined Under Namespace
Classes: FetchError
Constant Summary collapse
- DATA_FILE =
File.("../../data/subject_areas.yaml", __dir__)
- AREAS_URL =
"https://electropedia.org/iev/iev.nsf/" \ "6d6bdd8667c378f7c12581fa003d80e7?OpenForm"
- SECTIONS_URL_TEMPLATE =
"https://electropedia.org/iev/iev.nsf/" \ "index?openform&part=%<part>s"
- MIN_PAGE_SIZE =
15_000- FETCH_DELAY =
5- RETRY_DELAY =
30- MAX_RETRIES =
2
Class Method Summary collapse
-
.all ⇒ Array<SubjectArea>
Return all subject areas with their sections.
-
.area_for(ievref) ⇒ SubjectArea?
Find the subject area for any IEV reference.
-
.area_for_section(section_code) ⇒ SubjectArea?
Return the parent area for a given section code.
-
.area_uri(code) ⇒ String
URI for a subject area concept.
-
.fetch ⇒ Object
— Fetching (network, writes to bundled data file) —.
- .fetch_areas ⇒ Object
- .fetch_sections(part) ⇒ Object
-
.find_area(code) ⇒ SubjectArea?
Find a single subject area by its numeric code.
-
.find_section(section_code) ⇒ Section?
Find a single section by its section code.
-
.reload! ⇒ Object
Clear cached typed objects (useful after fetch updates raw data).
-
.section_for(ievref) ⇒ Section?
Find the section for any IEV reference.
-
.section_uri(code) ⇒ String
URI for a section concept.
-
.sections_for(code) ⇒ Array<Section>
Return all sections for a given area code.
Class Method Details
.all ⇒ Array<SubjectArea>
Return all subject areas with their sections.
45 46 47 |
# File 'lib/iev/subject_areas.rb', line 45 def all @all ||= raw_data["areas"].map { |h| build_area(h) } end |
.area_for(ievref) ⇒ SubjectArea?
Find the subject area for any IEV reference.
83 84 85 86 |
# File 'lib/iev/subject_areas.rb', line 83 def area_for(ievref) code = IevCode.new(ievref) find_area(code.area_code) end |
.area_for_section(section_code) ⇒ SubjectArea?
Return the parent area for a given section code.
73 74 75 76 |
# File 'lib/iev/subject_areas.rb', line 73 def area_for_section(section_code) sec = find_section(section_code) sec ? find_area(sec.area_code) : nil end |
.area_uri(code) ⇒ String
URI for a subject area concept.
30 31 32 |
# File 'lib/iev/subject_areas.rb', line 30 def area_uri(code) "area-#{code}" end |
.fetch ⇒ Object
— Fetching (network, writes to bundled data file) —
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/iev/subject_areas.rb', line 98 def fetch cached = read_cache("subject_areas.yaml") return cached if cached && complete?(cached) areas = cached ? cached["areas"] : [] fresh_areas = fetch_areas puts "Found #{fresh_areas.length} areas (#{areas.length} cached)" if $stdout.tty? # Merge: keep existing sections, add new areas existing = areas.to_h { |a| [a["code"], a] } fresh_areas.each do |fa| existing[fa["code"]] ||= fa end areas = fresh_areas.map { |fa| existing[fa["code"]] || fa } areas.each_with_index do |area, i| next if area["fetched"] begin area["sections"] = fetch_sections(area["code"]) area["fetched"] = true rescue FetchError area["sections"] ||= [] warn "IEV: Skipping area #{area['code']} due to WAF" end puts "[#{i + 1}/#{areas.length}] #{area['code']}: #{area['title']} — #{area['sections'].length} sections" if $stdout.tty? # Save progress every 10 areas so partial results survive WAF failures if ((i + 1) % 10).zero? write_cache("subject_areas.yaml", { "areas" => areas }) end sleep FETCH_DELAY unless i == areas.length - 1 end result = { "areas" => areas } write_cache("subject_areas.yaml", result) result end |
.fetch_areas ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/iev/subject_areas.rb', line 139 def fetch_areas html = fetch_page_with_retry(AREAS_URL) doc = Nokogiri::HTML(html) areas = [] doc.css("a").each do |link| href = link["href"].to_s next unless href.include?("part=") code = href.match(/part=(\d+)/)&.[](1) next unless code title = link.text.strip next if title.empty? areas << { "code" => code, "title" => title, "sections" => [] } end areas.uniq { |a| a["code"] } end |
.fetch_sections(part) ⇒ Object
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/iev/subject_areas.rb', line 160 def fetch_sections(part) url = format(SECTIONS_URL_TEMPLATE, part: part) html = fetch_page_with_retry(url) doc = Nokogiri::HTML(html) sections = [] doc.css("td").each do |td| text = td.text.strip if (m = text.match(/\ASection\s+([\d-]+):\s*(.+)\z/)) sections << { "code" => m[1], "title" => m[2].strip } end end sections.uniq { |s| s["code"] } end |
.find_area(code) ⇒ SubjectArea?
Find a single subject area by its numeric code. O(1) indexed.
52 53 54 |
# File 'lib/iev/subject_areas.rb', line 52 def find_area(code) area_index[code.to_s] end |
.find_section(section_code) ⇒ Section?
Find a single section by its section code. O(1) indexed.
66 67 68 |
# File 'lib/iev/subject_areas.rb', line 66 def find_section(section_code) section_index[section_code.to_s] end |
.reload! ⇒ Object
Clear cached typed objects (useful after fetch updates raw data).
177 178 179 180 181 182 |
# File 'lib/iev/subject_areas.rb', line 177 def reload! @typed_areas = nil @area_index = nil @section_index = nil @raw_data = nil end |
.section_for(ievref) ⇒ Section?
Find the section for any IEV reference.
91 92 93 94 |
# File 'lib/iev/subject_areas.rb', line 91 def section_for(ievref) code = IevCode.new(ievref) code.section_code ? find_section(code.section_code) : nil end |
.section_uri(code) ⇒ String
URI for a section concept.
37 38 39 |
# File 'lib/iev/subject_areas.rb', line 37 def section_uri(code) "section-#{code}" end |
.sections_for(code) ⇒ Array<Section>
Return all sections for a given area code.
59 60 61 |
# File 'lib/iev/subject_areas.rb', line 59 def sections_for(code) find_area(code)&.sections || [] end |