Module: Iev::SubjectAreas

Defined in:
lib/iev/subject_areas.rb

Defined Under Namespace

Classes: FetchError

Constant Summary collapse

DATA_FILE =
File.expand_path("../../data/subject_areas.yaml", __dir__)
AREAS_URL =
"https://electropedia.org/iev/iev.nsf/" \
"6d6bdd8667c378f7c12581fa003d80e7?OpenForm"
SECTIONS_URL_TEMPLATE =
"https://electropedia.org/iev/iev.nsf/" \
"index?openform&part=%<part>s"
MIN_PAGE_SIZE =
15_000
FETCH_DELAY =
5
RETRY_DELAY =
30
MAX_RETRIES =
2

Class Method Summary collapse

Class Method Details

.allArray<SubjectArea>

Return all subject areas with their sections.

Returns:



46
47
48
# File 'lib/iev/subject_areas.rb', line 46

def all
  @typed_areas ||= raw_data["areas"].map { |h| build_area(h) }
end

.area_for(ievref) ⇒ SubjectArea?

Find the subject area for any IEV reference.

Parameters:

  • ievref (String)

    e.g. “103-01-02”

Returns:



84
85
86
87
# File 'lib/iev/subject_areas.rb', line 84

def area_for(ievref)
  code = IevCode.new(ievref)
  find_area(code.area_code)
end

.area_for_section(section_code) ⇒ SubjectArea?

Return the parent area for a given section code.

Parameters:

  • section_code (String)

    e.g. “102-01”

Returns:



74
75
76
77
# File 'lib/iev/subject_areas.rb', line 74

def area_for_section(section_code)
  sec = find_section(section_code)
  sec ? find_area(sec.area_code) : nil
end

.area_uri(code) ⇒ String

URI for a subject area concept.

Parameters:

  • code (String, Integer)

    e.g. “102”

Returns:

  • (String)

    e.g. “area-102”



31
32
33
# File 'lib/iev/subject_areas.rb', line 31

def area_uri(code)
  "area-#{code}"
end

.fetchObject

— Fetching (network, writes to bundled data file) —



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/iev/subject_areas.rb', line 99

def fetch
  cached = read_cache("subject_areas.yaml")
  return cached if cached && complete?(cached)

  areas = cached ? cached["areas"] : []
  fresh_areas = fetch_areas
  puts "Found #{fresh_areas.length} areas (#{areas.length} cached)" if $stdout.tty?

  # Merge: keep existing sections, add new areas
  existing = areas.each_with_object({}) { |a, h| h[a["code"]] = a }
  fresh_areas.each do |fa|
    existing[fa["code"]] ||= fa
  end
  areas = fresh_areas.map { |fa| existing[fa["code"]] || fa }

  areas.each_with_index do |area, i|
    next if area["fetched"]

    begin
      area["sections"] = fetch_sections(area["code"])
      area["fetched"] = true
    rescue FetchError
      area["sections"] ||= []
      warn "IEV: Skipping area #{area["code"]} due to WAF"
    end

    puts "[#{i + 1}/#{areas.length}] #{area["code"]}: #{area["title"]}#{area["sections"].length} sections" if $stdout.tty?

    # Save progress every 10 areas so partial results survive WAF failures
    if (i + 1) % 10 == 0
      write_cache("subject_areas.yaml", { "areas" => areas })
    end

    sleep FETCH_DELAY unless i == areas.length - 1
  end

  result = { "areas" => areas }
  write_cache("subject_areas.yaml", result)
  result
end

.fetch_areasObject



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/iev/subject_areas.rb', line 140

def fetch_areas
  html = fetch_page_with_retry(AREAS_URL)
  doc = Nokogiri::HTML(html)

  areas = []
  doc.css("a").each do |link|
    href = link["href"].to_s
    next unless href.include?("part=")

    code = href.match(/part=(\d+)/)&.[](1)
    next unless code

    title = link.text.strip
    next if title.empty?

    areas << { "code" => code, "title" => title, "sections" => [] }
  end

  areas.uniq { |a| a["code"] }
end

.fetch_sections(part) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/iev/subject_areas.rb', line 161

def fetch_sections(part)
  url = format(SECTIONS_URL_TEMPLATE, part: part)
  html = fetch_page_with_retry(url)
  doc = Nokogiri::HTML(html)

  sections = []
  doc.css("td").each do |td|
    text = td.text.strip
    if (m = text.match(/\ASection\s+([\d-]+):\s*(.+)\z/))
      sections << { "code" => m[1], "title" => m[2].strip }
    end
  end

  sections.uniq { |s| s["code"] }
end

.find_area(code) ⇒ SubjectArea?

Find a single subject area by its numeric code. O(1) indexed.

Parameters:

  • code (String, Integer)

    e.g. “102” or 102

Returns:



53
54
55
# File 'lib/iev/subject_areas.rb', line 53

def find_area(code)
  area_index[code.to_s]
end

.find_section(section_code) ⇒ Section?

Find a single section by its section code. O(1) indexed.

Parameters:

  • section_code (String)

    e.g. “102-01”

Returns:



67
68
69
# File 'lib/iev/subject_areas.rb', line 67

def find_section(section_code)
  section_index[section_code.to_s]
end

.reload!Object

Clear cached typed objects (useful after fetch updates raw data).



178
179
180
181
182
183
# File 'lib/iev/subject_areas.rb', line 178

def reload!
  @typed_areas = nil
  @area_index = nil
  @section_index = nil
  @raw_data = nil
end

.section_for(ievref) ⇒ Section?

Find the section for any IEV reference.

Parameters:

  • ievref (String)

    e.g. “103-01-02”

Returns:



92
93
94
95
# File 'lib/iev/subject_areas.rb', line 92

def section_for(ievref)
  code = IevCode.new(ievref)
  code.section_code ? find_section(code.section_code) : nil
end

.section_uri(code) ⇒ String

URI for a section concept.

Parameters:

  • code (String)

    e.g. “103-01”

Returns:

  • (String)

    e.g. “section-103-01”



38
39
40
# File 'lib/iev/subject_areas.rb', line 38

def section_uri(code)
  "section-#{code}"
end

.sections_for(code) ⇒ Array<Section>

Return all sections for a given area code.

Parameters:

  • code (String, Integer)

    area code, e.g. “102”

Returns:



60
61
62
# File 'lib/iev/subject_areas.rb', line 60

def sections_for(code)
  find_area(code)&.sections || []
end