Module: Iev::SubjectAreas

Defined in:
lib/iev/subject_areas.rb

Defined Under Namespace

Classes: FetchError

Constant Summary collapse

DATA_FILE =
File.expand_path("../../data/subject_areas.yaml", __dir__)
AREAS_URL =
"https://electropedia.org/iev/iev.nsf/" \
"6d6bdd8667c378f7c12581fa003d80e7?OpenForm"
SECTIONS_URL_TEMPLATE =
"https://electropedia.org/iev/iev.nsf/" \
"index?openform&part=%<part>s"
MIN_PAGE_SIZE =
15_000
FETCH_DELAY =
5
RETRY_DELAY =
30
MAX_RETRIES =
2

Class Method Summary collapse

Class Method Details

.allArray<Hash>

Return all subject areas with their sections.

Returns:

  • (Array<Hash>)

    each hash has “code”, “title”, “sections”



46
47
48
# File 'lib/iev/subject_areas.rb', line 46

def all
  data["areas"]
end

.area_for_section(section_code) ⇒ Hash?

Return the parent area for a given section code.

Parameters:

  • section_code (String)

    e.g. “102-01”

Returns:

  • (Hash, nil)


80
81
82
83
84
85
# File 'lib/iev/subject_areas.rb', line 80

def area_for_section(section_code)
  sc = section_code.to_s
  all.find do |area|
    area["sections"]&.any? { |s| s["code"] == sc }
  end
end

.area_uri(code) ⇒ String

URI for a subject area concept.

Parameters:

  • code (String, Integer)

    e.g. “102”

Returns:

  • (String)

    e.g. “area-102”



31
32
33
# File 'lib/iev/subject_areas.rb', line 31

def area_uri(code)
  "area-#{code}"
end

.fetchObject

— Fetching (network, writes to bundled data file) —



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/iev/subject_areas.rb', line 89

def fetch
  cached = read_cache("subject_areas.yaml")
  return cached if cached && complete?(cached)

  areas = cached ? cached["areas"] : []
  fresh_areas = fetch_areas
  puts "Found #{fresh_areas.length} areas (#{areas.length} cached)" if $stdout.tty?

  # Merge: keep existing sections, add new areas
  existing = areas.each_with_object({}) { |a, h| h[a["code"]] = a }
  fresh_areas.each do |fa|
    existing[fa["code"]] ||= fa
  end
  areas = fresh_areas.map { |fa| existing[fa["code"]] || fa }

  areas.each_with_index do |area, i|
    next if area["fetched"]

    begin
      area["sections"] = fetch_sections(area["code"])
      area["fetched"] = true
    rescue FetchError
      area["sections"] ||= []
      warn "IEV: Skipping area #{area["code"]} due to WAF"
    end

    puts "[#{i + 1}/#{areas.length}] #{area["code"]}: #{area["title"]}#{area["sections"].length} sections" if $stdout.tty?

    # Save progress every 10 areas so partial results survive WAF failures
    if (i + 1) % 10 == 0
      write_cache("subject_areas.yaml", { "areas" => areas })
    end

    sleep FETCH_DELAY unless i == areas.length - 1
  end

  result = { "areas" => areas }
  write_cache("subject_areas.yaml", result)
  result
end

.fetch_areasObject



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/iev/subject_areas.rb', line 130

def fetch_areas
  html = fetch_page_with_retry(AREAS_URL)
  doc = Nokogiri::HTML(html)

  areas = []
  doc.css("a").each do |link|
    href = link["href"].to_s
    next unless href.include?("part=")

    code = href.match(/part=(\d+)/)&.[](1)
    next unless code

    title = link.text.strip
    next if title.empty?

    areas << { "code" => code, "title" => title, "sections" => [] }
  end

  areas.uniq { |a| a["code"] }
end

.fetch_sections(part) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/iev/subject_areas.rb', line 151

def fetch_sections(part)
  url = format(SECTIONS_URL_TEMPLATE, part: part)
  html = fetch_page_with_retry(url)
  doc = Nokogiri::HTML(html)

  sections = []
  doc.css("td").each do |td|
    text = td.text.strip
    if (m = text.match(/\ASection\s+([\d-]+):\s*(.+)\z/))
      sections << { "code" => m[1], "title" => m[2].strip }
    end
  end

  sections.uniq { |s| s["code"] }
end

.find_area(code) ⇒ Hash?

Find a single subject area by its numeric code.

Parameters:

  • code (String, Integer)

    e.g. “102” or 102

Returns:

  • (Hash, nil)


53
54
55
# File 'lib/iev/subject_areas.rb', line 53

def find_area(code)
  all.find { |a| a["code"] == code.to_s }
end

.find_section(section_code) ⇒ Hash?

Find a single section by its section code.

Parameters:

  • section_code (String)

    e.g. “102-01”

Returns:

  • (Hash, nil)


68
69
70
71
72
73
74
75
# File 'lib/iev/subject_areas.rb', line 68

def find_section(section_code)
  sc = section_code.to_s
  all.each do |area|
    found = area["sections"]&.find { |s| s["code"] == sc }
    return found if found
  end
  nil
end

.section_uri(code) ⇒ String

URI for a section concept.

Parameters:

  • code (String)

    e.g. “103-01”

Returns:

  • (String)

    e.g. “section-103-01”



38
39
40
# File 'lib/iev/subject_areas.rb', line 38

def section_uri(code)
  "section-#{code}"
end

.sections_for(code) ⇒ Array<Hash>

Return all sections for a given area code.

Parameters:

  • code (String, Integer)

    area code, e.g. “102”

Returns:

  • (Array<Hash>)

    each hash has “code”, “title”



60
61
62
63
# File 'lib/iev/subject_areas.rb', line 60

def sections_for(code)
  area = find_area(code)
  area ? area["sections"] : []
end