Class: Glossarist::ReferenceExtractor
- Inherits:
-
Object
- Object
- Glossarist::ReferenceExtractor
show all
- Defined in:
- lib/glossarist/reference_extractor.rb
Defined Under Namespace
Classes: IdentifierResolver, Pattern
Constant Summary
collapse
- LANG_CODES =
Glossarist::LANG_CODES
Class Method Summary
collapse
Instance Method Summary
collapse
Class Method Details
.identifier_resolvers ⇒ Object
27
28
29
|
# File 'lib/glossarist/reference_extractor.rb', line 27
def identifier_resolvers
@identifier_resolvers.dup
end
|
.patterns ⇒ Object
23
24
25
|
# File 'lib/glossarist/reference_extractor.rb', line 23
def patterns
@patterns.dup
end
|
.register_identifier_resolver(prefix, &resolver) ⇒ Object
14
15
16
17
|
# File 'lib/glossarist/reference_extractor.rb', line 14
def register_identifier_resolver(prefix, &resolver)
@identifier_resolvers << IdentifierResolver.new(prefix: prefix,
resolver: resolver)
end
|
.register_pattern(name:, regex:, &resolver) ⇒ Object
19
20
21
|
# File 'lib/glossarist/reference_extractor.rb', line 19
def register_pattern(name:, regex:, &resolver)
@patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
end
|
Instance Method Details
Extract all reference types from a managed concept.
215
216
217
218
219
|
# File 'lib/glossarist/reference_extractor.rb', line 215
def (concept)
concept_refs = (concept)
asset_refs = (concept)
concept_refs + asset_refs
end
|
Extract asset references from model attributes (NonVerbRep, GraphicalSymbol).
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
# File 'lib/glossarist/reference_extractor.rb', line 168
def (concept)
refs = []
concept.localizations.each do |l10n|
Array(l10n.non_verb_rep).each do |nvr|
next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
refs << AssetReference.new(path: nvr.ref.strip)
end
(l10n.data&.terms || []).each do |term|
if term.is_a?(Designation::GraphicalSymbol) && term.image && !term.image.strip.empty?
refs << AssetReference.new(path: term.image.strip)
end
end
end
refs
end
|
Extract bibliographic xrefs from model-level source citations.
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
# File 'lib/glossarist/reference_extractor.rb', line 189
def (concept)
refs = []
concept.localizations.each do |l10n|
l10n.all_sources.each do |source|
origin = source.origin
next unless origin
ref = origin.ref
next unless ref
source_text = ref.source
if source_text && !source_text.strip.empty?
refs << BibliographicReference.new(anchor: source_text)
end
next unless ref.source && ref.id
key = "#{ref.source} #{ref.id}"
refs << BibliographicReference.new(anchor: key)
refs << BibliographicReference.new(anchor: ref.id.to_s)
end
end
refs
end
|
50
51
52
53
54
55
56
|
# File 'lib/glossarist/reference_extractor.rb', line 50
def (concept_hash)
LANG_CODES.flat_map do |lang|
next [] unless concept_hash[lang].is_a?(Hash)
(concept_hash[lang])
end
end
|
46
47
48
|
# File 'lib/glossarist/reference_extractor.rb', line 46
def (lc_hash)
gather_texts(lc_hash).flat_map { |t| (t) }
end
|
64
65
66
|
# File 'lib/glossarist/reference_extractor.rb', line 64
def (l10n)
l10n.text_content.flat_map { |t| (t) }
end
|
58
59
60
61
62
|
# File 'lib/glossarist/reference_extractor.rb', line 58
def (concept)
concept.localizations.flat_map do |l10n|
(l10n)
end
end
|
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/glossarist/reference_extractor.rb', line 32
def (text)
return [] unless text.is_a?(String)
refs = []
self.class.patterns.each do |pattern|
text.scan(pattern.regex).each do |captures|
captures = [captures] unless captures.is_a?(Array)
ref = pattern.resolver.call(self, *captures)
refs << ref if ref
end
end
deduplicate(refs)
end
|
#resolve_asciidoc_xref(target) ⇒ Object
221
222
223
|
# File 'lib/glossarist/reference_extractor.rb', line 221
def resolve_asciidoc_xref(target)
BibliographicReference.new(anchor: target.strip)
end
|
#resolve_by_identifier(identifier, display) ⇒ Object
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/glossarist/reference_extractor.rb', line 83
def resolve_by_identifier(identifier, display)
self.class.identifier_resolvers.each do |ir|
next unless identifier.start_with?(ir.prefix)
return ir.resolver.call(self, identifier, display)
end
case identifier
when /\A\d[\d.-]*\z/
resolve_local(display || identifier, identifier)
else
resolve_designation(identifier, display)
end
end
|
#resolve_cite_key(identifier, display) ⇒ Object
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# File 'lib/glossarist/reference_extractor.rb', line 117
def resolve_cite_key(identifier, display)
cleaned = identifier.delete_prefix("cite:").strip
return nil if cleaned.empty?
if cleaned.start_with?('"') && cleaned.end_with?('"') && cleaned.length >= 2
cleaned = cleaned[1..-2].gsub('""', '"')
end
ConceptReference.new(
concept_id: cleaned,
source: nil,
term: display || cleaned,
ref_type: "cite",
)
end
|
#resolve_designation(text, display) ⇒ Object
108
109
110
111
112
113
114
115
|
# File 'lib/glossarist/reference_extractor.rb', line 108
def resolve_designation(text, display)
ConceptReference.new(
term: display || text,
concept_id: nil,
source: nil,
ref_type: "designation",
)
end
|
#resolve_generic_urn(urn, display) ⇒ Object
156
157
158
159
160
161
162
163
|
# File 'lib/glossarist/reference_extractor.rb', line 156
def resolve_generic_urn(urn, display)
ConceptReference.new(
term: display || "",
concept_id: nil,
source: urn,
ref_type: "urn",
)
end
|
#resolve_iec_urn(urn, display) ⇒ Object
133
134
135
136
137
138
139
140
141
142
|
# File 'lib/glossarist/reference_extractor.rb', line 133
def resolve_iec_urn(urn, display)
concept_id = (urn)
ConceptReference.new(
term: display || "",
concept_id: concept_id,
source: "urn:iec:std:iec:60050",
ref_type: "urn",
)
end
|
#resolve_image_ref(path) ⇒ Object
225
226
227
|
# File 'lib/glossarist/reference_extractor.rb', line 225
def resolve_image_ref(path)
AssetReference.new(path: path.strip)
end
|
#resolve_iso_urn(urn, display) ⇒ Object
144
145
146
147
148
149
150
151
152
153
154
|
# File 'lib/glossarist/reference_extractor.rb', line 144
def resolve_iso_urn(urn, display)
if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
term_id = (m[2])
ConceptReference.new(
term: display || "",
concept_id: term_id,
source: "urn:iso:std:iso:#{m[1]}",
ref_type: "urn",
)
end
end
|
#resolve_local(term, concept_id) ⇒ Object
99
100
101
102
103
104
105
106
|
# File 'lib/glossarist/reference_extractor.rb', line 99
def resolve_local(term, concept_id)
ConceptReference.new(
term: term.strip,
concept_id: concept_id.strip,
source: nil,
ref_type: "local",
)
end
|
#resolve_mention(content) ⇒ Object
Unified concept mention dispatcher. Content is the text inside {…}.
70
71
72
73
74
75
76
77
78
79
80
81
|
# File 'lib/glossarist/reference_extractor.rb', line 70
def resolve_mention(content)
content = content.strip
if content.include?(",")
parts = content.split(",", 2)
identifier = parts[0].strip
display = parts[1].strip
resolve_by_identifier(identifier, display)
else
resolve_by_identifier(content, nil)
end
end
|