Class: Glossarist::ReferenceExtractor
- Inherits:
-
Object
- Object
- Glossarist::ReferenceExtractor
show all
- Defined in:
- lib/glossarist/reference_extractor.rb
Defined Under Namespace
Classes: IdentifierResolver, Pattern
Constant Summary
collapse
- LANG_CODES =
Glossarist::LANG_CODES
Class Method Summary
collapse
Instance Method Summary
collapse
Class Method Details
.identifier_resolvers ⇒ Object
27
28
29
|
# File 'lib/glossarist/reference_extractor.rb', line 27
def identifier_resolvers
@identifier_resolvers.dup
end
|
.patterns ⇒ Object
23
24
25
|
# File 'lib/glossarist/reference_extractor.rb', line 23
def patterns
@patterns.dup
end
|
.register_identifier_resolver(prefix, &resolver) ⇒ Object
14
15
16
17
|
# File 'lib/glossarist/reference_extractor.rb', line 14
def register_identifier_resolver(prefix, &resolver)
@identifier_resolvers << IdentifierResolver.new(prefix: prefix,
resolver: resolver)
end
|
.register_pattern(name:, regex:, &resolver) ⇒ Object
19
20
21
|
# File 'lib/glossarist/reference_extractor.rb', line 19
def register_pattern(name:, regex:, &resolver)
@patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
end
|
Instance Method Details
Extract all reference types from a managed concept.
199
200
201
202
203
|
# File 'lib/glossarist/reference_extractor.rb', line 199
def (concept)
concept_refs = (concept)
asset_refs = (concept)
concept_refs + asset_refs
end
|
Extract asset references from model attributes (NonVerbRep, GraphicalSymbol).
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
# File 'lib/glossarist/reference_extractor.rb', line 152
def (concept)
refs = []
concept.localizations.each do |l10n|
Array(l10n.non_verb_rep).each do |nvr|
next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
refs << AssetReference.new(path: nvr.ref.strip)
end
(l10n.data&.terms || []).each do |term|
if term.is_a?(Designation::GraphicalSymbol) && term.image && !term.image.strip.empty?
refs << AssetReference.new(path: term.image.strip)
end
end
end
refs
end
|
Extract bibliographic xrefs from model-level source citations.
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
# File 'lib/glossarist/reference_extractor.rb', line 173
def (concept)
refs = []
concept.localizations.each do |l10n|
l10n.all_sources.each do |source|
origin = source.origin
next unless origin
ref = origin.ref
next unless ref
source_text = ref.source
if source_text && !source_text.strip.empty?
refs << BibliographicReference.new(anchor: source_text)
end
next unless ref.source && ref.id
key = "#{ref.source} #{ref.id}"
refs << BibliographicReference.new(anchor: key)
refs << BibliographicReference.new(anchor: ref.id.to_s)
end
end
refs
end
|
50
51
52
53
54
55
56
|
# File 'lib/glossarist/reference_extractor.rb', line 50
def (concept_hash)
LANG_CODES.flat_map do |lang|
next [] unless concept_hash[lang].is_a?(Hash)
(concept_hash[lang])
end
end
|
46
47
48
|
# File 'lib/glossarist/reference_extractor.rb', line 46
def (lc_hash)
gather_texts(lc_hash).flat_map { |t| (t) }
end
|
64
65
66
|
# File 'lib/glossarist/reference_extractor.rb', line 64
def (l10n)
l10n.text_content.flat_map { |t| (t) }
end
|
58
59
60
61
62
|
# File 'lib/glossarist/reference_extractor.rb', line 58
def (concept)
concept.localizations.flat_map do |l10n|
(l10n)
end
end
|
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/glossarist/reference_extractor.rb', line 32
def (text)
return [] unless text.is_a?(String)
refs = []
self.class.patterns.each do |pattern|
text.scan(pattern.regex).each do |captures|
captures = [captures] unless captures.is_a?(Array)
ref = pattern.resolver.call(self, *captures)
refs << ref if ref
end
end
deduplicate(refs)
end
|
#resolve_asciidoc_xref(target) ⇒ Object
205
206
207
|
# File 'lib/glossarist/reference_extractor.rb', line 205
def resolve_asciidoc_xref(target)
BibliographicReference.new(anchor: target.strip)
end
|
#resolve_by_identifier(identifier, display) ⇒ Object
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/glossarist/reference_extractor.rb', line 83
def resolve_by_identifier(identifier, display)
self.class.identifier_resolvers.each do |ir|
next unless identifier.start_with?(ir.prefix)
return ir.resolver.call(self, identifier, display)
end
case identifier
when /\A\d[\d.-]*\z/
resolve_local(display || identifier, identifier)
else
resolve_designation(identifier, display)
end
end
|
#resolve_designation(text, display) ⇒ Object
108
109
110
111
112
113
114
115
|
# File 'lib/glossarist/reference_extractor.rb', line 108
def resolve_designation(text, display)
ConceptReference.new(
term: display || text,
concept_id: nil,
source: nil,
ref_type: "designation",
)
end
|
#resolve_generic_urn(urn, display) ⇒ Object
140
141
142
143
144
145
146
147
|
# File 'lib/glossarist/reference_extractor.rb', line 140
def resolve_generic_urn(urn, display)
ConceptReference.new(
term: display || "",
concept_id: nil,
source: urn,
ref_type: "urn",
)
end
|
#resolve_iec_urn(urn, display) ⇒ Object
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/glossarist/reference_extractor.rb', line 117
def resolve_iec_urn(urn, display)
concept_id = (urn)
ConceptReference.new(
term: display || "",
concept_id: concept_id,
source: "urn:iec:std:iec:60050",
ref_type: "urn",
)
end
|
#resolve_image_ref(path) ⇒ Object
209
210
211
|
# File 'lib/glossarist/reference_extractor.rb', line 209
def resolve_image_ref(path)
AssetReference.new(path: path.strip)
end
|
#resolve_iso_urn(urn, display) ⇒ Object
128
129
130
131
132
133
134
135
136
137
138
|
# File 'lib/glossarist/reference_extractor.rb', line 128
def resolve_iso_urn(urn, display)
if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
term_id = (m[2])
ConceptReference.new(
term: display || "",
concept_id: term_id,
source: "urn:iso:std:iso:#{m[1]}",
ref_type: "urn",
)
end
end
|
#resolve_local(term, concept_id) ⇒ Object
99
100
101
102
103
104
105
106
|
# File 'lib/glossarist/reference_extractor.rb', line 99
def resolve_local(term, concept_id)
ConceptReference.new(
term: term.strip,
concept_id: concept_id.strip,
source: nil,
ref_type: "local",
)
end
|
#resolve_mention(content) ⇒ Object
Unified concept mention dispatcher. Content is the text inside {…}.
70
71
72
73
74
75
76
77
78
79
80
81
|
# File 'lib/glossarist/reference_extractor.rb', line 70
def resolve_mention(content)
content = content.strip
if content.include?(",")
parts = content.split(",", 2)
display = parts[0].strip
identifier = parts[1].strip
resolve_by_identifier(identifier, display)
else
resolve_by_identifier(content, nil)
end
end
|