Class: Glossarist::ReferenceExtractor
- Inherits:
-
Object
- Object
- Glossarist::ReferenceExtractor
show all
- Defined in:
- lib/glossarist/reference_extractor.rb
Defined Under Namespace
Classes: IdentifierResolver, Pattern
Constant Summary
collapse
- LANG_CODES =
Glossarist::LANG_CODES
Class Method Summary
collapse
Instance Method Summary
collapse
Class Method Details
.identifier_resolvers ⇒ Object
27
28
29
|
# File 'lib/glossarist/reference_extractor.rb', line 27
def identifier_resolvers
@identifier_resolvers.dup
end
|
.patterns ⇒ Object
23
24
25
|
# File 'lib/glossarist/reference_extractor.rb', line 23
def patterns
@patterns.dup
end
|
.register_identifier_resolver(prefix, &resolver) ⇒ Object
14
15
16
17
|
# File 'lib/glossarist/reference_extractor.rb', line 14
def register_identifier_resolver(prefix, &resolver)
@identifier_resolvers << IdentifierResolver.new(prefix: prefix,
resolver: resolver)
end
|
.register_pattern(name:, regex:, &resolver) ⇒ Object
19
20
21
|
# File 'lib/glossarist/reference_extractor.rb', line 19
def register_pattern(name:, regex:, &resolver)
@patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
end
|
Instance Method Details
50
51
52
53
54
55
56
|
# File 'lib/glossarist/reference_extractor.rb', line 50
def (concept_hash)
LANG_CODES.flat_map do |lang|
next [] unless concept_hash[lang].is_a?(Hash)
(concept_hash[lang])
end
end
|
46
47
48
|
# File 'lib/glossarist/reference_extractor.rb', line 46
def (lc_hash)
gather_texts(lc_hash).flat_map { |t| (t) }
end
|
64
65
66
67
68
69
70
|
# File 'lib/glossarist/reference_extractor.rb', line 64
def (l10n)
texts = []
l10n.data.definition&.each { |d| texts << d.content if d.content }
l10n.data.notes&.each { |n| texts << n.content if n.content }
l10n.data.examples&.each { |e| texts << e.content if e.content }
texts.flat_map { |t| (t) }
end
|
58
59
60
61
62
|
# File 'lib/glossarist/reference_extractor.rb', line 58
def (concept)
concept.localizations.flat_map do |l10n|
(l10n)
end
end
|
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/glossarist/reference_extractor.rb', line 32
def (text)
return [] unless text.is_a?(String)
refs = []
self.class.patterns.each do |pattern|
text.scan(pattern.regex).each do |captures|
captures = [captures] unless captures.is_a?(Array)
ref = pattern.resolver.call(self, *captures)
refs << ref if ref
end
end
deduplicate(refs)
end
|
#resolve_by_identifier(identifier, display) ⇒ Object
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
# File 'lib/glossarist/reference_extractor.rb', line 87
def resolve_by_identifier(identifier, display)
self.class.identifier_resolvers.each do |ir|
next unless identifier.start_with?(ir.prefix)
return ir.resolver.call(self, identifier, display)
end
case identifier
when /\A\d[\d.-]*\z/
resolve_local(display || identifier, identifier)
else
resolve_designation(identifier, display)
end
end
|
#resolve_designation(text, display) ⇒ Object
112
113
114
115
116
117
118
119
|
# File 'lib/glossarist/reference_extractor.rb', line 112
def resolve_designation(text, display)
ConceptReference.new(
term: display || text,
concept_id: nil,
source: nil,
ref_type: "designation",
)
end
|
#resolve_generic_urn(urn, display) ⇒ Object
144
145
146
147
148
149
150
151
|
# File 'lib/glossarist/reference_extractor.rb', line 144
def resolve_generic_urn(urn, display)
ConceptReference.new(
term: display || "",
concept_id: nil,
source: urn,
ref_type: "urn",
)
end
|
#resolve_iec_urn(urn, display) ⇒ Object
121
122
123
124
125
126
127
128
129
130
|
# File 'lib/glossarist/reference_extractor.rb', line 121
def resolve_iec_urn(urn, display)
concept_id = (urn)
ConceptReference.new(
term: display || "",
concept_id: concept_id,
source: "urn:iec:std:iec:60050",
ref_type: "urn",
)
end
|
#resolve_iso_urn(urn, display) ⇒ Object
132
133
134
135
136
137
138
139
140
141
142
|
# File 'lib/glossarist/reference_extractor.rb', line 132
def resolve_iso_urn(urn, display)
if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
term_id = (m[2])
ConceptReference.new(
term: display || "",
concept_id: term_id,
source: "urn:iso:std:iso:#{m[1]}",
ref_type: "urn",
)
end
end
|
#resolve_local(term, concept_id) ⇒ Object
103
104
105
106
107
108
109
110
|
# File 'lib/glossarist/reference_extractor.rb', line 103
def resolve_local(term, concept_id)
ConceptReference.new(
term: term.strip,
concept_id: concept_id.strip,
source: nil,
ref_type: "local",
)
end
|
#resolve_mention(content) ⇒ Object
Unified concept mention dispatcher. Content is the text inside {…}.
74
75
76
77
78
79
80
81
82
83
84
85
|
# File 'lib/glossarist/reference_extractor.rb', line 74
def resolve_mention(content)
content = content.strip
if content.include?(",")
parts = content.split(",", 2)
display = parts[0].strip
identifier = parts[1].strip
resolve_by_identifier(identifier, display)
else
resolve_by_identifier(content, nil)
end
end
|