Module: Metanorma::Standoc::Block

Included in:
Cleanup
Defined in:
lib/metanorma/cleanup/block.rb

Defined Under Namespace

Classes: SourcecodeMarkup

Constant Summary collapse

ELEMS_ALLOW_NOTES =
%w[p formula ul ol dl figure].freeze

Instance Method Summary collapse

Instance Method Details

#align_callouts_to_annotations(xmldoc) ⇒ Object



147
148
149
150
151
152
153
154
# File 'lib/metanorma/cleanup/block.rb', line 147

def align_callouts_to_annotations(xmldoc)
  xmldoc.xpath("//sourcecode").each do |x|
    callouts = x.xpath("./body/callout")
    annotations = x.xpath("./callout-annotation")
    callouts.size == annotations.size and
      link_callouts_to_annotations(callouts, annotations)
  end
end

#blocksource_cleanup(xmldoc) ⇒ Object



282
283
284
285
286
# File 'lib/metanorma/cleanup/block.rb', line 282

def blocksource_cleanup(xmldoc)
  xmldoc.xpath("//figure//source | //table//source").each do |s|
    s.delete("type")
  end
end

#callout_cleanup(xmldoc) ⇒ Object



164
165
166
167
# File 'lib/metanorma/cleanup/block.rb', line 164

def callout_cleanup(xmldoc)
  merge_annotations_into_sourcecode(xmldoc)
  align_callouts_to_annotations(xmldoc)
end

#figure_cleanup(xmldoc) ⇒ Object



112
113
114
115
116
117
118
119
# File 'lib/metanorma/cleanup/block.rb', line 112

def figure_cleanup(xmldoc)
  figure_table_cleanup(xmldoc)
  figure_footnote_cleanup(xmldoc)
  subfigure_cleanup(xmldoc)
  figure_dl_cleanup1(xmldoc)
  figure_dl_cleanup2(xmldoc)
  single_subfigure_cleanup(xmldoc)
end

#figure_dl_cleanup1(xmldoc) ⇒ Object



68
69
70
71
72
73
74
# File 'lib/metanorma/cleanup/block.rb', line 68

def figure_dl_cleanup1(xmldoc)
  q = "//figure/following-sibling::*[self::key]"
  q1 = "//figure/figure/following-sibling::*[self::key]"
  (xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
    s.previous_element << s.remove
  end
end

#figure_dl_cleanup2(xmldoc) ⇒ Object

include key definition list inside figure



77
78
79
# File 'lib/metanorma/cleanup/block.rb', line 77

def figure_dl_cleanup2(xmldoc)
  text_key_extract(xmldoc, "figure", "key")
end

#figure_table_cleanup(xmldoc) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/metanorma/cleanup/block.rb', line 99

def figure_table_cleanup(xmldoc)
  xmldoc.xpath("//figure").each do |f|
    t = f.at("./table") or next
    t["plain"] = true
    t.xpath(".//td | .//th").each do |d|
      d["align"] = "center"
      d["valign"] = "bottom"
    end
    t.xpath("./note | ./footnote | ./dl | ./source")
      .each { |n| f << n }
  end
end

#form_cleanup(xmldoc) ⇒ Object



267
268
269
270
271
272
273
# File 'lib/metanorma/cleanup/block.rb', line 267

def form_cleanup(xmldoc)
  xmldoc.xpath("//select").each do |s|
    while s.next_element&.name == "option"
      s << s.next_element
    end
  end
end

#formula_cleanup(formula) ⇒ Object

include where definition list inside stem block



42
43
44
45
# File 'lib/metanorma/cleanup/block.rb', line 42

def formula_cleanup(formula)
  formula_cleanup_where1(formula)
  formula_cleanup_where2(formula)
end

#formula_cleanup_where1(formula) ⇒ Object



47
48
49
50
51
52
# File 'lib/metanorma/cleanup/block.rb', line 47

def formula_cleanup_where1(formula)
  q = "//formula/following-sibling::*[1][self::key]"
  formula.xpath(q).each do |s|
    s.previous_element << s.remove
  end
end

#formula_cleanup_where2(formula) ⇒ Object



54
55
56
# File 'lib/metanorma/cleanup/block.rb', line 54

def formula_cleanup_where2(formula)
  text_key_extract(formula, "formula", "where")
end

#inject_id(xmldoc, path) ⇒ Object



18
19
20
21
22
# File 'lib/metanorma/cleanup/block.rb', line 18

def inject_id(xmldoc, path)
  xmldoc.xpath(path).each do |x|
    x["id"] or add_id(x)
  end
end

#key_cleanup(xmldoc) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/metanorma/cleanup/block.rb', line 24

def key_cleanup(xmldoc)
  xmldoc.xpath("//*[@key = 'true']").each do |x|
    x.delete("key")
    x.replace("<key>#{to_xml(x)}</key>")
  end
  key_concatenate(xmldoc)
  xmldoc.xpath("//key//key").each { |x| x.replace(x.children) }
end

#key_concatenate(xmldoc) ⇒ Object



33
34
35
36
37
38
39
# File 'lib/metanorma/cleanup/block.rb', line 33

def key_concatenate(xmldoc)
  xmldoc.xpath("//key").each do |x|
    while x.next_element&.name == "key"
      x << x.next_element.remove.children
    end
  end
end


139
140
141
142
143
144
145
# File 'lib/metanorma/cleanup/block.rb', line 139

def link_callouts_to_annotations(callouts, annotations)
  callouts.each_with_index do |c, i|
    add_id(annotations[i])
    annotations[i]["anchor"] = annotations[i]["id"]
    c["target"] = annotations[i]["id"]
  end
end

#merge_annotations_into_sourcecode(xmldoc) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/metanorma/cleanup/block.rb', line 156

def merge_annotations_into_sourcecode(xmldoc)
  xmldoc.xpath("//sourcecode").each do |x|
    while x.next_element&.name == "callout-annotation"
      x.next_element.parent = x
    end
  end
end

#note_cleanup(xmldoc) ⇒ Object

if a note is at the end of a section, it is left alone if a note is followed by a non-note block, it is moved inside its preceding block if it is not delimited (so there was no way of making that block include the note)



127
128
129
130
131
132
133
134
135
136
137
# File 'lib/metanorma/cleanup/block.rb', line 127

def note_cleanup(xmldoc)
  xmldoc.xpath("//note").each do |n|
    n["keep-separate"] == "true" || !n.ancestors("table").empty? and next
    prev = n.previous_element or next
    n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name
  end
  xmldoc.xpath("//note[@keep-separate] | " \
               "//termnote[@keep-separate]").each do |n|
    n.delete("keep-separate")
  end
end

#ol_cleanup(doc) ⇒ Object



275
276
277
278
279
280
# File 'lib/metanorma/cleanup/block.rb', line 275

def ol_cleanup(doc)
  doc.xpath("//ol[@explicit-type]").each do |x|
    x.delete("explicit-type")
    @log.add("STANDOC_14", x, display: false)
  end
end

#para_cleanup(xmldoc) ⇒ Object



7
8
9
10
11
12
13
14
15
16
# File 'lib/metanorma/cleanup/block.rb', line 7

def para_cleanup(xmldoc)
  ["//p[not(ancestor::bibdata)]", "//ol[not(ancestor::bibdata)]",
   "//ul[not(ancestor::bibdata)]", "//quote[not(ancestor::bibdata)]",
   "//dl[not(ancestor::bibdata)]",
   "//note[not(ancestor::bibitem or " \
   "ancestor::table or ancestor::bibdata)]"].each do |w|
    inject_id(xmldoc, w)
  end
  xmldoc.xpath("//p[not(text()) and not(node())]").each(&:remove)
end

#safe_noko(text, doc) ⇒ Object



188
189
190
191
192
193
# File 'lib/metanorma/cleanup/block.rb', line 188

def safe_noko(text, doc)
  Nokogiri::XML::Text.new(text, doc).to_xml(
    encoding: "US-ASCII",
    save_with: Nokogiri::XML::Node::SaveOptions::NO_DECLARATION,
  )
end

#single_subfigure_cleanup(xmldoc) ⇒ Object



91
92
93
94
95
96
97
# File 'lib/metanorma/cleanup/block.rb', line 91

def single_subfigure_cleanup(xmldoc)
  xmldoc.xpath("//figure[figure]").each do |e|
    s = e.xpath("./figure")
    s.size == 1 or next
    s[0].replace(s[0].children)
  end
end

#sourcecode_cleanup(xmldoc) ⇒ Object

… }} injects Asciidoc markup into otherwise-verbatim sourcecode. A span’s delimiters can be split across separate text nodes by an element that subs=“macros” injected between them (e.g. an inline image), so we walk the sourcecode’s text nodes in order and carry the open state across them, rather than processing each node in isolation.



180
181
182
183
184
185
186
# File 'lib/metanorma/cleanup/block.rb', line 180

def sourcecode_cleanup(xmldoc)
  xmldoc.xpath("//sourcecode").each do |x|
    open = x.xpath(".//text()")
      .inject(false) { |acc, node| sourcecode_markup(node, acc) }
    open and @log.add("STANDOC_65", x, params: [@sourcecode_markup_start])
  end
end

#sourcecode_markup(node, open) ⇒ Boolean

Returns whether a {{{ span is still open after this node.

Returns:

  • (Boolean)

    whether a {{{ span is still open after this node



196
197
198
199
200
201
202
203
# File 'lib/metanorma/cleanup/block.rb', line 196

def sourcecode_markup(node, open)
  open || sourcecode_markup?(node.text) or return open
  state = SourcecodeMarkup.new(open, false, [], [], node)
  sourcecode_markup_split(node.text)
    .each { |tok| sourcecode_markup_token(tok, state) }
  sourcecode_markup_flush(state)
  state.open
end

#sourcecode_markup?(text) ⇒ Boolean

Returns:

  • (Boolean)


211
212
213
214
# File 'lib/metanorma/cleanup/block.rb', line 211

def sourcecode_markup?(text)
  text.include?(@sourcecode_markup_start) ||
    text.include?(@sourcecode_markup_end)
end

#sourcecode_markup_close(state) ⇒ Object



236
237
238
239
240
241
# File 'lib/metanorma/cleanup/block.rb', line 236

def sourcecode_markup_close(state)
  state.open or return sourcecode_markup_stray(state)
  state.out << sourcecode_markup_closed(state)
  state.open = state.inline = false
  state.buf = []
end

#sourcecode_markup_closed(state) ⇒ Object

a span closed within one node is converted; one split across nodes (already holding a processed element) is delimiter-stripped only



250
251
252
253
# File 'lib/metanorma/cleanup/block.rb', line 250

def sourcecode_markup_closed(state)
  state.inline and return sourcecode_markup_convert(state.buf.join)
  safe_noko(state.buf.join, state.node.document)
end

#sourcecode_markup_content(tok, state) ⇒ Object



255
256
257
258
# File 'lib/metanorma/cleanup/block.rb', line 255

def sourcecode_markup_content(tok, state)
  state.open and return state.buf << tok
  state.out << safe_noko(tok, state.node.document)
end

#sourcecode_markup_convert(span) ⇒ Object



260
261
262
263
264
265
# File 'lib/metanorma/cleanup/block.rb', line 260

def sourcecode_markup_convert(span)
  @conv.isolated_asciidoctor_convert(
    "{blank} #{span}", doctype: :inline,
                       backend: @conv.backend&.to_sym || :standoc
  ).strip
end

#sourcecode_markup_flush(state) ⇒ Object



205
206
207
208
209
# File 'lib/metanorma/cleanup/block.rb', line 205

def sourcecode_markup_flush(state)
  state.open and
    state.out << safe_noko(state.buf.join, state.node.document)
  state.node.replace(state.out.join)
end

#sourcecode_markup_open(state) ⇒ Object

a nested {{{ is improper nesting: STANDOC_61 is fatal



230
231
232
233
234
# File 'lib/metanorma/cleanup/block.rb', line 230

def sourcecode_markup_open(state)
  state.open and
    return @log.add("STANDOC_61", state.node, params: [state.node.text])
  state.open = state.inline = true
end

#sourcecode_markup_split(text) ⇒ Object



216
217
218
219
# File 'lib/metanorma/cleanup/block.rb', line 216

def sourcecode_markup_split(text)
  text.split(/(#{Regexp.escape(@sourcecode_markup_start)}|
               #{Regexp.escape(@sourcecode_markup_end)})/x)
end

#sourcecode_markup_stray(state) ⇒ Object

a stray }}} with no opener is left as literal text



244
245
246
# File 'lib/metanorma/cleanup/block.rb', line 244

def sourcecode_markup_stray(state)
  state.out << safe_noko(@sourcecode_markup_end, state.node.document)
end

#sourcecode_markup_token(tok, state) ⇒ Object



221
222
223
224
225
226
227
# File 'lib/metanorma/cleanup/block.rb', line 221

def sourcecode_markup_token(tok, state)
  case tok
  when @sourcecode_markup_start then sourcecode_markup_open(state)
  when @sourcecode_markup_end then sourcecode_markup_close(state)
  else sourcecode_markup_content(tok, state)
  end
end

#subfigure_cleanup(xmldoc) ⇒ Object

examples containing only figures become subfigures of figures



82
83
84
85
86
87
88
89
# File 'lib/metanorma/cleanup/block.rb', line 82

def subfigure_cleanup(xmldoc)
  xmldoc.xpath("//example[figure]").each do |e|
    e.elements.reject do |m|
      %w(name figure index note key).include?(m.name)
    end.empty? or next
    e.name = "figure"
  end
end

#text_key_extract(elem, tag, keywd) ⇒ Object



58
59
60
61
62
63
64
65
66
# File 'lib/metanorma/cleanup/block.rb', line 58

def text_key_extract(elem, tag, keywd)
  q = "//#{tag}/following-sibling::*[1][self::p]"
  elem.xpath(q).each do |s|
    if s.text =~ /^\s*#{keywd}[^a-z]*$/i && s&.next_element&.name == "dl"
      s.previous_element << "<key>#{to_xml(s.next_element.remove)}</key>"
      s.remove
    end
  end
end

#unnumbered_blocks_cleanup(xmldoc) ⇒ Object



288
289
290
291
292
293
294
295
# File 'lib/metanorma/cleanup/block.rb', line 288

def unnumbered_blocks_cleanup(xmldoc)
  @blockunnumbered&.each do |b|
    xmldoc.xpath("//#{b}").each do |e|
      /^[^_]/.match?(e["anchor"]) and e["unnumbered"] = "false"
      e["unnumbered"] ||= "true"
    end
  end
end