Class: Metanorma::Collection::FileLookup

Inherits:
Object
  • Object
show all
Defined in:
lib/metanorma/collection/filelookup/base.rb,
lib/metanorma/collection/filelookup/utils.rb,
lib/metanorma/collection/filelookup/filelookup.rb,
lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, parent) ⇒ FileLookup

hash for each document in collection of document identifier to: document reference (fileref or id), type of document reference, and bibdata entry for that file

Parameters:

  • path (String)

    path to collection



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 18

def initialize(path, parent)
  @c = HTMLEntities.new
  @files = {}
  @parent = parent
  @xml = parent.xml
  @isodoc = parent.isodoc
  @isodoc_presxml = parent.isodoc_presxml
  @path = path
  @compile = parent.compile
  @documents = parent.documents
  @files_to_delete = []
  @disambig = Util::DisambigFiles.new
  @manifest = parent.manifest
  read_files(@manifest.entry, parent.manifest)
end

Instance Attribute Details

#files_to_deleteObject

Returns the value of attribute files_to_delete.



12
13
14
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12

def files_to_delete
  @files_to_delete
end

#parentObject

Returns the value of attribute parent.



12
13
14
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 12

def parent
  @parent
end

Instance Method Details

#add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) ⇒ Object



72
73
74
75
76
77
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 72

def add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry)
  idx = File.join(File.dirname(sectionsplit_manifest.file), "index.html")
  FileUtils.cp entry[:ref], idx
  manifest["#{key}:index1.html"] =
    entry.merge(out_path: "index.html", ref: idx)
end

#add_document_suffix(identifier, doc) ⇒ Object



237
238
239
240
241
242
243
244
245
246
247
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 237

def add_document_suffix(identifier, doc)
  document_suffix = Metanorma::Utils::to_ncname(identifier)
  ids = doc.xpath("./@id | .//@id").map(&:value)
  Util::anchor_id_attributes.each do |(tag_name, attr_name)|
    Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name,
                              @isodoc)
  end
  Util::url_in_css_styles(doc, ids, document_suffix)
  doc.root["document_suffix"] ||= ""
  doc.root["document_suffix"] += document_suffix
end

#add_section_splitObject



6
7
8
9
10
11
12
13
14
15
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 6

def add_section_split
  ret = @files.keys.each_with_object({}) do |k, m|
    if @files[k][:sectionsplit] && !@files[k][:attachment]
      original_out_path = process_section_split_instance(k, m)
      cleanup_section_split_instance(k, m, original_out_path)
    end
    m[k] = @files[k]
  end
  @files = ret
end

#add_section_split_attachments(manifest, ident) ⇒ Object



89
90
91
92
93
94
95
96
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 89

def add_section_split_attachments(manifest, ident)
  attachments = @sectionsplit
    .section_split_attachments(out: File.dirname(manifest.file))
  attachments or return
  @files[ident][:out_path] = attachments
  { attachment: true, index: false, out_path: attachments,
    ref: File.join(File.dirname(manifest.file), attachments) }
end

#add_section_split_cover(manifest, sectionsplit_manifest, ident) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 59

def add_section_split_cover(manifest, sectionsplit_manifest, ident)
  cover = @sectionsplit
    .section_split_cover(sectionsplit_manifest,
                         @parent.dir_name_cleanse(ident),
                         one_doc_collection?)
  @files[ident][:out_path] = cover
  src = File.join(File.dirname(sectionsplit_manifest.file), cover)
  m = { attachment: true, index: false, out_path: cover, ref: src }
  manifest["#{ident}:index.html"] = m
  one_doc_collection? and
    add_cover_one_doc_coll(manifest, sectionsplit_manifest, ident, m)
end

#add_section_split_instance(file, manifest, key, idx) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 98

def add_section_split_instance(file, manifest, key, idx)
  presfile, newkey, xml = add_section_split_instance_prep(file, key)
  anchors = read_anchors(xml)
  # Preserve directory structure in out_path if parent has custom sectionsplit_filename with directory
  sectionsplit_fname = @files[key][:sectionsplit_filename]

  # file[:url] from sectionsplit.rb already has placeholders substituted and includes full path
  # Use it directly for out_path (without .xml extension)
  base_filename = File.basename(file[:url], ".xml")

  # Get the directory from file[:url] which already has placeholders substituted
  file_dir = File.dirname(file[:url])

  # If file[:url] has a directory (i.e., placeholders were substituted), use it
  out_path_value = if file_dir == "."
                     base_filename
                   else
                     File.join(file_dir, base_filename)
                   end

  m = { parentid: key, presentationxml: true, type: "fileref",
        rel_path: out_path_value, out_path: out_path_value,
        anchors: anchors, anchors_lookup: anchors_lookup(anchors),
        ids: read_ids(xml), format: @files[key][:format],
        sectionsplit_output: true, indirect_key: @sectionsplit.key,
        bibdata: @files[key][:bibdata], ref: presfile,
        sectionsplit_filename: sectionsplit_fname,
        idx: @files[key][:idx] }
  m[:bare] = true unless idx.zero?
  manifest[newkey] = m
  # Don't delete split output files - we want to keep them!
  # The original parent HTML file is deleted in cleanup_section_split_instance
end

#add_section_split_instance_prep(file, key) ⇒ Object



132
133
134
135
136
137
138
139
140
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 132

def add_section_split_instance_prep(file, key)
  # XML files are always stored flat in the _files directory (no subdirectories)
  # file[:url] contains full path with directory for HTML output, but XML is basename only
  xml_basename = File.basename(file[:url])
  presfile = File.join(File.dirname(@files[key][:ref]), xml_basename)
  newkey = key("#{key.strip} #{file[:title]}")
  xml = Nokogiri::XML(File.read(presfile), &:huge)
  [presfile, newkey, xml]
end

#anchors_lookup(anchors) ⇒ Object



40
41
42
43
44
# File 'lib/metanorma/collection/filelookup/utils.rb', line 40

def anchors_lookup(anchors)
  anchors.values.each_with_object({}) do |v, m|
    v.each_value { |v1| m[v1] = true }
  end
end

#bibdata_extract(xml) ⇒ Object



87
88
89
90
91
92
93
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 87

def bibdata_extract(xml)
  anchors = read_anchors(xml)
  { anchors: anchors, anchors_lookup: anchors_lookup(anchors),
    ids: read_ids(xml),
    bibdata: xml.at(ns("//bibdata")),
    document_suffix: xml.root["document_suffix"] }
end

#bibdata_process(entry, ident) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 75

def bibdata_process(entry, ident)
  if entry[:attachment]
    entry[:bibdata] =
      Metanorma::Collection::Document.attachment_bibitem(ident).root
  else
    file, _filename = targetfile(entry, read: true)
    xml = Nokogiri::XML(file, &:huge)
    add_document_suffix(ident, xml)
    entry.merge!(bibdata_extract(xml))
  end
end

#bibitem_process(entry) ⇒ Object



95
96
97
98
99
100
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 95

def bibitem_process(entry)
  entry[:bibitem] = entry[:bibdata].dup
  entry[:bibitem].name = "bibitem"
  entry[:bibitem]["hidden"] = "true"
  entry[:bibitem].at("./*[local-name() = 'ext']")&.remove
end

#cleanup_section_split_instance(key, manifest, original_out_path) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 44

def cleanup_section_split_instance(key, manifest, original_out_path)
  # Delete the sectionsplit index.html from source directory after it's copied to output
  @files_to_delete << manifest["#{key}:index.html"][:ref]
  # Delete the original files when sectionsplit happens (all formats: html, xml, presentation.xml)
  # Use the saved original out_path (before it was changed to index.html)
  if original_out_path
    base = File.join(@parent.outdir, original_out_path.sub(/\.xml$/, ""))
    @files_to_delete << "#{base}.html"
    @files_to_delete << "#{base}.xml"
    @files_to_delete << "#{base}.presentation.xml"
  end
  # @files[key].delete(:ids).delete(:anchors)
  @files[key][:indirect_key] = @sectionsplit.key
end

#derive_format(entry, parent) ⇒ Object



46
47
48
49
50
51
52
53
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 46

def derive_format(entry, parent)
  entry.attachment and return
  if Array(entry.format).empty?
    parent_fmt = Array(parent.format)
    entry.format = parent_fmt.empty? ? %w(xml presentation html) : parent_fmt.dup
  end
  entry.format |= ["xml", "presentation"]
end

#eachObject



30
31
32
# File 'lib/metanorma/collection/filelookup/base.rb', line 30

def each
  @files.each
end

#each_with_indexObject



34
35
36
# File 'lib/metanorma/collection/filelookup/base.rb', line 34

def each_with_index
  @files.each_with_index
end

#file_entry(ref, identifier, idx) ⇒ Object

ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp idx is the index of the document in the manifest



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 108

def file_entry(ref, identifier, idx)
  ref.file or return
  abs = @documents[Util::key identifier].file
  # For sectionsplit outputs from YAML manifest, we need to compute the full path
  # by combining sectionsplit_filename directory with ref.file basename
  sso = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
  out_path, rel_path = file_entry_paths(ref, idx, sso)
  ret = if ref.file
          { type: "fileref", ref: abs, rel_path: rel_path, url: ref.url,
            out_path: out_path, idx: idx,
            output_filename: ref.output_filename,
            sectionsplit_filename: ref.sectionsplit_filename,
            pdffile: ref.pdffile, format: ref.format&.map(&:to_sym) }
            .compact
        else { type: "id", ref: ref.id }
        end
  file_entry_copy(ref, ret)
  ret.compact
end

#file_entry_copy(ref, ret) ⇒ Object



228
229
230
231
232
233
234
235
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 228

def file_entry_copy(ref, ret)
  %w(attachment sectionsplit index presentation-xml url
     bare-after-first output_filename sectionsplit_filename
     sectionsplit_output).each do |s|
    ref.respond_to?(s.to_sym) and
      ret[s.delete("-").to_sym] = ref.send(s)
  end
end

#file_entry_paths(ref, idx, sso) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 145

def file_entry_paths(ref, idx, sso)
  base = File.basename(ref.file, ".xml")
  if sso && ref.respond_to?(:sectionsplit_filename) &&
      ref.sectionsplit_filename
    # Extract directory from sectionsplit_filename
    dir = File.dirname(ref.sectionsplit_filename)
    if dir == "." # No directory in pattern
      [output_file_path(ref, idx), ref.file]
    else # Pattern has directory, prepend it
      full_path = File.join(dir, base)
      [full_path, "#{full_path}.xml"]
    end
  else [output_file_path(ref, idx), ref.file]
  end
end

#file_entry_struct(ref, abs) ⇒ Object

ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp extract_opts are the compilation options extracted as document attributes



135
136
137
138
139
140
141
142
143
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 135

def file_entry_struct(ref, abs)
  adoc = abs.sub(/\.xml$/, ".adoc")
  if adoc.end_with?(".adoc") && File.exist?(adoc)
    opts = Metanorma::Input::Asciidoc.new.extract_options(File.read(adoc))
  end
  { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
    out_path: output_file_path(ref), pdffile: ref.pdffile,
    format: ref.format&.map(&:to_sym), extract_opts: opts }.compact
end

#get(ident, attr = nil) ⇒ Object



20
21
22
23
24
# File 'lib/metanorma/collection/filelookup/base.rb', line 20

def get(ident, attr = nil)
  if attr then @files[key(ident)][attr]
  else @files[key(ident)]
  end
end

#key(ident) ⇒ Object



11
12
13
14
# File 'lib/metanorma/collection/filelookup/base.rb', line 11

def key(ident)
  @c.decode(ident).gsub(/(\p{Zs})+/, " ")
    .sub(/^metanorma-collection /, "")
end

#keysObject



16
17
18
# File 'lib/metanorma/collection/filelookup/base.rb', line 16

def keys
  @files.keys
end

#ns(xpath) ⇒ Object



38
39
40
# File 'lib/metanorma/collection/filelookup/base.rb', line 38

def ns(xpath)
  @isodoc.ns(xpath)
end

#one_doc_collection?Boolean

Returns:

  • (Boolean)


79
80
81
82
83
84
85
86
87
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 79

def one_doc_collection?
  docs = 0
  @files.each_value do |v|
    v[:attachment] and next
    v[:presentationxml] and next
    docs += 1
  end
  docs <= 1
end

#output_file_path(ref, idx) ⇒ Object

TODO make the output file location reflect source location universally, not just for attachments: no File.basename

For files with custom directory structure, construct path with directory For files with output_filename, use that (with substitutions) For others, use basename of ref.file



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 187

def output_file_path(ref, idx)
  has_custom_dir, file_has_dir, params = output_file_path_prep(ref, idx)
  # Apply sectionsplit_filename directory structure if:
  # 1. File has sectionsplit enabled (parent document being split), OR
  # 2. File is a sectionsplit output (from collection or single-file sectionsplit)
  # Regular files that inherit sectionsplit_filename from collection level
  # but are not sectionsplit outputs should NOT use it
  is_sectionsplit_output = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
  use_sectionsplit_dir = ref.sectionsplit_filename && has_custom_dir &&
    (ref.sectionsplit || is_sectionsplit_output || file_has_dir)
  f = if use_sectionsplit_dir
        # For sectionsplit outputs, return just the basename
        # The directory will be applied during file_compile_format
        # via preserve_directory_structure?
        File.basename(ref.file)
      elsif ref.output_filename
        substitute_filename_pattern(ref.output_filename, **params)
      elsif file_has_dir
        ref.file # Preserve directory structure already in ref.file
      elsif ref.attachment
        ref.file
      else File.basename(ref.file)
      end
  ret = @disambig.source2dest_filename(f, preserve_dirs: ref.attachment)
  warn ret
  ret
end

#output_file_path_prep(ref, idx) ⇒ Object



215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 215

def output_file_path_prep(ref, idx)
  b = File.basename(ref.file)
  b_no_ext = File.basename(ref.file, ".*")
  # Check for sectionsplit_filename (for both parent and split output files)
  # or output_filename
  custom_filename = ref.sectionsplit_filename || ref.output_filename
  has_custom_dir = custom_filename && File.dirname(custom_filename) != "."
  # Also check if ref.file itself contains a directory
  file_has_dir = File.dirname(ref.file) != "."
  params = { document_num: idx, basename: b_no_ext, basename_legacy: b }
  [has_custom_dir, file_has_dir, params]
end

#preserve_directory_structure?(ident) ⇒ Boolean

Check if we should preserve directory structure for an identifier Returns the custom filename if directory structure should be preserved, nil otherwise

Returns:

  • (Boolean)


359
360
361
362
363
364
365
366
367
368
369
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 359

def preserve_directory_structure?(ident)
  ret = if get(ident, :sectionsplit_output)
          # For sectionsplit outputs, use rel_path which has the directory
          get(ident, :rel_path) || get(ident, :out_path)
        elsif get(ident, :sectionsplit)
          get(ident, :sectionsplit_filename)
        else get(ident, :output_filename)
        end
  # Return the custom filename only if it contains a directory
  ret && File.dirname(ret) != "." ? ret : nil
end

#process_section_split_instance(key, manifest) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 17

def process_section_split_instance(key, manifest)
  # Save the original out_path before it gets modified
  original_out_path = @files[key][:out_path]
  s, sectionsplit_manifest = sectionsplit(key)
  # section_split_instance_threads(s, manifest, key)
  s.each_with_index do |f1, i|
    add_section_split_instance(f1, manifest, key, i)
  end
  a = add_section_split_attachments(sectionsplit_manifest, key) and
    manifest["#{key}:attachments"] = a
  add_section_split_cover(manifest, sectionsplit_manifest, key)
  # Return the original path for cleanup
  original_out_path
end

#read_anchors(xml) ⇒ Object

map locality type and label (e.g. “clause” “1”) to id = anchor for a document Note: will only key clauses, which have unambiguous reference label in locality. Notes, examples etc with containers are just plunked against UUIDs, so that their IDs can at least be registered to be tracked as existing.



20
21
22
23
24
25
26
27
# File 'lib/metanorma/collection/filelookup/utils.rb', line 20

def read_anchors(xml)
  xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n,
                            { locale: @locale })
  xrefs.parse xml
  xrefs.get.each_with_object({}) do |(k, v), ret|
    read_anchors1(k, v, ret)
  end
end

#read_anchors1(key, val, ret) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/metanorma/collection/filelookup/utils.rb', line 29

def read_anchors1(key, val, ret)
  val[:type] ||= "clause"
  ret[val[:type]] ||= {}
  index = if val[:container] || val[:label].nil? || val[:label].empty?
            UUIDTools::UUID.random_create.to_s
          else val[:label].gsub(%r{<[^<>]+>}, "")
          end
  ret[val[:type]][index] = key
  v = val[:value] and ret[val[:type]][v.gsub(%r{<[^<>]+>}, "")] = key
end

#read_file(manifest, idx) ⇒ Object



55
56
57
58
59
60
61
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 55

def read_file(manifest, idx)
  i, k = read_file_idents(manifest)
  entry = file_entry(manifest, k, idx) or return
  bibdata_process(entry, i)
  bibitem_process(entry)
  @files[key(i)] = entry
end

#read_file_idents(manifest) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 63

def read_file_idents(manifest)
  id = manifest.identifier
  sanitised_id = key(@isodoc.docid_prefix("", manifest.identifier.dup))
  #       if manifest.bibdata and # NO, DO NOT FISH FOR THE GENUINE IDENTIFIER IN BIBDATA
  #         d = manifest.bibdata.docidentifier.detect { |x| x.primary } ||
  #           manifest.bibdata.docidentifier.first
  #         k = d.id
  #         i = key(@isodoc.docid_prefix(d.type, d.id.dup))
  #       end
  [id, sanitised_id]
end

#read_files(entries, parent, idx = 0) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 34

def read_files(entries, parent, idx = 0)
  Array(entries).each do |e|
    derive_format(e, parent)
    if e.file
      read_file(e, idx)
      idx += 1
    end
    idx = read_files(e.entry, e, idx)
  end
  idx
end

#read_ids(xml) ⇒ Object

Also parse all ids in doc (including ones which won’t be xref targets)



5
6
7
8
9
10
11
12
# File 'lib/metanorma/collection/filelookup/utils.rb', line 5

def read_ids(xml)
  ret = {}
  xml.traverse do |x|
    x.text? and next
    x["id"] and ret[x["id"]] = true
  end
  ret
end

#ref_file(ref, data, read, doc) ⇒ Object



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 280

def ref_file(ref, data, read, doc)
  file = File.read(ref, encoding: "utf-8") if read
  # Use the actual output path from :outputs if available (set after compilation)
  # Otherwise fall back to :out_path (set at initialization)
  filename = if doc && data[:outputs] && data[:outputs][:html]
               data[:outputs][:html].sub(
                 %r{^#{Regexp.escape(@parent.outdir)}/}, ""
               )
             else
               data[:out_path].dup
             end
  if doc && !data[:outputs]
    filename = ref_file_xml2html(filename)
  end
  [file, filename]
end

#ref_file_xml2html(filename) ⇒ Object

Check if file has a recognized MIME type (other than XML) If so, don’t append .html (e.g., .svg, .png, .jpg, etc.) Only process if it doesn’t have a recognized non-XML extension If filename ends in .xml, replace with .html Otherwise (including sectionsplit files like “file.xml.0” or custom titles), append .html



303
304
305
306
307
308
309
310
311
312
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 303

def ref_file_xml2html(filename)
  unless Util::mime_file_recognised?(filename) &&
      !filename.end_with?(".xml")
    filename = if filename.end_with?(".xml")
                 filename.sub(/\.xml$/, ".html")
               else "#{filename}.html"
               end
  end
  filename
end

#section_split_instance_threads(s, manifest, key) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 32

def section_split_instance_threads(s, manifest, key)
  @mutex = Mutex.new
  pool = Concurrent::FixedThreadPool.new(4)
  s.each_with_index do |f1, i|
    pool.post do
      add_section_split_instance(f1, manifest, key, i)
    end
  end
  pool.shutdown
  pool.wait_for_termination
end

#sectionsplit(ident) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 142

def sectionsplit(ident)
  file = @files[ident][:ref]
  # @base must always be just basename, never contain directory components
  # Directory structure comes from sectionsplit_filename pattern only
  base = File.basename(@files[ident][:out_path] || file)
  @sectionsplit = ::Metanorma::Collection::Sectionsplit
    .new(input: file, base: base,
         dir: File.dirname(file), output: @files[ident][:out_path],
         compile_opts: @parent.compile_options, ident: ident,
         fileslookup: self, isodoc: @isodoc,
         parent_idx: @files[ident][:idx],
         sectionsplit_filename: @files[ident][:sectionsplit_filename],
         isodoc_presxml: @isodoc_presxml,
         document_suffix: @files[ident][:document_suffix])
  coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] }
  xml = Nokogiri::XML(File.read(file, encoding: "UTF-8"), &:huge)
  [coll, @sectionsplit
    .collection_manifest(File.basename(file), coll, xml, nil,
                         File.dirname(file))]
end

#set(ident, attr, value) ⇒ Object



26
27
28
# File 'lib/metanorma/collection/filelookup/base.rb', line 26

def set(ident, attr, value)
  @files[key(ident)][attr] = value
end

#substitute_filename_pattern(pattern, options = {}) ⇒ Object

Substitute special strings in filename patterns

Parameters:

  • pattern (String)

    filename pattern with placeholders

  • options (Hash) (defaults to: {})

    substitution values

Options Hash (options):

  • :document_num (Integer)

    document index

  • :basename (String)

    filename without extension

  • :basename_legacy (String)

    full filename with extension

  • :sectionsplit_num (Integer)

    sectionsplit index



168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 168

def substitute_filename_pattern(pattern, options = {})
  pattern or return pattern
  result = pattern.dup
  options[:document_num] and
    result.gsub!(/\{document-num\}/, options[:document_num].to_s)
  result.gsub!(/\{basename\}/, options[:basename]) if options[:basename]
  options[:basename_legacy] and
    result.gsub!(/\{basename_legacy\}/, options[:basename_legacy])
  options[:sectionsplit_num] and
    result.gsub!(/\{sectionsplit-num\}/, options[:sectionsplit_num].to_s)
  result
end

#targetfile(data, options) ⇒ Array<String, nil>

return file contents + output filename for each file in the collection, given a docref entry so my URL should end with html or pdf or whatever formed relative to YAML file, not input path, relative to calling function

Parameters:

  • data (Hash)

    docref entry

  • read (Boolean)

    read the file in and return it

  • doc (Boolean)

    I am a Metanorma document,

  • relative (Boolean)

    Return output path,

Returns:



266
267
268
269
270
271
272
273
274
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 266

def targetfile(data, options)
  options = { read: false, doc: true, relative: false }.merge(options)
  path = options[:relative] ? data[:rel_path] : data[:ref]
  if data[:type] == "fileref"
    ref_file path, data, options[:read], options[:doc]
  else
    xml_file data[:id], options[:read]
  end
end

#targetfile_id(ident, options) ⇒ Object



276
277
278
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 276

def targetfile_id(ident, options)
  targetfile(get(ident), options)
end

#url(ident, options) ⇒ Object

return citation url for file so my URL should end with html or pdf or whatever

Parameters:

  • doc (Boolean)

    I am a Metanorma document,



49
50
51
52
# File 'lib/metanorma/collection/filelookup/utils.rb', line 49

def url(ident, options)
  data = get(ident)
  data[:url] || targetfile(data, options)[1]
end

#url?(ident) ⇒ Boolean

are references to the file to be linked to a file in the collection, or externally? Determines whether file suffix anchors are to be used

Returns:

  • (Boolean)


6
7
8
9
# File 'lib/metanorma/collection/filelookup/base.rb', line 6

def url?(ident)
  data = get(ident) or return false
  data[:url]
end

#xml_file(id, read) ⇒ Object



314
315
316
317
318
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 314

def xml_file(id, read)
  file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
  filename = "#{id}.html"
  [file, filename]
end