Class: Uniword::Validation::Rules::DocumentContext

Inherits:
Object
  • Object
show all
Defined in:
lib/uniword/validation/rules/document_context.rb

Overview

Provides unified access to a DOCX package for validation rules.

Lazy-loads and caches parsed XML parts. Rules use this to access the document content without knowing about ZIP internals.

Examples:

Access a parsed part

context.document_xml  # => Moxml::Document
context.part_exists?("word/styles.xml")  # => true

Constant Summary collapse

W_NS =
"http://schemas.openxmlformats.org/wordprocessingml/2006/main"
R_NS =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
RELS_NS =
"http://schemas.openxmlformats.org/package/2006/relationships"
CT_NS =
"http://schemas.openxmlformats.org/package/2006/content-types"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ DocumentContext

Initialize context for a DOCX file.

Parameters:

  • path (String)

    Path to .docx file



28
29
30
31
32
33
# File 'lib/uniword/validation/rules/document_context.rb', line 28

def initialize(path)
  @path = path
  @zip = nil
  @parsed_parts = {}
  @moxml = Moxml.new(:nokogiri)
end

Instance Attribute Details

#pathObject (readonly)

Returns the value of attribute path.



23
24
25
# File 'lib/uniword/validation/rules/document_context.rb', line 23

def path
  @path
end

Instance Method Details

#closeObject

Close the ZIP archive.



43
44
45
46
47
# File 'lib/uniword/validation/rules/document_context.rb', line 43

def close
  @zip&.close
  @zip = nil
  @parsed_parts.clear
end

#content_typesHash

Get all declared content types.

Returns:

  • (Hash)

    { extension => content_type, part_name => content_type }



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/uniword/validation/rules/document_context.rb', line 149

def content_types
  raw = part_raw("[Content_Types].xml")
  return {} unless raw

  doc = Nokogiri::XML(raw)
  types = {}

  doc.xpath("//xmlns:Default", "xmlns" => CT_NS).each do |node|
    types[node["Extension"]] = node["ContentType"] if node["Extension"]
  end

  doc.xpath("//xmlns:Override", "xmlns" => CT_NS).each do |node|
    types[node["PartName"]] = node["ContentType"] if node["PartName"]
  end

  types
end

#document_xmlObject

Convenience: parsed word/document.xml



92
93
94
# File 'lib/uniword/validation/rules/document_context.rb', line 92

def document_xml
  part("word/document.xml")
end

#font_table_xmlObject

Convenience: parsed word/fontTable.xml



112
113
114
# File 'lib/uniword/validation/rules/document_context.rb', line 112

def font_table_xml
  part("word/fontTable.xml")
end

#modelUniword::Docx::Package?

Convenience: Uniword model-level access to the DOCX package. Lazy-loads via Package.from_file for model-driven validation rules.

Returns:



120
121
122
123
124
125
# File 'lib/uniword/validation/rules/document_context.rb', line 120

def model
  @model ||= Uniword::Docx::Package.from_file(@path)
rescue StandardError => e
  Uniword.logger&.debug { "Package model load failed: #{e.message}" }
  @model = nil
end

#numbering_idsSet<String>

Collect all numId values from numbering.xml.

Returns:

  • (Set<String>)

    numId values



184
185
186
187
188
189
190
191
192
193
# File 'lib/uniword/validation/rules/document_context.rb', line 184

def numbering_ids
  doc = numbering_xml
  return Set.new unless doc

  ids = Set.new
  doc.root.xpath(".//w:num/@w:numId", "w" => W_NS).each do |attr|
    ids << attr.value
  end
  ids
end

#numbering_xmlObject

Convenience: parsed word/numbering.xml



102
103
104
# File 'lib/uniword/validation/rules/document_context.rb', line 102

def numbering_xml
  part("word/numbering.xml")
end

#part(name) ⇒ Moxml::Document?

Get a parsed (Moxml) document for a part. Results are cached.

Parameters:

  • name (String)

    Part path

Returns:

  • (Moxml::Document, nil)


80
81
82
83
84
85
86
87
88
89
# File 'lib/uniword/validation/rules/document_context.rb', line 80

def part(name)
  return @parsed_parts[name] if @parsed_parts.key?(name)
  return @parsed_parts[name] = nil unless part_exists?(name)

  raw = part_raw(name)
  @parsed_parts[name] = raw ? @moxml.parse(raw) : nil
rescue StandardError => e
  Uniword.logger&.debug { "Part parse failed (#{name}): #{e.message}" }
  @parsed_parts[name] = nil
end

#part_exists?(name) ⇒ Boolean

Check if a part exists in the package.

Parameters:

  • name (String)

    Part path (e.g., “word/document.xml”)

Returns:

  • (Boolean)


60
61
62
# File 'lib/uniword/validation/rules/document_context.rb', line 60

def part_exists?(name)
  !!zip.find_entry(name)
end

#part_raw(name) ⇒ String?

Get raw content of a part.

Parameters:

  • name (String)

    Part path

Returns:

  • (String, nil)

    Raw XML content



68
69
70
71
72
73
# File 'lib/uniword/validation/rules/document_context.rb', line 68

def part_raw(name)
  entry = zip.find_entry(name)
  return nil unless entry

  entry.get_input_stream.read
end

#relationships(rels_path = "word/_rels/document.xml.rels") ⇒ Array<Hash>

Get all relationships from a .rels file.

Parameters:

  • rels_path (String) (defaults to: "word/_rels/document.xml.rels")

    Path to .rels file

Returns:

  • (Array<Hash>)
    { id:, type:, target:, target_mode: }


131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/uniword/validation/rules/document_context.rb', line 131

def relationships(rels_path = "word/_rels/document.xml.rels")
  raw = part_raw(rels_path)
  return [] unless raw

  doc = Nokogiri::XML(raw)
  doc.xpath("//xmlns:Relationship", "xmlns" => RELS_NS).map do |rel|
    {
      id: rel["Id"],
      type: rel["Type"],
      target: rel["Target"],
      target_mode: rel["TargetMode"],
    }
  end
end

#settings_xmlObject

Convenience: parsed word/settings.xml



107
108
109
# File 'lib/uniword/validation/rules/document_context.rb', line 107

def settings_xml
  part("word/settings.xml")
end

#style_idsSet<String>

Collect all style IDs from styles.xml.

Returns:

  • (Set<String>)

    Style ID values



170
171
172
173
174
175
176
177
178
179
# File 'lib/uniword/validation/rules/document_context.rb', line 170

def style_ids
  doc = styles_xml
  return Set.new unless doc

  ids = Set.new
  doc.root.xpath(".//w:style/@w:styleId", "w" => W_NS).each do |attr|
    ids << attr.value
  end
  ids
end

#styles_xmlObject

Convenience: parsed word/styles.xml



97
98
99
# File 'lib/uniword/validation/rules/document_context.rb', line 97

def styles_xml
  part("word/styles.xml")
end

#zipZip::File

Open the ZIP archive.

Returns:

  • (Zip::File)


38
39
40
# File 'lib/uniword/validation/rules/document_context.rb', line 38

def zip
  @zip ||= Zip::File.open(@path)
end

#zip_entriesArray<String>

List all entries in the ZIP.

Returns:

  • (Array<String>)

    Entry names



52
53
54
# File 'lib/uniword/validation/rules/document_context.rb', line 52

def zip_entries
  zip.entries.map(&:name)
end