Module: Zip::Merge

Defined in:
lib/zip/merge.rb,
lib/zip/merge/version.rb

Defined Under Namespace

Modules: Version

Constant Summary collapse

LOCAL =
0x04034b50
CENTRAL =
0x02014b50
EOCD =
0x06054b50
DOS_EPOCH =
[0, 0, 0x21, 0].pack("C*")
RenderError =
Class.new(StandardError) do
  attr_reader :diagnostic

  def initialize(diagnostic)
    @diagnostic = diagnostic
    super(diagnostic.message)
  end
end
VERSION =
Version::VERSION

Class Method Summary collapse

Class Method Details

.central_directory_record(record) ⇒ Object



235
236
237
# File 'lib/zip/merge.rb', line 235

def central_directory_record(record)
  [CENTRAL, 20, 20, record[:flags], record[:method]].pack("Vvvvv") + DOS_EPOCH + [record[:crc32], record[:compressed_size], record[:uncompressed_size], record[:name].bytesize, 0, 0, 0, 0, 0, record[:offset]].pack("VVVvvvvvVV") + record[:name]
end

.central_record_from_entry(entry, offset) ⇒ Object



251
252
253
# File 'lib/zip/merge.rb', line 251

def central_record_from_entry(entry, offset)
  { name: entry.path, method: entry.compression == "deflate" ? 8 : 0, crc32: entry.crc32.to_i(16), compressed_size: entry.compressed_size, uncompressed_size: entry.uncompressed_size, offset: offset, flags: 0 }
end

.compression_name(method) ⇒ Object



268
269
270
271
272
273
274
# File 'lib/zip/merge.rb', line 268

def compression_name(method)
  case method
  when 0 then "stored"
  when 8 then "deflate"
  else "method-#{method}"
  end
end

.decision(report, path, operation, disposition, reason) ⇒ Object



259
260
261
262
# File 'lib/zip/merge.rb', line 259

def decision(report, path, operation, disposition, reason)
  report.member_decisions << TreeHaver::ZipMemberDecision.new(normalized_path: path, operation: operation, disposition: disposition, reason: reason)
  report.merge_report.rewritten_nodes << schema_path(path)
end

.diagnostic(category, schema_path, message) ⇒ Object



303
304
305
# File 'lib/zip/merge.rb', line 303

def diagnostic(category, schema_path, message)
  TreeHaver::BinaryDiagnostic.new(severity: "error", category: category, message: message, schema_path: schema_path)
end

.empty_reportObject



159
160
161
# File 'lib/zip/merge.rb', line 159

def empty_report
  TreeHaver::BinaryMergeReport.new(format: "zip", schema: "zip.ksy", matched_schema_paths: [], preserved_ranges: [], rewritten_nodes: [], checksum_updates: [], nested_dispatches: [], diagnostics: [])
end

.entries_by_path(entries) ⇒ Object



288
289
290
# File 'lib/zip/merge.rb', line 288

def entries_by_path(entries)
  entries.to_h { |entry| [entry.normalized_path, entry] }
end

.eocd_record(entries, size, offset) ⇒ Object



239
240
241
# File 'lib/zip/merge.rb', line 239

def eocd_record(entries, size, offset)
  [EOCD, 0, 0, entries, entries, size, offset, 0].pack("VvvvvVVv")
end

.escapes_root?(path) ⇒ Boolean

Returns:

  • (Boolean)


276
277
278
# File 'lib/zip/merge.rb', line 276

def escapes_root?(path)
  path.start_with?("/") || path.tr("\\", "/").split("/").then { |parts| depth = 0; parts.any? { |part| part == ".." ? (depth -= 1) : (depth += 1 unless part == "."); depth.negative? } }
end

.nested_family(path) ⇒ Object



292
293
294
295
296
297
# File 'lib/zip/merge.rb', line 292

def nested_family(path)
  return "markdown" if path.match?(/\.m(?:d|arkdown)\z/i)
  return "json" if path.end_with?(".json")
  return "yaml" if path.match?(/\.ya?ml\z/i)
  return "xml" if path.end_with?(".xml")
end

.new_stored_zip(entries) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/zip/merge.rb', line 145

def new_stored_zip(entries)
  output = +"".b
  central = []
  entries.keys.sort.each do |name|
    rendered, record = rendered_local_record(path_entry(name, entries[name]), entries[name].b, 0, output.bytesize)
    output << rendered
    central << record
  end
  start = output.bytesize
  central.each { |record| output << central_directory_record(record) }
  output << eocd_record(central.length, output.bytesize - start, start)
  output
end

.normalize_zip_path(path) ⇒ Object



264
265
266
# File 'lib/zip/merge.rb', line 264

def normalize_zip_path(path)
  path.tr("\\", "/").split("/").each_with_object([]) { |part, stack| part == ".." ? stack.pop : stack << part unless part == "." }.join("/")
end

.parse_zip_inventory(source) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/zip/merge.rb', line 26

def parse_zip_inventory(source)
  bytes = source.b
  central = scan_central_directory(bytes)
  locals = scan_local_headers(bytes, central[:records])
  entries = central[:records].map do |name, record|
    local = locals.fetch(name)
    TreeHaver::ZipArchiveEntry.new(
      path: name,
      normalized_path: normalize_zip_path(name),
      directory: name.end_with?("/"),
      compression: compression_name(record[:method]),
      compressed_size: record[:compressed_size],
      uncompressed_size: record[:uncompressed_size],
      crc32: "%08x" % record[:crc32],
      local_header_range: TreeHaver::ByteRange.new(start_byte: record[:local_offset], end_byte: local[:data_start]),
      data_range: TreeHaver::ByteRange.new(start_byte: local[:data_start], end_byte: local[:data_start] + record[:compressed_size]),
      central_directory_range: record[:range]
    )
  end.sort_by { |entry| entry.local_header_range.start_byte }

  TreeHaver::ZipFamilyReport.new(
    archive: TreeHaver::ZipArchiveInfo.new(format: "zip", schema: "zip.ksy", entry_count: entries.length, central_directory_range: central[:range]),
    entries: entries,
    member_decisions: [],
    unsafe_entries: unsafe_entries(entries, central[:records]),
    merge_report: empty_report
  )
end

.path_entry(name, content) ⇒ Object



255
256
257
# File 'lib/zip/merge.rb', line 255

def path_entry(name, content)
  TreeHaver::ZipArchiveEntry.new(path: name, normalized_path: normalize_zip_path(name), directory: name.end_with?("/"), compression: "stored", compressed_size: content.bytesize, uncompressed_size: content.bytesize, crc32: "%08x" % Zlib.crc32(content), local_header_range: TreeHaver::ByteRange.new(start_byte: 0, end_byte: 0), data_range: TreeHaver::ByteRange.new(start_byte: 0, end_byte: 0), central_directory_range: TreeHaver::ByteRange.new(start_byte: 0, end_byte: 0))
end

.plan_zip_merge(ancestor, current, incoming) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/zip/merge.rb', line 55

def plan_zip_merge(ancestor, current, incoming)
  report = TreeHaver::ZipFamilyReport.new(
    archive: incoming.archive,
    entries: incoming.entries,
    member_decisions: [],
    unsafe_entries: incoming.unsafe_entries || [],
    merge_report: empty_report
  )
  ancestor_entries = entries_by_path(ancestor.entries)
  current_entries = entries_by_path(current.entries)
  incoming_entries = entries_by_path(incoming.entries)
  unsafe_by_path = report.unsafe_entries.to_h { |entry| [entry.normalized_path, entry] }

  (ancestor_entries.keys | current_entries.keys | incoming_entries.keys).sort.each do |path|
    ancestor_entry = ancestor_entries[path]
    current_entry = current_entries[path]
    incoming_entry = incoming_entries[path]
    if unsafe_by_path[path]
      unsafe = unsafe_by_path[path]
      report.member_decisions << TreeHaver::ZipMemberDecision.new(normalized_path: path, operation: "reject", disposition: "unsafe", reason: unsafe.reason)
      report.merge_report.diagnostics << diagnostic(unsafe.category, schema_path(path), unsafe.reason)
    elsif current_entry.nil? && incoming_entry
      decision(report, path, "add", "requires_renderer", "member exists only in incoming archive")
    elsif current_entry && incoming_entry.nil?
      decision(report, path, "delete", "requires_renderer", "member was removed from incoming archive")
    elsif ancestor_entry && same_entry?(current_entry, ancestor_entry) && same_entry?(incoming_entry, ancestor_entry)
      report.member_decisions << TreeHaver::ZipMemberDecision.new(normalized_path: path, operation: "preserve", disposition: "safe", reason: "member is unchanged from ancestor")
      report.merge_report.preserved_ranges.concat([current_entry.local_header_range, current_entry.data_range])
    elsif (family = nested_family(path))
      report.member_decisions << TreeHaver::ZipMemberDecision.new(normalized_path: path, operation: "delegate", disposition: "requires_renderer", nested_family: family, reason: "structured member can be merged by a nested family before ZIP rendering")
      report.merge_report.nested_dispatches << TreeHaver::BinaryNestedDispatch.new(schema_path: "#{schema_path(path)}/data", family: family, status: "planned")
      report.merge_report.rewritten_nodes << schema_path(path)
      report.merge_report.checksum_updates << "#{schema_path(path)}/crc32"
    else
      decision(report, path, "rewrite", "requires_renderer", "member bytes or metadata changed")
      report.merge_report.checksum_updates << "#{schema_path(path)}/crc32"
    end
    report.merge_report.matched_schema_paths << schema_path(path)
  end
  unless report.merge_report.rewritten_nodes.empty? && report.merge_report.checksum_updates.empty?
    report.merge_report.rewritten_nodes << "/central_directory"
    report.merge_report.checksum_updates.concat(["/central_directory/size", "/central_directory/offset"])
  end
  report
end

.raw_local_record_ranges(source, entries) ⇒ Object



243
244
245
246
247
248
249
# File 'lib/zip/merge.rb', line 243

def raw_local_record_ranges(source, entries)
  ordered = entries.values.sort_by { |entry| entry.local_header_range.start_byte }
  ordered.each_with_index.to_h do |entry, index|
    end_byte = ordered[index + 1]&.local_header_range&.start_byte || entry.central_directory_range.start_byte
    [entry.normalized_path, TreeHaver::ByteRange.new(start_byte: entry.local_header_range.start_byte, end_byte: end_byte)]
  end
end

.render_error(category, schema_path, message) ⇒ Object



307
308
309
# File 'lib/zip/merge.rb', line 307

def render_error(category, schema_path, message)
  RenderError.new(diagnostic(category, schema_path, message))
end

.render_with_raw_preservation(source:, plan:, member_bytes: {}, compression: 0) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/zip/merge.rb', line 101

def render_with_raw_preservation(source:, plan:, member_bytes: {}, compression: 0)
  raise render_error("unsupported_compression", "/render/options/compression", "unsupported raw-preserving compression method") unless [0, 8].include?(compression)

  source = source.b
  source_inventory = parse_zip_inventory(source)
  central = scan_central_directory(source)
  source_entries = entries_by_path(source_inventory.entries)
  raw_ranges = raw_local_record_ranges(source, source_entries)
  output = +"".b
  central_records = []
  entries = entries_by_path(plan.entries)
  plan.member_decisions.each do |member|
    entry = entries[member.normalized_path]
    case member.operation
    when "reject"
      raise render_error("rejected_member", schema_path(member.normalized_path), member.reason)
    when "delete"
      next
    when "preserve"
      source_entry = source_entries.fetch(member.normalized_path)
      validate_raw_preserve_entry!(source, central, source_entry)
      range = raw_ranges.fetch(member.normalized_path)
      offset = output.bytesize
      output << source.byteslice(range.start_byte...range.end_byte)
      central_records << central_record_from_entry(source_entry, offset)
    when "add", "rewrite", "delegate"
      content = member_bytes.fetch(member.normalized_path)
      rendered, record = rendered_local_record(entry, content.b, compression, output.bytesize)
      output << rendered
      central_records << record
    else
      raise "unsupported ZIP render operation #{member.operation.inspect}"
    end
  end
  central_start = output.bytesize
  central_records.each { |record| output << central_directory_record(record) }
  central_size = output.bytesize - central_start
  output << eocd_record(central_records.length, central_size, central_start)
  report = parse_zip_inventory(output)
  merge_report = plan.merge_report
  merge_report.preserved_ranges = plan.member_decisions.filter_map { |member| raw_ranges[member.normalized_path] if member.operation == "preserve" }
  [output, report, merge_report]
end

.rendered_local_record(entry, content, method, offset) ⇒ Object



228
229
230
231
232
233
# File 'lib/zip/merge.rb', line 228

def rendered_local_record(entry, content, method, offset)
  payload = method == 8 ? Zlib::Deflate.deflate(content) : content
  crc = Zlib.crc32(content)
  header = [LOCAL, 20, 0, method].pack("Vvvv") + DOS_EPOCH + [crc, payload.bytesize, content.bytesize, entry.path.bytesize, 0].pack("VVVvv") + entry.path
  [header + payload, { name: entry.path, method: method, crc32: crc, compressed_size: payload.bytesize, uncompressed_size: content.bytesize, offset: offset, flags: 0 }]
end

.same_entry?(left, right) ⇒ Boolean

Returns:

  • (Boolean)


284
285
286
# File 'lib/zip/merge.rb', line 284

def same_entry?(left, right)
  left && right && left.path == right.path && left.compression == right.compression && left.compressed_size == right.compressed_size && left.uncompressed_size == right.uncompressed_size && left.crc32 == right.crc32
end

.scan_central_directory(source) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/zip/merge.rb', line 163

def scan_central_directory(source)
  eocd = source.bytesize - 22
  eocd -= 1 while eocd >= 0 && source.byteslice(eocd, 4).unpack1("V") != EOCD
  raise "missing ZIP end of central directory" if eocd.negative?
  size = source.byteslice(eocd + 12, 4).unpack1("V")
  offset = source.byteslice(eocd + 16, 4).unpack1("V")
  comment_length = source.byteslice(eocd + 20, 2).unpack1("v")
  records = {}
  cursor = offset
  while cursor < offset + size
    raise "unexpected central directory record" unless source.byteslice(cursor, 4).unpack1("V") == CENTRAL
    name_len = source.byteslice(cursor + 28, 2).unpack1("v")
    extra_len = source.byteslice(cursor + 30, 2).unpack1("v")
    comment_len = source.byteslice(cursor + 32, 2).unpack1("v")
    name = source.byteslice(cursor + 46, name_len)
    records[name] = {
      range: TreeHaver::ByteRange.new(start_byte: cursor, end_byte: cursor + 46 + name_len + extra_len + comment_len),
      flags: source.byteslice(cursor + 8, 2).unpack1("v"),
      method: source.byteslice(cursor + 10, 2).unpack1("v"),
      crc32: source.byteslice(cursor + 16, 4).unpack1("V"),
      compressed_size: source.byteslice(cursor + 20, 4).unpack1("V"),
      uncompressed_size: source.byteslice(cursor + 24, 4).unpack1("V"),
      extra_length: extra_len,
      comment_length: comment_len,
      local_offset: source.byteslice(cursor + 42, 4).unpack1("V")
    }
    cursor = records[name][:range].end_byte
  end
  { range: TreeHaver::ByteRange.new(start_byte: offset, end_byte: offset + size), records: records, archive_comment: comment_length.positive? }
end

.scan_local_headers(source, records) ⇒ Object



194
195
196
197
198
199
200
201
202
# File 'lib/zip/merge.rb', line 194

def scan_local_headers(source, records)
  records.transform_values do |record|
    cursor = record[:local_offset]
    raise "unexpected ZIP local header" unless source.byteslice(cursor, 4).unpack1("V") == LOCAL
    name_len = source.byteslice(cursor + 26, 2).unpack1("v")
    extra_len = source.byteslice(cursor + 28, 2).unpack1("v")
    { data_start: cursor + 30 + name_len + extra_len, extra_length: extra_len }
  end
end

.schema_path(path) ⇒ Object



299
300
301
# File 'lib/zip/merge.rb', line 299

def schema_path(path)
  "/entries/by_path/#{path}"
end

.signing_sensitive?(path) ⇒ Boolean

Returns:

  • (Boolean)


280
281
282
# File 'lib/zip/merge.rb', line 280

def signing_sensitive?(path)
  path.upcase.start_with?("META-INF/") && [".RSA", ".DSA", ".EC", ".SF"].any? { |suffix| path.upcase.end_with?(suffix) }
end

.unsafe_entries(entries, records) ⇒ Object



215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/zip/merge.rb', line 215

def unsafe_entries(entries, records)
  seen = {}
  entries.flat_map do |entry|
    list = []
    list << TreeHaver::ZipUnsafeEntry.new(path: entry.path, normalized_path: entry.normalized_path, category: "path_traversal", reason: "entry escapes the archive root") if escapes_root?(entry.path)
    list << TreeHaver::ZipUnsafeEntry.new(path: entry.path, normalized_path: entry.normalized_path, category: "duplicate_normalized_path", reason: "normalized path collides with an existing entry") if seen[entry.normalized_path] && seen[entry.normalized_path] != entry.path
    list << TreeHaver::ZipUnsafeEntry.new(path: entry.path, normalized_path: entry.normalized_path, category: "encrypted_member", reason: "encrypted member cannot be rendered by the default provider") unless (records[entry.path][:flags] & 0x1).zero?
    list << TreeHaver::ZipUnsafeEntry.new(path: entry.path, normalized_path: entry.normalized_path, category: "signing_sensitive_member", reason: "signature-bearing member mutation is not enabled") if signing_sensitive?(entry.normalized_path)
    seen[entry.normalized_path] = entry.path
    list
  end
end

.validate_raw_preserve_entry!(source, central, entry) ⇒ Object



204
205
206
207
208
209
210
211
212
213
# File 'lib/zip/merge.rb', line 204

def validate_raw_preserve_entry!(source, central, entry)
  raise render_error("archive_comment", "/archive/comment", "raw-preserving ZIP renderer does not yet preserve archive comments") if central[:archive_comment]
  record = central[:records].fetch(entry.path)
  raise render_error("encrypted_member", schema_path(entry.normalized_path), "raw-preserving ZIP renderer rejects encrypted member #{entry.normalized_path}") unless (record[:flags] & 0x1).zero?
  raise render_error("unsupported_compression", schema_path(entry.normalized_path), "raw-preserving ZIP renderer rejects unsupported compression #{entry.compression.inspect}") unless [0, 8].include?(record[:method])
  raise render_error("central_directory_extra_field", schema_path(entry.normalized_path), "raw-preserving ZIP renderer does not yet preserve central-directory extra fields for #{entry.normalized_path}") unless record[:extra_length].zero?
  raise render_error("member_comment", schema_path(entry.normalized_path), "raw-preserving ZIP renderer does not yet preserve member comments for #{entry.normalized_path}") unless record[:comment_length].zero?
  local_extra = source.byteslice(entry.local_header_range.start_byte + 28, 2).unpack1("v")
  raise render_error("local_header_extra_field", schema_path(entry.normalized_path), "raw-preserving ZIP renderer does not yet preserve local extra fields for #{entry.normalized_path}") unless local_extra.zero?
end