Class: Archaeo::PathSanitizer

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/path_sanitizer.rb

Overview

Sanitizes URLs into safe filesystem paths.

Handles recursive percent-decoding, query string hashing, segment truncation, and invalid character replacement.

Constant Summary collapse

DEFAULT_MAX_SEGMENT =
200
HASH_LENGTH =
8
MAX_DECODE_ITERATIONS =
5
INVALID_CHARS =
/[<>:"|?*#]/
SEPARATOR_RE =
%r{[/\\]}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(max_segment_length: DEFAULT_MAX_SEGMENT) ⇒ PathSanitizer

Returns a new instance of PathSanitizer.



21
22
23
# File 'lib/archaeo/path_sanitizer.rb', line 21

def initialize(max_segment_length: DEFAULT_MAX_SEGMENT)
  @max_segment_length = max_segment_length
end

Instance Attribute Details

#max_segment_lengthObject (readonly)

Returns the value of attribute max_segment_length.



19
20
21
# File 'lib/archaeo/path_sanitizer.rb', line 19

def max_segment_length
  @max_segment_length
end

Instance Method Details

#file_id(archive_url) ⇒ Object



32
33
34
35
# File 'lib/archaeo/path_sanitizer.rb', line 32

def file_id(archive_url)
  stripped = strip_archive_prefix(archive_url)
  sanitize(stripped)
end

#sanitize(url) ⇒ Object



25
26
27
28
29
30
# File 'lib/archaeo/path_sanitizer.rb', line 25

def sanitize(url)
  path = strip_scheme(url)
  path = recursive_decode(path)
  path = hash_query_strings(path)
  clean_segments(path)
end

#segment_for(path_segment) ⇒ Object



37
38
39
40
# File 'lib/archaeo/path_sanitizer.rb', line 37

def segment_for(path_segment)
  cleaned = path_segment.gsub(INVALID_CHARS, "_")
  truncate(cleaned)
end