Class: Archaeo::CdxFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/cdx_filter.rb

Overview

Builds and validates CDX Server API filter expressions.

CDX filter format: [!]field:regex The optional ! prefix inverts the match. The field must be a recognized CDX field name. The regex is a Java-compatible regex pattern matched against the field value.

Constant Summary collapse

VALID_FIELDS =
%w[
  urlkey timestamp original mimetype statuscode
  digest length
].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(expression) ⇒ CdxFilter

Returns a new instance of CdxFilter.



16
17
18
19
# File 'lib/archaeo/cdx_filter.rb', line 16

def initialize(expression)
  @expression = expression.to_s
  validate!
end

Class Method Details

.by_digest(digest) ⇒ Object



61
62
63
# File 'lib/archaeo/cdx_filter.rb', line 61

def self.by_digest(digest)
  new("digest:#{digest}")
end

.by_mimetype(type) ⇒ Object



53
54
55
# File 'lib/archaeo/cdx_filter.rb', line 53

def self.by_mimetype(type)
  new("mimetype:#{type}")
end

.by_mimetype_prefix(prefix) ⇒ Object



94
95
96
# File 'lib/archaeo/cdx_filter.rb', line 94

def self.by_mimetype_prefix(prefix)
  new("mimetype:#{Regexp.escape(prefix)}.*")
end

.by_status(code) ⇒ Object



45
46
47
# File 'lib/archaeo/cdx_filter.rb', line 45

def self.by_status(code)
  new("statuscode:#{code}")
end

.by_url(pattern) ⇒ Object



65
66
67
# File 'lib/archaeo/cdx_filter.rb', line 65

def self.by_url(pattern)
  new("original:#{pattern}")
end

.by_urlkey(pattern) ⇒ Object



69
70
71
# File 'lib/archaeo/cdx_filter.rb', line 69

def self.by_urlkey(pattern)
  new("urlkey:#{pattern}")
end

.combine(*filters) ⇒ Object



77
78
79
# File 'lib/archaeo/cdx_filter.rb', line 77

def self.combine(*filters)
  filters.flatten
end

.excluding_errorsObject



85
86
87
88
# File 'lib/archaeo/cdx_filter.rb', line 85

def self.excluding_errors
  [excluding_status(404), excluding_status(500),
   excluding_status(502), excluding_status(503)]
end

.excluding_mimetype(type) ⇒ Object



57
58
59
# File 'lib/archaeo/cdx_filter.rb', line 57

def self.excluding_mimetype(type)
  new("!mimetype:#{type}")
end

.excluding_redirectsObject



98
99
100
# File 'lib/archaeo/cdx_filter.rb', line 98

def self.excluding_redirects
  %w[301 302 303 307 308].map { |c| excluding_status(c) }
end

.excluding_status(code) ⇒ Object



49
50
51
# File 'lib/archaeo/cdx_filter.rb', line 49

def self.excluding_status(code)
  new("!statuscode:#{code}")
end

.only_htmlObject



90
91
92
# File 'lib/archaeo/cdx_filter.rb', line 90

def self.only_html
  [by_mimetype("text/html")]
end

.only_successfulObject



81
82
83
# File 'lib/archaeo/cdx_filter.rb', line 81

def self.only_successful
  [by_status(200)]
end

Instance Method Details

#and(other) ⇒ Object



73
74
75
# File 'lib/archaeo/cdx_filter.rb', line 73

def and(other)
  [self, other]
end

#fieldObject



29
30
31
32
# File 'lib/archaeo/cdx_filter.rb', line 29

def field
  stripped = @expression.delete_prefix("!")
  stripped.split(":", 2).first.to_s
end

#matches?(value) ⇒ Boolean

Returns:

  • (Boolean)


39
40
41
42
43
# File 'lib/archaeo/cdx_filter.rb', line 39

def matches?(value)
  regex = Regexp.new(pattern)
  result = regex.match?(value.to_s)
  negated? ? !result : result
end

#negated?Boolean

Returns:

  • (Boolean)


25
26
27
# File 'lib/archaeo/cdx_filter.rb', line 25

def negated?
  @expression.start_with?("!")
end

#patternObject



34
35
36
37
# File 'lib/archaeo/cdx_filter.rb', line 34

def pattern
  stripped = @expression.delete_prefix("!")
  stripped.split(":", 2).last.to_s
end

#to_sObject



21
22
23
# File 'lib/archaeo/cdx_filter.rb', line 21

def to_s
  @expression
end