Module: Iriq::Normalizer

Defined in:
lib/iriq/normalizer.rb

Overview

Produces a canonical, shape-aware string for an identifier.

Normalizer.normalize("https://Foo.com:443/users/123")
# => "https://foo.com/users/{integer_id}"

The form is intended for grouping/diffing — it is not a round-trippable URL.

Class Method Summary collapse

Class Method Details

.normalize(input, classifier: SegmentClassifier.new) ⇒ Object



11
12
13
14
# File 'lib/iriq/normalizer.rb', line 11

def normalize(input, classifier: SegmentClassifier.new)
  iri = input.is_a?(Identifier) ? input : Parser.parse(input)
  normalize_identifier(iri, classifier: classifier)
end

.normalize_identifier(iri, classifier: SegmentClassifier.new) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/iriq/normalizer.rb', line 16

def normalize_identifier(iri, classifier: SegmentClassifier.new)
  if iri.urn?
    # urn:isbn:0451450523 -> urn:isbn:{integer_id}
    if iri.scheme == "urn" && iri.nss && iri.nss.include?(":")
      ns, value = iri.nss.split(":", 2)
      type      = classifier.classify(value)
      shaped    = classifier.variable?(type) ? "{#{type}}" : value
      "urn:#{ns}:#{shaped}"
    else
      iri.canonical
    end
  else
    out = +""
    out << "#{iri.scheme}://" if iri.scheme
    out << iri.host if iri.host
    out << ":#{iri.port}" if iri.port
    out << PathShape.new(classifier: classifier).for(iri.path_segments)
    if iri.query_params && !iri.query_params.empty?
      out << "?" + shape_query(iri.query_params, classifier)
    end
    out
  end
end

.shape_query(params, classifier) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/iriq/normalizer.rb', line 40

def shape_query(params, classifier)
  params.keys.sort.map do |k|
    v    = params[k]
    type = classifier.classify(v.to_s)
    shaped = classifier.variable?(type) ? "{#{type}}" : v
    "#{k}=#{shaped}"
  end.join("&")
end