Module: JSONP3::Path

Defined in:: lib/json_p3.rb,
lib/json_p3/errors.rb,
lib/json_p3/path/node.rb,
lib/json_p3/path/lexer.rb,
lib/json_p3/path/query.rb,
lib/json_p3/path/filter.rb,
lib/json_p3/path/parser.rb,
lib/json_p3/path/segment.rb,
lib/json_p3/path/function.rb,
lib/json_p3/path/selector.rb,
lib/json_p3/path/unescape.rb,
lib/json_p3/path/serialize.rb,
lib/json_p3/path/environment.rb,
lib/json_p3/path/function_extensions/count.rb,
lib/json_p3/path/function_extensions/match.rb,
lib/json_p3/path/function_extensions/value.rb,
lib/json_p3/path/function_extensions/length.rb,
lib/json_p3/path/function_extensions/search.rb,
lib/json_p3/path/function_extensions/pattern.rb

Overview

JSONPath query expressions.

Defined Under Namespace

Classes: AbsoluteQueryExpression, BooleanLiteral, ChildSegment, Count, DescendantSegment, Environment, EqExpression, Error, Expression, FilterContext, FilterExpression, FilterExpressionLiteral, FilterSelector, FloatLiteral, FunctionExpression, FunctionExtension, GeExpression, GtExpression, IndexSelector, InfixExpression, IntegerLiteral, LeExpression, Length, LogicalAndExpression, LogicalNotExpression, LogicalOrExpression, LtExpression, Match, NameError, NameSelector, NeExpression, Node, NodeList, NullLiteral, Parser, Query, QueryExpression, RecursionError, RelativeQueryExpression, Search, Segment, Selector, SliceSelector, StringLiteral, SymbolNameSelector, SyntaxError, TypeError, Value, WildcardSelector

Constant Summary collapse

DefaultEnvironment =

JSONP3::Path::Environment.new

RE_FLOAT =

/\G(?:-?\d+\.\d+(?:[eE][+-]?\d+)?)|(-?\d+[eE]-\d+)/

RE_INDEX =

/\G-?\d+/

RE_INT =

/\G-?\d+[eE]\+?\d+/

RE_SLASH_U =

/\\u([0-9a-fA-F]{4})/

TRANS =

{ "\\\"" => "\"", "'" => "\\'" }.freeze

Class Method Summary collapse

.canonical_string(value) ⇒ Object
Return value formatted as a canonical string literal.
.compile(path) ⇒ Object
.eq?(left, right) ⇒ Boolean
.find(path, data) ⇒ Object
.find_enum(path, data) ⇒ Object
.first(path, data) ⇒ Object
.get_token_value(token, query) ⇒ Object
.high_surrogate?(code_point) ⇒ Boolean
.low_surrogate?(code_point) ⇒ Boolean
.lt?(left, right) ⇒ Boolean
.map_iregexp(pattern) ⇒ String
Map I-Regexp pattern to Ruby regex pattern.
.match(path, data) ⇒ Object
.match?(path, data) ⇒ Boolean
.name_ch?(ch) ⇒ Boolean
.name_first?(ch) ⇒ Boolean
.number_ch?(ch) ⇒ Boolean
.scan_string_literal(query, byte, pos) ⇒ Object
.tokenize(query) ⇒ Object
.trivia?(ch) ⇒ Boolean
.truthy?(obj) ⇒ Boolean
.unescape(value, token, query) ⇒ Object
Replace escape sequences with their equivalent Unicode code point.

Class Method Details

.canonical_string(value) ⇒ `Object`

Return value formatted as a canonical string literal.

Parameters:

value (String)



12
13
14

# File 'lib/json_p3/path/serialize.rb', line 12

def self.canonical_string(value)
  "'#{(JSON.dump(value)[1..-2] || raise).gsub(/('|\\")/, TRANS)}'"
end

.compile(path) ⇒ `Object`



32
33
34

# File 'lib/json_p3.rb', line 32

def self.compile(path)
  DefaultEnvironment.compile(path)
end

.eq?(left, right) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/json_p3/path/filter.rb', line 423

def self.eq?(left, right)
  left = left.first.value if left.is_a?(NodeList) && left.length == 1
  right = right.first.value if right.is_a?(NodeList) && right.length == 1

  right, left = left, right if right.is_a?(NodeList)

  if left.is_a? NodeList
    return left == right if right.is_a? NodeList
    return right == :nothing if left.empty?
    return left.first == right if left.length == 1

    return false
  end

  return true if left == :nothing && right == :nothing

  left == right
end

.find(path, data) ⇒ `Object`



24
25
26

# File 'lib/json_p3.rb', line 24

def self.find(path, data)
  DefaultEnvironment.find(path, data)
end

.find_enum(path, data) ⇒ `Object`



28
29
30

# File 'lib/json_p3.rb', line 28

def self.find_enum(path, data)
  DefaultEnvironment.find_enum(path, data)
end

.first(path, data) ⇒ `Object`



44
45
46

# File 'lib/json_p3.rb', line 44

def self.first(path, data)
  DefaultEnvironment.first(path, data)
end

.get_token_value(token, query) ⇒ `Object`



216
217
218

# File 'lib/json_p3/path/lexer.rb', line 216

def self.get_token_value(token, query)
  query.byteslice(token[1], token.last - token[1]) || raise
end

.high_surrogate?(code_point) ⇒ `Boolean`

Returns:

(Boolean)



126
127
128

# File 'lib/json_p3/path/unescape.rb', line 126

def self.high_surrogate?(code_point)
  code_point.between?(0xD800, 0xDBFF)
end

.low_surrogate?(code_point) ⇒ `Boolean`

Returns:

(Boolean)



130
131
132

# File 'lib/json_p3/path/unescape.rb', line 130

def self.low_surrogate?(code_point)
  code_point.between?(0xDC00, 0xDFFF)
end

.lt?(left, right) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/json_p3/path/filter.rb', line 442

def self.lt?(left, right)
  left = left.first.value if left.is_a?(NodeList) && left.length == 1
  right = right.first.value if right.is_a?(NodeList) && right.length == 1
  return left < right if left.is_a?(String) && right.is_a?(String)
  return left < right if (left.is_a?(Integer) || left.is_a?(Float)) &&
                         (right.is_a?(Integer) || right.is_a?(Float))

  false
end

.map_iregexp(pattern) ⇒ `String`

Map I-Regexp pattern to Ruby regex pattern.

Parameters:

pattern (String)

Returns:

(String)

# File 'lib/json_p3/path/function_extensions/pattern.rb', line 9

def self.map_iregexp(pattern)
  escaped = false
  char_class = false
  mapped = String.new(encoding: "UTF-8")

  pattern.each_char do |c|
    if escaped
      mapped << c
      escaped = false
      next
    end

    case c
    when "."
      # mapped << (char_class ? c : "(?:(?![\\r\\n])\\P{Cs}|\\p{Cs}\\p{Cs})")
      mapped << (char_class ? c : "[^\\n\\r]")
    when "\\"
      escaped = true
      mapped << "\\"
    when "["
      char_class = true
      mapped << "["
    when "]"
      char_class = false
      mapped << "]"
    else
      mapped << c
    end
  end

  mapped
end

.match(path, data) ⇒ `Object`



36
37
38

# File 'lib/json_p3.rb', line 36

def self.match(path, data)
  DefaultEnvironment.match(path, data)
end

.match?(path, data) ⇒ `Boolean`

Returns:

(Boolean)



40
41
42

# File 'lib/json_p3.rb', line 40

def self.match?(path, data)
  DefaultEnvironment.match?(path, data)
end

.name_ch?(ch) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/json_p3/path/lexer.rb', line 200

def self.name_ch?(ch)
  (ch >= 48 && ch <= 57) ||
    (ch >= 65 && ch <= 90) ||
    (ch >= 97 && ch <= 122) ||
    ch == 95 ||
    (ch >= 0x80 && ch <= 0xffff)
end

.name_first?(ch) ⇒ `Boolean`

Returns:

(Boolean)



196
197
198

# File 'lib/json_p3/path/lexer.rb', line 196

def self.name_first?(ch)
  (ch >= 65 && ch <= 90) || (ch >= 97 && ch <= 122) || ch == 95 || (ch >= 0x80 && ch <= 0xffff)
end

.number_ch?(ch) ⇒ `Boolean`

Returns:

(Boolean)



208
209
210

# File 'lib/json_p3/path/lexer.rb', line 208

def self.number_ch?(ch)
  ch == 45 || (ch >= 48 && ch <= 57)
end

.scan_string_literal(query, byte, pos) ⇒ `Object`

Raises:

(JSONP3::Path::SyntaxError)

# File 'lib/json_p3/path/lexer.rb', line 145

def self.scan_string_literal(query, byte, pos)
  start = pos
  length = query.bytesize

  # @type var token: t_token
  # @type var kind: t_token_kind
  # @type var esc_kind: t_token_kind

  kind = byte == 39 ? :token_single_quoted_string : :token_double_quoted_string
  esc_kind = byte == 39 ? :token_single_quoted_esc_string : :token_double_quoted_esc_string

  while pos < length
    ch = query.getbyte(pos)
    case ch
    when 92 # \
      kind = esc_kind
      pos += 2
    when nil
      break
    when 39 # '
      token = [kind, start, pos]
      return [token, pos + 1] if esc_kind == :token_single_quoted_esc_string

      pos += 1
    when 34 # "
      token = [kind, start, pos]
      return [token, pos + 1] if esc_kind == :token_double_quoted_esc_string

      pos += 1
    else
      # Escaped strings get scanned by the parser, where invalid characters will be caught.
      if ch <= 0x1f
        token = [:token_error, start, pos]
        raise JSONP3::Path::SyntaxError.new(
          "invalid character",
          token,
          query
        )
      end
      pos += 1
    end
  end

  token = [:token_error, start, pos]
  raise JSONP3::Path::SyntaxError.new(
    "unclosed string literal",
    token,
    query
  )
end

.tokenize(query) ⇒ `Object`

# File 'lib/json_p3/path/lexer.rb', line 10

def self.tokenize(query)
  tokens = [] #: Array[t_token]
  length = query.bytesize
  start = 0
  pos = 0

  while pos < length
    byte = query.getbyte(pos)

    case byte
    when nil
      break
    when 42 # *
      pos += 1
      tokens << [:token_asterisk, start, pos]
    when 64 # @
      pos += 1
      tokens << [:token_at, start, pos]
    when 58 # :
      pos += 1
      tokens << [:token_colon, start, pos]
    when 44 # ,
      pos += 1
      tokens << [:token_comma, start, pos]
    when 36 # $
      pos += 1
      tokens << [:token_dollar, start, pos]
    when 40 # (
      pos += 1
      tokens << [:token_lparen, start, pos]
    when 91 # [
      pos += 1
      tokens << [:token_lbracket, start, pos]
    when 41 # )
      pos += 1
      tokens << [:token_rparen, start, pos]
    when 93 # ]
      pos += 1
      tokens << [:token_rbracket, start, pos]
    when 63 # ?
      pos += 1
      tokens << [:token_question, start, pos]
    when 38 # &
      tokens << if query.getbyte(pos + 1) == 38
                  pos += 2
                  [:token_and, start, pos]
                else
                  pos += 1
                  [:token_error, start, pos]
                end
    when 124 # |
      tokens << if query.getbyte(pos + 1) == 124
                  pos += 2
                  [:token_or, start, pos]
                else
                  pos += 1
                  [:token_error, start, pos]
                end
    when 46 # .
      tokens << if query.getbyte(pos + 1) == 46
                  pos += 2
                  [:token_double_dot, start, pos]
                else
                  pos += 1
                  [:token_dot, start, pos]
                end
    when 61 # =
      tokens << if query.getbyte(pos + 1) == 61
                  pos += 2
                  [:token_eq, start, pos]
                else
                  pos += 1
                  [:token_error, start, pos]
                end
    when 33 # !
      tokens << if query.getbyte(pos + 1) == 61
                  pos += 2
                  [:token_ne, start, pos]
                else
                  pos += 1
                  [:token_not, start, pos]
                end
    when 62 # >
      tokens << if query.getbyte(pos + 1) == 61
                  pos += 2
                  [:token_ge, start, pos]
                else
                  pos += 1
                  [:token_gt, start, pos]
                end
    when 60 # <
      tokens << if query.getbyte(pos + 1) == 61
                  pos += 2
                  [:token_le, start, pos]
                else
                  pos += 1
                  [:token_lt, start, pos]
                end
    when 39, 34 # ' or "
      pos += 1
      token, pos = scan_string_literal(query, byte, pos)
      tokens << token
    else
      if name_first?(byte)
        pos += 1 while name_ch?(query.getbyte(pos) || 0)
        tokens << [:token_name, start, pos]
      elsif trivia?(byte)
        pos += 1 while trivia?(query.getbyte(pos) || 0)
        tokens << [:token_trivia, start, pos]
      elsif number_ch?(byte)
        if (match = query.match(RE_FLOAT, pos))
          pos = match.end(0) || raise
          tokens << [:token_float, start, pos]
        elsif (match = query.match(RE_INT, pos))
          pos = match.end(0) || raise
          tokens << [:token_int, start, pos]
        elsif (match = query.match(RE_INDEX, pos))
          pos = match.end(0) || raise
          tokens << [:token_index, start, pos]
        else
          pos += 1
          tokens << [:token_error, start, pos]
        end
      else
        pos += 1
        tokens << [:token_error, start, pos]
      end
    end

    start = pos
  end

  tokens
end

.trivia?(ch) ⇒ `Boolean`

Returns:

(Boolean)



212
213
214

# File 'lib/json_p3/path/lexer.rb', line 212

def self.trivia?(ch)
  ch == 32 || ch == 9 || ch == 10 || ch == 13 # rubocop: disable Style/MultipleComparison
end

.truthy?(obj) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/json_p3/path/filter.rb', line 416

def self.truthy?(obj)
  return !obj.empty? if obj.is_a?(NodeList)
  return false if obj == :nothing

  obj != false
end

.unescape(value, token, query) ⇒ `Object`

Replace escape sequences with their equivalent Unicode code point.

# File 'lib/json_p3/path/unescape.rb', line 11

def self.unescape(value, token, query)
  unescaped = String.new(encoding: "UTF-8")
  scanner = StringScanner.new(value)

  until scanner.eos?
    if scanner.scan(RE_SLASH_U)
      code_point = (scanner.captures&.first || raise).to_i(16)

      if low_surrogate?(code_point)
        raise JSONP3::Path::SyntaxError.new(
          "unexpected low surrogate",
          token,
          query
        )
      end

      if high_surrogate?(code_point)
        unless scanner.scan(RE_SLASH_U)
          raise JSONP3::Path::SyntaxError.new(
            "expected a low surrogate",
            token,
            query
          )
        end

        low_surrogate = (scanner.captures&.first || raise).to_i(16)

        unless low_surrogate?(low_surrogate)
          raise JSONP3::Path::SyntaxError.new(
            "expected a low surrogate",
            token,
            query
          )
        end

        code_point = 0x10000 + (
          ((code_point & 0x03FF) << 10) | (low_surrogate & 0x03FF)
        )
      end

      if code_point <= 0x1f
        raise JSONP3::Path::SyntaxError.new(
          "invalid character #{code_point}",
          token,
          query
        )
      end

      unescaped << code_point.chr(Encoding::UTF_8)
      next
    end

    ch = scanner.getch

    break if ch.nil?

    unless ch == "\\"
      if ch.ord <= 0x1f
        raise JSONP3::Path::SyntaxError.new(
          "invalid character #{ch.ord}",
          token,
          query
        )
      end
      unescaped << ch
      next
    end

    ch = scanner.getch

    case ch
    when "\""
      if token.first == :token_single_quoted_esc_string
        raise JSONP3::Path::SyntaxError.new(
          "unexpected \\\" escape in single quoted string",
          token,
          query
        )
      end
      unescaped << "\""
    when "'"
      if token.first == :token_double_quoted_esc_string
        raise JSONP3::Path::SyntaxError.new(
          "unexpected \\' escape in double quoted string",
          token,
          query
        )
      end
      unescaped << "'"
    when "\\"
      unescaped << "\\"
    when "/"
      unescaped << "/"
    when "b"
      unescaped << "\x08"
    when "f"
      unescaped << "\x0c"
    when "n"
      unescaped << "\n"
    when "r"
      unescaped << "\r"
    when "t"
      unescaped << "\t"
    when "u"
      raise JSONP3::Path::SyntaxError.new("unexpected \\u escape sequence", token, query)
    when nil
      raise JSONP3::Path::SyntaxError.new("incomplete escape sequence", token, query)
    else
      raise JSONP3::Path::SyntaxError.new("unknown escape sequence", token, query)
    end
  end

  unescaped
end

Module: JSONP3::Path

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.canonical_string(value) ⇒ Object

.compile(path) ⇒ Object

.eq?(left, right) ⇒ Boolean

.find(path, data) ⇒ Object

.find_enum(path, data) ⇒ Object

.first(path, data) ⇒ Object

.get_token_value(token, query) ⇒ Object

.high_surrogate?(code_point) ⇒ Boolean

.low_surrogate?(code_point) ⇒ Boolean

.lt?(left, right) ⇒ Boolean

.map_iregexp(pattern) ⇒ String

.match(path, data) ⇒ Object

.match?(path, data) ⇒ Boolean

.name_ch?(ch) ⇒ Boolean

.name_first?(ch) ⇒ Boolean

.number_ch?(ch) ⇒ Boolean

.scan_string_literal(query, byte, pos) ⇒ Object

.tokenize(query) ⇒ Object

.trivia?(ch) ⇒ Boolean

.truthy?(obj) ⇒ Boolean

.unescape(value, token, query) ⇒ Object

.canonical_string(value) ⇒ `Object`

.compile(path) ⇒ `Object`

.eq?(left, right) ⇒ `Boolean`

.find(path, data) ⇒ `Object`

.find_enum(path, data) ⇒ `Object`

.first(path, data) ⇒ `Object`

.get_token_value(token, query) ⇒ `Object`

.high_surrogate?(code_point) ⇒ `Boolean`

.low_surrogate?(code_point) ⇒ `Boolean`

.lt?(left, right) ⇒ `Boolean`

.map_iregexp(pattern) ⇒ `String`

.match(path, data) ⇒ `Object`

.match?(path, data) ⇒ `Boolean`

.name_ch?(ch) ⇒ `Boolean`

.name_first?(ch) ⇒ `Boolean`

.number_ch?(ch) ⇒ `Boolean`

.scan_string_literal(query, byte, pos) ⇒ `Object`

.tokenize(query) ⇒ `Object`

.trivia?(ch) ⇒ `Boolean`

.truthy?(obj) ⇒ `Boolean`

.unescape(value, token, query) ⇒ `Object`