Module: JSON::Repair::StringUtils

Included in:
JSON::Repairer
Defined in:
lib/json/repair/string_utils.rb

Constant Summary collapse

BACKSLASH =

Constants for character chars

'\\'
SLASH =

0x5c

'/'
ASTERISK =

0x2f

'*'
OPENING_BRACE =

0x2a

'{'
CLOSING_BRACE =

0x7b

'}'
OPENING_BRACKET =

0x7d

'['
CLOSING_BRACKET =

0x5b

']'
OPEN_PARENTHESIS =

0x5d

'('
CLOSE_PARENTHESIS =

0x28

')'
SPACE =

0x29

' '
NEWLINE =

0x20

"\n"
TAB =

0xa

"\t"
RETURN =

0x9

"\r"
BACKSPACE =

0xd

"\b"
FORM_FEED =

0x08

"\f"
DOUBLE_QUOTE =

0x0c

'"'
PLUS =

0x0022

'+'
MINUS =

0x2b

'-'
QUOTE =

0x2d

"'"
ZERO =

0x27

'0'
NINE =

0x30

'9'
COMMA =

0x39

','
DOT =

0x2c

'.'
COLON =

0x2e

':'
SEMICOLON =

0x3a

';'
UPPERCASE_A =

0x3b

'A'
LOWERCASE_A =

0x41

'a'
UPPERCASE_E =

0x61

'E'
LOWERCASE_E =

0x45

'e'
UPPERCASE_F =

0x65

'F'
LOWERCASE_F =

0x46

'f'
NON_BREAKING_SPACE =

0x66

' '
MONGOLIAN_VOWEL_SEPARATOR =

0xa0

''
EN_QUAD =

0x180e

' '
ZERO_WIDTH_SPACE =

0x2000

''
NARROW_NO_BREAK_SPACE =

0x200b

''
MEDIUM_MATHEMATICAL_SPACE =

0x202f

''
IDEOGRAPHIC_SPACE =

0x205f

' '
ZERO_WIDTH_NO_BREAK_SPACE =

0x3000

''
DOUBLE_QUOTE_LEFT =

0xfeff

''
DOUBLE_QUOTE_RIGHT =

0x201c

''
QUOTE_LEFT =

0x201d

''
QUOTE_RIGHT =

0x2018

''
GRAVE_ACCENT =

0x2019

'`'
ACUTE_ACCENT =

0x0060

'´'
REGEX_DELIMITER =

0x00b4

%r{^[,:\[\]/{}()\n+]+$}
REGEX_UNQUOTED_STRING_DELIMITER =
%r{^[,\[\]/{}\n+]+$}
REGEX_START_OF_VALUE =
/^[\[{\w-]$/
REGEX_URL_START =

matches “https://” and other schemas

%r{^(http|https|ftp|mailto|file|data|irc)://$}
REGEX_URL_CHAR =

matches all valid URL characters EXCEPT “[”, “]”, and “,” (important JSON delimiters)

%r{^[A-Za-z0-9\-._~:/?#@!$&'()*+;=]$}
REGEX_FUNCTION_NAME_CHAR_START =
/\A[a-zA-Z_$]\z/
REGEX_FUNCTION_NAME_CHAR =
/\A[a-zA-Z0-9_$]\z/

Instance Method Summary collapse

Instance Method Details

#control_character?(char) ⇒ Boolean

Returns:

  • (Boolean)


100
101
102
# File 'lib/json/repair/string_utils.rb', line 100

def control_character?(char)
  !char.nil? && [NEWLINE, RETURN, TAB, BACKSPACE, FORM_FEED].include?(char)
end

#delimiter?(char) ⇒ Boolean

Returns:

  • (Boolean)


77
78
79
# File 'lib/json/repair/string_utils.rb', line 77

def delimiter?(char)
  !char.nil? && REGEX_DELIMITER.match?(char)
end

#digit?(char) ⇒ Boolean

Returns:

  • (Boolean)


69
70
71
# File 'lib/json/repair/string_utils.rb', line 69

def digit?(char)
  !char.nil? && char >= ZERO && char <= NINE
end

#double_quote?(char) ⇒ Boolean

Returns:

  • (Boolean)


138
139
140
# File 'lib/json/repair/string_utils.rb', line 138

def double_quote?(char)
  char == DOUBLE_QUOTE
end

#double_quote_like?(char) ⇒ Boolean

Returns:

  • (Boolean)


146
147
148
# File 'lib/json/repair/string_utils.rb', line 146

def double_quote_like?(char)
  !char.nil? && [DOUBLE_QUOTE, DOUBLE_QUOTE_LEFT, DOUBLE_QUOTE_RIGHT].include?(char)
end

#ends_with_comma_or_newline?(text) ⇒ Boolean

Returns:

  • (Boolean)


207
208
209
# File 'lib/json/repair/string_utils.rb', line 207

def ends_with_comma_or_newline?(text)
  /[,\n][ \t\r]*$/.match?(text)
end

#function_name_char?(char) ⇒ Boolean

Returns:

  • (Boolean)


92
93
94
# File 'lib/json/repair/string_utils.rb', line 92

def function_name_char?(char)
  !char.nil? && REGEX_FUNCTION_NAME_CHAR.match?(char)
end

#function_name_char_start?(char) ⇒ Boolean

Returns:

  • (Boolean)


88
89
90
# File 'lib/json/repair/string_utils.rb', line 88

def function_name_char_start?(char)
  !char.nil? && REGEX_FUNCTION_NAME_CHAR_START.match?(char)
end

#hex?(char) ⇒ Boolean

Functions to check character chars

Returns:

  • (Boolean)


62
63
64
65
66
67
# File 'lib/json/repair/string_utils.rb', line 62

def hex?(char)
  !char.nil? &&
    ((char >= ZERO && char <= NINE) ||
     (char >= UPPERCASE_A && char <= UPPERCASE_F) ||
     (char >= LOWERCASE_A && char <= LOWERCASE_F))
end

#insert_before_last_whitespace(text, text_to_insert) ⇒ Object



168
169
170
171
172
173
174
175
176
# File 'lib/json/repair/string_utils.rb', line 168

def insert_before_last_whitespace(text, text_to_insert)
  index = text.length

  return text + text_to_insert unless whitespace?(text[index - 1])

  index -= 1 while whitespace?(text[index - 1])

  (text[0...index] || '') + text_to_insert + (text[index..] || '')
end

#parse_keyword(name, value) ⇒ Object



193
194
195
196
197
198
199
200
201
# File 'lib/json/repair/string_utils.rb', line 193

def parse_keyword(name, value)
  if @json[@index, name.length] == name
    @output << value
    @index += name.length
    true
  else
    false
  end
end

#parse_keywordsObject

Parse keywords true, false, null Repair Python keywords True, False, None Repair Ruby keyword nil



181
182
183
184
185
186
187
188
189
190
191
# File 'lib/json/repair/string_utils.rb', line 181

def parse_keywords
  parse_keyword('true', 'true') ||
    parse_keyword('false', 'false') ||
    parse_keyword('null', 'null') ||
    # Repair Python keywords True, False, None
    parse_keyword('True', 'true') ||
    parse_keyword('False', 'false') ||
    parse_keyword('None', 'null') ||
    # Repair Ruby keyword nil
    parse_keyword('nil', 'null')
end

#quote?(char) ⇒ Boolean

Returns:

  • (Boolean)


134
135
136
# File 'lib/json/repair/string_utils.rb', line 134

def quote?(char)
  double_quote_like?(char) || single_quote_like?(char)
end

#remove_at_index(text, start, count) ⇒ Object



203
204
205
# File 'lib/json/repair/string_utils.rb', line 203

def remove_at_index(text, start, count)
  (text[0...start] || '') + (text[start + count..] || '')
end

#same_line_whitespace?(char) ⇒ Boolean

Returns:

  • (Boolean)


126
127
128
# File 'lib/json/repair/string_utils.rb', line 126

def same_line_whitespace?(char)
  whitespace_except_newline?(char) || special_whitespace?(char)
end

#single_quote?(char) ⇒ Boolean

Returns:

  • (Boolean)


142
143
144
# File 'lib/json/repair/string_utils.rb', line 142

def single_quote?(char)
  char == QUOTE
end

#single_quote_like?(char) ⇒ Boolean

Returns:

  • (Boolean)


150
151
152
# File 'lib/json/repair/string_utils.rb', line 150

def single_quote_like?(char)
  !char.nil? && [QUOTE, QUOTE_LEFT, QUOTE_RIGHT, GRAVE_ACCENT, ACUTE_ACCENT].include?(char)
end

#special_whitespace?(char) ⇒ Boolean

Returns:

  • (Boolean)


112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/json/repair/string_utils.rb', line 112

def special_whitespace?(char)
  return false unless char

  [
    NON_BREAKING_SPACE,
    MONGOLIAN_VOWEL_SEPARATOR,
    NARROW_NO_BREAK_SPACE,
    MEDIUM_MATHEMATICAL_SPACE,
    IDEOGRAPHIC_SPACE,
    ZERO_WIDTH_NO_BREAK_SPACE
  ].include?(char) ||
    (char >= EN_QUAD && char <= ZERO_WIDTH_SPACE)
end

#start_of_value?(char) ⇒ Boolean

Returns:

  • (Boolean)


96
97
98
# File 'lib/json/repair/string_utils.rb', line 96

def start_of_value?(char)
  !char.nil? && (REGEX_START_OF_VALUE.match?(char) || quote?(char))
end

#strip_last_occurrence(text, text_to_strip, strip_remaining_text: false) ⇒ Object

Strip last occurrence of text_to_strip from text.

‘|| ”` on the slices below (and in `insert_before_last_whitespace` / `remove_at_index`) is for steep’s nil-narrowing: ‘String#` is typed `String?`, but every call site here keeps indices within `0..text.length`, so the slices never actually return `nil`.



160
161
162
163
164
165
166
# File 'lib/json/repair/string_utils.rb', line 160

def strip_last_occurrence(text, text_to_strip, strip_remaining_text: false)
  index = text.rindex(text_to_strip)
  return text unless index

  remaining_text = strip_remaining_text ? '' : (text[index + 1..] || '')
  (text[0...index] || '') + remaining_text
end

#unquoted_string_delimiter?(char) ⇒ Boolean

Returns:

  • (Boolean)


81
82
83
# File 'lib/json/repair/string_utils.rb', line 81

def unquoted_string_delimiter?(char)
  !char.nil? && REGEX_UNQUOTED_STRING_DELIMITER.match?(char)
end

#valid_string_character?(char) ⇒ Boolean

Returns:

  • (Boolean)


73
74
75
# File 'lib/json/repair/string_utils.rb', line 73

def valid_string_character?(char)
  char.ord >= 0x20 && char.ord <= 0x10ffff
end

#whitespace?(char) ⇒ Boolean

Returns:

  • (Boolean)


104
105
106
# File 'lib/json/repair/string_utils.rb', line 104

def whitespace?(char)
  !char.nil? && [SPACE, NEWLINE, TAB, RETURN].include?(char)
end

#whitespace_except_newline?(char) ⇒ Boolean

Returns:

  • (Boolean)


108
109
110
# File 'lib/json/repair/string_utils.rb', line 108

def whitespace_except_newline?(char)
  !char.nil? && [SPACE, TAB, RETURN].include?(char)
end

#whitespace_or_special?(char) ⇒ Boolean

Returns:

  • (Boolean)


130
131
132
# File 'lib/json/repair/string_utils.rb', line 130

def whitespace_or_special?(char)
  whitespace?(char) || special_whitespace?(char)
end