Module: SmarterJSON::Recovery

Defined in:
lib/smarter_json/parser.rb

Class Method Summary collapse

Class Method Details

.candidate_ranges(input) ⇒ Object



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/smarter_json/parser.rb', line 197

def candidate_ranges(input)
  ranges = []
  stack = []
  start_pos = nil
  i = 0
  mode = nil
  while i < input.bytesize
    b = input.getbyte(i)
    if mode == :double
      if b == 0x5C
        i += 2
        next
      elsif b == 0x22
        mode = nil
      end
      i += 1
      next
    elsif mode == :single
      if b == 0x5C
        i += 2
        next
      elsif b == 0x27
        mode = nil
      end
      i += 1
      next
    elsif mode == :triple
      if input.byteslice(i, 3) == "'''"
        mode = nil
        i += 3
      else
        i += 1
      end
      next
    elsif mode == :line_comment
      if [0x0A, 0x0D].include?(b)
        mode = nil
      else
        i += 1
        next
      end
    elsif mode == :block_comment
      if input.byteslice(i, 2) == "*/"
        mode = nil
        i += 2
      else
        i += 1
      end
      next
    else
      if input.byteslice(i, 2) == "//"
        mode = :line_comment
        i += 2
        next
      elsif input.byteslice(i, 2) == "/*"
        mode = :block_comment
        i += 2
        next
      elsif b == 0x23
        mode = :line_comment
        i += 1
        next
      elsif b == 0x22
        mode = :double
        i += 1
        next
      elsif input.byteslice(i, 3) == "'''"
        mode = :triple
        i += 3
        next
      elsif b == 0x27
        mode = :single
        i += 1
        next
      elsif [0x7B, 0x5B].include?(b)
        start_pos = i if stack.empty?
        stack << b
      elsif b == 0x7D
        stack.pop if stack.last == 0x7B
        if stack.empty? && start_pos
          ranges << (start_pos...(i + 1))
          start_pos = nil
        end
      elsif b == 0x5D
        stack.pop if stack.last == 0x5B
        if stack.empty? && start_pos
          ranges << (start_pos...(i + 1))
          start_pos = nil
        end
      end
    end
    i += 1
  end
  ranges
end

.emit_wrapper_warnings(payloads, handler) ⇒ Object



117
118
119
120
121
122
123
124
125
# File 'lib/smarter_json/parser.rb', line 117

def emit_wrapper_warnings(payloads, handler)
  return unless handler

  meta = payloads.first[:meta]
  warn(handler, :prefix_text_ignored, "ignored non-JSON text before the payload", *meta[:first_pos]) if meta[:prefix]
  warn(handler, :code_fence_stripped, "stripped markdown code fences around the payload", *meta[:first_pos]) if meta[:fence]
  warn(handler, :wrapper_tag_stripped, "stripped wrapper tags around the payload", *meta[:first_pos]) if meta[:wrapper]
  warn(handler, :suffix_text_ignored, "ignored non-JSON text after the payload", *meta[:last_pos]) if meta[:suffix]
end

.extract_payloads(input, options) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/smarter_json/parser.rb', line 127

def extract_payloads(input, options)
  payloads = candidate_ranges(input).filter_map do |range|
    slice = input.byteslice(range.begin, range.end - range.begin)
    begin
      SmarterJSON.send(:process_content, slice, options.merge(on_warning: nil))
      { slice: slice, range: range }
    rescue ParseError
      nil
    end
  end
  meta = wrapper_meta(input, payloads.map { |p| p[:range] })
  payloads.each { |payload| payload[:meta] = meta }
  payloads
end

.line_col_for(input, offset) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/smarter_json/parser.rb', line 159

def line_col_for(input, offset)
  line = 1
  col = 1
  i = 0
  while i < offset
    b = input.getbyte(i)
    break if b.nil?

    if b == 0x0A
      line += 1
      col = 1
      i += 1
    elsif b == 0x0D
      line += 1
      col = 1
      i += 1
      i += 1 if i < offset && input.getbyte(i) == 0x0A
    else
      col += 1
      i += 1
    end
  end
  [line, col]
end

.process_string(input, options, &block) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/smarter_json/parser.rb', line 78

def process_string(input, options, &block)
  return SmarterJSON.send(:process_content, input, options, &block) unless input.valid_encoding?

  if wrapper_hint?(input)
    payloads = extract_payloads(input, options)
    return replay_payloads(payloads, options, &block) unless payloads.empty?
  end

  SmarterJSON.send(:process_content, input, options, &block)
rescue ParseError => e
  raise if e.is_a?(EncodingError)

  payloads = extract_payloads(input, options)
  return replay_payloads(payloads, options, &block) unless payloads.empty?

  raise
end

.replay_payloads(payloads, options, &block) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/smarter_json/parser.rb', line 102

def replay_payloads(payloads, options, &block)
  handler = options[:on_warning]
  emit_wrapper_warnings(payloads, handler)

  results = payloads.map do |payload|
    SmarterJSON.send(:process_content, payload[:slice], options)
  end

  return results.each(&block).then { nil } if block_given?
  return nil if results.empty?
  return results.first if results.length == 1

  results
end

.substantive_text?(text) ⇒ Boolean

Returns:

  • (Boolean)


184
185
186
187
188
189
190
191
# File 'lib/smarter_json/parser.rb', line 184

def substantive_text?(text)
  return false if text.nil? || text.empty?

  stripped = text.dup
  stripped.gsub!(%r{/\*.*?\*/}m, "")
  stripped.gsub!(/^\s*(?:#|\/\/).*$/, "")
  !stripped.strip.empty? && !stripped.strip.match?(/\A(?:```[a-zA-Z0-9_-]*)?\z/) && !stripped.strip.match?(/\A(?:<\/?json>|BEGIN_JSON|END_JSON)\z/i)
end

.warn(handler, type, message, line, col) ⇒ Object



193
194
195
# File 'lib/smarter_json/parser.rb', line 193

def warn(handler, type, message, line, col)
  handler.call(Warning.new(type, message, line, col))
end

.wrapper_hint?(input) ⇒ Boolean

Returns:

  • (Boolean)


96
97
98
99
100
# File 'lib/smarter_json/parser.rb', line 96

def wrapper_hint?(input)
  return false unless input.valid_encoding?

  input.match?(/```|<json\b|BEGIN_JSON\b/i) || input.match?(/\A[[:space:]]*(?:JSON|Final answer)[[:space:]]*:/i)
end

.wrapper_meta(input, ranges) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/smarter_json/parser.rb', line 142

def wrapper_meta(input, ranges)
  return { prefix: false, suffix: false, fence: false, wrapper: false } if ranges.empty?

  first = ranges.first
  last = ranges.last
  prefix = input.byteslice(0, first.begin)
  suffix = input.byteslice(last.end, input.bytesize - last.end)
  {
    prefix: substantive_text?(prefix),
    suffix: substantive_text?(suffix),
    fence: input.match?(/```/),
    wrapper: input.match?(/<json\b|BEGIN_JSON\b/i),
    first_pos: line_col_for(input, first.begin),
    last_pos: line_col_for(input, last.begin)
  }
end