Class: IRB::RubyLex

Inherits:
Object show all
Defined in:
lib/irb/ruby-lex.rb

Overview

:stopdoc:

Defined Under Namespace

Classes: TerminateLineInput

Constant Summary collapse

LTYPE_TOKENS =
%i[
  on_heredoc_beg on_tstring_beg
  on_regexp_beg on_symbeg on_backtick
  on_symbols_beg on_qsymbols_beg
  on_words_beg on_qwords_beg
]
RESERVED_WORDS =
%i[
  __ENCODING__ __LINE__ __FILE__
  BEGIN END
  alias and
  begin break
  case class
  def defined? do
  else elsif end ensure
  false for
  if in
  module
  next nil not
  or
  redo rescue retry return
  self super
  then true
  undef unless until
  when while
  yield
]
FREE_INDENT_NESTINGS =
%i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]

Instance Method Summary collapse

Instance Method Details

#assignment_expression?(code, local_variables:) ⇒ Boolean

Returns:

  • (Boolean)


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/irb/ruby-lex.rb', line 71

def assignment_expression?(code, local_variables:)
  # Parse the code and check if the last of possibly multiple
  # expressions is an assignment node.
  program_node = Prism.parse(code, scopes: [local_variables]).value
  node = program_node.statements.body.last
  case node
  when nil
    # Empty code, comment-only code or invalid code
    false
  when Prism::CallNode
    # a.b = 1, a[b] = 1
    # Prism::CallNode#equal_loc is only available in prism >= 1.7.0
    if node.name == :[]=
      # Distinguish between `a[k] = v` from `a.[]= k, v`, `a.[]=(k, v)`
      node.opening == '['
    else
      node.name.end_with?('=')
    end
  when Prism::MatchWriteNode
    # /(?<lvar>)/ =~ a, Class name is *WriteNode but not an assignment.
    false
  else
    # a = 1, @a = 1, $a = 1, @@a = 1, A = 1, a += 1, a &&= 1, a.b += 1, and so on
    node.class.name.match?(/WriteNode/)
  end
end

#calc_indent_level(opens) ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/irb/ruby-lex.rb', line 208

def calc_indent_level(opens)
  indent_level = 0
  opens.each_with_index do |elem, index|
    case elem.event
    when :on_heredoc_beg
      if opens[index + 1]&.event != :on_heredoc_beg
        if elem.tok.match?(/^<<[~-]/)
          indent_level += 1
        else
          indent_level = 0
        end
      end
    when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
      # No indent: "", //, :"", ``
      # Indent: %(), %r(), %i(), %x()
      indent_level += 1 if elem.tok.start_with? '%'
    when :on_embdoc_beg
      indent_level = 0
    else
      indent_level += 1 unless elem.tok == 'alias' || elem.tok == 'undef'
    end
  end
  indent_level
end

#check_code_state(code, local_variables:) ⇒ Object



48
49
50
51
52
53
54
55
56
# File 'lib/irb/ruby-lex.rb', line 48

def check_code_state(code, local_variables:)
  parse_lex_result = Prism.parse_lex(code, scopes: [local_variables])

  opens = NestingParser.open_nestings(parse_lex_result)
  lines = code.lines
  tokens = parse_lex_result.value[1].map(&:first).sort_by {|t| t.location.start_offset }
  continue = should_continue?(tokens, lines.last, lines.size)
  [continue, opens, code_terminated?(code, continue, opens, local_variables: local_variables)]
end

#check_code_syntax(code, local_variables:) ⇒ Object



136
137
138
139
140
141
142
143
144
145
# File 'lib/irb/ruby-lex.rb', line 136

def check_code_syntax(code, local_variables:)
  result = Prism.lex(code, scopes: [local_variables])
  if result.success?
    :valid
  elsif result.respond_to?(:continuable?)
    result.continuable? ? :recoverable_error : :unrecoverable_error
  else # For Prism <= 1.9.0. Drop this branch when IRB requires Prism >= 1.10.0.
    check_syntax_error_heuristics(result)
  end
end

#check_syntax_error_heuristics(prism_parse_result) ⇒ Object

Prism <= 1.9.0 does not have ‘ParseResult#continuable?` method. Fallback to legacy heuristics based on error messages and error locations.



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/irb/ruby-lex.rb', line 149

def check_syntax_error_heuristics(prism_parse_result)

  # Get the token excluding trailing comments and newlines
  # to compare error location with the last or second-last meaningful token location
  tokens = prism_parse_result.value.map(&:first)
  until tokens.empty?
    case tokens.last.type
    when :COMMENT, :NEWLINE, :IGNORED_NEWLINE, :EMBDOC_BEGIN, :EMBDOC_LINE, :EMBDOC_END, :EOF
      tokens.pop
    else
      break
    end
  end

  unknown = false
  prism_parse_result.errors.each do |error|
    case error.message
    when /unexpected character literal|incomplete expression at|unexpected .%.|too short escape sequence/i
      # Ignore these errors. Likely to appear only at the end of code.
      # `[a, b ?` unexpected character literal, incomplete expression at
      # `p a, %`  unexpected '%'
      # `/\u`     too short escape sequence
    when /unexpected write target/i
      # `a,b` recoverable by `=v`
      # `a,b,` recoverable by `c=v`
      tok = tokens.last
      tok = tokens[-2] if tok&.type == :COMMA
      return :unrecoverable_error if tok && error.location.end_offset < tok.location.end_offset
    when /(invalid|unexpected) (?:break|next|redo)/i
      # Hard to check correctly, so treat it as always recoverable.
      # `(break;1)` recoverable by `.f while true`
    when / meets end of file|unexpected end-of-input|unterminated |cannot parse|could not parse/i
      # These are recoverable errors if there is no other unrecoverable error
      # `/aaa`    unterminated regexp meets end of file
      # `def f`   unexpected end-of-input
      # `"#{`     unterminated string
      # `:"aa`    cannot parse the string part
      # `def f =` could not parse the endless method body
    when /is not allowed|unexpected .+ ignoring it/i
      # `@@` `$--` is not allowed
      # `)`, `end` unexpected ')', ignoring it
      return :unrecoverable_error
    when /unexpected |invalid |dynamic constant assignment|can't set variable|can't change the value|is not valid to get|variable capture in alternative pattern/i
      # Likely to be unrecoverable except when the error is at the last token location.
      # Unexpected: `class a`, `tap(&`, `def f(a,`
      # Invalid: `a ? b :`, `/\u{`, `"\M-`
      # `a,B`        recoverable by `.c=v` dynamic constant assignment
      # `a,$1`       recoverable by `.f=v` Can't set variable
      # `a,self`     recoverable by `.f=v` Can't change the value of self
      # `p foo?:`    recoverable by `v`    is not valid to get
      # `x in 1|{x:` recoverable by `1}`   variable capture in alternative pattern
      return :unrecoverable_error if tokens.last && error.location.end_offset <= tokens.last.location.start_offset
    else
      unknown = true
    end
  end
  unknown ? :other_error : :recoverable_error
end

#check_termination_in_prev_line(code, local_variables:) ⇒ Object

Check if code.lines[...-1] is terminated and can be evaluated immediately. Returns the last line string if terminated, otherwise false. Terminated means previous lines(code.lines[...-1]) is syntax valid and previous lines and the last line are syntactically separated. Terminated example

foo(
bar)
baz.

Unterminated example: previous lines are syntax invalid

foo(
bar).
baz

Unterminated example: previous lines are connected to the last line

foo(
bar)
.baz


374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# File 'lib/irb/ruby-lex.rb', line 374

def check_termination_in_prev_line(code, local_variables:)
  lines = code.lines
  return false if lines.size < 2

  prev_line_result = Prism.parse(lines[...-1].join, scopes: [local_variables])
  return false unless prev_line_result.success?

  prev_nodes = prev_line_result.value.statements.body
  whole_nodes = Prism.parse(code, scopes: [local_variables]).value.statements.body

  return false if whole_nodes.size < prev_nodes.size
  return false unless prev_nodes.zip(whole_nodes).all? do |a, b|
    a.location == b.location
  end

  # If the last line only contain comments, treat it as not connected to handle this case:
  #   receiver
  #   # comment
  #   .method
  return false if lines.last.match?(/\A\s*#/)

  lines.last
end

#code_terminated?(code, continue, opens, local_variables:) ⇒ Boolean

Returns:

  • (Boolean)


58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/irb/ruby-lex.rb', line 58

def code_terminated?(code, continue, opens, local_variables:)
  case check_code_syntax(code, local_variables: local_variables)
  when :unrecoverable_error
    true
  when :recoverable_error
    false
  when :other_error
    opens.empty? && !continue
  when :valid
    !continue
  end
end

#free_indent_nesting_element?(elem) ⇒ Boolean

Returns:

  • (Boolean)


235
236
237
# File 'lib/irb/ruby-lex.rb', line 235

def free_indent_nesting_element?(elem)
  FREE_INDENT_NESTINGS.include?(elem&.event)
end

#indent_difference(lines, line_results, line_index) ⇒ Object

Calculates the difference of pasted code’s indent and indent calculated from tokens



240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/irb/ruby-lex.rb', line 240

def indent_difference(lines, line_results, line_index)
  loop do
    prev_opens, _next_opens, min_depth = line_results[line_index]
    open_elem = prev_opens.last
    if !open_elem || (open_elem.event != :on_heredoc_beg && !free_indent_nesting_element?(open_elem))
      # If the leading whitespace is an indent, return the difference
      indent_level = calc_indent_level(prev_opens.take(min_depth))
      calculated_indent = 2 * indent_level
      actual_indent = lines[line_index][/^ */].size
      return actual_indent - calculated_indent
    elsif open_elem.event == :on_heredoc_beg && open_elem.tok.match?(/^<<[^-~]/)
      return 0
    end
    # If the leading whitespace is not an indent but part of a multiline token
    # Calculate base_indent of the multiline token's beginning line
    line_index = open_elem.pos[0] - 1
  end
end

#ltype_from_open_nestings(opens) ⇒ Object



328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# File 'lib/irb/ruby-lex.rb', line 328

def ltype_from_open_nestings(opens)
  start_nesting = opens.reverse_each.find do |elem|
    LTYPE_TOKENS.include?(elem.event)
  end
  return nil unless start_nesting

  case start_nesting&.event
  when :on_tstring_beg
    case start_nesting&.tok
    when ?"      then ?"
    when /^%.$/  then ?"
    when /^%Q.$/ then ?"
    when ?'      then ?'
    when /^%q.$/ then ?'
    end
  when :on_regexp_beg   then ?/
  when :on_symbeg       then ?:
  when :on_backtick     then ?`
  when :on_qwords_beg   then ?]
  when :on_words_beg    then ?]
  when :on_qsymbols_beg then ?]
  when :on_symbols_beg  then ?]
  when :on_heredoc_beg
    start_nesting&.tok =~ /<<[-~]?(['"`])\w+\1/
    $1 || ?"
  else
    nil
  end
end

#process_indent_level(parse_lex_result, lines, line_index, is_newline) ⇒ Object



259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/irb/ruby-lex.rb', line 259

def process_indent_level(parse_lex_result, lines, line_index, is_newline)
  line_results = NestingParser.(parse_lex_result)
  result = line_results[line_index]
  if result
    prev_opens, next_opens, min_depth = result
  else
    # When last line is empty
    prev_opens = next_opens = line_results.last[1]
    min_depth = next_opens.size
  end

  # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
  # Shortest open tokens can be calculated by `opens.take(min_depth)`
  indent = 2 * calc_indent_level(prev_opens.take(min_depth))

  preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size

  prev_open_elem = prev_opens.last
  next_open_elem = next_opens.last

  # Calculates base indent for pasted code on the line where prev_open_elem is located
  # irb(main):001:1*   if a # base_indent is 2, indent calculated from nestings is 0
  # irb(main):002:1*         if b # base_indent is 6, indent calculated from nestings is 2
  # irb(main):003:0>           c # base_indent is 6, indent calculated from nestings is 4
  if prev_open_elem
    base_indent = [0, indent_difference(lines, line_results, prev_open_elem.pos[0] - 1)].max
  else
    base_indent = 0
  end

  if free_indent_nesting_element?(prev_open_elem)
    if is_newline && prev_open_elem.pos[0] == line_index
      # First newline inside free-indent token
      base_indent + indent
    else
      # Accept any number of indent inside free-indent token
      preserve_indent
    end
  elsif prev_open_elem&.event == :on_embdoc_beg || next_open_elem&.event == :on_embdoc_beg
    if prev_open_elem&.event == next_open_elem&.event
      # Accept any number of indent inside embdoc content
      preserve_indent
    else
      # =begin or =end
      0
    end
  elsif prev_open_elem&.event == :on_heredoc_beg
    tok = prev_open_elem.tok
    if prev_opens.size <= next_opens.size
      if is_newline && lines[line_index].empty? && line_results[line_index - 1][0].last != next_open_elem
        # First line in heredoc
        tok.match?(/^<<[-~]/) ? base_indent + indent : indent
      elsif tok.match?(/^<<~/)
        # Accept extra indent spaces inside `<<~` heredoc
        [base_indent + indent, preserve_indent].max
      else
        # Accept any number of indent inside other heredoc
        preserve_indent
      end
    else
      # Heredoc close
      prev_line_indent_level = calc_indent_level(prev_opens)
      tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
    end
  else
    base_indent + indent
  end
end

#should_continue?(tokens, line, line_num) ⇒ Boolean

Returns:

  • (Boolean)


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/irb/ruby-lex.rb', line 98

def should_continue?(tokens, line, line_num)
  # Check if the line ends with \\. Then IRB should continue reading next line.
  # Space and backslash are not included in Prism token, so find trailing text after last non-newline token position.
  trailing = line
  tokens.reverse_each do |t|
    break if t.location.start_line < line_num
    if t.location.start_line == line_num &&
        t.location.end_line == line_num &&
        t.type != :IGNORED_NEWLINE &&
        t.type != :NEWLINE &&
        t.type != :EOF
      trailing = line.byteslice(t.location.end_column..)
      trailing ||= '' # in case end_line is wrong (e.g. `"\C-`)
      break
    end
  end
  return true if trailing.match?(/\A\s*\\\n?\z/)

  # "1 + \n" and "foo.\n" should continue.
  pos = tokens.size - 1
  ignored_newline_found = false
  while pos >= 0
    case tokens[pos].type
    when :EMBDOC_BEGIN, :EMBDOC_LINE, :EMBDOC_END, :COMMENT, :EOF
      pos -= 1
    when :IGNORED_NEWLINE
      pos -= 1
      ignored_newline_found = true
    else
      break
    end
  end

  # If IGNORED_NEWLINE token is following non-newline non-semicolon token, it should continue.
  # Special case: treat `1..` and `1...` as not continuing.
  ignored_newline_found && pos >= 0 && !%i[DOT_DOT DOT_DOT_DOT NEWLINE SEMICOLON].include?(tokens[pos].type)
end