Class: IRB::RubyLex

Inherits:

Object

Object
IRB::RubyLex

show all

Defined in:: lib/irb/ruby-lex.rb

Overview

:stopdoc:

Defined Under Namespace

Classes: TerminateLineInput

Constant Summary collapse

ASSIGNMENT_NODE_TYPES =

[
  # Local, instance, global, class, constant, instance, and index assignment:
  #   "foo = bar",
  #   "@foo = bar",
  #   "$foo = bar",
  #   "@@foo = bar",
  #   "::Foo = bar",
  #   "a::Foo = bar",
  #   "Foo = bar"
  #   "foo.bar = 1"
  #   "foo[1] = bar"
  :assign,

  # Operation assignment:
  #   "foo += bar"
  #   "foo -= bar"
  #   "foo ||= bar"
  #   "foo &&= bar"
  :opassign,

  # Multiple assignment:
  #   "foo, bar = 1, 2
  :massign,
]

ERROR_TOKENS =

[
  :on_parse_error,
  :compile_error,
  :on_assign_error,
  :on_alias_error,
  :on_class_name_error,
  :on_param_error
]

FREE_INDENT_TOKENS =

%i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]

LTYPE_TOKENS =

%i[
  on_heredoc_beg on_tstring_beg
  on_regexp_beg on_symbeg on_backtick
  on_symbols_beg on_qsymbols_beg
  on_words_beg on_qwords_beg
]

Class Method Summary collapse

.compile_with_errors_suppressed(code, line_no: 1) ⇒ Object
.generate_local_variables_assign_code(local_variables) ⇒ Object
.interpolate_ripper_ignored_tokens(code, tokens) ⇒ Object

Some part of the code is not included in Ripper’s token.
.ripper_lex_without_warning(code, local_variables: []) ⇒ Object

Instance Method Summary collapse

#assignment_expression?(code, local_variables:) ⇒ Boolean
#calc_indent_level(opens) ⇒ Object
#check_code_state(code, local_variables:) ⇒ Object
#check_code_syntax(code, local_variables:) ⇒ Object
#check_termination_in_prev_line(code, local_variables:) ⇒ Object
#code_terminated?(code, tokens, opens, local_variables:) ⇒ Boolean
#free_indent_token?(token) ⇒ Boolean
#indent_difference(lines, line_results, line_index) ⇒ Object

Calculates the difference of pasted code’s indent and indent calculated from tokens.
#ltype_from_open_tokens(opens) ⇒ Object
#process_indent_level(tokens, lines, line_index, is_newline) ⇒ Object
#should_continue?(tokens) ⇒ Boolean

Class Method Details

.compile_with_errors_suppressed(code, line_no: 1) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 45

def self.compile_with_errors_suppressed(code, line_no: 1)
  begin
    result = yield code, line_no
  rescue ArgumentError
    # Ruby can issue an error for the code if there is an
    # incomplete magic comment for encoding in it. Force an
    # expression with a new line before the code in this
    # case to prevent magic comment handling.  To make sure
    # line numbers in the lexed code remain the same,
    # decrease the line number by one.
    code = ";\n#{code}"
    line_no -= 1
    result = yield code, line_no
  end
  result
end

.generate_local_variables_assign_code(local_variables) ⇒ `Object`



71
72
73

# File 'lib/irb/ruby-lex.rb', line 71

def self.generate_local_variables_assign_code(local_variables)
  "#{local_variables.join('=')}=nil;" unless local_variables.empty?
end

.interpolate_ripper_ignored_tokens(code, tokens) ⇒ `Object`

Some part of the code is not included in Ripper’s token. Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. With interpolated tokens, tokens.map(&:tok).join will be equal to code.

# File 'lib/irb/ruby-lex.rb', line 78

def self.interpolate_ripper_ignored_tokens(code, tokens)
  line_positions = [0]
  code.lines.each do |line|
    line_positions << line_positions.last + line.bytesize
  end
  prev_byte_pos = 0
  interpolated = []
  prev_line = 1
  tokens.each do |t|
    line, col = t.pos
    byte_pos = line_positions[line - 1] + col
    if prev_byte_pos < byte_pos
      tok = code.byteslice(prev_byte_pos...byte_pos)
      pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
      interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
      prev_line += tok.count("\n")
    end
    interpolated << t
    prev_byte_pos = byte_pos + t.tok.bytesize
    prev_line += t.tok.count("\n")
  end
  if prev_byte_pos < code.bytesize
    tok = code.byteslice(prev_byte_pos..)
    pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
    interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
  end
  interpolated
end

.ripper_lex_without_warning(code, local_variables: []) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 107

def self.ripper_lex_without_warning(code, local_variables: [])
  verbose, $VERBOSE = $VERBOSE, nil
  lvars_code = generate_local_variables_assign_code(local_variables)
  original_code = code
  if lvars_code
    code = "#{lvars_code}\n#{code}"
    line_no = 0
  else
    line_no = 1
  end

  compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
    lexer = Ripper::Lexer.new(inner_code, '-', line_no)
    tokens = []
    lexer.scan.each do |t|
      next if t.pos.first == 0
      prev_tk = tokens.last
      position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
      if position_overlapped
        tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
      else
        tokens << t
      end
    end
    interpolate_ripper_ignored_tokens(original_code, tokens)
  end
ensure
  $VERBOSE = verbose
end

Instance Method Details

#assignment_expression?(code, local_variables:) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/irb/ruby-lex.rb', line 156

def assignment_expression?(code, local_variables:)
  # Try to parse the code and check if the last of possibly multiple
  # expressions is an assignment type.

  # If the expression is invalid, Ripper.sexp should return nil which will
  # result in false being returned. Any valid expression should return an
  # s-expression where the second element of the top level array is an
  # array of parsed expressions. The first element of each expression is the
  # expression's type.
  verbose, $VERBOSE = $VERBOSE, nil
  code = "#{RubyLex.generate_local_variables_assign_code(local_variables) || 'nil;'}\n#{code}"
  # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
  node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
  ASSIGNMENT_NODE_TYPES.include?(node_type)
ensure
  $VERBOSE = verbose
end

#calc_indent_level(opens) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 274

def calc_indent_level(opens)
  indent_level = 0
  opens.each_with_index do |t, index|
    case t.event
    when :on_heredoc_beg
      if opens[index + 1]&.event != :on_heredoc_beg
        if t.tok.match?(/^<<[~-]/)
          indent_level += 1
        else
          indent_level = 0
        end
      end
    when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
      # No indent: "", //, :"", ``
      # Indent: %(), %r(), %i(), %x()
      indent_level += 1 if t.tok.start_with? '%'
    when :on_embdoc_beg
      indent_level = 0
    else
      indent_level += 1
    end
  end
  indent_level
end

#check_code_state(code, local_variables:) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 137

def check_code_state(code, local_variables:)
  tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
  opens = NestingParser.open_tokens(tokens)
  [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)]
end

#check_code_syntax(code, local_variables:) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 198

def check_code_syntax(code, local_variables:)
  lvars_code = RubyLex.generate_local_variables_assign_code(local_variables)
  code = "#{lvars_code}\n#{code}"

  begin # check if parser error are available
    verbose, $VERBOSE = $VERBOSE, nil
    case RUBY_ENGINE
    when 'ruby'
      self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
        RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
      end
    when 'jruby'
      JRuby.compile_ir(code)
    else
      catch(:valid) do
        eval("BEGIN { throw :valid, true }\n#{code}")
        false
      end
    end
  rescue EncodingError
    # This is for a hash with invalid encoding symbol, {"\xAE": 1}
    :unrecoverable_error
  rescue SyntaxError => e
    case e.message
    when /unterminated (?:string|regexp) meets end of file/
      # "unterminated regexp meets end of file"
      #
      #   example:
      #     /
      #
      # "unterminated string meets end of file"
      #
      #   example:
      #     '
      return :recoverable_error
    when /syntax error, unexpected end-of-input/
      # "syntax error, unexpected end-of-input, expecting keyword_end"
      #
      #   example:
      #     if true
      #       hoge
      #       if false
      #         fuga
      #       end
      return :recoverable_error
    when /syntax error, unexpected keyword_end/
      # "syntax error, unexpected keyword_end"
      #
      #   example:
      #     if (
      #     end
      #
      #   example:
      #     end
      return :unrecoverable_error
    when /syntax error, unexpected '\.'/
      # "syntax error, unexpected '.'"
      #
      #   example:
      #     .
      return :unrecoverable_error
    when /unexpected tREGEXP_BEG/
      # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
      #
      #   example:
      #     method / f /
      return :unrecoverable_error
    else
      return :other_error
    end
  ensure
    $VERBOSE = verbose
  end
  :valid
end

#check_termination_in_prev_line(code, local_variables:) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 431

def check_termination_in_prev_line(code, local_variables:)
  tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
  past_first_newline = false
  index = tokens.rindex do |t|
    # traverse first token before last line
    if past_first_newline
      if t.tok.include?("\n")
        true
      end
    elsif t.tok.include?("\n")
      past_first_newline = true
      false
    else
      false
    end
  end

  if index
    first_token = nil
    last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
    last_line_tokens.each do |t|
      unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
        first_token = t
        break
      end
    end

    if first_token && first_token.state != Ripper::EXPR_DOT
      tokens_without_last_line = tokens[0..index]
      code_without_last_line = tokens_without_last_line.map(&:tok).join
      opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line)
      if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables)
        return last_line_tokens.map(&:tok).join
      end
    end
  end
  false
end

#code_terminated?(code, tokens, opens, local_variables:) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/irb/ruby-lex.rb', line 143

def code_terminated?(code, tokens, opens, local_variables:)
  case check_code_syntax(code, local_variables: local_variables)
  when :unrecoverable_error
    true
  when :recoverable_error
    false
  when :other_error
    opens.empty? && !should_continue?(tokens)
  when :valid
    !should_continue?(tokens)
  end
end

#free_indent_token?(token) ⇒ `Boolean`

Returns:

(Boolean)



301
302
303

# File 'lib/irb/ruby-lex.rb', line 301

def free_indent_token?(token)
  FREE_INDENT_TOKENS.include?(token&.event)
end

#indent_difference(lines, line_results, line_index) ⇒ `Object`

Calculates the difference of pasted code’s indent and indent calculated from tokens

# File 'lib/irb/ruby-lex.rb', line 306

def indent_difference(lines, line_results, line_index)
  loop do
    _tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
    open_token = prev_opens.last
    if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
      # If the leading whitespace is an indent, return the difference
      indent_level = calc_indent_level(prev_opens.take(min_depth))
      calculated_indent = 2 * indent_level
      actual_indent = lines[line_index][/^ */].size
      return actual_indent - calculated_indent
    elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
      return 0
    end
    # If the leading whitespace is not an indent but part of a multiline token
    # Calculate base_indent of the multiline token's beginning line
    line_index = open_token.pos[0] - 1
  end
end

#ltype_from_open_tokens(opens) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 401

def ltype_from_open_tokens(opens)
  start_token = opens.reverse_each.find do |tok|
    LTYPE_TOKENS.include?(tok.event)
  end
  return nil unless start_token

  case start_token&.event
  when :on_tstring_beg
    case start_token&.tok
    when ?"      then ?"
    when /^%.$/  then ?"
    when /^%Q.$/ then ?"
    when ?'      then ?'
    when /^%q.$/ then ?'
    end
  when :on_regexp_beg   then ?/
  when :on_symbeg       then ?:
  when :on_backtick     then ?`
  when :on_qwords_beg   then ?]
  when :on_words_beg    then ?]
  when :on_qsymbols_beg then ?]
  when :on_symbols_beg  then ?]
  when :on_heredoc_beg
    start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
    $1 || ?"
  else
    nil
  end
end

#process_indent_level(tokens, lines, line_index, is_newline) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 325

def process_indent_level(tokens, lines, line_index, is_newline)
  line_results = NestingParser.parse_by_line(tokens)
  result = line_results[line_index]
  if result
    _tokens, prev_opens, next_opens, min_depth = result
  else
    # When last line is empty
    prev_opens = next_opens = line_results.last[2]
    min_depth = next_opens.size
  end

  # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
  # Shortest open tokens can be calculated by `opens.take(min_depth)`
  indent = 2 * calc_indent_level(prev_opens.take(min_depth))

  preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size

  prev_open_token = prev_opens.last
  next_open_token = next_opens.last

  # Calculates base indent for pasted code on the line where prev_open_token is located
  # irb(main):001:1*   if a # base_indent is 2, indent calculated from tokens is 0
  # irb(main):002:1*         if b # base_indent is 6, indent calculated from tokens is 2
  # irb(main):003:0>           c # base_indent is 6, indent calculated from tokens is 4
  if prev_open_token
    base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
  else
    base_indent = 0
  end

  if free_indent_token?(prev_open_token)
    if is_newline && prev_open_token.pos[0] == line_index
      # First newline inside free-indent token
      base_indent + indent
    else
      # Accept any number of indent inside free-indent token
      preserve_indent
    end
  elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
    if prev_open_token&.event == next_open_token&.event
      # Accept any number of indent inside embdoc content
      preserve_indent
    else
      # =begin or =end
      0
    end
  elsif prev_open_token&.event == :on_heredoc_beg
    tok = prev_open_token.tok
    if prev_opens.size <= next_opens.size
      if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
        # First line in heredoc
        tok.match?(/^<<[-~]/) ? base_indent + indent : indent
      elsif tok.match?(/^<<~/)
        # Accept extra indent spaces inside `<<~` heredoc
        [base_indent + indent, preserve_indent].max
      else
        # Accept any number of indent inside other heredoc
        preserve_indent
      end
    else
      # Heredoc close
      prev_line_indent_level = calc_indent_level(prev_opens)
      tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
    end
  else
    base_indent + indent
  end
end

#should_continue?(tokens) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/irb/ruby-lex.rb', line 174

def should_continue?(tokens)
  # Look at the last token and check if IRB need to continue reading next line.
  # Example code that should continue: `a\` `a +` `a.`
  # Trailing spaces, newline, comments are skipped
  return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"

  tokens.reverse_each do |token|
    case token.event
    when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
      # Skip
    when :on_regexp_end, :on_heredoc_end, :on_semicolon
      # State is EXPR_BEG but should not continue
      return false
    else
      # Endless range should not continue
      return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)

      # EXPR_DOT and most of the EXPR_BEG should continue
      return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
    end
  end
  false
end

Class: IRB::RubyLex

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.compile_with_errors_suppressed(code, line_no: 1) ⇒ Object

.generate_local_variables_assign_code(local_variables) ⇒ Object

.interpolate_ripper_ignored_tokens(code, tokens) ⇒ Object

.ripper_lex_without_warning(code, local_variables: []) ⇒ Object

Instance Method Details

#assignment_expression?(code, local_variables:) ⇒ Boolean

#calc_indent_level(opens) ⇒ Object

#check_code_state(code, local_variables:) ⇒ Object

#check_code_syntax(code, local_variables:) ⇒ Object

#check_termination_in_prev_line(code, local_variables:) ⇒ Object

#code_terminated?(code, tokens, opens, local_variables:) ⇒ Boolean

#free_indent_token?(token) ⇒ Boolean

#indent_difference(lines, line_results, line_index) ⇒ Object

#ltype_from_open_tokens(opens) ⇒ Object

#process_indent_level(tokens, lines, line_index, is_newline) ⇒ Object

#should_continue?(tokens) ⇒ Boolean

.compile_with_errors_suppressed(code, line_no: 1) ⇒ `Object`

.generate_local_variables_assign_code(local_variables) ⇒ `Object`

.interpolate_ripper_ignored_tokens(code, tokens) ⇒ `Object`

.ripper_lex_without_warning(code, local_variables: []) ⇒ `Object`

#assignment_expression?(code, local_variables:) ⇒ `Boolean`

#calc_indent_level(opens) ⇒ `Object`

#check_code_state(code, local_variables:) ⇒ `Object`

#check_code_syntax(code, local_variables:) ⇒ `Object`

#check_termination_in_prev_line(code, local_variables:) ⇒ `Object`

#code_terminated?(code, tokens, opens, local_variables:) ⇒ `Boolean`

#free_indent_token?(token) ⇒ `Boolean`

#indent_difference(lines, line_results, line_index) ⇒ `Object`

#ltype_from_open_tokens(opens) ⇒ `Object`

#process_indent_level(tokens, lines, line_index, is_newline) ⇒ `Object`

#should_continue?(tokens) ⇒ `Boolean`