Class: Rich::PythonLexer

Inherits:

BaseLexer

Object
BaseLexer
Rich::PythonLexer

show all

Defined in:: lib/rich/syntax.rb

Overview

Python lexer

Constant Summary collapse

KEYWORDS =

%w[
  and as assert async await break class continue def del elif else
  except finally for from global if import in is lambda None nonlocal
  not or pass raise return try while with yield True False
].freeze

BUILTINS =

%w[
  abs all any ascii bin bool breakpoint bytearray bytes callable
  chr classmethod compile complex delattr dict dir divmod enumerate
  eval exec filter float format frozenset getattr globals hasattr
  hash help hex id input int isinstance issubclass iter len list
  locals map max memoryview min next object oct open ord pow print
  property range repr reversed round set setattr slice sorted
  staticmethod str sum super tuple type vars zip
].freeze

Instance Method Summary collapse

#tokenize(line, theme) ⇒ Object

Instance Method Details

#tokenize(line, theme) ⇒ `Object`

# File 'lib/rich/syntax.rb', line 529

def tokenize(line, theme)
  segments = []
  pos = 0

  while pos < line.length
    if line[pos].match?(/\s/)
      ws_end = pos
      ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
      segments << Segment.new(line[pos...ws_end])
      pos = ws_end
      next
    end

    # Comment
    if line[pos] == "#"
      segments << Segment.new(line[pos..], style: theme[:comment])
      break
    end

    # Docstring/String
    if line[pos..pos + 2] == '"""' || line[pos..pos + 2] == "'''"
      delim = line[pos..pos + 2]
      str_end = line.index(delim, pos + 3)
      str_end = str_end ? str_end + 2 : line.length - 1
      segments << Segment.new(line[pos..str_end], style: theme[:string_doc] || theme[:string])
      pos = str_end + 1
      next
    end

    # String
    if ['"', "'"].include?(line[pos])
      delim = line[pos]
      str_end = find_string_end(line, pos, delim)
      segments << Segment.new(line[pos..str_end], style: theme[:string])
      pos = str_end + 1
      next
    end

    # Number
    if line[pos].match?(/\d/)
      num_end = pos
      num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeE+\-]/)
      segments << Segment.new(line[pos...num_end], style: theme[:number])
      pos = num_end
      next
    end

    # Decorator
    if line[pos] == "@"
      dec_end = pos + 1
      dec_end += 1 while dec_end < line.length && line[dec_end].match?(/[\w.]/)
      segments << Segment.new(line[pos...dec_end], style: theme[:name_decorator] || theme[:name])
      pos = dec_end
      next
    end

    # Identifier
    if line[pos].match?(/[a-zA-Z_]/)
      word_end = pos
      word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
      word = line[pos...word_end]

      style = if KEYWORDS.include?(word)
                theme[:keyword]
              elsif BUILTINS.include?(word)
                theme[:name_builtin] || theme[:name]
              elsif word[0].match?(/[A-Z]/)
                theme[:name_class] || theme[:name]
              else
                theme[:name]
              end

      segments << Segment.new(word, style: style)
      pos = word_end
      next
    end

    # Operators
    if line[pos].match?(/[+\-*\/%&|^~<>=!@]/)
      op_end = pos + 1
      op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!@]/)
      segments << Segment.new(line[pos...op_end], style: theme[:operator])
      pos = op_end
      next
    end

    # Punctuation
    if line[pos].match?(/[(){}\[\].,;:]/)
      segments << Segment.new(line[pos], style: theme[:punctuation])
      pos += 1
      next
    end

    segments << Segment.new(line[pos])
    pos += 1
  end

  segments
end

Class: Rich::PythonLexer

Overview

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#tokenize(line, theme) ⇒ Object

#tokenize(line, theme) ⇒ `Object`