Class: Rich::PythonLexer

Inherits:
BaseLexer show all
Defined in:
lib/rich/syntax.rb

Overview

Python lexer

Constant Summary collapse

KEYWORDS =
%w[
  and as assert async await break class continue def del elif else
  except finally for from global if import in is lambda None nonlocal
  not or pass raise return try while with yield True False
].freeze
BUILTINS =
%w[
  abs all any ascii bin bool breakpoint bytearray bytes callable
  chr classmethod compile complex delattr dict dir divmod enumerate
  eval exec filter float format frozenset getattr globals hasattr
  hash help hex id input int isinstance issubclass iter len list
  locals map max memoryview min next object oct open ord pow print
  property range repr reversed round set setattr slice sorted
  staticmethod str sum super tuple type vars zip
].freeze

Instance Method Summary collapse

Instance Method Details

#tokenize(line, theme) ⇒ Object



529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
# File 'lib/rich/syntax.rb', line 529

def tokenize(line, theme)
  segments = []
  pos = 0

  while pos < line.length
    if line[pos].match?(/\s/)
      ws_end = pos
      ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
      segments << Segment.new(line[pos...ws_end])
      pos = ws_end
      next
    end

    # Comment
    if line[pos] == "#"
      segments << Segment.new(line[pos..], style: theme[:comment])
      break
    end

    # Docstring/String
    if line[pos..pos + 2] == '"""' || line[pos..pos + 2] == "'''"
      delim = line[pos..pos + 2]
      str_end = line.index(delim, pos + 3)
      str_end = str_end ? str_end + 2 : line.length - 1
      segments << Segment.new(line[pos..str_end], style: theme[:string_doc] || theme[:string])
      pos = str_end + 1
      next
    end

    # String
    if ['"', "'"].include?(line[pos])
      delim = line[pos]
      str_end = find_string_end(line, pos, delim)
      segments << Segment.new(line[pos..str_end], style: theme[:string])
      pos = str_end + 1
      next
    end

    # Number
    if line[pos].match?(/\d/)
      num_end = pos
      num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeE+\-]/)
      segments << Segment.new(line[pos...num_end], style: theme[:number])
      pos = num_end
      next
    end

    # Decorator
    if line[pos] == "@"
      dec_end = pos + 1
      dec_end += 1 while dec_end < line.length && line[dec_end].match?(/[\w.]/)
      segments << Segment.new(line[pos...dec_end], style: theme[:name_decorator] || theme[:name])
      pos = dec_end
      next
    end

    # Identifier
    if line[pos].match?(/[a-zA-Z_]/)
      word_end = pos
      word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
      word = line[pos...word_end]

      style = if KEYWORDS.include?(word)
                theme[:keyword]
              elsif BUILTINS.include?(word)
                theme[:name_builtin] || theme[:name]
              elsif word[0].match?(/[A-Z]/)
                theme[:name_class] || theme[:name]
              else
                theme[:name]
              end

      segments << Segment.new(word, style: style)
      pos = word_end
      next
    end

    # Operators
    if line[pos].match?(/[+\-*\/%&|^~<>=!@]/)
      op_end = pos + 1
      op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!@]/)
      segments << Segment.new(line[pos...op_end], style: theme[:operator])
      pos = op_end
      next
    end

    # Punctuation
    if line[pos].match?(/[(){}\[\].,;:]/)
      segments << Segment.new(line[pos], style: theme[:punctuation])
      pos += 1
      next
    end

    segments << Segment.new(line[pos])
    pos += 1
  end

  segments
end