Class: Odin::Parsing::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/odin/parsing/tokenizer.rb

Constant Summary collapse

MAX_DOCUMENT_SIZE =
Utils::SecurityLimits::MAX_DOCUMENT_SIZE
RE_WHITESPACE =

Pre-compiled regex patterns for StringScanner (all frozen)

/[ \t]+/
RE_NEWLINE_CRLF =
/\r\n?/
RE_IDENTIFIER =
/[a-zA-Z_][a-zA-Z0-9_\-]*/
RE_IDENT_PATH =
/[a-zA-Z_][a-zA-Z0-9_\-.]*/
RE_NUMERIC =
/[+\-]?[0-9eE.+\-]+/
RE_CURRENCY_VAL =
/[+\-]?[0-9.]+(?:[eE][+\-]?\d+)?(?::[a-zA-Z0-9_\-]+)?/
RE_WORD =
/[a-zA-Z0-9_.\-]+/
RE_HEADER_CONTENT =
/[^}\r\n]*/
RE_COMMENT_CONTENT =
/[^\r\n]*/
RE_REF_PATH =
/[a-zA-Z0-9_.\[\]()?\-@']*/
RE_BINARY_DATA =
/[^\s;\r\n]*/
RE_BARE_VALUE =
/[^\s;:\r\n]+/
RE_DATE_OR_NUM =
/[0-9eE.\-:+TZ]+/
RE_DATE_PREFIX =
/\A\d{4}-\d{2}-\d{2}T/
RE_DATE_EXACT =
/\A\d{4}-\d{2}-\d{2}\z/
RE_DURATION =
/P[0-9YMWDTHS.]+/
RE_TIME_VAL =
/T[0-9:.+\-Z]+/
RE_ARRAY_INDEX =
/\[[^\]]*\]/
ESCAPE_MAP =
{
  '"'  => '"',
  '\\' => '\\',
  'n'  => "\n",
  't'  => "\t",
  'r'  => "\r",
  '0'  => "\0",
  '/'  => '/'
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ Tokenizer

Returns a new instance of Tokenizer.



40
41
42
43
44
45
46
47
# File 'lib/odin/parsing/tokenizer.rb', line 40

def initialize(text)
  @source = text
  @scanner = StringScanner.new(text)
  @line = 1
  @col = 1
  @tokens = Array.new(text.length / 10 + 16)
  @token_count = 0
end

Instance Method Details

#tokenizeObject



49
50
51
52
53
54
55
# File 'lib/odin/parsing/tokenizer.rb', line 49

def tokenize
  check_document_size!
  skip_bom
  scan_tokens
  emit(TokenType::EOF, "", @line, @col)
  @tokens.first(@token_count)
end