Class: TreeSitter::Parser

Inherits:

Object

Object
TreeSitter::Parser

show all

Defined in:: ext/tree_sitter/parser.c

Instance Method Summary collapse

#cancellation_flag ⇒ Integer

Get the parser’s current cancellation flag pointer.
#cancellation_flag=(flag) ⇒ Object

Set the parser’s current cancellation flag pointer.
#included_ranges ⇒ Array<Range>

Get the ranges of text that the parser will include when parsing.
#included_ranges=(array) ⇒ Boolean

Set the ranges of text that the parser should include when parsing.
#language ⇒ Object

Get the parser’s current language.
#language=(language) ⇒ Boolean

Set the language that the parser should use for parsing.
#logger ⇒ Logger

Get the parser’s current logger.
#logger=(logger) ⇒ Object

Set the logger that a parser should use during parsing.
#parse(old_tree, input) ⇒ Tree^?

Use the parser to parse some source code and create a syntax tree.
#parse_string(old_tree, string) ⇒ Tree^?

Use the parser to parse some source code stored in one contiguous buffer.
#parse_string_encoding(old_tree, string, encoding) ⇒ Tree^?

Use the parser to parse some source code stored in one contiguous buffer with a given encoding.
#print_dot_graphs(file) ⇒ Object

Set the file descriptor to which the parser should write debugging graphs during parsing.
#reset ⇒ Object

Instruct the parser to start the next parse from the beginning.

Instance Method Details

#cancellation_flag ⇒ `Integer`

Note:

DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.

Get the parser’s current cancellation flag pointer.

Returns:

(Integer)

# File 'ext/tree_sitter/parser.c', line 63

static VALUE parser_get_cancellation_flag(VALUE self) {
  // tree-sitter 0.26+ removed cancellation_flag API
  // Return the stored value for backward compatibility
  return SIZET2NUM(unwrap(self)->cancellation_flag);
}

#cancellation_flag=(flag) ⇒ `Object`

Note:

DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.

Set the parser’s current cancellation flag pointer.

If a non-null pointer is assigned, then the parser will periodically read from this pointer during parsing. If it reads a non-zero value, it will halt early, returning nil.

Returns:

#included_ranges ⇒ `Array<Range>`

Get the ranges of text that the parser will include when parsing.

Returns:

(Array<Range>)

# File 'ext/tree_sitter/parser.c', line 119

static VALUE parser_get_included_ranges(VALUE self) {
  uint32_t length;
  const TSRange *ranges = ts_parser_included_ranges(SELF, &length);
  VALUE res = rb_ary_new_capa(length);
  for (uint32_t i = 0; i < length; i++) {
    rb_ary_push(res, new_range(&ranges[i]));
  }
  return res;
}

#included_ranges=(array) ⇒ `Boolean`

Set the ranges of text that the parser should include when parsing.

By default, the parser will always include entire documents. This function allows you to parse only a portion of a document but still return a syntax tree whose ranges match up with the document as a whole. You can also pass multiple disjoint ranges.

The second and third parameters specify the location and length of an array of ranges. The parser does not take ownership of these ranges; it copies the data, so it doesn’t matter how these ranges are allocated.

If array‘s length is zero, then the entire document will be parsed. Otherwise, the given ranges must be ordered from earliest to latest in the document, and they must not overlap. That is, the following must hold for all:

i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte

If this requirement is not satisfied, the operation will fail, the ranges will not be assigned, and this function will return false. On success, this function returns true

Parameters:

array (Array<Range>)

Returns:

(Boolean)

# File 'ext/tree_sitter/parser.c', line 156

static VALUE parser_set_included_ranges(VALUE self, VALUE array) {
  Check_Type(array, T_ARRAY);

  long length = rb_array_len(array);
  TSRange *ranges = (TSRange *)malloc(length * sizeof(TSRange));
  for (long i = 0; i < length; i++) {
    ranges[i] = value_to_range(rb_ary_entry(array, i));
  }
  bool res = ts_parser_set_included_ranges(SELF, ranges, (uint32_t)length);
  if (ranges) {
    free(ranges);
  }
  return res ? Qtrue : Qfalse;
}

#language ⇒ `Object`

Get the parser’s current language.



93
94
95

# File 'ext/tree_sitter/parser.c', line 93

static VALUE parser_get_language(VALUE self) {
  return new_language(ts_parser_language(SELF));
}

#language=(language) ⇒ `Boolean`

Set the language that the parser should use for parsing.

Returns a boolean indicating whether or not the language was successfully assigned. True means assignment succeeded. False means there was a version mismatch: the language was generated with an incompatible version of the Tree-sitter CLI. Check the language’s version using Language#version and compare it to this library’s LANGUAGE_VERSION and MIN_COMPATIBLE_LANGUAGE_VERSION constants.

Returns:

(Boolean)

# File 'ext/tree_sitter/parser.c', line 109

static VALUE parser_set_language(VALUE self, VALUE language) {
  return ts_parser_set_language(SELF, value_to_language(language)) ? Qtrue
                                                                   : Qfalse;
}

#logger ⇒ `Logger`

Get the parser’s current logger.

Returns:

(Logger)



176
177
178

# File 'ext/tree_sitter/parser.c', line 176

static VALUE parser_get_logger(VALUE self) {
  return new_logger_by_val(ts_parser_logger(SELF));
}

#logger=(logger) ⇒ `Object`

Set the logger that a parser should use during parsing.

The parser does not take ownership over the logger payload. If a logger was previously assigned, the caller is responsible for releasing any memory owned by the previous logger.

Parameters:

logger (Logger) —

or any object that has a printf, puts, or write.

Returns:

# File 'ext/tree_sitter/parser.c', line 191

static VALUE parser_set_logger(VALUE self, VALUE logger) {
  ts_parser_set_logger(SELF, value_to_logger(logger));
  unwrap(self)->logger = logger;
  return Qnil;
}

#parse(old_tree, input) ⇒ `Tree`^?

Note:

this is curently incomplete, as the Input class is incomplete.

Use the parser to parse some source code and create a syntax tree.

If you are parsing this document for the first time, pass nil for the old_tree parameter. Otherwise, if you have already parsed an earlier version of this document and the document has since been edited, pass the previous syntax tree so that the unchanged parts of it can be reused. This will save time and memory. For this to work correctly, you must have already edited the old syntax tree using the Tree#edit function in a way that exactly matches the source code changes.

The input parameter lets you specify how to read the text. It has the following three fields:

read: A function to retrieve a chunk of text at a given byte offset and (row, column) position. The function should return a pointer to the text and write its length to the bytes_read pointer. The parser does not take ownership of this buffer; it just borrows it until it has finished reading it. The function should write a zero value to the bytes_read pointer to indicate the end of the document.
payload: An arbitrary pointer that will be passed to each invocation of the read function.
encoding: An indication of how the text is encoded. Either Encoding::UTF8 or Encoding::UTF16.

This function returns a syntax tree on success, and nil on failure. There are three possible reasons for failure:

The parser does not have a language assigned. Check for this using the #language function.
Parsing was cancelled due to a timeout that was set by an earlier call to the Parser#timeout_micros= function. You can resume parsing from where the parser left out by calling #parse again with the same arguments. Or you can start parsing from scratch by first calling #reset.
Parsing was cancelled using a cancellation flag that was set by an earlier call to #cancellation_flag=. You can resume parsing from where the parser left out by calling #parse again with the same arguments.

Parameters:

old_tree (Tree)
input (Input)

Returns:

(Tree, nil) —

A parse tree if parsing was successful.

# File 'ext/tree_sitter/parser.c', line 242

static VALUE parser_parse(VALUE self, VALUE old_tree, VALUE input) {
  if (NIL_P(input)) {
    return Qnil;
  }

  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse(SELF, tree, value_to_input(input));
  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

#parse_string(old_tree, string) ⇒ `Tree`^?

Use the parser to parse some source code stored in one contiguous buffer. The first two parameters are the same as in the #parse function above. The second two parameters indicate the location of the buffer and its length in bytes.

Parameters:

old_tree (Tree)
string (String)

Returns:

(Tree, nil) —

A parse tree if parsing was successful.

# File 'ext/tree_sitter/parser.c', line 271

static VALUE parser_parse_string(VALUE self, VALUE old_tree, VALUE string) {
  if (NIL_P(string)) {
    return Qnil;
  }

  const char *str = StringValuePtr(string);
  uint32_t len = (uint32_t)RSTRING_LEN(string);
  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse_string(SELF, tree, str, len);
  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

#parse_string_encoding(old_tree, string, encoding) ⇒ `Tree`^?

Use the parser to parse some source code stored in one contiguous buffer with a given encoding. The first four parameters work the same as in the #parse_string method above. The final parameter indicates whether the text is encoded as Encoding::UTF8 or Encoding::UTF16.

Parameters:

old_tree (Tree)
string (String)
encoding (Encoding)

Returns:

(Tree, nil) —

A parse tree if parsing was successful.

# File 'ext/tree_sitter/parser.c', line 303

static VALUE parser_parse_string_encoding(VALUE self, VALUE old_tree,
                                          VALUE string, VALUE encoding) {
  if (NIL_P(string)) {
    return Qnil;
  }

  const char *str = StringValuePtr(string);
  uint32_t len = (uint32_t)RSTRING_LEN(string);
  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse_string_encoding(SELF, tree, str, len,
                                                value_to_encoding(encoding));

  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

#print_dot_graphs(file) ⇒ `Object`

Set the file descriptor to which the parser should write debugging graphs during parsing. The graphs are formatted in the DOT language. You may want to pipe these graphs directly to a dot(1) process in order to generate SVG output. You can turn off this logging by passing a negative number.

passing nil or -1

Parameters:

file (Integer, String, nil) —

a file name to print, or turn off by

Returns:

# File 'ext/tree_sitter/parser.c', line 337

static VALUE parser_print_dot_graphs(VALUE self, VALUE file) {
  if (NIL_P(file)) {
    ts_parser_print_dot_graphs(SELF, -1);
  } else if (rb_integer_type_p(file) && NUM2INT(file) < 0) {
    ts_parser_print_dot_graphs(SELF, NUM2INT(file));
  } else {
    Check_Type(file, T_STRING);
    char *path = StringValueCStr(file);
    int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC,
                  0644); // 0644 = all read + user write
    ts_parser_print_dot_graphs(SELF, fd);
  }
  return Qnil;
}

#reset ⇒ `Object`

Instruct the parser to start the next parse from the beginning.

If the parser previously failed because of a timeout or a cancellation, then by default, it will resume where it left off on the next call to #parse or other parsing functions. If you don’t want to resume, and instead intend to use this parser to parse some other document, you must call #reset first.

Returns:

# File 'ext/tree_sitter/parser.c', line 363

static VALUE parser_reset(VALUE self) {
  ts_parser_reset(SELF);
  return Qnil;
}

Class: TreeSitter::Parser

Instance Method Summary collapse

Instance Method Details

#cancellation_flag ⇒ Integer

#cancellation_flag=(flag) ⇒ Object

#included_ranges ⇒ Array<Range>

#included_ranges=(array) ⇒ Boolean

#language ⇒ Object

#language=(language) ⇒ Boolean

#logger ⇒ Logger

#logger=(logger) ⇒ Object

#parse(old_tree, input) ⇒ Tree?

#parse_string(old_tree, string) ⇒ Tree?

#parse_string_encoding(old_tree, string, encoding) ⇒ Tree?

#print_dot_graphs(file) ⇒ Object

#reset ⇒ Object

#cancellation_flag ⇒ `Integer`

#cancellation_flag=(flag) ⇒ `Object`

#included_ranges ⇒ `Array<Range>`

#included_ranges=(array) ⇒ `Boolean`

#language ⇒ `Object`

#language=(language) ⇒ `Boolean`

#logger ⇒ `Logger`

#logger=(logger) ⇒ `Object`

#parse(old_tree, input) ⇒ `Tree`^?

#parse_string(old_tree, string) ⇒ `Tree`^?

#parse_string_encoding(old_tree, string, encoding) ⇒ `Tree`^?

#print_dot_graphs(file) ⇒ `Object`

#reset ⇒ `Object`