Class: TreeSitter::Parser

Inherits:
Object
  • Object
show all
Defined in:
ext/tree_sitter/parser.c

Instance Method Summary collapse

Instance Method Details

#cancellation_flagInteger

Note:

DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.

Get the parser’s current cancellation flag pointer.

Returns:

  • (Integer)


63
64
65
66
67
# File 'ext/tree_sitter/parser.c', line 63

static VALUE parser_get_cancellation_flag(VALUE self) {
  // tree-sitter 0.26+ removed cancellation_flag API
  // Return the stored value for backward compatibility
  return SIZET2NUM(unwrap(self)->cancellation_flag);
}

#cancellation_flag=(flag) ⇒ Object

Note:

DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.

Set the parser’s current cancellation flag pointer.

If a non-null pointer is assigned, then the parser will periodically read from this pointer during parsing. If it reads a non-zero value, it will halt early, returning nil.

Returns:

  • nil

See Also:



83
84
85
86
87
88
# File 'ext/tree_sitter/parser.c', line 83

static VALUE parser_set_cancellation_flag(VALUE self, VALUE flag) {
  // tree-sitter 0.26+ removed cancellation_flag API
  // Store the value for backward compatibility but it won't affect parsing
  unwrap(self)->cancellation_flag = NUM2SIZET(flag);
  return Qnil;
}

#included_rangesArray<Range>

Get the ranges of text that the parser will include when parsing.

Returns:



119
120
121
122
123
124
125
126
127
# File 'ext/tree_sitter/parser.c', line 119

static VALUE parser_get_included_ranges(VALUE self) {
  uint32_t length;
  const TSRange *ranges = ts_parser_included_ranges(SELF, &length);
  VALUE res = rb_ary_new_capa(length);
  for (uint32_t i = 0; i < length; i++) {
    rb_ary_push(res, new_range(&ranges[i]));
  }
  return res;
}

#included_ranges=(array) ⇒ Boolean

Set the ranges of text that the parser should include when parsing.

By default, the parser will always include entire documents. This function allows you to parse only a portion of a document but still return a syntax tree whose ranges match up with the document as a whole. You can also pass multiple disjoint ranges.

The second and third parameters specify the location and length of an array of ranges. The parser does not take ownership of these ranges; it copies the data, so it doesn’t matter how these ranges are allocated.

If array‘s length is zero, then the entire document will be parsed. Otherwise, the given ranges must be ordered from earliest to latest in the document, and they must not overlap. That is, the following must hold for all:

i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte

If this requirement is not satisfied, the operation will fail, the ranges will not be assigned, and this function will return false. On success, this function returns true

Parameters:

Returns:

  • (Boolean)


156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'ext/tree_sitter/parser.c', line 156

static VALUE parser_set_included_ranges(VALUE self, VALUE array) {
  Check_Type(array, T_ARRAY);

  long length = rb_array_len(array);
  TSRange *ranges = (TSRange *)malloc(length * sizeof(TSRange));
  for (long i = 0; i < length; i++) {
    ranges[i] = value_to_range(rb_ary_entry(array, i));
  }
  bool res = ts_parser_set_included_ranges(SELF, ranges, (uint32_t)length);
  if (ranges) {
    free(ranges);
  }
  return res ? Qtrue : Qfalse;
}

#languageObject

Get the parser’s current language.



93
94
95
# File 'ext/tree_sitter/parser.c', line 93

static VALUE parser_get_language(VALUE self) {
  return new_language(ts_parser_language(SELF));
}

#language=(language) ⇒ Boolean

Set the language that the parser should use for parsing.

Returns a boolean indicating whether or not the language was successfully assigned. True means assignment succeeded. False means there was a version mismatch: the language was generated with an incompatible version of the Tree-sitter CLI. Check the language’s version using Language#version and compare it to this library’s LANGUAGE_VERSION and MIN_COMPATIBLE_LANGUAGE_VERSION constants.

Returns:

  • (Boolean)


109
110
111
112
# File 'ext/tree_sitter/parser.c', line 109

static VALUE parser_set_language(VALUE self, VALUE language) {
  return ts_parser_set_language(SELF, value_to_language(language)) ? Qtrue
                                                                   : Qfalse;
}

#loggerLogger

Get the parser’s current logger.

Returns:



176
177
178
# File 'ext/tree_sitter/parser.c', line 176

static VALUE parser_get_logger(VALUE self) {
  return new_logger_by_val(ts_parser_logger(SELF));
}

#logger=(logger) ⇒ Object

Set the logger that a parser should use during parsing.

The parser does not take ownership over the logger payload. If a logger was previously assigned, the caller is responsible for releasing any memory owned by the previous logger.

Parameters:

  • logger (Logger)

    or any object that has a printf, puts, or write.

Returns:

  • nil



191
192
193
194
195
# File 'ext/tree_sitter/parser.c', line 191

static VALUE parser_set_logger(VALUE self, VALUE logger) {
  ts_parser_set_logger(SELF, value_to_logger(logger));
  unwrap(self)->logger = logger;
  return Qnil;
}

#parse(old_tree, input) ⇒ Tree?

Note:

this is curently incomplete, as the Input class is incomplete.

Use the parser to parse some source code and create a syntax tree.

If you are parsing this document for the first time, pass nil for the old_tree parameter. Otherwise, if you have already parsed an earlier version of this document and the document has since been edited, pass the previous syntax tree so that the unchanged parts of it can be reused. This will save time and memory. For this to work correctly, you must have already edited the old syntax tree using the Tree#edit function in a way that exactly matches the source code changes.

The input parameter lets you specify how to read the text. It has the following three fields:

  1. read: A function to retrieve a chunk of text at a given byte offset and (row, column) position. The function should return a pointer to the text and write its length to the bytes_read pointer. The parser does not take ownership of this buffer; it just borrows it until it has finished reading it. The function should write a zero value to the bytes_read pointer to indicate the end of the document.

  2. payload: An arbitrary pointer that will be passed to each invocation of the read function.

  3. encoding: An indication of how the text is encoded. Either Encoding::UTF8 or Encoding::UTF16.

This function returns a syntax tree on success, and nil on failure. There are three possible reasons for failure:

  1. The parser does not have a language assigned. Check for this using the #language function.

  2. Parsing was cancelled due to a timeout that was set by an earlier call to the Parser#timeout_micros= function. You can resume parsing from where the parser left out by calling #parse again with the same arguments. Or you can start parsing from scratch by first calling #reset.

  3. Parsing was cancelled using a cancellation flag that was set by an earlier call to #cancellation_flag=. You can resume parsing from where the parser left out by calling #parse again with the same arguments.

Parameters:

Returns:

  • (Tree, nil)

    A parse tree if parsing was successful.



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'ext/tree_sitter/parser.c', line 242

static VALUE parser_parse(VALUE self, VALUE old_tree, VALUE input) {
  if (NIL_P(input)) {
    return Qnil;
  }

  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse(SELF, tree, value_to_input(input));
  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

#parse_string(old_tree, string) ⇒ Tree?

Use the parser to parse some source code stored in one contiguous buffer. The first two parameters are the same as in the #parse function above. The second two parameters indicate the location of the buffer and its length in bytes.

Parameters:

  • old_tree (Tree)
  • string (String)

Returns:

  • (Tree, nil)

    A parse tree if parsing was successful.



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'ext/tree_sitter/parser.c', line 271

static VALUE parser_parse_string(VALUE self, VALUE old_tree, VALUE string) {
  if (NIL_P(string)) {
    return Qnil;
  }

  const char *str = StringValuePtr(string);
  uint32_t len = (uint32_t)RSTRING_LEN(string);
  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse_string(SELF, tree, str, len);
  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

#parse_string_encoding(old_tree, string, encoding) ⇒ Tree?

Use the parser to parse some source code stored in one contiguous buffer with a given encoding. The first four parameters work the same as in the #parse_string method above. The final parameter indicates whether the text is encoded as Encoding::UTF8 or Encoding::UTF16.

Parameters:

Returns:

  • (Tree, nil)

    A parse tree if parsing was successful.



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'ext/tree_sitter/parser.c', line 303

static VALUE parser_parse_string_encoding(VALUE self, VALUE old_tree,
                                          VALUE string, VALUE encoding) {
  if (NIL_P(string)) {
    return Qnil;
  }

  const char *str = StringValuePtr(string);
  uint32_t len = (uint32_t)RSTRING_LEN(string);
  TSTree *tree = NULL;
  if (!NIL_P(old_tree)) {
    tree = value_to_tree(old_tree);
  }

  TSTree *ret = ts_parser_parse_string_encoding(SELF, tree, str, len,
                                                value_to_encoding(encoding));

  if (ret == NULL) {
    return Qnil;
  } else {
    return new_tree(ret);
  }
}

Set the file descriptor to which the parser should write debugging graphs during parsing. The graphs are formatted in the DOT language. You may want to pipe these graphs directly to a dot(1) process in order to generate SVG output. You can turn off this logging by passing a negative number.

passing nil or -1

Parameters:

  • file (Integer, String, nil)

    a file name to print, or turn off by

Returns:

  • nil



337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'ext/tree_sitter/parser.c', line 337

static VALUE parser_print_dot_graphs(VALUE self, VALUE file) {
  if (NIL_P(file)) {
    ts_parser_print_dot_graphs(SELF, -1);
  } else if (rb_integer_type_p(file) && NUM2INT(file) < 0) {
    ts_parser_print_dot_graphs(SELF, NUM2INT(file));
  } else {
    Check_Type(file, T_STRING);
    char *path = StringValueCStr(file);
    int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC,
                  0644); // 0644 = all read + user write
    ts_parser_print_dot_graphs(SELF, fd);
  }
  return Qnil;
}

#resetObject

Instruct the parser to start the next parse from the beginning.

If the parser previously failed because of a timeout or a cancellation, then by default, it will resume where it left off on the next call to #parse or other parsing functions. If you don’t want to resume, and instead intend to use this parser to parse some other document, you must call #reset first.

Returns:

  • nil



363
364
365
366
# File 'ext/tree_sitter/parser.c', line 363

static VALUE parser_reset(VALUE self) {
  ts_parser_reset(SELF);
  return Qnil;
}