Class: TreeSitter::Parser
- Inherits:
-
Object
- Object
- TreeSitter::Parser
- Defined in:
- ext/tree_sitter/parser.c
Instance Method Summary collapse
-
#cancellation_flag ⇒ Integer
Get the parser’s current cancellation flag pointer.
-
#cancellation_flag=(flag) ⇒ Object
Set the parser’s current cancellation flag pointer.
-
#included_ranges ⇒ Array<Range>
Get the ranges of text that the parser will include when parsing.
-
#included_ranges=(array) ⇒ Boolean
Set the ranges of text that the parser should include when parsing.
-
#language ⇒ Object
Get the parser’s current language.
-
#language=(language) ⇒ Boolean
Set the language that the parser should use for parsing.
-
#logger ⇒ Logger
Get the parser’s current logger.
-
#logger=(logger) ⇒ Object
Set the logger that a parser should use during parsing.
-
#parse(old_tree, input) ⇒ Tree?
Use the parser to parse some source code and create a syntax tree.
-
#parse_string(old_tree, string) ⇒ Tree?
Use the parser to parse some source code stored in one contiguous buffer.
-
#parse_string_encoding(old_tree, string, encoding) ⇒ Tree?
Use the parser to parse some source code stored in one contiguous buffer with a given encoding.
-
#print_dot_graphs(file) ⇒ Object
Set the file descriptor to which the parser should write debugging graphs during parsing.
-
#reset ⇒ Object
Instruct the parser to start the next parse from the beginning.
Instance Method Details
#cancellation_flag ⇒ Integer
DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.
Get the parser’s current cancellation flag pointer.
63 64 65 66 67 |
# File 'ext/tree_sitter/parser.c', line 63
static VALUE parser_get_cancellation_flag(VALUE self) {
// tree-sitter 0.26+ removed cancellation_flag API
// Return the stored value for backward compatibility
return SIZET2NUM(unwrap(self)->cancellation_flag);
}
|
#cancellation_flag=(flag) ⇒ Object
DEPRECATED in tree-sitter 0.26+. This API was removed. Use TSParseOptions with progress_callback instead.
Set the parser’s current cancellation flag pointer.
If a non-null pointer is assigned, then the parser will periodically read from this pointer during parsing. If it reads a non-zero value, it will halt early, returning nil.
83 84 85 86 87 88 |
# File 'ext/tree_sitter/parser.c', line 83
static VALUE parser_set_cancellation_flag(VALUE self, VALUE flag) {
// tree-sitter 0.26+ removed cancellation_flag API
// Store the value for backward compatibility but it won't affect parsing
unwrap(self)->cancellation_flag = NUM2SIZET(flag);
return Qnil;
}
|
#included_ranges ⇒ Array<Range>
Get the ranges of text that the parser will include when parsing.
119 120 121 122 123 124 125 126 127 |
# File 'ext/tree_sitter/parser.c', line 119
static VALUE parser_get_included_ranges(VALUE self) {
uint32_t length;
const TSRange *ranges = ts_parser_included_ranges(SELF, &length);
VALUE res = rb_ary_new_capa(length);
for (uint32_t i = 0; i < length; i++) {
rb_ary_push(res, new_range(&ranges[i]));
}
return res;
}
|
#included_ranges=(array) ⇒ Boolean
Set the ranges of text that the parser should include when parsing.
By default, the parser will always include entire documents. This function allows you to parse only a portion of a document but still return a syntax tree whose ranges match up with the document as a whole. You can also pass multiple disjoint ranges.
The second and third parameters specify the location and length of an array of ranges. The parser does not take ownership of these ranges; it copies the data, so it doesn’t matter how these ranges are allocated.
If array‘s length is zero, then the entire document will be parsed. Otherwise, the given ranges must be ordered from earliest to latest in the document, and they must not overlap. That is, the following must hold for all:
i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte
If this requirement is not satisfied, the operation will fail, the ranges will not be assigned, and this function will return false. On success, this function returns true
156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'ext/tree_sitter/parser.c', line 156
static VALUE parser_set_included_ranges(VALUE self, VALUE array) {
Check_Type(array, T_ARRAY);
long length = rb_array_len(array);
TSRange *ranges = (TSRange *)malloc(length * sizeof(TSRange));
for (long i = 0; i < length; i++) {
ranges[i] = value_to_range(rb_ary_entry(array, i));
}
bool res = ts_parser_set_included_ranges(SELF, ranges, (uint32_t)length);
if (ranges) {
free(ranges);
}
return res ? Qtrue : Qfalse;
}
|
#language ⇒ Object
Get the parser’s current language.
93 94 95 |
# File 'ext/tree_sitter/parser.c', line 93 static VALUE parser_get_language(VALUE self) { return new_language(ts_parser_language(SELF)); } |
#language=(language) ⇒ Boolean
Set the language that the parser should use for parsing.
Returns a boolean indicating whether or not the language was successfully assigned. True means assignment succeeded. False means there was a version mismatch: the language was generated with an incompatible version of the Tree-sitter CLI. Check the language’s version using Language#version and compare it to this library’s LANGUAGE_VERSION and MIN_COMPATIBLE_LANGUAGE_VERSION constants.
109 110 111 112 |
# File 'ext/tree_sitter/parser.c', line 109
static VALUE parser_set_language(VALUE self, VALUE language) {
return ts_parser_set_language(SELF, value_to_language(language)) ? Qtrue
: Qfalse;
}
|
#logger ⇒ Logger
Get the parser’s current logger.
176 177 178 |
# File 'ext/tree_sitter/parser.c', line 176 static VALUE parser_get_logger(VALUE self) { return new_logger_by_val(ts_parser_logger(SELF)); } |
#logger=(logger) ⇒ Object
Set the logger that a parser should use during parsing.
The parser does not take ownership over the logger payload. If a logger was previously assigned, the caller is responsible for releasing any memory owned by the previous logger.
191 192 193 194 195 |
# File 'ext/tree_sitter/parser.c', line 191
static VALUE parser_set_logger(VALUE self, VALUE logger) {
ts_parser_set_logger(SELF, value_to_logger(logger));
unwrap(self)->logger = logger;
return Qnil;
}
|
#parse(old_tree, input) ⇒ Tree?
this is curently incomplete, as the Input class is incomplete.
Use the parser to parse some source code and create a syntax tree.
If you are parsing this document for the first time, pass nil for the old_tree parameter. Otherwise, if you have already parsed an earlier version of this document and the document has since been edited, pass the previous syntax tree so that the unchanged parts of it can be reused. This will save time and memory. For this to work correctly, you must have already edited the old syntax tree using the Tree#edit function in a way that exactly matches the source code changes.
The input parameter lets you specify how to read the text. It has the following three fields:
-
read: A function to retrieve a chunk of text at a given byte offset and (row, column) position. The function should return a pointer to the text and write its length to thebytes_readpointer. The parser does not take ownership of this buffer; it just borrows it until it has finished reading it. The function should write a zero value to thebytes_readpointer to indicate the end of the document. -
payload: An arbitrary pointer that will be passed to each invocation of thereadfunction. -
encoding: An indication of how the text is encoded. Either Encoding::UTF8 or Encoding::UTF16.
This function returns a syntax tree on success, and nil on failure. There are three possible reasons for failure:
-
The parser does not have a language assigned. Check for this using the #language function.
-
Parsing was cancelled due to a timeout that was set by an earlier call to the Parser#timeout_micros= function. You can resume parsing from where the parser left out by calling #parse again with the same arguments. Or you can start parsing from scratch by first calling #reset.
-
Parsing was cancelled using a cancellation flag that was set by an earlier call to #cancellation_flag=. You can resume parsing from where the parser left out by calling #parse again with the same arguments.
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
# File 'ext/tree_sitter/parser.c', line 242
static VALUE parser_parse(VALUE self, VALUE old_tree, VALUE input) {
if (NIL_P(input)) {
return Qnil;
}
TSTree *tree = NULL;
if (!NIL_P(old_tree)) {
tree = value_to_tree(old_tree);
}
TSTree *ret = ts_parser_parse(SELF, tree, value_to_input(input));
if (ret == NULL) {
return Qnil;
} else {
return new_tree(ret);
}
}
|
#parse_string(old_tree, string) ⇒ Tree?
Use the parser to parse some source code stored in one contiguous buffer. The first two parameters are the same as in the #parse function above. The second two parameters indicate the location of the buffer and its length in bytes.
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# File 'ext/tree_sitter/parser.c', line 271
static VALUE parser_parse_string(VALUE self, VALUE old_tree, VALUE string) {
if (NIL_P(string)) {
return Qnil;
}
const char *str = StringValuePtr(string);
uint32_t len = (uint32_t)RSTRING_LEN(string);
TSTree *tree = NULL;
if (!NIL_P(old_tree)) {
tree = value_to_tree(old_tree);
}
TSTree *ret = ts_parser_parse_string(SELF, tree, str, len);
if (ret == NULL) {
return Qnil;
} else {
return new_tree(ret);
}
}
|
#parse_string_encoding(old_tree, string, encoding) ⇒ Tree?
Use the parser to parse some source code stored in one contiguous buffer with a given encoding. The first four parameters work the same as in the #parse_string method above. The final parameter indicates whether the text is encoded as Encoding::UTF8 or Encoding::UTF16.
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 |
# File 'ext/tree_sitter/parser.c', line 303
static VALUE parser_parse_string_encoding(VALUE self, VALUE old_tree,
VALUE string, VALUE encoding) {
if (NIL_P(string)) {
return Qnil;
}
const char *str = StringValuePtr(string);
uint32_t len = (uint32_t)RSTRING_LEN(string);
TSTree *tree = NULL;
if (!NIL_P(old_tree)) {
tree = value_to_tree(old_tree);
}
TSTree *ret = ts_parser_parse_string_encoding(SELF, tree, str, len,
value_to_encoding(encoding));
if (ret == NULL) {
return Qnil;
} else {
return new_tree(ret);
}
}
|
#print_dot_graphs(file) ⇒ Object
Set the file descriptor to which the parser should write debugging graphs during parsing. The graphs are formatted in the DOT language. You may want to pipe these graphs directly to a dot(1) process in order to generate SVG output. You can turn off this logging by passing a negative number.
passing nil or -1
337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
# File 'ext/tree_sitter/parser.c', line 337
static VALUE parser_print_dot_graphs(VALUE self, VALUE file) {
if (NIL_P(file)) {
ts_parser_print_dot_graphs(SELF, -1);
} else if (rb_integer_type_p(file) && NUM2INT(file) < 0) {
ts_parser_print_dot_graphs(SELF, NUM2INT(file));
} else {
Check_Type(file, T_STRING);
char *path = StringValueCStr(file);
int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC,
0644); // 0644 = all read + user write
ts_parser_print_dot_graphs(SELF, fd);
}
return Qnil;
}
|
#reset ⇒ Object
Instruct the parser to start the next parse from the beginning.
If the parser previously failed because of a timeout or a cancellation, then by default, it will resume where it left off on the next call to #parse or other parsing functions. If you don’t want to resume, and instead intend to use this parser to parse some other document, you must call #reset first.
363 364 365 366 |
# File 'ext/tree_sitter/parser.c', line 363 static VALUE parser_reset(VALUE self) { ts_parser_reset(SELF); return Qnil; } |