feat: remove tree-sitter-facade-sg

BREAKING CHANGE: this uses the native tree-sitter crate
This commit is contained in:
HerringtonDarkholme
2025-05-12 18:20:44 -04:00
parent 8dcf3d8851
commit 1e8a38d181
14 changed files with 99 additions and 112 deletions
Generated
+8 -46
View File
@@ -141,7 +141,7 @@ dependencies = [
"smallvec",
"tempfile",
"tokio",
"tree-sitter-facade-sg",
"tree-sitter",
]
[[package]]
@@ -166,7 +166,7 @@ dependencies = [
"bit-set",
"regex",
"thiserror",
"tree-sitter-facade-sg",
"tree-sitter",
"tree-sitter-typescript",
]
@@ -190,13 +190,13 @@ dependencies = [
"ast-grep-core",
"ignore",
"serde",
"tree-sitter",
"tree-sitter-bash",
"tree-sitter-c",
"tree-sitter-c-sharp",
"tree-sitter-cpp",
"tree-sitter-css",
"tree-sitter-elixir",
"tree-sitter-facade-sg",
"tree-sitter-go",
"tree-sitter-haskell",
"tree-sitter-html",
@@ -242,7 +242,7 @@ dependencies = [
"napi-build",
"napi-derive",
"serde_json",
"tree-sitter-facade-sg",
"tree-sitter",
]
[[package]]
@@ -996,7 +996,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]
[[package]]
@@ -1880,9 +1880,9 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.25.3"
version = "0.25.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9ac5ea5e7f2f1700842ec071401010b9c59bf735295f6e9fa079c3dc035b167"
checksum = "69aff09fea9a41fb061ae6b206cb87cac1b8db07df31be3ba271fbc26760f213"
dependencies = [
"cc",
"regex",
@@ -1952,20 +1952,6 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-facade-sg"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b456912926c4079fb0e6d26e4282a588f93857fbb4d79e29c5eb1244b7100d55"
dependencies = [
"js-sys",
"tree-sitter",
"tree-sitter-language",
"wasm-bindgen",
"web-sys",
"web-tree-sitter-sg",
]
[[package]]
name = "tree-sitter-go"
version = "0.23.4"
@@ -2234,19 +2220,6 @@ dependencies = [
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
@@ -2289,17 +2262,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "web-tree-sitter-sg"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c769bd29dd6612783fffb7090d29ed8b390672fb9e968b9e76bbc07bf78600c"
dependencies = [
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
]
[[package]]
name = "winapi"
version = "0.3.9"
@@ -2322,7 +2284,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]
+2 -2
View File
@@ -33,7 +33,7 @@ ignore = { version = "0.4.22" }
regex = { version = "1.10.4" }
serde = { version = "1.0.200", features = ["derive"] }
serde_yaml = "0.9.33"
tree-sitter = { version = "0.25.3", package = "tree-sitter-facade-sg" }
tree-sitter = { version = "0.25.4" }
thiserror = "2.0.0"
schemars = "0.8.17"
anyhow = "1.0.82"
anyhow = "1.0.82"
+6 -1
View File
@@ -175,7 +175,12 @@ fn node_to_range<D: Doc>(node: &Node<D>) -> TSRange {
let end = node.end_pos();
let ep = end.byte_point();
let ep = tree_sitter::Point::new(ep.0, ep.1);
TSRange::new(r.start as u32, r.end as u32, &sp, &ep)
TSRange {
start_byte: r.start,
end_byte: r.end,
start_point: sp,
end_point: ep,
}
}
#[cfg(test)]
+3 -3
View File
@@ -84,7 +84,7 @@ fn dump_pattern(
} => {
if *is_named {
let kind = lang.node_kind_for_id(*kind_id).unwrap();
let kind = style.kind_style.paint(format!("{kind}"));
let kind = style.kind_style.paint(kind);
writeln!(ret, "{kind} {text}")?;
} else {
writeln!(ret, "{text}")?;
@@ -190,8 +190,8 @@ impl From<ts::Point> for Pos {
#[inline]
fn from(pt: ts::Point) -> Self {
Pos {
row: pt.row() as usize,
column: pt.column() as usize,
row: pt.row,
column: pt.column,
}
}
}
+3 -1
View File
@@ -56,7 +56,9 @@ mod test {
TSLanguage::from(tree_sitter_typescript::LANGUAGE_TSX).id_for_node_kind(kind, true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
TSLanguage::from(tree_sitter_typescript::LANGUAGE_TSX).field_id_for_name(field)
TSLanguage::from(tree_sitter_typescript::LANGUAGE_TSX)
.field_id_for_name(field)
.map(|f| f.get())
}
fn from_path<P: AsRef<Path>>(_path: P) -> Option<Self> {
Some(TypeScript::Tsx)
+4 -1
View File
@@ -62,7 +62,10 @@ mod test {
ts_lang.id_for_node_kind(kind, /* named */ true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
self.get_ts_language().field_id_for_name(field)
self
.get_ts_language()
.field_id_for_name(field)
.map(|f| f.get())
}
fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
builder.build(|src| StrDoc::try_new(src, self.clone()))
+2 -2
View File
@@ -38,8 +38,8 @@ impl Position {
let source = node.get_doc().get_source();
source.get_char_column(self.byte_column, self.byte_offset)
}
pub fn byte_point(&self) -> (u32, u32) {
(self.line as u32, self.byte_column as u32)
pub fn byte_point(&self) -> (usize, usize) {
(self.line, self.byte_column)
}
}
+38 -36
View File
@@ -7,17 +7,16 @@ use crate::{node::KindId, Language, Position};
use crate::{AstGrep, Matcher};
use std::borrow::Cow;
use std::collections::HashMap;
use std::num::NonZero;
use thiserror::Error;
pub use traversal::{TsPre, Visitor};
pub use tree_sitter::Language as TSLanguage;
use tree_sitter::{InputEdit, LanguageError, Node, Parser, ParserError, Point, Tree};
use tree_sitter::{InputEdit, LanguageError, Node, Parser, Point, Tree};
pub use tree_sitter::{Point as TSPoint, Range as TSRange};
/// Represents tree-sitter related error
#[derive(Debug, Error)]
pub enum TSParseError {
#[error("web-tree-sitter parser is not available")]
Parse(#[from] ParserError),
#[error("incompatible `Language` is assigned to a `Parser`.")]
Language(#[from] LanguageError),
/// A general error when tree sitter fails to parse in time. It can be caused by
@@ -31,12 +30,12 @@ pub enum TSParseError {
#[inline]
fn parse_lang(
parse_fn: impl Fn(&mut Parser) -> Result<Option<Tree>, ParserError>,
parse_fn: impl Fn(&mut Parser) -> Option<Tree>,
ts_lang: TSLanguage,
) -> Result<Tree, TSParseError> {
let mut parser = Parser::new()?;
let mut parser = Parser::new();
parser.set_language(&ts_lang)?;
if let Some(tree) = parse_fn(&mut parser)? {
if let Some(tree) = parse_fn(&mut parser) {
Ok(tree)
} else {
Err(TSParseError::TreeUnavailable)
@@ -87,9 +86,11 @@ impl<L: LanguageExt> Doc for StrDoc<L> {
self.tree.root_node()
}
fn get_node_text<'a>(&'a self, node: &Self::Node<'a>) -> Cow<'a, str> {
node
.utf8_text(self.src.as_bytes())
.expect("invalid source text encoding")
Cow::Borrowed(
node
.utf8_text(self.src.as_bytes())
.expect("invalid source text encoding"),
)
}
}
@@ -129,7 +130,7 @@ impl<'r> SgNode<'r> for Node<'r> {
if inner.id() == self_id {
return None;
}
ancestor = inner.child_with_descendant(self.clone());
ancestor = inner.child_with_descendant(*self);
Some(inner)
})
// We must iterate up the tree to preserve backwards compatibility
@@ -142,14 +143,14 @@ impl<'r> SgNode<'r> for Node<'r> {
}
fn child(&self, nth: usize) -> Option<Self> {
// TODO remove cast after migrating to tree-sitter
Node::child(self, nth as u32)
Node::child(self, nth)
}
fn children(&self) -> impl ExactSizeIterator<Item = Self> {
let mut cursor = self.walk();
cursor.goto_first_child();
NodeWalker {
cursor,
count: self.child_count() as usize,
count: self.child_count(),
}
}
fn child_by_field_id(&self, field_id: u16) -> Option<Self> {
@@ -163,7 +164,7 @@ impl<'r> SgNode<'r> for Node<'r> {
}
fn next_all(&self) -> impl Iterator<Item = Self> {
// if root is none, use self as fallback to return a type-stable Iterator
let node = self.parent().unwrap_or_else(|| self.clone());
let node = self.parent().unwrap_or(*self);
let mut cursor = node.walk();
cursor.goto_first_child_for_byte(self.start_byte());
std::iter::from_fn(move || {
@@ -176,7 +177,7 @@ impl<'r> SgNode<'r> for Node<'r> {
}
fn prev_all(&self) -> impl Iterator<Item = Self> {
// if root is none, use self as fallback to return a type-stable Iterator
let node = self.parent().unwrap_or_else(|| self.clone());
let node = self.parent().unwrap_or(*self);
let mut cursor = node.walk();
cursor.goto_first_child_for_byte(self.start_byte());
std::iter::from_fn(move || {
@@ -199,7 +200,7 @@ impl<'r> SgNode<'r> for Node<'r> {
self.child_count() == 0
}
fn kind(&self) -> Cow<str> {
Node::kind(self)
Cow::Borrowed(Node::kind(self))
}
fn kind_id(&self) -> KindId {
Node::kind_id(self)
@@ -208,17 +209,17 @@ impl<'r> SgNode<'r> for Node<'r> {
self.id()
}
fn range(&self) -> std::ops::Range<usize> {
(self.start_byte() as usize)..(self.end_byte() as usize)
self.start_byte()..self.end_byte()
}
fn start_pos(&self) -> Position {
let pos = self.start_position();
let byte = self.start_byte() as usize;
Position::new(pos.row() as usize, pos.column() as usize, byte)
let byte = self.start_byte();
Position::new(pos.row, pos.column, byte)
}
fn end_pos(&self) -> Position {
let pos = self.end_position();
let byte = self.end_byte() as usize;
Position::new(pos.row() as usize, pos.column() as usize, byte)
let byte = self.end_byte();
Position::new(pos.row, pos.column, byte)
}
// missing node is a tree-sitter specific concept
fn is_missing(&self) -> bool {
@@ -232,6 +233,7 @@ impl<'r> SgNode<'r> for Node<'r> {
self.child_by_field_name(name)
}
fn field_children(&self, field_id: Option<u16>) -> impl Iterator<Item = Self> {
let field_id = field_id.and_then(NonZero::new);
let mut cursor = self.walk();
cursor.goto_first_child();
// if field_id is not found, iteration is done
@@ -329,14 +331,14 @@ impl ContentExt for String {
let old_end_position = position_for_offset(input, old_end_byte);
input.splice(start_byte..old_end_byte, edit.inserted_text.clone());
let new_end_position = position_for_offset(input, new_end_byte);
InputEdit::new(
start_byte as u32,
old_end_byte as u32,
new_end_byte as u32,
&start_position,
&old_end_position,
&new_end_position,
)
InputEdit {
start_byte,
old_end_byte,
new_end_byte,
start_position,
old_end_position,
new_end_position,
}
}
}
@@ -360,15 +362,15 @@ impl<L: LanguageExt> Root<StrDoc<L>> {
.filter_map(|(lang, ranges)| {
let lang = get_lang(&lang)?;
let source = self.doc.get_source();
let mut parser = tree_sitter::Parser::new().ok()?;
let mut parser = Parser::new();
parser.set_included_ranges(&ranges).ok()?;
parser.set_language(&lang.get_ts_language()).ok()?;
let tree = parser.parse(source, None).ok()?;
tree.map(|t| Self {
let tree = parser.parse(source, None)?;
Some(Self {
doc: StrDoc {
src: self.doc.src.clone(),
lang,
tree: t,
tree,
},
})
})
@@ -394,8 +396,8 @@ impl<'r, L: LanguageExt> crate::Node<'r, StrDoc<L>> {
pub fn display_context(&self, before: usize, after: usize) -> DisplayContext<'r> {
let source = self.root.doc.get_source().as_str();
let bytes = source.as_bytes();
let start = self.inner.start_byte() as usize;
let end = self.inner.end_byte() as usize;
let start = self.inner.start_byte();
let end = self.inner.end_byte();
let (mut leading, mut trailing) = (start, end);
let mut lines_before = before + 1;
while leading > 0 {
@@ -463,8 +465,8 @@ mod test {
let tree = parse("var a = 1234")?;
let root_node = tree.root_node();
assert_eq!(root_node.kind(), "program");
assert_eq!(root_node.start_position().column(), 0);
assert_eq!(root_node.end_position().column(), 12);
assert_eq!(root_node.start_position().column, 0);
assert_eq!(root_node.end_position().column, 12);
assert_eq!(
root_node.to_sexp(),
"(program (variable_declaration (variable_declarator name: (identifier) value: (number))))"
+1 -1
View File
@@ -387,7 +387,7 @@ pub struct Level<'tree, L: LanguageExt> {
impl<'tree, L: LanguageExt> Level<'tree, L> {
pub fn new(node: &Node<'tree, StrDoc<L>>) -> Self {
let mut deque = VecDeque::new();
deque.push_back(node.inner.clone());
deque.push_back(node.inner);
let cursor = node.inner.walk();
Self {
deque,
+1 -1
View File
@@ -17,7 +17,7 @@ ignore.workspace = true
libloading = "0.8.3"
serde.workspace = true
thiserror.workspace = true
tree-sitter-native = { version = "0.25.3", package = "tree-sitter" }
tree-sitter.workspace = true
[dev-dependencies]
serde_yaml.workspace = true
+4 -4
View File
@@ -6,7 +6,7 @@ use ignore::types::{Types, TypesBuilder};
use libloading::{Error as LibError, Library, Symbol};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tree_sitter_native::{Language as NativeTS, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION};
use tree_sitter::{Language as NativeTS, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION};
use std::borrow::Cow;
use std::fs::canonicalize;
@@ -133,7 +133,7 @@ unsafe fn load_ts_language(
} else {
// ATTENTION: dragon ahead
// must hold valid reference to NativeTS
Ok((lib, lang.into()))
Ok((lib, lang))
}
}
@@ -239,7 +239,7 @@ impl Language for DynamicLang {
}
fn field_to_id(&self, field: &str) -> Option<u16> {
let inner = self.inner();
inner.lang.field_id_for_name(field)
inner.lang.field_id_for_name(field).map(|f| f.get())
}
fn from_path<P: AsRef<Path>>(path: P) -> Option<Self> {
@@ -297,7 +297,7 @@ mod test {
self.0.id_for_node_kind(kind, /* named */ true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
self.0.field_id_for_name(field)
self.0.field_id_for_name(field).map(|f| f.get())
}
fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
builder.build(|src| StrDoc::try_new(src, self.clone()))
+9 -2
View File
@@ -20,7 +20,9 @@ impl Language for Html {
crate::parsers::language_html().id_for_node_kind(kind, true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
crate::parsers::language_html().field_id_for_name(field)
crate::parsers::language_html()
.field_id_for_name(field)
.map(|f| f.get())
}
fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
builder.build(|src| StrDoc::try_new(src, *self))
@@ -88,7 +90,12 @@ fn node_to_range<D: Doc>(node: &Node<D>) -> TSRange {
let end = node.end_pos();
let ep = end.byte_point();
let ep = tree_sitter::Point::new(ep.0, ep.1);
TSRange::new(r.start as u32, r.end as u32, &sp, &ep)
TSRange {
start_byte: r.start,
end_byte: r.end,
start_point: sp,
end_point: ep,
}
}
#[cfg(test)]
+8 -2
View File
@@ -59,7 +59,10 @@ macro_rules! impl_lang {
.id_for_node_kind(kind, /*named*/ true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
self.get_ts_language().field_id_for_name(field)
self
.get_ts_language()
.field_id_for_name(field)
.map(|f| f.get())
}
fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
builder.build(|src| StrDoc::try_new(src, self.clone()))
@@ -107,7 +110,10 @@ macro_rules! impl_lang_expando {
.id_for_node_kind(kind, /*named*/ true)
}
fn field_to_id(&self, field: &str) -> Option<u16> {
self.get_ts_language().field_id_for_name(field)
self
.get_ts_language()
.field_id_for_name(field)
.map(|f| f.get())
}
fn expando_char(&self) -> char {
$char
+10 -10
View File
@@ -86,14 +86,14 @@ impl ContentExt for Wrapper {
let old_end_position = pos_for_byte_offset(input, old_end_byte);
input.splice(start_byte / 2..old_end_byte / 2, edit.inserted_text.clone());
let new_end_position = pos_for_byte_offset(input, new_end_byte);
InputEdit::new(
start_byte as u32,
old_end_byte as u32,
new_end_byte as u32,
&start_position,
&old_end_position,
&new_end_position,
)
InputEdit {
start_byte,
old_end_byte,
new_end_byte,
start_position,
old_end_position,
new_end_position,
}
}
}
@@ -124,10 +124,10 @@ fn parse(
lang: &NapiLang,
old_tree: Option<&Tree>,
) -> std::result::Result<Tree, TSParseError> {
let mut parser = Parser::new()?;
let mut parser = Parser::new();
let ts_lang = lang.get_ts_language();
parser.set_language(&ts_lang)?;
if let Some(tree) = parser.parse_utf16_le(source.inner.as_slice(), old_tree)? {
if let Some(tree) = parser.parse_utf16_le(source.inner.as_slice(), old_tree) {
Ok(tree)
} else {
Err(TSParseError::TreeUnavailable)