diff --git a/README.md b/README.md
index 4b71ab442..6226b649f 100644
--- a/README.md
+++ b/README.md
@@ -212,14 +212,20 @@ feature flags can be used to use the
[c2rust transpilation of tree-sitter](https://crates.io/crates/tree-sitter-c2rust)
instead of the
[official C implementation](https://crates.io/crates/tree-sitter). This is only
-supported by the `syntastica-parsers-git` parser collection, and only parsers
-that don't use an external C++ scanner are available.
+supported by the `syntastica-parsers-git` parser collection as only that
+collection has enough control over the parser build process to allow for Wasm
+support.
To use this approach, simply set `default-features = false` and enable the
`runtime-c2rust` feature for _all_ `syntastica` dependencies. An example using
this approach for use of `syntastica` in a Dioxus project can be found
[here](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/dioxus).
+Note that for extra safety, you should compile this with nightly Rust and the
+`-Zwasm_c_abi=spec` option set, as otherwise the `wasm32-unknown-unknown` target
+doesn't yet conform with the C ABI. In my personal testing I haven't had any
+issues on stable Rust either though.
+
### 2. Using Emscripten / the `syntastica-js` package
`syntastica` can also be compiled to `wasm32-unknown-emscripten` which has much
@@ -239,7 +245,9 @@ There are three examples using `syntastica-js`:
- [Usage from JavaScript in NodeJS for console applications](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/node)
- [Usage from Rust in the browser using wasm-bindgen](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/wasm-pack-with-npm-pkg)
-> **Note**
+
+
+> [!WARNING]
>
> The `syntastica` NPM package is currently not being updated and uses an old
> version of `syntastica`, because the current implementation always includes
@@ -248,6 +256,8 @@ There are three examples using `syntastica-js`:
> package into multiple binaries that can be fetched from a server on-demand,
> and to provide multiple NPM packages for manual selection of the parsers.
+
+
## Examples
This section contains some basic usage examples. More specific examples can be
@@ -504,3 +514,5 @@ possible replacement for [`syntect`](https://crates.io/crates/syntect). The main
difference from `lirstings` at the start was the parser collection(s), providing
a rigid set of parsers and queries for users. Over time `syntastica` then grew
to the big project it is today.
+
+[!WARNING]: .
diff --git a/syntastica-macros/languages.toml b/syntastica-macros/languages.toml
index d981530ac..ad4190da8 100644
--- a/syntastica-macros/languages.toml
+++ b/syntastica-macros/languages.toml
@@ -18,7 +18,6 @@ locals = false
name = "bash"
group = "some"
file-types = ["sh", "bash", "ksh"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-bash", rev = "0c46d792d54c536be5ff7eb18eb95c70fccdb232" }
external-scanner = { c = true, cpp = false }
@@ -83,7 +82,6 @@ locals = false
name = "cpp"
group = "some"
file-types = ["cpp"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "e5cea0ec884c5c3d2d1e41a741a66ce13da4d945" }
external-scanner = { c = true, cpp = false }
@@ -213,7 +211,6 @@ locals = true
name = "haskell"
group = "most"
file-types = ["haskell"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "0975ef72fc3c47b530309ca93937d7d143523628" }
external-scanner = { c = true, cpp = false }
@@ -246,7 +243,6 @@ locals = false
name = "html"
group = "some"
file-types = ["html"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-html", rev = "cbb91a0ff3621245e890d1c50cc811bffb77a26b" }
external-scanner = { c = true, cpp = false }
@@ -359,7 +355,6 @@ locals = false
name = "julia"
group = "all"
file-types = ["julia"]
-wasm = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-julia", rev = "3f9870ca1f419e480f39fc181fcfb6dad480969a" }
external-scanner = { c = true, cpp = false }
@@ -394,7 +389,6 @@ locals = true
name = "llvm"
group = "all"
file-types = ["llvm"]
-wasm = false
[languages.parser]
git = { url = "https://github.com/benwilliamgraham/tree-sitter-llvm", rev = "c14cb839003348692158b845db9edda201374548" }
external-scanner = { c = false, cpp = false }
@@ -411,7 +405,6 @@ locals = false
name = "lua"
group = "some"
file-types = ["lua"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/muniftanjim/tree-sitter-lua", rev = "68d29aa745b68ae22cbbdb5dcb68c20232521ff6" }
external-scanner = { c = true, cpp = false }
@@ -428,7 +421,6 @@ locals = true
name = "markdown"
group = "most"
file-types = ["markdown"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown" }
external-scanner = { c = true, cpp = false }
@@ -444,9 +436,7 @@ locals = false
[[languages]]
name = "markdown_inline"
group = "most"
-# TODO: markdown filetype for both markdown parsers?
-file-types = ["markdown"]
-wasm-unknown = false
+file-types = []
[languages.parser]
git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown-inline" }
external-scanner = { c = true, cpp = false }
@@ -463,7 +453,6 @@ locals = false
name = "ocaml"
group = "all"
file-types = ["ocaml"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/ocaml" }
external-scanner = { c = true, cpp = false }
@@ -480,7 +469,6 @@ locals = true
name = "ocaml_interface"
group = "all"
file-types = ["ocamlinterface"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/interface" }
external-scanner = { c = true, cpp = false }
@@ -497,7 +485,6 @@ locals = true
name = "php"
group = "most"
file-types = ["php"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-php", rev = "f3a19ab3217a6e838870fc7142fa492d1fd7a7c9", path = "php" }
external-scanner = { c = true, cpp = false }
@@ -514,7 +501,6 @@ locals = true
name = "python"
group = "some"
file-types = ["python"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-python", rev = "710796b8b877a970297106e5bbc8e2afa47f86ec" }
external-scanner = { c = true, cpp = false }
@@ -611,7 +597,6 @@ locals = true
name = "scala"
group = "most"
file-types = ["scala", "sbt"]
-wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-scala", rev = "d67fe3ba3879bd7ba74bd7f56c9a38fed241ba25" }
external-scanner = { c = true, cpp = false }
diff --git a/syntastica-parsers-git/Cargo.toml b/syntastica-parsers-git/Cargo.toml
index 66711ee7a..1c8f88507 100644
--- a/syntastica-parsers-git/Cargo.toml
+++ b/syntastica-parsers-git/Cargo.toml
@@ -20,12 +20,15 @@ document-features = { workspace = true, optional = true }
once_cell.workspace = true
strum = { workspace = true, features = ["derive", "phf"] }
+[target.wasm32-unknown-unknown.dependencies]
+# TODO: switch to official when https://github.com/trevyn/wasm32-unknown-unknown-openbsd-libc/pull/3 is merged
+wasm32-unknown-unknown-openbsd-libc = { version = "0.2.1", package = "wasm32-unknown-unknown-openbsd-libc-wctypes-fix" }
+
[build-dependencies]
syntastica-macros.workspace = true
cc.workspace = true
rustc_version = { workspace = true, optional = true }
-tree-sitter-wasm-build-tool = { version = "0.2.2", path = "../tree-sitter-wasm-build-tool/" }
[package.metadata.docs.rs]
all-features = true
diff --git a/syntastica-parsers-git/build.rs b/syntastica-parsers-git/build.rs
index b3c50c0a2..c9561fb25 100644
--- a/syntastica-parsers-git/build.rs
+++ b/syntastica-parsers-git/build.rs
@@ -67,7 +67,7 @@ fn compile_parser(
// external cpp scanners are not supported on the `wasm32-unknown-unknown` target
// plus extra cases for parsers which require additional libc features
- if target == "wasm32-unknown-unknown" && (external_cpp || !wasm_unknown) {
+ if target == "wasm32-unknown-unknown" && !wasm_unknown {
return Ok(());
}
@@ -103,7 +103,7 @@ fn compile_parser(
}
}
- // clone repo into `parsers/{name}`, if it does not already exists
+ // clone repo into `parsers/{name}/{rev}`, if it does not already exist
let repo_dir = clone_dir.join(name).join(rev);
if !repo_dir.exists() {
println!("cloning repository for {name}");
@@ -138,13 +138,18 @@ fn compile_parser(
if external_c {
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
- println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap());
+ println!("cargo::rerun-if-changed={}", scanner_path.display());
}
- #[cfg(feature = "runtime-c2rust")]
- tree_sitter_wasm_build_tool::add_wasm_headers(&mut c_config).unwrap();
+ if target == "wasm32-unknown-unknown" {
+ c_config.include(
+ // this is set by the `wasm32-unknown-unknown-openbsd-libc` crate
+ std::env::var_os("DEP_WASM32_UNKNOWN_UNKNOWN_OPENBSD_LIBC_INCLUDE")
+ .expect("failed to find wasm libc"),
+ );
+ }
- println!("cargo::rerun-if-changed={}", parser_path.to_str().unwrap());
+ println!("cargo::rerun-if-changed={}", parser_path.display());
c_config.compile(&c_lib_name);
println!("finished building parser for {name}");
@@ -168,7 +173,7 @@ fn compile_parser(
.flag_if_supported("-w");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
- println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap());
+ println!("cargo::rerun-if-changed={}", scanner_path.display());
cpp_config.compile(&cpp_lib_name);
println!("finished building cpp scanner for {name}");
diff --git a/syntastica-parsers-git/src/lib.rs b/syntastica-parsers-git/src/lib.rs
index c346271c6..1d3e67996 100644
--- a/syntastica-parsers-git/src/lib.rs
+++ b/syntastica-parsers-git/src/lib.rs
@@ -9,3 +9,118 @@
#![deny(missing_docs)]
syntastica_macros::parsers_ffi!();
+
+/// Basic implementation of some libc functions that tree-sitter parsers can link to.
+#[cfg(all(
+ feature = "runtime-c2rust",
+ target_arch = "wasm32",
+ target_vendor = "unknown",
+ target_os = "unknown",
+ target_env = ""
+))]
+mod wasm_c_bridge {
+ use std::{ffi::CStr, mem::MaybeUninit};
+
+ ///
+ #[no_mangle]
+ extern "C" fn abort() {
+ panic!("program aborted");
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn towupper(wc: u32) -> u32 {
+ let Some(char) = char::from_u32(wc) else {
+ return wc;
+ };
+ let mut uppercase = char.to_uppercase();
+ if uppercase.len() == 1 {
+ uppercase.next().unwrap() as u32
+ } else {
+ wc
+ }
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn towlower(wc: u32) -> u32 {
+ let Some(char) = char::from_u32(wc) else {
+ return wc;
+ };
+ let mut uppercase = char.to_lowercase();
+ if uppercase.len() == 1 {
+ uppercase.next().unwrap() as u32
+ } else {
+ wc
+ }
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn malloc(size: usize) -> *mut MaybeUninit {
+ Box::leak(Box::<[u8]>::new_uninit_slice(size)).as_mut_ptr()
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn calloc(num: usize, size: usize) -> *mut u8 {
+ let mut values = Box::<[u8]>::new_uninit_slice(size * num);
+ unsafe {
+ for i in 0..size * num {
+ values[i].as_mut_ptr().write(0);
+ }
+ Box::leak(values.assume_init()).as_mut_ptr()
+ }
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn realloc(ptr: *mut u8, new_size: usize) -> *mut MaybeUninit {
+ free(ptr);
+ Box::leak(Box::<[u8]>::new_uninit_slice(new_size)).as_mut_ptr()
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn free(_ptr: *mut u8) {
+ // surely a bit of memory leakage isn't _that_ bad... :)
+ }
+
+ #[no_mangle]
+ extern "C" fn __assert2(file: *const i8, line: i32, func: *const i8, error: *const i8) {
+ let file = unsafe { CStr::from_ptr(file) }.to_string_lossy();
+ let func = unsafe { CStr::from_ptr(func) }.to_string_lossy();
+ let error = unsafe { CStr::from_ptr(error) }.to_string_lossy();
+ panic!("assertion failed in {file} on line {line} in {func}: {error}");
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn strcmp(lhs: *const i8, rhs: *const i8) -> i32 {
+ let lhs = unsafe { CStr::from_ptr(lhs) };
+ let rhs = unsafe { CStr::from_ptr(rhs) };
+ lhs.cmp(rhs) as i32
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn strncpy(dest: *mut i8, src: *const i8, count: usize) -> *mut i8 {
+ for i in 0..count {
+ let cp = unsafe { src.add(i).read() };
+ unsafe { dest.add(i).write(cp) }
+ }
+ dest
+ }
+
+ ///
+ #[no_mangle]
+ extern "C" fn memchr(ptr: *const u8, ch: i32, count: usize) -> *mut u8 {
+ let ch = ch as u8;
+ for i in 0..count {
+ if unsafe { ptr.add(i).read() } == ch {
+ return unsafe { ptr.add(i) as *mut _ };
+ }
+ }
+ std::ptr::null_mut()
+ }
+}