Skip to content

Commit

Permalink
feat: better wasm support
Browse files Browse the repository at this point in the history
  • Loading branch information
RubixDev committed Feb 26, 2025
1 parent 3c19a63 commit ccb3448
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 27 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,20 @@ feature flags can be used to use the
[c2rust transpilation of tree-sitter](https://crates.io/crates/tree-sitter-c2rust)
instead of the
[official C implementation](https://crates.io/crates/tree-sitter). This is only
supported by the `syntastica-parsers-git` parser collection, and only parsers
that don't use an external C++ scanner are available.
supported by the `syntastica-parsers-git` parser collection as only that
collection has enough control over the parser build process to allow for Wasm
support.

To use this approach, simply set `default-features = false` and enable the
`runtime-c2rust` feature for _all_ `syntastica` dependencies. An example using
this approach for use of `syntastica` in a Dioxus project can be found
[here](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/dioxus).

Note that for extra safety, you should compile this with nightly Rust and the
`-Zwasm_c_abi=spec` option set, as otherwise the `wasm32-unknown-unknown` target
doesn't yet conform with the C ABI. In my personal testing I haven't had any
issues on stable Rust either though.

### 2. Using Emscripten / the `syntastica-js` package

`syntastica` can also be compiled to `wasm32-unknown-emscripten` which has much
Expand All @@ -239,7 +245,9 @@ There are three examples using `syntastica-js`:
- [Usage from JavaScript in NodeJS for console applications](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/node)
- [Usage from Rust in the browser using wasm-bindgen](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/wasm-pack-with-npm-pkg)

> **Note**
<div class="warning">

> [!WARNING]
>
> The `syntastica` NPM package is currently not being updated and uses an old
> version of `syntastica`, because the current implementation always includes
Expand All @@ -248,6 +256,8 @@ There are three examples using `syntastica-js`:
> package into multiple binaries that can be fetched from a server on-demand,
> and to provide multiple NPM packages for manual selection of the parsers.
</div>

## Examples

This section contains some basic usage examples. More specific examples can be
Expand Down Expand Up @@ -504,3 +514,5 @@ possible replacement for [`syntect`](https://crates.io/crates/syntect). The main
difference from `lirstings` at the start was the parser collection(s), providing
a rigid set of parsers and queries for users. Over time `syntastica` then grew
to the big project it is today.

[!WARNING]: .
17 changes: 1 addition & 16 deletions syntastica-macros/languages.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ locals = false
name = "bash"
group = "some"
file-types = ["sh", "bash", "ksh"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-bash", rev = "0c46d792d54c536be5ff7eb18eb95c70fccdb232" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -83,7 +82,6 @@ locals = false
name = "cpp"
group = "some"
file-types = ["cpp"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "e5cea0ec884c5c3d2d1e41a741a66ce13da4d945" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -213,7 +211,6 @@ locals = true
name = "haskell"
group = "most"
file-types = ["haskell"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "0975ef72fc3c47b530309ca93937d7d143523628" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -246,7 +243,6 @@ locals = false
name = "html"
group = "some"
file-types = ["html"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-html", rev = "cbb91a0ff3621245e890d1c50cc811bffb77a26b" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -359,7 +355,6 @@ locals = false
name = "julia"
group = "all"
file-types = ["julia"]
wasm = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-julia", rev = "3f9870ca1f419e480f39fc181fcfb6dad480969a" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -394,7 +389,6 @@ locals = true
name = "llvm"
group = "all"
file-types = ["llvm"]
wasm = false
[languages.parser]
git = { url = "https://github.com/benwilliamgraham/tree-sitter-llvm", rev = "c14cb839003348692158b845db9edda201374548" }
external-scanner = { c = false, cpp = false }
Expand All @@ -411,7 +405,6 @@ locals = false
name = "lua"
group = "some"
file-types = ["lua"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/muniftanjim/tree-sitter-lua", rev = "68d29aa745b68ae22cbbdb5dcb68c20232521ff6" }
external-scanner = { c = true, cpp = false }
Expand All @@ -428,7 +421,6 @@ locals = true
name = "markdown"
group = "most"
file-types = ["markdown"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown" }
external-scanner = { c = true, cpp = false }
Expand All @@ -444,9 +436,7 @@ locals = false
[[languages]]
name = "markdown_inline"
group = "most"
# TODO: markdown filetype for both markdown parsers?
file-types = ["markdown"]
wasm-unknown = false
file-types = []
[languages.parser]
git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown-inline" }
external-scanner = { c = true, cpp = false }
Expand All @@ -463,7 +453,6 @@ locals = false
name = "ocaml"
group = "all"
file-types = ["ocaml"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/ocaml" }
external-scanner = { c = true, cpp = false }
Expand All @@ -480,7 +469,6 @@ locals = true
name = "ocaml_interface"
group = "all"
file-types = ["ocamlinterface"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/interface" }
external-scanner = { c = true, cpp = false }
Expand All @@ -497,7 +485,6 @@ locals = true
name = "php"
group = "most"
file-types = ["php"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-php", rev = "f3a19ab3217a6e838870fc7142fa492d1fd7a7c9", path = "php" }
external-scanner = { c = true, cpp = false }
Expand All @@ -514,7 +501,6 @@ locals = true
name = "python"
group = "some"
file-types = ["python"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-python", rev = "710796b8b877a970297106e5bbc8e2afa47f86ec" }
external-scanner = { c = true, cpp = false }
Expand Down Expand Up @@ -611,7 +597,6 @@ locals = true
name = "scala"
group = "most"
file-types = ["scala", "sbt"]
wasm-unknown = false
[languages.parser]
git = { url = "https://github.com/tree-sitter/tree-sitter-scala", rev = "d67fe3ba3879bd7ba74bd7f56c9a38fed241ba25" }
external-scanner = { c = true, cpp = false }
Expand Down
5 changes: 4 additions & 1 deletion syntastica-parsers-git/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ document-features = { workspace = true, optional = true }
once_cell.workspace = true
strum = { workspace = true, features = ["derive", "phf"] }

[target.wasm32-unknown-unknown.dependencies]
# TODO: switch to official when https://github.com/trevyn/wasm32-unknown-unknown-openbsd-libc/pull/3 is merged
wasm32-unknown-unknown-openbsd-libc = { version = "0.2.1", package = "wasm32-unknown-unknown-openbsd-libc-wctypes-fix" }

[build-dependencies]
syntastica-macros.workspace = true

cc.workspace = true
rustc_version = { workspace = true, optional = true }
tree-sitter-wasm-build-tool = { version = "0.2.2", path = "../tree-sitter-wasm-build-tool/" }

[package.metadata.docs.rs]
all-features = true
Expand Down
19 changes: 12 additions & 7 deletions syntastica-parsers-git/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ fn compile_parser(

// external cpp scanners are not supported on the `wasm32-unknown-unknown` target
// plus extra cases for parsers which require additional libc features
if target == "wasm32-unknown-unknown" && (external_cpp || !wasm_unknown) {
if target == "wasm32-unknown-unknown" && !wasm_unknown {
return Ok(());
}

Expand Down Expand Up @@ -103,7 +103,7 @@ fn compile_parser(
}
}

// clone repo into `parsers/{name}`, if it does not already exists
// clone repo into `parsers/{name}/{rev}`, if it does not already exist
let repo_dir = clone_dir.join(name).join(rev);
if !repo_dir.exists() {
println!("cloning repository for {name}");
Expand Down Expand Up @@ -138,13 +138,18 @@ fn compile_parser(
if external_c {
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap());
println!("cargo::rerun-if-changed={}", scanner_path.display());
}

#[cfg(feature = "runtime-c2rust")]
tree_sitter_wasm_build_tool::add_wasm_headers(&mut c_config).unwrap();
if target == "wasm32-unknown-unknown" {
c_config.include(
// this is set by the `wasm32-unknown-unknown-openbsd-libc` crate
std::env::var_os("DEP_WASM32_UNKNOWN_UNKNOWN_OPENBSD_LIBC_INCLUDE")
.expect("failed to find wasm libc"),
);
}

println!("cargo::rerun-if-changed={}", parser_path.to_str().unwrap());
println!("cargo::rerun-if-changed={}", parser_path.display());
c_config.compile(&c_lib_name);
println!("finished building parser for {name}");

Expand All @@ -168,7 +173,7 @@ fn compile_parser(
.flag_if_supported("-w");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap());
println!("cargo::rerun-if-changed={}", scanner_path.display());
cpp_config.compile(&cpp_lib_name);
println!("finished building cpp scanner for {name}");

Expand Down
115 changes: 115 additions & 0 deletions syntastica-parsers-git/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,118 @@
#![deny(missing_docs)]

syntastica_macros::parsers_ffi!();

/// Basic implementation of some libc functions that tree-sitter parsers can link to.
#[cfg(all(
feature = "runtime-c2rust",
target_arch = "wasm32",
target_vendor = "unknown",
target_os = "unknown",
target_env = ""
))]
mod wasm_c_bridge {
use std::{ffi::CStr, mem::MaybeUninit};

/// <https://en.cppreference.com/w/c/program/abort>
#[no_mangle]
extern "C" fn abort() {
panic!("program aborted");
}

/// <https://en.cppreference.com/w/c/string/wide/towupper>
#[no_mangle]
extern "C" fn towupper(wc: u32) -> u32 {
let Some(char) = char::from_u32(wc) else {
return wc;
};
let mut uppercase = char.to_uppercase();
if uppercase.len() == 1 {
uppercase.next().unwrap() as u32
} else {
wc
}
}

/// <https://en.cppreference.com/w/c/string/wide/towlower>
#[no_mangle]
extern "C" fn towlower(wc: u32) -> u32 {
let Some(char) = char::from_u32(wc) else {
return wc;
};
let mut uppercase = char.to_lowercase();
if uppercase.len() == 1 {
uppercase.next().unwrap() as u32
} else {
wc
}
}

/// <https://en.cppreference.com/w/c/memory/malloc>
#[no_mangle]
extern "C" fn malloc(size: usize) -> *mut MaybeUninit<u8> {
Box::leak(Box::<[u8]>::new_uninit_slice(size)).as_mut_ptr()
}

/// <https://en.cppreference.com/w/c/memory/calloc>
#[no_mangle]
extern "C" fn calloc(num: usize, size: usize) -> *mut u8 {
let mut values = Box::<[u8]>::new_uninit_slice(size * num);
unsafe {
for i in 0..size * num {
values[i].as_mut_ptr().write(0);
}
Box::leak(values.assume_init()).as_mut_ptr()
}
}

/// <https://en.cppreference.com/w/c/memory/realloc>
#[no_mangle]
extern "C" fn realloc(ptr: *mut u8, new_size: usize) -> *mut MaybeUninit<u8> {
free(ptr);
Box::leak(Box::<[u8]>::new_uninit_slice(new_size)).as_mut_ptr()
}

/// <https://en.cppreference.com/w/c/memory/free>
#[no_mangle]
extern "C" fn free(_ptr: *mut u8) {
// surely a bit of memory leakage isn't _that_ bad... :)
}

#[no_mangle]
extern "C" fn __assert2(file: *const i8, line: i32, func: *const i8, error: *const i8) {
let file = unsafe { CStr::from_ptr(file) }.to_string_lossy();
let func = unsafe { CStr::from_ptr(func) }.to_string_lossy();
let error = unsafe { CStr::from_ptr(error) }.to_string_lossy();
panic!("assertion failed in {file} on line {line} in {func}: {error}");
}

/// <https://en.cppreference.com/w/c/string/byte/strcmp>
#[no_mangle]
extern "C" fn strcmp(lhs: *const i8, rhs: *const i8) -> i32 {
let lhs = unsafe { CStr::from_ptr(lhs) };
let rhs = unsafe { CStr::from_ptr(rhs) };
lhs.cmp(rhs) as i32
}

/// <https://en.cppreference.com/w/c/string/byte/strncpy>
#[no_mangle]
extern "C" fn strncpy(dest: *mut i8, src: *const i8, count: usize) -> *mut i8 {
for i in 0..count {
let cp = unsafe { src.add(i).read() };
unsafe { dest.add(i).write(cp) }
}
dest
}

/// <https://en.cppreference.com/w/c/string/byte/memchr>
#[no_mangle]
extern "C" fn memchr(ptr: *const u8, ch: i32, count: usize) -> *mut u8 {
let ch = ch as u8;
for i in 0..count {
if unsafe { ptr.add(i).read() } == ch {
return unsafe { ptr.add(i) as *mut _ };
}
}
std::ptr::null_mut()
}
}

0 comments on commit ccb3448

Please sign in to comment.