diff --git a/README.md b/README.md index 4b71ab442..6226b649f 100644 --- a/README.md +++ b/README.md @@ -212,14 +212,20 @@ feature flags can be used to use the [c2rust transpilation of tree-sitter](https://crates.io/crates/tree-sitter-c2rust) instead of the [official C implementation](https://crates.io/crates/tree-sitter). This is only -supported by the `syntastica-parsers-git` parser collection, and only parsers -that don't use an external C++ scanner are available. +supported by the `syntastica-parsers-git` parser collection as only that +collection has enough control over the parser build process to allow for Wasm +support. To use this approach, simply set `default-features = false` and enable the `runtime-c2rust` feature for _all_ `syntastica` dependencies. An example using this approach for use of `syntastica` in a Dioxus project can be found [here](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/dioxus). +Note that for extra safety, you should compile this with nightly Rust and the +`-Zwasm_c_abi=spec` option set, as otherwise the `wasm32-unknown-unknown` target +doesn't yet conform with the C ABI. In my personal testing I haven't had any +issues on stable Rust either though. + ### 2. Using Emscripten / the `syntastica-js` package `syntastica` can also be compiled to `wasm32-unknown-emscripten` which has much @@ -239,7 +245,9 @@ There are three examples using `syntastica-js`: - [Usage from JavaScript in NodeJS for console applications](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/node) - [Usage from Rust in the browser using wasm-bindgen](https://github.com/RubixDev/syntastica/tree/main/examples/wasm/wasm-pack-with-npm-pkg) -> **Note** +
+ +> [!WARNING] > > The `syntastica` NPM package is currently not being updated and uses an old > version of `syntastica`, because the current implementation always includes @@ -248,6 +256,8 @@ There are three examples using `syntastica-js`: > package into multiple binaries that can be fetched from a server on-demand, > and to provide multiple NPM packages for manual selection of the parsers. +
+ ## Examples This section contains some basic usage examples. More specific examples can be @@ -504,3 +514,5 @@ possible replacement for [`syntect`](https://crates.io/crates/syntect). The main difference from `lirstings` at the start was the parser collection(s), providing a rigid set of parsers and queries for users. Over time `syntastica` then grew to the big project it is today. + +[!WARNING]: . diff --git a/syntastica-macros/languages.toml b/syntastica-macros/languages.toml index d981530ac..ad4190da8 100644 --- a/syntastica-macros/languages.toml +++ b/syntastica-macros/languages.toml @@ -18,7 +18,6 @@ locals = false name = "bash" group = "some" file-types = ["sh", "bash", "ksh"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-bash", rev = "0c46d792d54c536be5ff7eb18eb95c70fccdb232" } external-scanner = { c = true, cpp = false } @@ -83,7 +82,6 @@ locals = false name = "cpp" group = "some" file-types = ["cpp"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "e5cea0ec884c5c3d2d1e41a741a66ce13da4d945" } external-scanner = { c = true, cpp = false } @@ -213,7 +211,6 @@ locals = true name = "haskell" group = "most" file-types = ["haskell"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "0975ef72fc3c47b530309ca93937d7d143523628" } external-scanner = { c = true, cpp = false } @@ -246,7 +243,6 @@ locals = false name = "html" group = "some" file-types = ["html"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-html", rev = "cbb91a0ff3621245e890d1c50cc811bffb77a26b" } external-scanner = { c = true, cpp = false } @@ -359,7 +355,6 @@ locals = false name = "julia" group = "all" file-types = ["julia"] -wasm = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-julia", rev = "3f9870ca1f419e480f39fc181fcfb6dad480969a" } external-scanner = { c = true, cpp = false } @@ -394,7 +389,6 @@ locals = true name = "llvm" group = "all" file-types = ["llvm"] -wasm = false [languages.parser] git = { url = "https://github.com/benwilliamgraham/tree-sitter-llvm", rev = "c14cb839003348692158b845db9edda201374548" } external-scanner = { c = false, cpp = false } @@ -411,7 +405,6 @@ locals = false name = "lua" group = "some" file-types = ["lua"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/muniftanjim/tree-sitter-lua", rev = "68d29aa745b68ae22cbbdb5dcb68c20232521ff6" } external-scanner = { c = true, cpp = false } @@ -428,7 +421,6 @@ locals = true name = "markdown" group = "most" file-types = ["markdown"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown" } external-scanner = { c = true, cpp = false } @@ -444,9 +436,7 @@ locals = false [[languages]] name = "markdown_inline" group = "most" -# TODO: markdown filetype for both markdown parsers? -file-types = ["markdown"] -wasm-unknown = false +file-types = [] [languages.parser] git = { url = "https://github.com/MDeiml/tree-sitter-markdown", rev = "192407ab5a24bfc24f13332979b5e7967518754a", path = "tree-sitter-markdown-inline" } external-scanner = { c = true, cpp = false } @@ -463,7 +453,6 @@ locals = false name = "ocaml" group = "all" file-types = ["ocaml"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/ocaml" } external-scanner = { c = true, cpp = false } @@ -480,7 +469,6 @@ locals = true name = "ocaml_interface" group = "all" file-types = ["ocamlinterface"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "91e4d9106f5da8199ab5158ba65e02da3e664597", path = "grammars/interface" } external-scanner = { c = true, cpp = false } @@ -497,7 +485,6 @@ locals = true name = "php" group = "most" file-types = ["php"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-php", rev = "f3a19ab3217a6e838870fc7142fa492d1fd7a7c9", path = "php" } external-scanner = { c = true, cpp = false } @@ -514,7 +501,6 @@ locals = true name = "python" group = "some" file-types = ["python"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-python", rev = "710796b8b877a970297106e5bbc8e2afa47f86ec" } external-scanner = { c = true, cpp = false } @@ -611,7 +597,6 @@ locals = true name = "scala" group = "most" file-types = ["scala", "sbt"] -wasm-unknown = false [languages.parser] git = { url = "https://github.com/tree-sitter/tree-sitter-scala", rev = "d67fe3ba3879bd7ba74bd7f56c9a38fed241ba25" } external-scanner = { c = true, cpp = false } diff --git a/syntastica-parsers-git/Cargo.toml b/syntastica-parsers-git/Cargo.toml index 66711ee7a..1c8f88507 100644 --- a/syntastica-parsers-git/Cargo.toml +++ b/syntastica-parsers-git/Cargo.toml @@ -20,12 +20,15 @@ document-features = { workspace = true, optional = true } once_cell.workspace = true strum = { workspace = true, features = ["derive", "phf"] } +[target.wasm32-unknown-unknown.dependencies] +# TODO: switch to official when https://github.com/trevyn/wasm32-unknown-unknown-openbsd-libc/pull/3 is merged +wasm32-unknown-unknown-openbsd-libc = { version = "0.2.1", package = "wasm32-unknown-unknown-openbsd-libc-wctypes-fix" } + [build-dependencies] syntastica-macros.workspace = true cc.workspace = true rustc_version = { workspace = true, optional = true } -tree-sitter-wasm-build-tool = { version = "0.2.2", path = "../tree-sitter-wasm-build-tool/" } [package.metadata.docs.rs] all-features = true diff --git a/syntastica-parsers-git/build.rs b/syntastica-parsers-git/build.rs index b3c50c0a2..c9561fb25 100644 --- a/syntastica-parsers-git/build.rs +++ b/syntastica-parsers-git/build.rs @@ -67,7 +67,7 @@ fn compile_parser( // external cpp scanners are not supported on the `wasm32-unknown-unknown` target // plus extra cases for parsers which require additional libc features - if target == "wasm32-unknown-unknown" && (external_cpp || !wasm_unknown) { + if target == "wasm32-unknown-unknown" && !wasm_unknown { return Ok(()); } @@ -103,7 +103,7 @@ fn compile_parser( } } - // clone repo into `parsers/{name}`, if it does not already exists + // clone repo into `parsers/{name}/{rev}`, if it does not already exist let repo_dir = clone_dir.join(name).join(rev); if !repo_dir.exists() { println!("cloning repository for {name}"); @@ -138,13 +138,18 @@ fn compile_parser( if external_c { let scanner_path = src_dir.join("scanner.c"); c_config.file(&scanner_path); - println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap()); + println!("cargo::rerun-if-changed={}", scanner_path.display()); } - #[cfg(feature = "runtime-c2rust")] - tree_sitter_wasm_build_tool::add_wasm_headers(&mut c_config).unwrap(); + if target == "wasm32-unknown-unknown" { + c_config.include( + // this is set by the `wasm32-unknown-unknown-openbsd-libc` crate + std::env::var_os("DEP_WASM32_UNKNOWN_UNKNOWN_OPENBSD_LIBC_INCLUDE") + .expect("failed to find wasm libc"), + ); + } - println!("cargo::rerun-if-changed={}", parser_path.to_str().unwrap()); + println!("cargo::rerun-if-changed={}", parser_path.display()); c_config.compile(&c_lib_name); println!("finished building parser for {name}"); @@ -168,7 +173,7 @@ fn compile_parser( .flag_if_supported("-w"); let scanner_path = src_dir.join("scanner.cc"); cpp_config.file(&scanner_path); - println!("cargo::rerun-if-changed={}", scanner_path.to_str().unwrap()); + println!("cargo::rerun-if-changed={}", scanner_path.display()); cpp_config.compile(&cpp_lib_name); println!("finished building cpp scanner for {name}"); diff --git a/syntastica-parsers-git/src/lib.rs b/syntastica-parsers-git/src/lib.rs index c346271c6..1d3e67996 100644 --- a/syntastica-parsers-git/src/lib.rs +++ b/syntastica-parsers-git/src/lib.rs @@ -9,3 +9,118 @@ #![deny(missing_docs)] syntastica_macros::parsers_ffi!(); + +/// Basic implementation of some libc functions that tree-sitter parsers can link to. +#[cfg(all( + feature = "runtime-c2rust", + target_arch = "wasm32", + target_vendor = "unknown", + target_os = "unknown", + target_env = "" +))] +mod wasm_c_bridge { + use std::{ffi::CStr, mem::MaybeUninit}; + + /// + #[no_mangle] + extern "C" fn abort() { + panic!("program aborted"); + } + + /// + #[no_mangle] + extern "C" fn towupper(wc: u32) -> u32 { + let Some(char) = char::from_u32(wc) else { + return wc; + }; + let mut uppercase = char.to_uppercase(); + if uppercase.len() == 1 { + uppercase.next().unwrap() as u32 + } else { + wc + } + } + + /// + #[no_mangle] + extern "C" fn towlower(wc: u32) -> u32 { + let Some(char) = char::from_u32(wc) else { + return wc; + }; + let mut uppercase = char.to_lowercase(); + if uppercase.len() == 1 { + uppercase.next().unwrap() as u32 + } else { + wc + } + } + + /// + #[no_mangle] + extern "C" fn malloc(size: usize) -> *mut MaybeUninit { + Box::leak(Box::<[u8]>::new_uninit_slice(size)).as_mut_ptr() + } + + /// + #[no_mangle] + extern "C" fn calloc(num: usize, size: usize) -> *mut u8 { + let mut values = Box::<[u8]>::new_uninit_slice(size * num); + unsafe { + for i in 0..size * num { + values[i].as_mut_ptr().write(0); + } + Box::leak(values.assume_init()).as_mut_ptr() + } + } + + /// + #[no_mangle] + extern "C" fn realloc(ptr: *mut u8, new_size: usize) -> *mut MaybeUninit { + free(ptr); + Box::leak(Box::<[u8]>::new_uninit_slice(new_size)).as_mut_ptr() + } + + /// + #[no_mangle] + extern "C" fn free(_ptr: *mut u8) { + // surely a bit of memory leakage isn't _that_ bad... :) + } + + #[no_mangle] + extern "C" fn __assert2(file: *const i8, line: i32, func: *const i8, error: *const i8) { + let file = unsafe { CStr::from_ptr(file) }.to_string_lossy(); + let func = unsafe { CStr::from_ptr(func) }.to_string_lossy(); + let error = unsafe { CStr::from_ptr(error) }.to_string_lossy(); + panic!("assertion failed in {file} on line {line} in {func}: {error}"); + } + + /// + #[no_mangle] + extern "C" fn strcmp(lhs: *const i8, rhs: *const i8) -> i32 { + let lhs = unsafe { CStr::from_ptr(lhs) }; + let rhs = unsafe { CStr::from_ptr(rhs) }; + lhs.cmp(rhs) as i32 + } + + /// + #[no_mangle] + extern "C" fn strncpy(dest: *mut i8, src: *const i8, count: usize) -> *mut i8 { + for i in 0..count { + let cp = unsafe { src.add(i).read() }; + unsafe { dest.add(i).write(cp) } + } + dest + } + + /// + #[no_mangle] + extern "C" fn memchr(ptr: *const u8, ch: i32, count: usize) -> *mut u8 { + let ch = ch as u8; + for i in 0..count { + if unsafe { ptr.add(i).read() } == ch { + return unsafe { ptr.add(i) as *mut _ }; + } + } + std::ptr::null_mut() + } +}