Skip to content

Commit

Permalink
levenshtein/regex: move levenshtein, remove regex
Browse files Browse the repository at this point in the history
This is where the Levenshtein automata originally lived. I originally
moved it out of the crate in order to reduce dependencies. But we can
instead just make it an optional feature.

Instead of moving fst-regex back into the crate as well, we instead
recommend using `regex-automata`. `regex-automata` is basically the
productionized version of "compile a regex to a DFA," which is exactly
what we want.
  • Loading branch information
BurntSushi committed Feb 22, 2020
1 parent cb04b44 commit 93c17cb
Show file tree
Hide file tree
Showing 11 changed files with 511 additions and 170 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ license = "Unlicense/MIT"
edition = "2018"

[features]
mmap = ["memmap"]
default = ["mmap"]
mmap = ["memmap"]
levenshtein = ["utf8-ranges"]

[[bench]]
name = "build"
Expand All @@ -33,11 +34,10 @@ bench = true
[dependencies]
byteorder = "1"
memmap = { version = "0.6.0", optional = true }
utf8-ranges = { version = "1", optional = true }

[dev-dependencies]
fnv = "1.0.5"
fst-levenshtein = { version = "0.2", path = "fst-levenshtein" }
fst-regex = { version = "0.2", path = "fst-regex" }
lazy_static = "0.2.8"
quickcheck = { version = "0.7", default-features = false }
rand = "0.5"
Expand Down
5 changes: 2 additions & 3 deletions fst-bin/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ bit-set = "0.4"
chan = "0.1"
csv = "1.1.3"
docopt = "1.1"
fst = { path = "..", version = "0.3" }
fst-regex = { path = "../fst-regex", version = "0.2" }
fst-levenshtein = { path = "../fst-levenshtein", version = "0.2" }
fst = { path = "..", version = "0.3", features = ["levenshtein"] }
lines = "0.0"
num_cpus = "1.5"
regex-automata = { version = "*", path = "/home/andrew/rust/regex-automata", features = ["fst1"] }
serde = { version = "1.0.104", features = ["derive"] }
tempdir = "0.3"

Expand Down
2 changes: 1 addition & 1 deletion fst-bin/src/cmd/fuzzy.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::io;

use docopt::Docopt;
use fst::levenshtein::Levenshtein;
use fst::raw::Fst;
use fst_levenshtein::Levenshtein;
use serde::Deserialize;

use crate::util;
Expand Down
19 changes: 16 additions & 3 deletions fst-bin/src/cmd/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use std::io;

use docopt::Docopt;
use fst::raw::Fst;
use fst_regex::Regex;
use regex_automata::dense;
// use regex_automata::sparse::SparseDFA;
use serde::Deserialize;

use crate::util;
Expand Down Expand Up @@ -40,8 +41,20 @@ pub fn run(argv: Vec<String>) -> Result<(), Error> {
.and_then(|d| d.argv(&argv).deserialize())
.unwrap_or_else(|e| e.exit());
let fst = unsafe { Fst::from_path(&args.arg_fst) }?;
let lev = Regex::new(&args.arg_regex)?;
let mut q = fst.search(&lev);
let dense_dfa = dense::Builder::new()
.anchored(true)
.byte_classes(false)
.premultiply(false)
.build(&args.arg_regex)?;
let dfa = match dense_dfa {
dense::DenseDFA::Standard(dfa) => dfa,
_ => unreachable!(),
};
// let dfa = match dense_dfa.to_sparse()? {
// SparseDFA::Standard(dfa) => dfa,
// _ => unreachable!(),
// };
let mut q = fst.search(&dfa);
if let Some(ref start) = args.flag_start {
q = q.ge(start);
}
Expand Down
4 changes: 2 additions & 2 deletions fst-levenshtein/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[package]
name = "fst-levenshtein"
version = "0.2.1" #:version
version = "0.3.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Search finite state transducers with fuzzy queries using Levenshtein automata.
DEPRECATED. Use 'fst' crate with 'levenshtein' feature instead.
"""
documentation = "https://docs.rs/fst-levenshtein"
homepage = "https://github.com/BurntSushi/fst"
Expand Down
4 changes: 2 additions & 2 deletions fst-regex/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[package]
name = "fst-regex"
version = "0.2.2" #:version
version = "0.3.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Search finite state transducers with regular expression.
DEPRECATED. Use 'regex-automata' crate with 'fst1' feature instead.
"""
documentation = "https://docs.rs/fst-regex"
homepage = "https://github.com/BurntSushi/fst"
Expand Down
Loading

0 comments on commit 93c17cb

Please sign in to comment.