Skip to content

Commit

Permalink
polish: more polishing work
Browse files Browse the repository at this point in the history
Touching up docs, restyling code in places, etc.
  • Loading branch information
BurntSushi committed Mar 7, 2020
1 parent 8a368ba commit 650480c
Show file tree
Hide file tree
Showing 17 changed files with 154 additions and 177 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
include:
- build: pinned
os: ubuntu-18.04
rust: 1.39.0
rust: 1.40.0
- build: stable
os: ubuntu-18.04
rust: stable
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ levenshtein = ["utf8-ranges"]
utf8-ranges = { version = "1.0.4", optional = true }

[dev-dependencies]
doc-comment = "0.3.1"
fnv = "1.0.6"
memmap = "0.7"
quickcheck = { version = "0.9.2", default-features = false }
Expand Down
39 changes: 16 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
https://docs.rs/fst

The
[`fst-regex`](https://docs.rs/fst-regex)
and
[`fst-levenshtein`](https://docs.rs/fst-levenshtein)
crates provide regular expression matching and fuzzy searching on FSTs,
respectively.
[`regex-automata`](https://docs.rs/regex-automata)
crate provides implementations of the `fst::Automata` trait when its
`transducer` feature is enabled. This permits using DFAs compiled by
`regex-automata` to search finite state transducers produced by this crate.


### Installation
Expand All @@ -36,27 +35,21 @@ Simply add a corresponding entry to your `Cargo.toml` dependency list:

```toml,ignore
[dependencies]
fst = "0.3"
fst = "0.4"
```


### Example

This example demonstrates building a set in memory and executing a fuzzy query
against it. You'll need `fst = "0.3"` and `fst-levenshtein = "0.2"` in your
`Cargo.toml`.
against it. You'll need `fst = "0.4"` with the `levenshtein` feature enabled in
your `Cargo.toml`.

```rust
extern crate fst;
extern crate fst_levenshtein;

use std::error::Error;
use std::process;

use fst::{IntoStreamer, Set};
use fst_levenshtein::Levenshtein;
use fst::automaton::Levenshtein;

fn try_main() -> Result<(), Box<Error>> {
fn main() -> Result<(), Box<dyn std::error::Error>> {
// A convenient way to create sets in memory.
let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
let set = Set::from_iter(keys)?;
Expand All @@ -71,13 +64,13 @@ fn try_main() -> Result<(), Box<Error>> {
assert_eq!(keys, vec!["fo", "fob", "foo", "food"]);
Ok(())
}

fn main() {
if let Err(err) = try_main() {
eprintln!("{}", err);
process::exit(1);
}
}
```

Check out the documentation for a lot more examples!


### Cargo features

* `levenshtein` - **Disabled** by default. This adds the `Levenshtein`
automaton to the `automaton` sub-module. This includes an additional
dependency on `utf8-ranges`.
18 changes: 9 additions & 9 deletions fst-bin/src/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl<I> Merger<I>
where
I: Iterator<Item = Result<(BString, u64), Error>> + Send + 'static,
{
pub fn new<T, P>(it: T, output: P) -> Self
pub fn new<T, P>(it: T, output: P) -> Merger<I>
where
P: AsRef<Path>,
T: IntoIterator<IntoIter = I, Item = I::Item>,
Expand All @@ -49,35 +49,35 @@ where
}
}

pub fn value_merger<F>(mut self, f: F) -> Self
pub fn value_merger<F>(mut self, f: F) -> Merger<I>
where
F: Fn(u64, u64) -> u64 + Send + Sync + 'static,
{
self.value_merger = Some(Arc::new(f));
self
}

pub fn fd_limit(mut self, fd_limit: u32) -> Self {
pub fn fd_limit(mut self, fd_limit: u32) -> Merger<I> {
self.fd_limit = fd_limit;
self
}

pub fn batch_size(mut self, batch_size: u32) -> Self {
pub fn batch_size(mut self, batch_size: u32) -> Merger<I> {
self.batch_size = batch_size;
self
}

pub fn threads(mut self, threads: u32) -> Self {
pub fn threads(mut self, threads: u32) -> Merger<I> {
self.threads = threads;
self
}

pub fn tmp_dir<P: AsRef<Path>>(mut self, path: P) -> Self {
pub fn tmp_dir<P: AsRef<Path>>(mut self, path: P) -> Merger<I> {
self.tmp_dir = path.as_ref().to_path_buf();
self
}

pub fn keep_tmp_dir(mut self, yes: bool) -> Self {
pub fn keep_tmp_dir(mut self, yes: bool) -> Merger<I> {
self.keep_tmp_dir = yes;
self
}
Expand Down Expand Up @@ -117,7 +117,7 @@ where
for (i, union_batch) in batches.into_iter().enumerate() {
sorters.create_fst(UnionBatch {
tmp_dir: tmp_dir_path.clone(),
gen: gen,
gen,
index: i,
fsts: union_batch?,
value_merger: self.value_merger.clone(),
Expand Down Expand Up @@ -178,7 +178,7 @@ struct Sorters<B> {
}

impl<B: Batchable + Send + 'static> Sorters<B> {
fn new(threads: u32) -> Self {
fn new(threads: u32) -> Sorters<B> {
let (bsend, brecv) = chan::bounded::<B>(0);
let (rsend, rrecv) = chan::bounded(0);
for _ in 0..threads {
Expand Down
8 changes: 4 additions & 4 deletions fst-bin/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,14 @@ type Lines = bstr::io::ByteLines<
impl ConcatLines {
pub fn new(mut inputs: Vec<PathBuf>) -> ConcatLines {
inputs.reverse(); // treat it as a stack
ConcatLines { inputs: inputs, cur: None }
ConcatLines { inputs, cur: None }
}
}

impl Iterator for ConcatLines {
type Item = io::Result<BString>;

fn next(&mut self) -> Option<Self::Item> {
fn next(&mut self) -> Option<io::Result<BString>> {
loop {
if self.cur.is_none() {
match self.inputs.pop() {
Expand Down Expand Up @@ -143,7 +143,7 @@ type Rows = csv::DeserializeRecordsIntoIter<Reader, (BString, u64)>;
impl ConcatCsv {
pub fn new(mut inputs: Vec<PathBuf>) -> ConcatCsv {
inputs.reverse(); // treat it as a stack
ConcatCsv { inputs: inputs, cur: None }
ConcatCsv { inputs, cur: None }
}

fn read_row(&mut self) -> Option<Result<(BString, u64), Error>> {
Expand All @@ -162,7 +162,7 @@ impl ConcatCsv {
impl Iterator for ConcatCsv {
type Item = Result<(BString, u64), Error>;

fn next(&mut self) -> Option<Self::Item> {
fn next(&mut self) -> Option<Result<(BString, u64), Error>> {
loop {
if self.cur.is_none() {
match self.inputs.pop() {
Expand Down
17 changes: 6 additions & 11 deletions fst-levenshtein/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@



use std::cmp;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
Expand Down Expand Up @@ -90,13 +87,13 @@ impl Levenshtein {
/// A `Levenshtein` value satisfies the `Automaton` trait, which means it
/// can be used with the `search` method of any finite state transducer.
#[inline]
pub fn new(query: &str, distance: u32) -> Result<Self, Error> {
pub fn new(query: &str, distance: u32) -> Result<Levenshtein, Error> {
let lev = DynamicLevenshtein {
query: query.to_owned(),
dist: distance as usize,
};
let dfa = DfaBuilder::new(lev.clone()).build()?;
Ok(Levenshtein { prog: lev, dfa: dfa })
Ok(Levenshtein { prog: lev, dfa })
}
}

Expand Down Expand Up @@ -197,10 +194,10 @@ struct DfaBuilder {
}

impl DfaBuilder {
fn new(lev: DynamicLevenshtein) -> Self {
fn new(lev: DynamicLevenshtein) -> DfaBuilder {
DfaBuilder {
dfa: Dfa { states: Vec::with_capacity(16) },
lev: lev,
lev,
cache: HashMap::with_capacity(1024),
}
}
Expand Down Expand Up @@ -251,9 +248,7 @@ impl DfaBuilder {
Entry::Occupied(v) => (*v.get(), true),
Entry::Vacant(v) => {
let is_match = self.lev.is_match(lev_state);
self.dfa
.states
.push(State { next: [None; 256], is_match: is_match });
self.dfa.states.push(State { next: [None; 256], is_match });
(*v.insert(self.dfa.states.len() - 1), false)
}
})
Expand Down Expand Up @@ -314,7 +309,7 @@ impl DfaBuilder {
}

fn new_state(&mut self, is_match: bool) -> usize {
self.dfa.states.push(State { next: [None; 256], is_match: is_match });
self.dfa.states.push(State { next: [None; 256], is_match });
self.dfa.states.len() - 1
}
}
7 changes: 5 additions & 2 deletions src/automaton/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ impl Levenshtein {
/// A `Levenshtein` value satisfies the `Automaton` trait, which means it
/// can be used with the `search` method of any finite state transducer.
#[inline]
pub fn new(query: &str, distance: u32) -> Result<Self, LevenshteinError> {
pub fn new(
query: &str,
distance: u32,
) -> Result<Levenshtein, LevenshteinError> {
let lev = DynamicLevenshtein {
query: query.to_owned(),
dist: distance as usize,
Expand Down Expand Up @@ -216,7 +219,7 @@ struct DfaBuilder {
}

impl DfaBuilder {
fn new(lev: DynamicLevenshtein) -> Self {
fn new(lev: DynamicLevenshtein) -> DfaBuilder {
DfaBuilder {
dfa: Dfa { states: Vec::with_capacity(16) },
lev,
Expand Down
Loading

0 comments on commit 650480c

Please sign in to comment.