diff --git a/.gitignore b/.gitignore index faca1a7c7..928010443 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ src/test .commit_message *.bk *.rustfmt +Cargo.lock diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 049b051f3..000000000 --- a/Cargo.lock +++ /dev/null @@ -1,662 +0,0 @@ -[root] -name = "tokei" -version = "6.1.2" -dependencies = [ - "clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "handlebars 0.27.0 (registry+https://github.com/rust-lang/crates.io-index)", - "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "ignore 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "rayon 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_cbor 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_yaml 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "aho-corasick" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "ansi_term" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "atty" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "bitflags" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "bitflags" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "byteorder" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "clap" -version = "2.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "coco" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "conv" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "custom_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "crossbeam" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "dtoa" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "either" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "encoding" -version = "0.2.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding-index-japanese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding-index-korean" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding-index-simpchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding-index-singlebyte" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding-index-tradchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "encoding_index_tests" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "env_logger" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "fnv" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "futures" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "globset" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "handlebars" -version = "0.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "pest 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "quick-error 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "hex" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "ignore" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", - "globset 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "itoa" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "lazy_static" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "libc" -version = "0.2.30" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "linked-hash-map" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "linked-hash-map" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "log" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "magenta" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "conv 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "magenta-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "magenta-sys" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "memchr" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "num-traits" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "num_cpus" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "pest" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "quick-error" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "quote" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "rand" -version = "0.3.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "magenta 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rayon" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rayon-core 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rayon-core" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "coco 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "num_cpus 1.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex-syntax" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "same-file" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "scopeguard" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "serde" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "serde_bytes" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde_cbor" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_bytes 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde_derive" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive_internals 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde_derive_internals" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", - "synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde_json" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "itoa 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde_yaml" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", - "yaml-rust 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "strsim" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "syn" -version = "0.11.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", - "synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "synom" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tempdir" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "term_size" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "textwrap" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "thread_local" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "toml" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "unicode-segmentation" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unicode-width" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unicode-xid" -version = "0.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unreachable" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "utf8-ranges" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "vec_map" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "walkdir" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "winapi" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "winapi-build" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "yaml-rust" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[metadata] -"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" -"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" -"checksum atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d912da0db7fa85514874458ca3651fe2cddace8d0b0505571dbdcd41ab490159" -"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" -"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" -"checksum byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff81738b726f5d099632ceaffe7fb65b90212e8dce59d518729e7e8634032d3d" -"checksum clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2267a8fdd4dce6956ba6649e130f62fb279026e5e84b92aa939ac8f85ce3f9f0" -"checksum coco 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c06169f5beb7e31c7c67ebf5540b8b472d23e3eade3b2ec7d1f5b504a85f91bd" -"checksum conv 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" -"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" -"checksum custom_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" -"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" -"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" -"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" -"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" -"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" -"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" -"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" -"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" -"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" -"checksum env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3ddf21e73e016298f5cb37d6ef8e8da8e39f91f9ec8b0df44b7deb16a9f8cd5b" -"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" -"checksum futures 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a82bdc62350ca9d7974c760e9665102fc9d740992a528c2254aa930e53b783c4" -"checksum globset 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "feeb1b6840809ef5efcf7a4a990bc4e1b7ee3df8cf9e2379a75aeb2ba42ac9c3" -"checksum handlebars 0.27.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ef7567daf271a32e60301e4821fcb5b51a5b535167115d1ce04f46c3f0a15f0b" -"checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa" -"checksum ignore 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b3fcaf2365eb14b28ec7603c98c06cc531f19de9eb283d89a3dff8417c8c99f5" -"checksum itoa 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ac17257442c2ed77dbc9fd555cf83c58b0c7f7d0e8f2ae08c0ac05c72842e1f6" -"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3b37545ab726dd833ec6420aaba8231c5b320814b9029ad585555d2a03e94fbf" -"checksum libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)" = "2370ca07ec338939e356443dac2296f581453c35fe1e3a3ed06023c49435f915" -"checksum linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6d262045c5b87c0861b3f004610afd0e2c851e2908d08b6c870cbb9d5f494ecd" -"checksum linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7860ec297f7008ff7a1e3382d7f7e1dcd69efc94751a2284bafc3d013c2aa939" -"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b" -"checksum magenta 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf0336886480e671965f794bc9b6fce88503563013d1bfb7a502c81fe3ac527" -"checksum magenta-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "40d014c7011ac470ae28e2f76a02bfea4a8480f73e701353b49ad7a8d75f4699" -"checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" -"checksum num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "99843c856d68d8b4313b03a17e33c4bb42ae8f6610ea81b28abe076ac721b9b0" -"checksum num_cpus 1.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aec53c34f2d0247c5ca5d32cca1478762f301740468ee9ee6dcb7a0dd7a0c584" -"checksum pest 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0a6dda33d67c26f0aac90d324ab2eb7239c819fc7b2552fe9faa4fe88441edc8" -"checksum quick-error 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c36987d4978eb1be2e422b1e0423a557923a5c3e7e6f31d5699e9aafaefa469" -"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" -"checksum rand 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)" = "eb250fd207a4729c976794d03db689c9be1d634ab5a1c9da9492a13d8fecbcdf" -"checksum rayon 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a77c51c07654ddd93f6cb543c7a849863b03abc7e82591afda6dc8ad4ac3ac4a" -"checksum rayon-core 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7febc28567082c345f10cddc3612c6ea020fc3297a1977d472cf9fdb73e6e493" -"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b" -"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" -"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7" -"checksum scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c79eb2c3ac4bc2507cda80e7f3ac5b88bd8eae4c0914d5663e6a8933994be918" -"checksum serde 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)" = "f7726f29ddf9731b17ff113c461e362c381d9d69433f79de4f3dd572488823e9" -"checksum serde_bytes 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "12b8ae62bf2de9844de7506deb95667943b156ac18136a5c8124cb2ac0c51e19" -"checksum serde_cbor 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27181cf088428830792d77a40dd44f59d663f3e909bd56cef8c815403cf814ba" -"checksum serde_derive 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cf823e706be268e73e7747b147aa31c8f633ab4ba31f115efb57e5047c3a76dd" -"checksum serde_derive_internals 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37aee4e0da52d801acfbc0cc219eb1eda7142112339726e427926a6f6ee65d3a" -"checksum serde_json 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "48b04779552e92037212c3615370f6bd57a40ebba7f20e554ff9f55e41a69a7b" -"checksum serde_yaml 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49d983aa39d2884a4b422bb11bb38f4f48fa05186e17469bc31e47d01e381111" -"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" -"checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" -"checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" -"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" -"checksum term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2b6b55df3198cc93372e85dd2ed817f0e38ce8cc0f22eb32391bfad9c4bf209" -"checksum textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f728584ea33b0ad19318e20557cb0a39097751dbb07171419673502f848c7af6" -"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" -"checksum toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a7540f4ffc193e0d3c94121edb19b055670d369f77d5804db11ae053a45b6e7e" -"checksum unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a8083c594e02b8ae1654ae26f0ade5158b119bd88ad0e8227a5d8fcd72407946" -"checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f" -"checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff" -"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" -"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" -"checksum yaml-rust 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992" diff --git a/Cargo.toml b/Cargo.toml index 2fb79dcc2..e7141de48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ handlebars = "0.27" lazy_static = "0.2" [dependencies] -encoding = "0.2" +encoding_rs = "0.7" ignore = "0.2" log = "0.3" rayon = "0.7" diff --git a/src/language/decoder.rs b/src/language/decoder.rs new file mode 100644 index 000000000..619c08cea --- /dev/null +++ b/src/language/decoder.rs @@ -0,0 +1,434 @@ +// This file is vendored and modified from @BurntSushi's ripgrep repository from commit 214f2be. +// ripgrep is licensed as public domain (the "Unlicense") +// +// This file changes BOM handling to always strip BOMs from the beginning of readers. We do not +// want BOMs to count the first line of comments as code. We also are able to make a few other +// simplifications by removing support for defining which encoding to use. + +use std::cmp; +use std::io::{self, Read}; + +use encoding_rs::{Decoder, Encoding, UTF_8}; + +/// A BOM is at least 2 bytes and at most 3 bytes. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct Bom { + bytes: [u8; 3], + len: usize, +} + +impl Bom { + fn as_slice(&self) -> &[u8] { + &self.bytes[0..self.len] + } + + // tokei change: we always remove the BOM, even if it is short, and even in UTF-8 + fn decoder(&self) -> Decoder { + let bom = self.as_slice(); + if let Some((enc, _)) = Encoding::for_bom(bom) { + return enc.new_decoder_with_bom_removal(); + } + UTF_8.new_decoder_without_bom_handling() + } +} + +/// BomPeeker wraps `R` and satisfies the `io::Read` interface while also +/// providing a peek at the BOM if one exists. Peeking at the BOM does not +/// advance the reader. +struct BomPeeker { + rdr: R, + bom: Option, + nread: usize, +} + +impl BomPeeker { + /// Create a new BomPeeker. + /// + /// The first three bytes can be read using the `peek_bom` method, but + /// will not advance the reader. + fn new(rdr: R) -> BomPeeker { + BomPeeker { + rdr: rdr, + bom: None, + nread: 0, + } + } + + /// Peek at the first three bytes of the underlying reader. + /// + /// This does not advance the reader provided by `BomPeeker`. + /// + /// If the underlying reader does not have at least two bytes available, + /// then `None` is returned. + fn peek_bom(&mut self) -> io::Result { + if let Some(bom) = self.bom { + return Ok(bom); + } + self.bom = Some(Bom { + bytes: [0; 3], + len: 0, + }); + let mut buf = [0u8; 3]; + let bom_len = read_full(&mut self.rdr, &mut buf)?; + self.bom = Some(Bom { + bytes: buf, + len: bom_len, + }); + Ok(self.bom.unwrap()) + } +} + +impl io::Read for BomPeeker { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.nread < 3 { + let bom = self.peek_bom()?; + let bom = bom.as_slice(); + if self.nread < bom.len() { + let rest = &bom[self.nread..]; + let len = cmp::min(buf.len(), rest.len()); + buf[..len].copy_from_slice(&rest[..len]); + self.nread += len; + return Ok(len); + } + } + let nread = self.rdr.read(buf)?; + self.nread += nread; + Ok(nread) + } +} + +/// Like io::Read::read_exact, except it never returns UnexpectedEof and +/// instead returns the number of bytes read if EOF is seen before filling +/// `buf`. +fn read_full(mut rdr: R, mut buf: &mut [u8]) -> io::Result { + let mut nread = 0; + while !buf.is_empty() { + match rdr.read(buf) { + Ok(0) => break, + Ok(n) => { + nread += n; + let tmp = buf; + buf = &mut tmp[n..]; + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + Ok(nread) +} + +/// A reader that transcodes to UTF-8. The source encoding is determined by +/// inspecting the BOM from the stream read from `R`, if one exists. If a +/// UTF-16 BOM exists, then the source stream is trancoded to UTF-8 with +/// invalid UTF-16 sequences translated to the Unicode replacement character. +/// In all other cases, the underlying reader is passed through unchanged. +/// +/// `R` is the type of the underlying reader and `B` is the type of an internal +/// buffer used to store the results of trancoding. +/// +/// Note that not all methods on `io::Read` work with this implementation. +/// For example, the `bytes` adapter method attempts to read a single byte at +/// a time, but this implementation requires a buffer of size at least `4`. If +/// a buffer of size less than 4 is given, then an error is returned. +pub struct DecodeReader { + /// The underlying reader, wrapped in a peeker for reading a BOM if one + /// exists. + rdr: BomPeeker, + /// The internal buffer to store transcoded bytes before they are read by + /// callers. + buf: B, + /// The current position in `buf`. Subsequent reads start here. + pos: usize, + /// The number of transcoded bytes in `buf`. Subsequent reads end here. + buflen: usize, + /// Whether a "last" read has occurred. After this point, EOF will always + /// be returned. + last: bool, + /// The underlying text decoder derived from the BOM, if one exists. + decoder: Decoder, +} + +impl> DecodeReader { + /// Create a new transcoder that converts a source stream to valid UTF-8. + /// + /// Errors in the encoding of `rdr` are handled with the Unicode + /// replacement character. If no encoding of `rdr` is specified, then + /// errors are not handled. + pub fn new(rdr: R, buf: B) -> io::Result> { + let mut rdr = BomPeeker::new(rdr); + let bom = rdr.peek_bom()?; + let decoder = bom.decoder(); + + Ok(DecodeReader { + rdr: rdr, + buf: buf, + buflen: 0, + pos: 0, + last: false, + decoder: decoder, + }) + } + + /// Fill the internal buffer from the underlying reader. + /// + /// If there are unread bytes in the internal buffer, then we move them + /// to the beginning of the internal buffer and fill the remainder. + /// + /// If the internal buffer is too small to read additional bytes, then an + /// error is returned. + #[inline(always)] // massive perf benefit (???) + fn fill(&mut self) -> io::Result<()> { + if self.pos < self.buflen { + if self.buflen >= self.buf.as_mut().len() { + return Err(io::Error::new( + io::ErrorKind::Other, + "DecodeReader: internal buffer exhausted", + )); + } + let newlen = self.buflen - self.pos; + let mut tmp = Vec::with_capacity(newlen); + tmp.extend_from_slice(&self.buf.as_mut()[self.pos..self.buflen]); + self.buf.as_mut()[..newlen].copy_from_slice(&tmp); + self.buflen = newlen; + } else { + self.buflen = 0; + } + self.pos = 0; + self.buflen += self.rdr.read(&mut self.buf.as_mut()[self.buflen..])?; + Ok(()) + } + + /// Transcode the inner stream to UTF-8 in `buf`. This assumes that there + /// is a decoder capable of transcoding the inner stream to UTF-8. This + /// returns the number of bytes written to `buf`. + /// + /// When this function returns, exactly one of the following things will + /// be true: + /// + /// 1. A non-zero number of bytes were written to `buf`. + /// 2. The underlying reader reached EOF. + /// 3. An error is returned: the internal buffer ran out of room. + /// 4. An I/O error occurred. + /// + /// Note that `buf` must have at least 4 bytes of space. + fn transcode(&mut self, buf: &mut [u8]) -> io::Result { + assert!(buf.len() >= 4); + if self.last { + return Ok(0); + } + if self.pos >= self.buflen { + self.fill()?; + } + let mut nwrite = 0; + loop { + let (_, nin, nout, _) = self.decoder.decode_to_utf8( + &self.buf.as_mut()[self.pos..self.buflen], + buf, + false, + ); + self.pos += nin; + nwrite += nout; + // If we've written at least one byte to the caller-provided + // buffer, then our mission is complete. + if nwrite > 0 { + break; + } + // Otherwise, we know that our internal buffer has insufficient + // data to transcode at least one char, so we attempt to refill it. + self.fill()?; + // Quit on EOF. + if self.buflen == 0 { + self.pos = 0; + self.last = true; + let (_, _, nout, _) = self.decoder.decode_to_utf8(&[], buf, true); + return Ok(nout); + } + } + Ok(nwrite) + } +} + +impl> io::Read for DecodeReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // When decoding UTF-8, we need at least 4 bytes of space to guarantee + // that we can decode at least one codepoint. If we don't have it, we + // can either return `0` for the number of bytes read or return an + // error. Since `0` would be interpreted as a possibly premature EOF, + // we opt for an error. + if buf.len() < 4 { + return Err(io::Error::new( + io::ErrorKind::Other, + "DecodeReader: byte buffer must have length at least 4", + )); + } + self.transcode(buf) + } +} + +#[cfg(test)] +mod tests { + use std::io::Read; + + use super::{Bom, BomPeeker, DecodeReader}; + + fn read_to_string(mut rdr: R) -> String { + let mut s = String::new(); + rdr.read_to_string(&mut s).unwrap(); + s + } + + #[test] + fn peeker_empty() { + let buf = []; + let mut peeker = BomPeeker::new(&buf[..]); + assert_eq!( + Bom { + bytes: [0; 3], + len: 0, + }, + peeker.peek_bom().unwrap() + ); + + let mut tmp = [0; 100]; + assert_eq!(0, peeker.read(&mut tmp).unwrap()); + } + + #[test] + fn peeker_one() { + let buf = [1]; + let mut peeker = BomPeeker::new(&buf[..]); + assert_eq!( + Bom { + bytes: [1, 0, 0], + len: 1, + }, + peeker.peek_bom().unwrap() + ); + + let mut tmp = [0; 100]; + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(1, tmp[0]); + assert_eq!(0, peeker.read(&mut tmp).unwrap()); + } + + #[test] + fn peeker_two() { + let buf = [1, 2]; + let mut peeker = BomPeeker::new(&buf[..]); + assert_eq!( + Bom { + bytes: [1, 2, 0], + len: 2, + }, + peeker.peek_bom().unwrap() + ); + + let mut tmp = [0; 100]; + assert_eq!(2, peeker.read(&mut tmp).unwrap()); + assert_eq!(1, tmp[0]); + assert_eq!(2, tmp[1]); + assert_eq!(0, peeker.read(&mut tmp).unwrap()); + } + + #[test] + fn peeker_three() { + let buf = [1, 2, 3]; + let mut peeker = BomPeeker::new(&buf[..]); + assert_eq!( + Bom { + bytes: [1, 2, 3], + len: 3, + }, + peeker.peek_bom().unwrap() + ); + + let mut tmp = [0; 100]; + assert_eq!(3, peeker.read(&mut tmp).unwrap()); + assert_eq!(1, tmp[0]); + assert_eq!(2, tmp[1]); + assert_eq!(3, tmp[2]); + assert_eq!(0, peeker.read(&mut tmp).unwrap()); + } + + #[test] + fn peeker_four() { + let buf = [1, 2, 3, 4]; + let mut peeker = BomPeeker::new(&buf[..]); + assert_eq!( + Bom { + bytes: [1, 2, 3], + len: 3, + }, + peeker.peek_bom().unwrap() + ); + + let mut tmp = [0; 100]; + assert_eq!(3, peeker.read(&mut tmp).unwrap()); + assert_eq!(1, tmp[0]); + assert_eq!(2, tmp[1]); + assert_eq!(3, tmp[2]); + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(4, tmp[0]); + assert_eq!(0, peeker.read(&mut tmp).unwrap()); + } + + #[test] + fn peeker_one_at_a_time() { + let buf = [1, 2, 3, 4]; + let mut peeker = BomPeeker::new(&buf[..]); + + let mut tmp = [0; 1]; + assert_eq!(0, peeker.read(&mut tmp[..0]).unwrap()); + assert_eq!(0, tmp[0]); + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(1, tmp[0]); + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(2, tmp[0]); + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(3, tmp[0]); + assert_eq!(1, peeker.read(&mut tmp).unwrap()); + assert_eq!(4, tmp[0]); + } + + // In cases where all we have is a bom, we expect the bytes to be + // passed through unchanged. + #[test] + fn trans_utf16_bom() { + let srcbuf = vec![0xFF, 0xFE]; + let mut dstbuf = vec![0; 8 * (1 << 10)]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + let n = rdr.read(&mut dstbuf).unwrap(); + assert_eq!(n, 0); + + let srcbuf = vec![0xFE, 0xFF]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + let n = rdr.read(&mut dstbuf).unwrap(); + assert_eq!(n, 0); + + let srcbuf = vec![0xEF, 0xBB, 0xBF]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + let n = rdr.read(&mut dstbuf).unwrap(); + assert_eq!(n, 0); + } + + // Test basic UTF-16 decoding. + #[test] + fn trans_utf16_basic() { + let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + assert_eq!("a", read_to_string(&mut rdr)); + + let srcbuf = vec![0xFE, 0xFF, 0x00, 0x61]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + assert_eq!("a", read_to_string(&mut rdr)); + } + + // Test incomplete UTF-16 decoding. This ensures we see a replacement char + // if the stream ends with an unpaired code unit. + #[test] + fn trans_utf16_incomplete() { + let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00, 0x00]; + let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1 << 10)]).unwrap(); + assert_eq!("a\u{FFFD}", read_to_string(&mut rdr)); + } +} diff --git a/src/language/languages.rs b/src/language/languages.rs index effabebba..2931cfb63 100644 --- a/src/language/languages.rs +++ b/src/language/languages.rs @@ -3,17 +3,13 @@ // found in the LICENCE-{APACHE/MIT} file. #![allow(unused_variables)] -use std::borrow::Cow; use std::collections::{btree_map, BTreeMap}; use std::fs::File; -use std::io::Read; +use std::io::{BufReader, BufRead}; use std::iter::IntoIterator; use std::mem; use std::ops::{AddAssign, Deref, DerefMut}; -use encoding; -use encoding::all::UTF_8; -use encoding::DecoderTrap::Replace; #[cfg(feature = "cbor")] use serde_cbor; #[cfg(feature = "json")] use serde_json; @@ -24,6 +20,7 @@ use rayon::prelude::*; use stats::Stats; use super::LanguageType::*; use super::{Language, LanguageType}; +use super::decoder::DecodeReader; use utils::fs; fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { @@ -37,27 +34,35 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { let stats: Vec<_> = files.into_par_iter().filter_map(|file| { let mut stack: Vec<&'static str> = Vec::new(); - let mut contents = Vec::new(); let mut quote: Option<&'static str> = None; - rs_ret_error!(rs_ret_error!(File::open(&file)).read_to_end(&mut contents)); + let f = rs_ret_error!(File::open(&file)); + let decoder = rs_ret_error!(DecodeReader::new(f, vec![0; 4<<10])); + let mut bufreader = BufReader::with_capacity(4<<10, decoder); - let text = match encoding::decode(&contents, Replace, UTF_8) { - (Ok(string), _) => Cow::Owned(string), - (Err(cow), _) => cow, - }; - - let lines = text.lines(); let mut stats = Stats::new(file); + let mut line_buf = String::new(); + if is_blank { - let count = lines.count(); - stats.lines += count; - stats.code += count; - return Some(stats); + loop { + line_buf.clear(); + if 0 == rs_ret_error!(bufreader.read_line(&mut line_buf)) { + return Some(stats); + } + stats.lines += 1; + stats.code += 1; + } } - 'line: for line in lines { + loop { + line_buf.clear(); + + if 0 == rs_ret_error!(bufreader.read_line(&mut line_buf)) { + break; + } + + let line = &*line_buf; stats.lines += 1; let no_stack = stack.is_empty(); diff --git a/src/language/mod.rs b/src/language/mod.rs index 95146fff6..34922e81f 100644 --- a/src/language/mod.rs +++ b/src/language/mod.rs @@ -2,6 +2,8 @@ // Use of this source code is governed by the MIT/APACHE2.0 license that can be // found in the LICENCE-{APACHE - MIT} file. +mod decoder; + pub mod language; pub mod languages; pub mod language_type; diff --git a/src/lib.rs b/src/lib.rs index 83c9d8c92..db5539730 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,7 +52,7 @@ #[macro_use] extern crate log; -extern crate encoding; +extern crate encoding_rs; extern crate ignore; extern crate rayon; diff --git a/tests/accuracy.rs b/tests/accuracy.rs index 5fe285ff8..ffc99d600 100644 --- a/tests/accuracy.rs +++ b/tests/accuracy.rs @@ -1,7 +1,6 @@ #[macro_use] extern crate lazy_static; extern crate regex; extern crate tokei; -extern crate ignore; use std::io::Read; use std::fs::File;