Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move icu_uniset into collections component as codepointinvlist module #2328

Merged
merged 7 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ jobs:
- components/locid
- components/plurals
- components/datetime
- utils/uniset
- components/collections
- utils/fixed_decimal


Expand Down Expand Up @@ -752,7 +752,7 @@ jobs:
icu_locid/filter_langids
icu_plurals/unread_emails
icu_plurals/elevator_floors
icu_uniset/unicode_bmp_blocks_selector
icu_collections/unicode_bmp_blocks_selector
fixed_decimal/permyriad
writeable/writeable_message
litemap/language_names_lite_map
Expand Down
27 changes: 2 additions & 25 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ members = [
"utils/pattern",
"utils/tinystr",
"utils/tzif",
"utils/uniset",
"utils/writeable",
"utils/yoke",
"utils/yoke/derive",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ ICU4X will provide an ECMA-402-compatible API surface in the target client-side
The [performance benchmarks](docs/process/benchmarking.md) are all run on Ubuntu, and are broken out by component.

* [locid](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/locid)
* [uniset](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/utils/uniset)
* [collections](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/collections)
* [fixed_decimal](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/utils/fixed_decimal)
* [plurals](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/plurals)
* [datetime](https://unicode-org.github.io/icu4x-docs/benchmarks/perf/components/datetime)
Expand Down
5 changes: 2 additions & 3 deletions components/collator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ icu_provider = { version = "0.6", path = "../../provider/core", features = ["mac
icu_locid = { version = "0.6", path = "../../components/locid" }
icu_normalizer = { version = "0.6", path = "../../components/normalizer" }
icu_properties = { version = "0.6", path = "../../components/properties" }
icu_uniset = { version = "0.5", path = "../../utils/uniset" }
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
zerovec = { version = "0.7", path = "../../utils/zerovec", features = ["serde"] }
utf8_iter = "1.0"
Expand All @@ -59,5 +58,5 @@ bench = false # This option is required for Benchmark CI

[features]
default = []
serde = ["dep:serde", "zerovec/serde", "icu_char16trie/serde", "icu_properties/serde", "icu_normalizer/serde", "icu_uniset/serde", "icu_collections/serde"]
datagen = ["serde", "databake", "zerovec/databake", "icu_char16trie/databake", "icu_properties/databake", "icu_normalizer/databake", "icu_uniset/databake", "icu_collections/databake"]
serde = ["dep:serde", "zerovec/serde", "icu_char16trie/serde", "icu_properties/serde", "icu_normalizer/serde", "icu_collections/serde"]
datagen = ["serde", "databake", "zerovec/databake", "icu_char16trie/databake", "icu_properties/databake", "icu_normalizer/databake", "icu_collections/databake"]
32 changes: 22 additions & 10 deletions components/collections/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ all-features = true

[dependencies]
displaydoc = { version = "0.2.3", default-features = false }
icu_uniset = { version = "0.5.0", path = "../../utils/uniset" }
yoke = { version = "0.6.0", path = "../../utils/yoke", features = ["derive"] }
zerofrom = { version = "0.1.0", path = "../../utils/zerofrom", features = ["derive"] }
zerovec = { version = "0.7", path = "../../utils/zerovec", features = ["yoke"] }
Expand All @@ -45,7 +44,9 @@ databake = { version = "0.1.0", path = "../../utils/databake", features = ["deri
postcard = { version = "1.0.0-alpha.4", features = ["alloc"] }
toml = "0.5"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
criterion = "0.3.4"
icu_benchmark_macros = { version = "0.6", path = "../../tools/benchmark/macros" }
iai = "0.1.1"
icu = { path = "../icu", default-features = false }

Expand All @@ -55,15 +56,26 @@ path = "src/lib.rs"

[features]
default = []
serde = ["dep:serde", "zerovec/serde", "icu_uniset/serde"]
serde = ["dep:serde", "zerovec/serde"]
bench = []
databake = ["dep:databake", "zerovec/databake"]

# [[bench]]
# name = "codepointtrie"
# harness = false
#
# [[bench]]
# name = "iai_cpt"
# harness = false
# required-features = ["bench"]
[[bench]]
name = "codepointtrie"
harness = false
path = "src/codepointtrie/benches/codepointtrie.rs"

[[bench]]
name = "iai_cpt"
harness = false
required-features = ["bench"]
path = "src/codepointtrie/benches/iai_cpt.rs"

[[bench]]
name = "inv_list"
harness = false
path = "src/codepointinvlist/benches/inv_list.rs"

[[example]]
name = "unicode_bmp_blocks_selector"
path = "src/codepointinvlist/examples/unicode_bmp_blocks_selector.rs"
5 changes: 4 additions & 1 deletion components/collections/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ Collections of API for use in ICU.
This module is published as its own crate ([`icu_collections`](https://docs.rs/icu_collections/latest/icu_collections/))
and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.

ICU4X [`CodePointTrie`](crate::codepointtrie::CodePointTrie) is designed to provide a read-only view of CodePointTrie data that is exported
ICU4X [`CodePointTrie`](crate::codepointtrie::CodePointTrie) provides a read-only view of CodePointTrie data that is exported
from ICU4C. Detailed information about the design of the data structure can be found in the documentation
for the [`CodePointTrie`](crate::codepointtrie::CodePointTrie) struct.

ICU4X [`CodePointInversionList`](`crate::codepointinvlist::CodePointInversionList`) provides necessary functionality for highly efficient querying of sets of Unicode characters.
It is an implementation of the existing [ICU4C UnicodeSet API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1UnicodeSet.html).


## More Information

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ represented by [inversion lists](http://userguide.icu-project.org/strings/proper
the [`CodePointInversionListBuilder`], or from the Properties API.

```rust
use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};

let mut builder = CodePointInversionListBuilder::new();
builder.add_range(&('A'..'Z'));
Expand All @@ -33,7 +33,7 @@ assert!(set.contains('A'));
Currently, you can check if a character/range of characters exists in the [`CodePointInversionList`], or iterate through the characters.

```rust
use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};

let mut builder = CodePointInversionListBuilder::new();
builder.add_range(&('A'..'Z'));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use criterion::{criterion_group, criterion_main, Criterion};
use icu_uniset::CodePointInversionList;
use icu_collections::codepointinvlist::CodePointInversionList;
use std::char;

fn uniset_bench(c: &mut Criterion) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use alloc::vec;
use alloc::vec::Vec;
use core::{char, cmp::Ordering, ops::RangeBounds};

use crate::{uniset::CodePointInversionList, utils::deconstruct_range};
use crate::codepointinvlist::{utils::deconstruct_range, CodePointInversionList};
use zerovec::{ule::AsULE, ZeroVec};

/// A builder for [`CodePointInversionList`].
Expand Down Expand Up @@ -90,7 +90,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_char('a');
/// let check = builder.build();
Expand All @@ -111,7 +111,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_u32(0x41);
/// let check = builder.build();
Expand All @@ -129,7 +129,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='Z'));
/// let check = builder.build();
Expand All @@ -145,7 +145,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range_u32(&(0xd800..=0xdfff));
/// let check = builder.build();
Expand All @@ -163,7 +163,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let set = CodePointInversionList::from_inversion_list_slice(&[0x41, 0x4C]).unwrap();
/// builder.add_set(&set);
Expand Down Expand Up @@ -210,7 +210,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='Z'));
/// builder.remove_char('A');
Expand All @@ -226,7 +226,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='Z'));
/// builder.remove_range(&('A'..='C'));
Expand All @@ -242,7 +242,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let set = CodePointInversionList::from_inversion_list_slice(&[0x41, 0x46]).unwrap();
/// builder.add_range(&('A'..='Z'));
Expand Down Expand Up @@ -271,7 +271,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='Z'));
/// builder.retain_char('A');
Expand All @@ -291,7 +291,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='Z'));
/// builder.retain_range(&('A'..='B'));
Expand All @@ -312,7 +312,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let set = CodePointInversionList::from_inversion_list_slice(&[65, 70]).unwrap();
/// builder.add_range(&('A'..='Z'));
Expand Down Expand Up @@ -379,7 +379,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let set =
/// CodePointInversionList::from_inversion_list_slice(&[0x0, 0x41, 0x46, (std::char::MAX as u32) + 1])
Expand Down Expand Up @@ -413,7 +413,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='D'));
/// builder.complement_char('A');
Expand All @@ -434,7 +434,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::CodePointInversionListBuilder;
/// use icu_collections::codepointinvlist::CodePointInversionListBuilder;
/// let mut builder = CodePointInversionListBuilder::new();
/// builder.add_range(&('A'..='D'));
/// builder.complement_range(&('C'..='F'));
Expand All @@ -454,7 +454,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let set = CodePointInversionList::from_inversion_list_slice(&[0x41, 0x46, 0x4B, 0x5A]).unwrap();
/// builder.add_range(&('C'..='N')); // 67 - 78
Expand All @@ -473,7 +473,7 @@ impl CodePointInversionListBuilder {
/// # Examples
///
/// ```
/// use icu_uniset::{CodePointInversionList, CodePointInversionListBuilder};
/// use icu_collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder};
/// let mut builder = CodePointInversionListBuilder::new();
/// let check = builder.build();
/// assert!(check.is_empty());
Expand Down
Loading