Skip to content

Commit 62f1eda

Browse files
authored
Export vec_locate_sorted_groups() (#1541)
* Rename `vec_order_locs()` to `vec_locate_sorted_groups()` * Export `vec_locate_sorted_groups()` * NEWS bullet
1 parent 838532b commit 62f1eda

File tree

7 files changed

+180
-38
lines changed

7 files changed

+180
-38
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ export(vec_is_empty)
563563
export(vec_is_list)
564564
export(vec_list_cast)
565565
export(vec_locate_matches)
566+
export(vec_locate_sorted_groups)
566567
export(vec_match)
567568
export(vec_math)
568569
export(vec_math_base)

NEWS.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# vctrs (development version)
22

3+
* New experimental `vec_locate_sorted_groups()` for returning the locations of
4+
groups in sorted order. This is equivalent to, but faster than, calling
5+
`vec_group_loc()` and then sorting by the `key` column of the result.
6+
37
* New experimental `vec_locate_matches()` for locating where each observation
48
in one vector matches one or more observations in another vector. It is
59
similar to `vec_match()`, but returns all matches by default (rather than just

R/order.R

+38-17
Original file line numberDiff line numberDiff line change
@@ -176,48 +176,69 @@ vec_sort_radix <- function(x,
176176
vec_slice(x, idx)
177177
}
178178

179-
#' Identify ordered groups
179+
# ------------------------------------------------------------------------------
180+
181+
#' Locate sorted groups
180182
#'
181183
#' @description
182184
#' `r lifecycle::badge("experimental")`
183185
#'
184-
#' `vec_order_locs()` returns a data frame containing a `key` column with
185-
#' sorted unique groups, and a `loc` column with the locations of each
186+
#' `vec_locate_sorted_groups()` returns a data frame containing a `key` column
187+
#' with sorted unique groups, and a `loc` column with the locations of each
186188
#' group in `x`. It is similar to [vec_group_loc()], except the groups are
187189
#' returned sorted rather than by first appearance.
188190
#'
191+
#' @details
192+
#' `vec_locate_sorted_groups(x)` is equivalent to, but faster than:
193+
#'
194+
#' ```
195+
#' info <- vec_group_loc(x)
196+
#' vec_slice(info, vec_order(info$key))
197+
#' ```
198+
#'
189199
#' @inheritParams order-radix
190200
#'
191201
#' @return
192202
#' A two column data frame with size equal to `vec_size(vec_unique(x))`.
193-
#' * A `key` column of type `vec_ptype(x)`.
194-
#' * A `loc` column of type list, with elements of type integer.
203+
#' * A `key` column of type `vec_ptype(x)`.
204+
#' * A `loc` column of type list, with elements of type integer.
195205
#'
196-
#' @section Dependencies of `vec_order_locs()`:
206+
#' @section Dependencies of `vec_locate_sorted_groups()`:
197207
#' * [vec_proxy_order()]
198208
#'
209+
#' @export
210+
#' @keywords internal
199211
#' @examples
200212
#' df <- data.frame(
201213
#' g = sample(2, 10, replace = TRUE),
202214
#' x = c(NA, sample(5, 9, replace = TRUE))
203215
#' )
204216
#'
205-
#' # `vec_order_locs()` is similar to `vec_group_loc()`, except keys are
206-
#' # returned ordered rather than by first appearance.
207-
#' vec_order_locs(df)
217+
#' # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys
218+
#' # are returned ordered rather than by first appearance.
219+
#' vec_locate_sorted_groups(df)
208220
#'
209221
#' vec_group_loc(df)
210-
#' @noRd
211-
vec_order_locs <- function(x,
212-
...,
213-
direction = "asc",
214-
na_value = "largest",
215-
nan_distinct = FALSE,
216-
chr_proxy_collate = NULL) {
222+
vec_locate_sorted_groups <- function(x,
223+
...,
224+
direction = "asc",
225+
na_value = "largest",
226+
nan_distinct = FALSE,
227+
chr_proxy_collate = NULL) {
217228
check_dots_empty0(...)
218-
.Call(vctrs_order_locs, x, direction, na_value, nan_distinct, chr_proxy_collate)
229+
230+
.Call(
231+
vctrs_locate_sorted_groups,
232+
x,
233+
direction,
234+
na_value,
235+
nan_distinct,
236+
chr_proxy_collate
237+
)
219238
}
220239

240+
# ------------------------------------------------------------------------------
241+
221242
vec_order_info <- function(x,
222243
...,
223244
direction = "asc",

man/vec_locate_sorted_groups.Rd

+99
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/init.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ extern SEXP vctrs_locate_complete(SEXP);
127127
extern SEXP vctrs_detect_complete(SEXP);
128128
extern SEXP vctrs_normalize_encoding(SEXP);
129129
extern SEXP vctrs_order(SEXP, SEXP, SEXP, SEXP, SEXP);
130-
extern SEXP vctrs_order_locs(SEXP, SEXP, SEXP, SEXP, SEXP);
130+
extern SEXP vctrs_locate_sorted_groups(SEXP, SEXP, SEXP, SEXP, SEXP);
131131
extern SEXP vctrs_order_info(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
132132
extern SEXP vctrs_unrep(SEXP);
133133
extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP);
@@ -295,7 +295,7 @@ static const R_CallMethodDef CallEntries[] = {
295295
{"vctrs_detect_complete", (DL_FUNC) &vctrs_detect_complete, 1},
296296
{"vctrs_normalize_encoding", (DL_FUNC) &vctrs_normalize_encoding, 1},
297297
{"vctrs_order", (DL_FUNC) &vctrs_order, 5},
298-
{"vctrs_order_locs", (DL_FUNC) &vctrs_order_locs, 5},
298+
{"vctrs_locate_sorted_groups", (DL_FUNC) &vctrs_locate_sorted_groups, 5},
299299
{"vctrs_order_info", (DL_FUNC) &vctrs_order_info, 6},
300300
{"vctrs_unrep", (DL_FUNC) &vctrs_unrep, 1},
301301
{"vctrs_fill_missing", (DL_FUNC) &vctrs_fill_missing, 3},

src/order.c

+31-14
Original file line numberDiff line numberDiff line change
@@ -211,28 +211,45 @@ SEXP vec_order(SEXP x,
211211

212212
// -----------------------------------------------------------------------------
213213

214-
static SEXP vec_order_locs(SEXP x,
215-
SEXP direction,
216-
SEXP na_value,
217-
bool nan_distinct,
218-
SEXP chr_proxy_collate);
214+
static SEXP vec_locate_sorted_groups(SEXP x,
215+
SEXP direction,
216+
SEXP na_value,
217+
bool nan_distinct,
218+
SEXP chr_proxy_collate);
219219

220220
// [[ register() ]]
221-
SEXP vctrs_order_locs(SEXP x,
222-
SEXP direction,
223-
SEXP na_value,
224-
SEXP nan_distinct,
225-
SEXP chr_proxy_collate) {
221+
SEXP vctrs_locate_sorted_groups(SEXP x,
222+
SEXP direction,
223+
SEXP na_value,
224+
SEXP nan_distinct,
225+
SEXP chr_proxy_collate) {
226226
bool c_nan_distinct = parse_nan_distinct(nan_distinct);
227-
return vec_order_locs(x, direction, na_value, c_nan_distinct, chr_proxy_collate);
228-
}
229227

228+
return vec_locate_sorted_groups(
229+
x,
230+
direction,
231+
na_value,
232+
c_nan_distinct,
233+
chr_proxy_collate
234+
);
235+
}
230236

231237
static
232-
SEXP vec_order_locs(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate) {
238+
SEXP vec_locate_sorted_groups(SEXP x,
239+
SEXP direction,
240+
SEXP na_value,
241+
bool nan_distinct,
242+
SEXP chr_proxy_collate) {
233243
const bool chr_ordered = true;
234244

235-
SEXP info = KEEP(vec_order_info(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered));
245+
SEXP info = KEEP(vec_order_info(
246+
x,
247+
direction,
248+
na_value,
249+
nan_distinct,
250+
chr_proxy_collate,
251+
chr_ordered
252+
));
236253

237254
SEXP o = r_list_get(info, 0);
238255
const int* p_o = r_int_cbegin(o);

tests/testthat/test-order.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -1138,9 +1138,9 @@ test_that("can order data frames (and subclasses) with matrix columns", {
11381138
})
11391139

11401140
# ------------------------------------------------------------------------------
1141-
# vec_order_locs()
1141+
# vec_locate_sorted_groups()
11421142

1143-
test_that("`vec_order_locs()` is working", {
1143+
test_that("`vec_locate_sorted_groups()` is working", {
11441144
x <- c(1, 3, 1, 5, 2, 5, 1)
11451145

11461146
expect <- new_data_frame(
@@ -1150,7 +1150,7 @@ test_that("`vec_order_locs()` is working", {
11501150
)
11511151
)
11521152

1153-
expect_identical(vec_order_locs(x), expect)
1153+
expect_identical(vec_locate_sorted_groups(x), expect)
11541154
})
11551155

11561156
test_that("`chr_proxy_collate` can result in keys being seen as identical", {
@@ -1160,8 +1160,8 @@ test_that("`chr_proxy_collate` can result in keys being seen as identical", {
11601160
x_expect <- data_frame(key = c("A", "b"), loc = list(c(2L, 3L), 1L))
11611161
y_expect <- data_frame(key = c("a", "b"), loc = list(c(2L, 3L), 1L))
11621162

1163-
expect_identical(vec_order_locs(x, chr_proxy_collate = tolower), x_expect)
1164-
expect_identical(vec_order_locs(y, chr_proxy_collate = tolower), y_expect)
1163+
expect_identical(vec_locate_sorted_groups(x, chr_proxy_collate = tolower), x_expect)
1164+
expect_identical(vec_locate_sorted_groups(y, chr_proxy_collate = tolower), y_expect)
11651165
})
11661166

11671167
# ------------------------------------------------------------------------------

0 commit comments

Comments
 (0)