Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add "lazy_duckplyr_df" class that requires collect() #390

Merged
merged 3 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ S3method(.cstr_construct.relational_relexpr_window,relexpr_window)
S3method(add_count,duckplyr_df)
S3method(anti_join,duckplyr_df)
S3method(arrange,duckplyr_df)
S3method(as.data.frame,duckplyr_df)
S3method(as.data.frame,lazy_duckplyr_df)
S3method(as_ducktbl,default)
S3method(as_ducktbl,tbl_duckdb_connection)
S3method(as_tibble,duckplyr_df)
S3method(auto_copy,duckplyr_df)
S3method(collect,duckplyr_df)
S3method(collect,lazy_duckplyr_df)
S3method(compute,duckplyr_df)
S3method(count,duckplyr_df)
S3method(cross_join,duckplyr_df)
Expand Down
23 changes: 19 additions & 4 deletions R/ducktbl.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#'
#' @param ... For `ducktbl()`, passed on to [tibble()].
#' For `as_ducktbl()`, passed on to methods.
#' @param .lazy Logical, whether to create a lazy duckplyr frame
#'
#' @return An object with the following classes:
#' - `"duckplyr_df"`
Expand All @@ -36,14 +37,18 @@
#'
#' y <- ducktbl(a = 1, .lazy = TRUE)
#' y
#' try(y$a)
#' collect(y)$a
#' try(length(y$a))
#' length(collect(y)$a)
#' @export
ducktbl <- function(...) {
ducktbl <- function(..., .lazy = FALSE) {
out <- tibble::tibble(...)

out <- as_duckplyr_df_impl(out)

if (.lazy) {
out <- as_lazy_duckplyr_df(out)
}

out
}

Expand All @@ -55,7 +60,17 @@ ducktbl <- function(...) {
#' @param x The object to convert or to test.
#' @rdname ducktbl
#' @export
as_ducktbl <- function(x, ...) {
as_ducktbl <- function(x, ..., .lazy = FALSE) {
out <- as_ducktbl_dispatch(x, ...)

if (.lazy) {
out <- as_lazy_duckplyr_df(out)
}

return(out)
UseMethod("as_ducktbl")
}
as_ducktbl_dispatch <- function(x, ...) {
UseMethod("as_ducktbl")
}

Expand Down
49 changes: 49 additions & 0 deletions R/lazy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
as_lazy_duckplyr_df <- function(x) {
rel <- duckdb_rel_from_df(x)

out <- rel_to_df(rel, allow_materialization = FALSE)

out <- dplyr_reconstruct(out, x)
add_lazy_duckplyr_df_class(out)
}

add_lazy_duckplyr_df_class <- function(x) {
class(x) <- unique(c("lazy_duckplyr_df", class(x)))
x
}

remove_lazy_duckplyr_df_class <- function(x) {
class(x) <- setdiff(class(x), "lazy_duckplyr_df")
x
}

#' @export
collect.lazy_duckplyr_df <- function(x, ...) {
rel <- duckdb_rel_from_df(x)

out <- rel_to_df(rel, allow_materialization = TRUE)

out <- dplyr_reconstruct(out, x)
out <- remove_lazy_duckplyr_df_class(out)
collect(out)
}

#' @export
as.data.frame.duckplyr_df <- function(x, row.names = NULL, optional = FALSE, ...) {
out <- collect(x)
class(out) <- setdiff(class(out), c("duckplyr_df", "tbl_df", "tbl"))
as.data.frame(out, row.names = row.names, optional = optional, ...)
}

#' @export
as.data.frame.lazy_duckplyr_df <- function(x, row.names = NULL, optional = FALSE, ...) {
out <- collect(x)
as.data.frame(out, row.names = row.names, optional = optional, ...)
}

#' @export
as_tibble.duckplyr_df <- function(x, ...) {
out <- collect(x)
class(out) <- setdiff(class(out), "duckplyr_df")
as_tibble(out)
}
2 changes: 2 additions & 0 deletions R/print.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ tbl_sum.duckplyr_df <- function(x) {
c("A duckplyr data frame" = cli::pluralize("{length(x)} variable{?s}"))
}

# dim.lazy_duckplyr_df is not called, special dispatch

#' @importFrom pillar tbl_nrow
#' @export
tbl_nrow.duckplyr_df <- function(x, ...) {
Expand Down
3 changes: 3 additions & 0 deletions R/relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ duckdb_rel_from_df <- function(df) {
# FIXME: make generic
stopifnot(is.data.frame(df))

# Avoid weird recursions
class(df) <- "data.frame"

rel <- duckdb$rel_from_altrep_df(df, strict = FALSE, allow_materialized = FALSE)
if (!is.null(rel)) {
# Once we're here, we know it's an ALTREP data frame
Expand Down
10 changes: 6 additions & 4 deletions man/ducktbl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion tests/testthat/test-ducktbl.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
test_that("Can construct", {
expect_identical(
ducktbl(a = 1),
as_duckplyr_df_impl(tibble::tibble(a = 1))
as_ducktbl(tibble::tibble(a = 1))
)

expect_identical(ducktbl(a = 1)$a, 1)
})

test_that(".lazy = TRUE forbids materialization", {
tbl <- ducktbl(a = 1, .lazy = TRUE)
expect_error(length(tbl$a))
})

test_that(".lazy = TRUE forbids materialization for as_ducktbl()", {
tbl <- as_ducktbl(data.frame(a = 1), .lazy = TRUE)
expect_error(length(tbl$a))
})
39 changes: 39 additions & 0 deletions tests/testthat/test-lazy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
test_that("lazy duckplyr frames will collect", {
tbl <- ducktbl(a = 1, .lazy = TRUE)
expect_identical(
collect(tbl),
ducktbl(a = 1)
)
})

test_that("eager duckplyr frames are converted to data frames", {
tbl <- ducktbl(a = 1)
expect_identical(
as.data.frame(tbl),
data.frame(a = 1)
)
})

test_that("lazy duckplyr frames are converted to data frames", {
tbl <- ducktbl(a = 1, .lazy = TRUE)
expect_identical(
as.data.frame(tbl),
data.frame(a = 1)
)
})

test_that("eager duckplyr frames are converted to tibbles", {
tbl <- ducktbl(a = 1)
expect_identical(
as_tibble(tbl),
tibble(a = 1)
)
})

test_that("lazy duckplyr frames are converted to tibbles", {
tbl <- ducktbl(a = 1, .lazy = TRUE)
expect_identical(
as_tibble(tbl),
tibble(a = 1)
)
})