Skip to content

Commit 642d51b

Browse files
print.data.table Gets New Argument "indices" (#6187)
* new argument to print indices alongside x * news * better wording * new approach * oops * wording * review suggestions * vestigial * don't need index_names * whoops, it's not fixed=TRUE anymore * review * unneeded assignment --------- Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
1 parent de0cf94 commit 642d51b

File tree

5 files changed

+116
-0
lines changed

5 files changed

+116
-0
lines changed

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@
9898

9999
15. `dcast()` now issues a warning when `fun.aggregate` is used but not provided by the user. `fun.aggregate` defaults to `length` in this case. Previously, only a message was issued. However, relying on this default often signals unexpected duplicates in the data. Therefore, a stricter class of signal was deemed more appropriate, [#5386](https://github.com/Rdatatable/data.table/issues/5386). The warning is classed as `dt_missing_fun_aggregate_warning`, allowing for more targeted handling in user code. Thanks @MichaelChirico for the suggestion and @Nj221102 for the fix.
100100

101+
16. `print.data.table` gains new argument `show.indices` and option `datatable.show.indices` that allows the user to print a `data.table`'s indices as columns without having to modify the `data.table` itself. Thanks @MichaelChirico for the report and @joshhwuu for the PR.
102+
101103
## TRANSLATIONS
102104
103105
1. Fix a typo in a Mandarin translation of an error message that was hiding the actual error message, [#6172](https://github.com/Rdatatable/data.table/issues/6172). Thanks @trafficfan for the report and @MichaelChirico for the fix.

R/onLoad.R

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
"datatable.print.colnames"="'auto'", # for print.data.table
8080
"datatable.print.keys"="TRUE", # for print.data.table
8181
"datatable.print.trunc.cols"="FALSE", # for print.data.table
82+
"datatable.show.indices"="FALSE", # for print.data.table
8283
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
8384
"datatable.dfdispatchwarn"="TRUE", # not a function argument
8485
"datatable.warnredundantby"="TRUE", # not a function argument

R/print.data.table.R

+14
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
77
col.names=getOption("datatable.print.colnames"),
88
print.keys=getOption("datatable.print.keys"),
99
trunc.cols=getOption("datatable.print.trunc.cols"),
10+
show.indices=getOption("datatable.show.indices"),
1011
quote=FALSE,
1112
na.print=NULL,
1213
timezone=FALSE, ...) {
@@ -64,15 +65,28 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
6465
}
6566
return(invisible(x))
6667
}
68+
if (show.indices) {
69+
if (is.null(indices(x))) {
70+
show.indices = FALSE
71+
} else {
72+
index_dt <- as.data.table(attributes(attr(x, 'index')))
73+
print_names <- paste0("index", if (ncol(index_dt) > 1L) seq_len(ncol(index_dt)) else "", ":", sub("^__", "", names(index_dt)))
74+
setnames(index_dt, print_names)
75+
}
76+
}
6777
n_x = nrow(x)
6878
if ((topn*2L+1L)<n_x && (n_x>nrows || !topnmiss)) {
6979
toprint = rbindlist(list(head(x, topn), tail(x, topn)), use.names=FALSE) # no need to match names because head and tail of same x, and #3306
7080
rn = c(seq_len(topn), seq.int(to=n_x, length.out=topn))
7181
printdots = TRUE
82+
idx = c(seq_len(topn), seq(to=nrow(x), length.out=topn))
83+
toprint = x[idx, ]
84+
if (show.indices) toprint = cbind(toprint, index_dt[idx, ])
7285
} else {
7386
toprint = x
7487
rn = seq_len(n_x)
7588
printdots = FALSE
89+
if (show.indices) toprint = cbind(toprint, index_dt)
7690
}
7791
toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as <NA>
7892
require_bit64_if_needed(x)

inst/tests/tests.Rraw

+84
Original file line numberDiff line numberDiff line change
@@ -18612,3 +18612,87 @@ test(2263.3, options=list(datatable.verbose=TRUE, datatable.optimize=0L), names(
1861218612
test(2263.4, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, b], data.table(b=dt$b, N=1L), output="GForce optimized j to")
1861318613
test(2263.5, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, .(b,c)], data.table(b=dt$b, c=dt$c, N=1L), output="GForce optimized j to")
1861418614
test(2263.6, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), names(attributes(dt[, .N, b]$b)), c("class", "att"), output="GForce optimized j to")
18615+
18616+
# tests for printing indices alongside data.tables
18617+
NN = 200
18618+
set.seed(2024)
18619+
DT = data.table(
18620+
grp1 = sample(100, NN, TRUE),
18621+
grp2 = sample(90, NN, TRUE),
18622+
grp3 = sample(80, NN, TRUE))
18623+
setkey(DT, grp1, grp2)
18624+
setindex(DT, grp1, grp3)
18625+
ans = c(
18626+
" grp1 grp2 grp3 index:grp1__grp3",
18627+
" 1: 1 5 15 1",
18628+
" 2: 1 24 60 2",
18629+
" 3: 2 26 32 5",
18630+
" 4: 2 36 57 3",
18631+
" 5: 2 51 30 4",
18632+
" --- ",
18633+
"196: 98 77 45 195",
18634+
"197: 98 87 70 197",
18635+
"198: 100 18 21 198",
18636+
"199: 100 36 51 199",
18637+
"200: 100 38 56 200")
18638+
# test printing with 1 index column, no markers for order
18639+
test(2264.1, print(DT, show.indices=TRUE), output=ans)
18640+
# test that options work as well
18641+
test(2264.2, options=list(datatable.show.indices=TRUE), print(DT), output=ans)
18642+
setindex(DT, grp3, grp1)
18643+
ans = c(
18644+
" grp1 grp2 grp3 index1:grp1__grp3 index2:grp3__grp1",
18645+
" 1: 1 5 15 1 10",
18646+
" 2: 1 24 60 2 119",
18647+
" 3: 2 26 32 5 164",
18648+
" 4: 2 36 57 3 192",
18649+
" 5: 2 51 30 4 63",
18650+
" --- ",
18651+
"196: 98 77 45 195 11",
18652+
"197: 98 87 70 197 66",
18653+
"198: 100 18 21 198 31",
18654+
"199: 100 36 51 199 139",
18655+
"200: 100 38 56 200 159")
18656+
# test for two indices, with markers to show order
18657+
test(2264.3, print(DT, show.indices=TRUE), output=ans)
18658+
test(2264.4, options=list(datatable.show.indices=TRUE), print(DT), output=ans)
18659+
setindex(DT, NULL) # clear indices
18660+
# if no indices are set, simply ignore
18661+
test(2264.5, print(DT, show.indices=TRUE), notOutput="index:grp1__grp3")
18662+
test(2264.6, options=list(datatable.show.indices=TRUE), print(DT, show.indices=TRUE), notOutput="index:grp1__grp3")
18663+
setindex(DT, grp3)
18664+
ans = c(
18665+
" grp1 grp2 grp3 index:grp3",
18666+
" 1: 1 5 15 10",
18667+
" 2: 1 24 60 119",
18668+
" 3: 2 26 32 164",
18669+
" 4: 2 36 57 192",
18670+
" 5: 2 51 30 63",
18671+
" --- ",
18672+
"196: 98 77 45 11",
18673+
"197: 98 87 70 66",
18674+
"198: 100 18 21 31",
18675+
"199: 100 36 51 139",
18676+
"200: 100 38 56 159")
18677+
test(2264.7, print(DT, show.indices=TRUE), output=ans)
18678+
NN = 10
18679+
DT = data.table(
18680+
grp1 = sample(100, NN, TRUE),
18681+
grp2 = sample(90, NN, TRUE),
18682+
grp3 = sample(80, NN, TRUE))
18683+
setindex(DT, grp1, grp3)
18684+
setindex(DT, grp3, grp1)
18685+
ans = c(
18686+
" grp1 grp2 grp3 index1:grp1__grp3 index2:grp3__grp1",
18687+
" 1: 77 61 53 3 5",
18688+
" 2: 80 66 37 8 4",
18689+
" 3: 27 42 8 5 3",
18690+
" 4: 66 37 7 4 7",
18691+
" 5: 38 69 5 6 2",
18692+
" 6: 72 89 69 1 10",
18693+
" 7: 86 52 16 2 1",
18694+
" 8: 28 35 62 10 8",
18695+
" 9: 95 82 80 7 6",
18696+
"10: 83 64 41 9 9")
18697+
# test where topn isn't necessary
18698+
test(2264.8, print(DT, show.indices=TRUE), output=ans)

man/print.data.table.Rd

+15
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
col.names=getOption("datatable.print.colnames"), # default: "auto"
2626
print.keys=getOption("datatable.print.keys"), # default: TRUE
2727
trunc.cols=getOption("datatable.print.trunc.cols"), # default: FALSE
28+
show.indices=getOption("datatable.show.indices"), # default: FALSE
2829
quote=FALSE,
2930
na.print=NULL,
3031
timezone=FALSE, \dots)
@@ -46,6 +47,7 @@
4647
\item{col.names}{ One of three flavours for controlling the display of column names in output. \code{"auto"} includes column names above the data, as well as below the table if \code{nrow(x) > 20}. \code{"top"} excludes this lower register when applicable, and \code{"none"} suppresses column names altogether (as well as column classes if \code{class = TRUE}. }
4748
\item{print.keys}{ If \code{TRUE}, any \code{\link{key}} and/or \code{\link[=indices]{index}} currently assigned to \code{x} will be printed prior to the preview of the data. }
4849
\item{trunc.cols}{ If \code{TRUE}, only the columns that can be printed in the console without wrapping the columns to new lines will be printed (similar to \code{tibbles}). }
50+
\item{show.indices}{ If \code{TRUE}, indices will be printed as columns alongside \code{x}. }
4951
\item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
5052
\item{timezone}{ If \code{TRUE}, time columns of class POSIXct or POSIXlt will be printed with their timezones (if attribute is available). }
5153
\item{na.print}{ The string to be printed in place of \code{NA} values, as in \code{print.default}. }
@@ -116,6 +118,19 @@
116118
x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
117119
print(x)
118120

121+
old = options(datatable.show.indices=TRUE)
122+
NN = 200
123+
set.seed(2024)
124+
DT = data.table(
125+
grp1 = sample(100, NN, TRUE),
126+
grp2 = sample(90, NN, TRUE),
127+
grp3 = sample(80, NN, TRUE)
128+
)
129+
setkey(DT, grp1, grp2)
130+
setindex(DT, grp1, grp3)
131+
print(DT)
132+
options(old)
133+
119134
iris = as.data.table(iris)
120135
iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species]
121136
format_list_item.lm = function(x, ...) sprintf('<lm:\%s>', format(x$call$formula))

0 commit comments

Comments
 (0)