Skip to content

Commit 0b57d38

Browse files
authored
truncate wide columns in print.data.table (#7788)
excellent work, thanks!
1 parent 9fb8486 commit 0b57d38

4 files changed

Lines changed: 48 additions & 20 deletions

File tree

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656

5757
11. `between()` now supports `Date` and `IDate` bounds with default `NAbounds=TRUE`, avoiding errors like "Not yet implemented NAbounds=TRUE for this non-numeric and non-character type" when date bounds contain `NA`, [#7281](https://github.com/Rdatatable/data.table/issues/7281). Thanks @grcatlin for the report and fix, and @ben-schwen and @aitap for assistance.
5858

59+
12. `print.data.table()` now truncates long character columns and list-column summaries by default to avoid horizontal console overflow, [#7718](https://github.com/Rdatatable/data.table/issues/7718). When `datatable.prettyprint.char` is `NULL` (the default), the truncation limit is now dynamically calculated based on the available console width. Use `options(datatable.prettyprint.char=Inf)` for the old default behavior (never truncate). Thanks @tdhock for the report and @venom1204 for the fix.
60+
5961
### Notes
6062

6163
1. {data.table} now depends on R 3.5.0 (2018).

R/print.data.table.R

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,12 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
8888
}
8989
require_bit64_if_needed(x)
9090
classes = classes1(toprint)
91-
toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as <NA>
91+
trunc.char = getOption("datatable.prettyprint.char")
92+
if (is.null(trunc.char)) {
93+
rn_w = if (isTRUE(row.names)) nchar(as.character(max(rn))) + 2L else 0L
94+
trunc.char = max(0L, getOption("width") - rn_w - 3L)
95+
}
96+
toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, trunc.char = trunc.char, ...) # na.encode=FALSE so that NA in character cols print as <NA>
9297

9398
# FR #353 - add row.names = logical argument to print.data.table
9499
if (isTRUE(row.names)) rownames(toprint)=paste0(format(rn,right=TRUE,scientific=FALSE),":") else rownames(toprint)=rep.int("", nrow(toprint))
@@ -155,12 +160,12 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
155160
invisible(x)
156161
}
157162

158-
format.data.table = function(x, ..., justify="none") {
163+
format.data.table = function(x, ..., trunc.char = getOption("datatable.prettyprint.char"), justify="none") {
159164
if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x))
160165

161166
stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.")
162167
}
163-
do.call(cbind, lapply(x, format_col, ..., justify=justify))
168+
do.call(cbind, lapply(x, format_col, ..., trunc.char = trunc.char, justify=justify))
164169
}
165170

166171
shouldPrint = function(x) {
@@ -198,13 +203,13 @@ has_format_method = function(x) {
198203
any(vapply_1b(class(x), f))
199204
}
200205

201-
format_col.default = function(x, ...) {
206+
format_col.default = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) {
202207
if (!is.null(dim(x)))
203208
"<multi-column>"
204209
else if (is.list(x))
205-
vapply_1c(x, format_list_item, ...)
210+
vapply_1c(x, format_list_item, ..., trunc.char = trunc.char)
206211
else
207-
format(char.trunc(x), ...) # relevant to #37
212+
format(char.trunc(x, trunc.char = trunc.char), ...) # relevant to #37
208213
}
209214

210215
# #2842 -- different columns can have different tzone, so force usage in output
@@ -221,20 +226,21 @@ format_col.POSIXct = function(x, ..., timezone=FALSE) {
221226
}
222227

223228
# #3011 -- expression columns can wrap to newlines which breaks printing
224-
format_col.expression = function(x, ...) format(char.trunc(as.character(x)), ...)
229+
format_col.expression = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) {
230+
format(char.trunc(as.character(x), trunc.char = trunc.char), ...)
231+
}
225232

226-
format_list_item.default = function(x, ...) {
227-
if (is.null(x)) # NULL item in a list column
228-
"[NULL]" # not '' or 'NULL' to distinguish from those "common" string values in data
229-
else if (is.atomic(x) || inherits(x, "formula")) # FR #2591 - format.data.table issue with columns of class "formula"
230-
paste(c(format(head(x, 6L), ...), if (length(x) > 6L) sprintf("...[%d]", length(x))), collapse=",") # fix for #5435, #37, and #605 - format has to be added here...
233+
format_list_item.default = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) {
234+
res = if (is.null(x)) # NULL item in a list column
235+
"[NULL]"
236+
else if (is.atomic(x) || inherits(x, "formula"))
237+
paste(c(format(head(x, 6L), ...), if (length(x) > 6L) sprintf("...[%d]", length(x))), collapse=",")
231238
else if (has_format_method(x) && length(formatted<-format(x, ...))==1L) {
232-
# the column's class does not have a format method (otherwise it would have been used by format_col and this
233-
# format_list_item would not be reached) but this particular list item does have a format method so use it
234239
formatted
235240
} else {
236241
paste0("<", class1(x), paste_dims(x), ">")
237242
}
243+
char.trunc(res, trunc.char = trunc.char)
238244
}
239245

240246
# #6592 -- nested 1-column frames breaks printing
@@ -247,12 +253,14 @@ format_list_item.data.frame = function(x, ...) {
247253
# Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters,
248254
# if this becomes a problem in the future, we could consider string traversal instead.
249255
char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
256+
if (is.null(trunc.char)) return(x)
250257
trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
251258
if (!is.character(x) || trunc.char <= 0L) return(x)
252-
nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096
253-
nchar_chars = nchar(x, 'char')
259+
nchar_width = nchar(x, 'width', allowNA = TRUE)
260+
nchar_chars = nchar(x, 'char', allowNA = TRUE)
254261
is_full_width = nchar_width > nchar_chars
255-
idx = !is.na(x) & pmin(nchar_width, nchar_chars) > trunc.char
262+
is_full_width[is.na(is_full_width)] = FALSE
263+
idx = !is.na(x) & !is.na(nchar_width) & pmin(nchar_width, nchar_chars) > trunc.char
256264
x[idx] = paste0(strtrim(x[idx], trunc.char * fifelse(is_full_width[idx], 2L, 1L)), "...")
257265
x
258266
}

inst/tests/tests.Rraw

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21660,3 +21660,12 @@ test(2374.08, key(DT[, .(a, a)]), NULL)
2166021660
test(2374.09, key(subset(DT, select=c(a, a))), NULL)
2166121661
DT = data.table(a=1:2, a.1=3:4, val=10:11)
2166221662
test(2374.10, key(DT[, .(a.1, sum(val)), keyby=.(a, a)]), NULL)
21663+
21664+
# issue 7718: print.data.table truncates long character columns based on width
21665+
test(2375.1, print(data.table(x="1234567890")), output="1234...", options=list(width=10, datatable.prettyprint.char=NULL))
21666+
test(2375.11, print(data.table(x="1234567890")), output="1234567890", options=list(width=10, datatable.prettyprint.char=Inf))
21667+
test(2375.2, print(data.table(L=list(1:20))), output="1,2,3,4,...", options=list(width=15, datatable.prettyprint.char=NULL))
21668+
test(2375.3, print(data.table(x=c("short", "abcdefghijklmnopqrstuvwxyz"))), output="abcdefghijklmn...", options=list(width=20, datatable.prettyprint.char=NULL))
21669+
test(2375.4, print(data.table(x="abcdefghijklmnopqrstuvwxyz")), output="abcdefghijklmnopqrstuvwxyz", options=list(width=200, datatable.prettyprint.char=NULL))
21670+
test(2375.5, print(data.table(id=1L, score=99.1, txt="abcdefghijklmnopqrstuvwxyz")), output="abcdefghijklmn...", options=list(width=20, datatable.prettyprint.char=NULL))
21671+
test(2375.6, print(data.table(x=rep("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e6)), topn=1), output="1000000: ABCDEFGHIJKLM...", options=list(width=25, datatable.prettyprint.char=NULL))

man/print.data.table.Rd

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,18 @@
3030
timezone=FALSE, \dots)
3131

3232
format_col(x, \dots)
33-
\method{format_col}{default}(x, \dots)
33+
\method{format_col}{default}(x, \dots, trunc.char = getOption("datatable.prettyprint.char"))
3434
\method{format_col}{POSIXct}(x, \dots, timezone=FALSE)
35-
\method{format_col}{expression}(x, \dots)
35+
\method{format_col}{expression}(x, \dots, trunc.char = getOption("datatable.prettyprint.char"))
3636

3737
format_list_item(x, \dots)
38-
\method{format_list_item}{default}(x, \dots)
38+
\method{format_list_item}{default}(x, \dots, trunc.char = getOption("datatable.prettyprint.char"))
3939
}
4040
\arguments{
4141
\item{x}{ A \code{data.table}. }
4242
\item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. }
4343
\item{nrows}{ The number of rows which will be printed before truncation is enforced. }
44+
\item{trunc.char}{The number of characters at which character columns and list-column summaries are truncated. If \code{NULL} (the default), it is dynamically calculated based on \code{getOption("width")}.}
4445
\item{class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). When combined with \code{col.names="auto"} and tables >20 rows, classes will also appear at the bottom.}
4546
\item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. }
4647
\item{col.names}{ One of three flavours for controlling the display of column names in output. \code{"auto"} includes column names above the data, as well as below the table if \code{nrow(x) > 20} (when \code{class=TRUE}, column classes will also appear at the bottom). \code{"top"} excludes this lower register when applicable, and \code{"none"} suppresses column names altogether (as well as column classes if \code{class = TRUE}. }
@@ -134,5 +135,13 @@
134135
iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species]
135136
format_list_item.lm = function(x, ...) sprintf('<lm:\%s>', format(x$call$formula))
136137
print(iris_agg)
138+
139+
# Truncation based on console width
140+
old = options(width = 25, datatable.prettyprint.char = NULL)
141+
data.table(x = "abcdefghijklmnopqrstuvwxyz", L = list(1:25))
142+
143+
# Dynamic truncation: Content shrinks as row labels grow
144+
print(data.table(x = rep("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e6)), topn = 1)
145+
options(old)
137146
}
138147

0 commit comments

Comments
 (0)