Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fe837d4
add doc page for data.table options
Mukulyadav2004 Jun 18, 2025
2071b3f
add script
Mukulyadav2004 Jun 19, 2025
f813423
add install remote in workflow
Mukulyadav2004 Jun 19, 2025
3a123cf
small typo
Mukulyadav2004 Jun 19, 2025
a1d66a1
Merge branch 'master' into issue_6720
Mukulyadav2004 Jun 23, 2025
d96356d
restructure script
Mukulyadav2004 Jun 23, 2025
9639787
restore (bad merge?)
MichaelChirico Jun 23, 2025
8d744fd
trailing ws
MichaelChirico Jun 23, 2025
8093860
terminal newline
MichaelChirico Jun 23, 2025
920f006
terminal newline
MichaelChirico Jun 23, 2025
88b3002
more potential aliases
MichaelChirico Jun 23, 2025
42132c9
clarification for reading outside interactive sessions
MichaelChirico Jun 23, 2025
205ebdf
Move the "See..." reference outside \describe{}
MichaelChirico Jun 23, 2025
eaa2b67
rm extra '.'
MichaelChirico Jun 23, 2025
3f4649f
style change: mention default up-front
MichaelChirico Jun 23, 2025
f234cb6
sweep '`' usage
MichaelChirico Jun 23, 2025
06e4d00
Update .ci/linters/rd/options_doc_check.R
Mukulyadav2004 Jun 23, 2025
00df531
Update .ci/linters/rd/options_doc_check.R
Mukulyadav2004 Jun 23, 2025
4262ce8
chng if to else if and _ast to _for_dt_optopns
Mukulyadav2004 Jun 23, 2025
44c9d79
remove unnecessary line
Mukulyadav2004 Jun 23, 2025
2a4941c
further simplify
MichaelChirico Jun 23, 2025
d2a2eb0
more simplification
MichaelChirico Jun 23, 2025
08a861a
simplify again: remove a helper
MichaelChirico Jun 23, 2025
9e6df34
further simplify, unify helper naming
MichaelChirico Jun 23, 2025
a706231
upd to sggsns
Mukulyadav2004 Jun 24, 2025
e28cc34
updt sgns
Mukulyadav2004 Jun 24, 2025
d522d73
consistent naming style
MichaelChirico Jun 24, 2025
6f1a16e
can't if(grepl(readLines())) b/c it's a vector
MichaelChirico Jun 24, 2025
34ede2f
fix naming at call site
MichaelChirico Jun 24, 2025
1bee980
another renaming
MichaelChirico Jun 24, 2025
f50eab3
ignore nomatch (deprecated)
MichaelChirico Jun 24, 2025
fb8831e
add nm to desc
Mukulyadav2004 Jun 24, 2025
d8c3ce4
one more [base] qualification
MichaelChirico Jun 24, 2025
d973d9f
fine-tune allow.cartesian description
MichaelChirico Jun 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .ci/check-options.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Run with: Rscript .ci/check-options.R
cat(">> Checking data.table options documentation consistency\n")

# Scan R source code for data.table options
code_opts <- sort(unique(gsub('["\']', '',
unlist(lapply( list.files("R", pattern = "\\.R$", full.names = TRUE),
function(f) {
lines <- suppressWarnings(readLines(f, warn = FALSE))
regmatches(lines, gregexpr('["\'](datatable\\.[.A-Za-z0-9]+)["\']', lines))
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
}
))
)))

# Scan the documentation file for data.table options
doc_file <- "man/data.table-options.Rd"
if (!file.exists(doc_file)) stop("CRITICAL: '", doc_file, "' not found.")
doc_opts <- sort(unique(unlist(
regmatches(readLines(doc_file, warn = FALSE), gregexpr("(?<=\\\\code\\{)datatable\\.[^}]+", readLines(doc_file, warn = FALSE), perl = TRUE))
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
)))

# Compare the final lists and report status
cat(sprintf(" Found %d options in code, %d in documentation.\n", length(code_opts), length(doc_opts)))

miss_in_doc <- setdiff(code_opts, doc_opts)
miss_in_code <- setdiff(doc_opts, doc_opts)

if (length(miss_in_doc) || length(miss_in_code)) {
message(" Mismatch in data.table options documentation:")
if (length(miss_in_doc)) {
message(" In code but MISSING from docs:\n - ", paste(miss_in_doc, collapse="\n - "))
}
if (length(miss_in_code)) {
message("\n In docs but NOT in code (check for typos/deprecation):\n - ", paste(miss_in_code, collapse="\n - "))
}
quit(status = 1)
}

message(" Options documentation is perfectly in sync.")
14 changes: 14 additions & 0 deletions .github/workflows/code-quality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,17 @@ jobs:
- name: Lint
run: for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f)
shell: Rscript {0}
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v2
- name: Install remotes
run: Rscript -e "install.packages('remotes')"
- name: Install dependencies
run: Rscript -e 'remotes::install_deps(dependencies = TRUE)'
- name: Check documentation and options consistency
run: Rscript .ci/check-options.R
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
- name: Run R CMD check
run: Rscript -e 'devtools::check()'
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated

159 changes: 159 additions & 0 deletions man/data.table-options.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
\name{data.table-options}
\alias{data.table-options}
\alias{data.table.options}
Comment thread
MichaelChirico marked this conversation as resolved.

\title{Global Options for the data.table Package}

\description{
The data.table package uses a number of global options to control its
behavior. These are regular R options that can be set with options()
and retrieved with getOption().

This page provides a comprehensive, up-to-date list of all user-configurable
options.
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
}

\usage{
\preformatted{
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
# Get the current value of an option
getOption("datatable.print.topn")

# Set a new value for an option
options(datatable.print.topn = 10)
}
}

\section{Printing Options}{
\describe{
\item{\code{datatable.print.topn}}{An integer. When a data.table is printed,
only the first topn and last topn rows are displayed.
Default: \code{5L}. See \code{\link{print.data.table}}.}
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
\item{\code{datatable.print.nrows}}{An integer. The total number of rows
to print before the topn logic is triggered.
Default: \code{100L}.}
\item{\code{datatable.print.class}}{A logical. If \code{TRUE}, the class of
each column is printed below its name.
Default: \code{FALSE}.}
\item{\code{datatable.print.keys}}{A logical. If \code{TRUE}, the table's
keys are printed above the data.
Default: \code{FALSE}.}
\item{\code{datatable.show.indices}}{A logical. A synonym for `datatable.print.keys` for historical reasons.
Default: \code{TRUE}.}
\item{\code{datatable.print.trunc.cols}}{A logical. If \code{TRUE} and a
table has more columns than fit on the screen, it truncates the middle
columns.
Default: \code{FALSE}.}
\item{\code{datatable.prettyprint.char}}{An integer. The maximum number of
characters to display in a character column cell before truncating.
Default: \code{100L}.}
\item{\code{datatable.print.colnames}}{A logical. If \code{TRUE}, prints column names.
Default: \code{TRUE}.}
\item{\code{datatable.print.rownames}}{A logical. If \code{TRUE}, prints row numbers.
Default: \code{TRUE}.}
}
}

\section{File I/O Options (fread and fwrite)}{
\describe{
\item{\code{datatable.fread.input.cmd.message}}{A logical. If \code{TRUE},
`fread` will print the shell command it is using when the input is a
command (e.g., `fread("grep ...")`).
Default: \code{TRUE}. See \code{\link{fread}}.}
\item{\code{datatable.fread.datatable}}{A logical. If \code{TRUE}, `fread`
returns a `data.table`. If `FALSE`, it returns a `data.frame`.
Default: \code{TRUE}.}
\item{\code{datatable.integer64}}{A character string. Controls how `fread`
handles 64-bit integers. Can be "integer64", "double", or "character".
Default: \code{"integer64"}.}
\item{\code{datatable.logical01}}{A logical. If \code{TRUE}, `fread` will
interpret columns containing only 0 and 1 as logical.
Default: \code{FALSE}.}
\item{\code{datatable.keepLeadingZeros}}{A logical. If \code{TRUE}, `fread`
preserves leading zeros in character columns by reading them as strings;
otherwise they may be coerced to numeric.
Default: \code{FALSE}.}
\item{\code{datatable.logicalYN}}{A logical. If \code{TRUE}, `fread`
will interpret "Y" and "N" as logical.
Default: \code{FALSE}.}
\item{\code{datatable.na.strings}}{A character vector. Global default for strings that
`fread` should interpret as `NA`.
Default: \code{"NA"}.}
\item{\code{datatable.fwrite.sep}}{A character string. The default separator
used by `fwrite`.
Default: \code{","}.}
\item{\code{datatable.showProgress}}{An integer or logical. Controls whether
long-running operations like `fread` display a progress bar. Default
is \code{interactive()}.}
}
}

\section{Join and Subset Options}{
\describe{
\item{\code{datatable.allow.cartesian}}{A logical. A safety feature. If `FALSE`, a join
is not allowed if the result would have more rows than the largest of the two tables.
Default: \code{FALSE}. See \code{\link{data.table}}.}
\item{\code{datatable.nomatch}}{Controls the behavior of non-matching rows in
a join. The default is `NA`. Can be set to `0L` to drop non-matching rows.
Default: \code{NA}.}
}
}

\section{Performance and Indexing Options}{
\describe{
\item{\code{datatable.auto.index}}{A logical. If \code{TRUE}, `data.table`
automatically creates a secondary index on-the-fly when a column is first
used in a subset, speeding up all subsequent queries.
Default: \code{TRUE}.}
\item{\code{datatable.use.index}}{A logical. A global switch to control
whether existing secondary indices are used for subsetting.
Default: \code{TRUE}.}
\item{\code{datatable.forder.auto.index}}{A logical. Similar to `datatable.auto.index`,
but applies to ordering operations (`forder`).
Default: \code{TRUE}.}
\item{\code{datatable.optimize}}{An integer controlling the GForce query
optimization engine. The default enables all possible optimizations.
See \code{\link{datatable.optimize}}.
Default: \code{Inf}.}
\item{\code{datatable.alloccol}}{An integer. Controls the number of column
slots to pre-allocate, improving performance when adding many columns.
See \code{\link{alloc.col}}.
Default: \code{1024L}.}
\item{\code{datatable.reuse.sorting}}{A logical. If `TRUE`, `data.table`
can reuse the sorted order of a table in joins, improving performance.
Default: \code{TRUE}.}
}
}

\section{Development and Verbosity Options}{
\describe{
\item{\code{datatable.quiet}}{A logical. The master switch to suppress all
`data.table` status messages, including the startup message.
Default: \code{FALSE}.}
\item{\code{datatable.verbose}}{A logical. If \code{TRUE}, `data.table` will
print detailed diagnostic information as it processes a query.
Default: \code{FALSE}.}
\item{\code{datatable.pedantic}}{A logical. If \code{TRUE}, `data.table`
enters a "pedantic" mode, issuing helpful warnings for potentially
unintentional user behavior.
Default: \code{FALSE}.}
\item{\code{datatable.dfdispatchwarn}}{A logical. If \code{TRUE}, warns
when a generic function from another package is applied to a `data.table`.
Default: \code{TRUE}.}
\item{\code{datatable.warnredundantby}}{A logical. If \code{TRUE}, `data.table`
will warn when grouping by columns that are already the key of the table.
Default: \code{TRUE}.}
\item{\code{datatable.enlist}}{Experimental feature. If set to a function
(e.g., `list`), the `j` expression can return a `list`, which will then
be "enlisted" into columns in the result.
Default: \code{NULL}.}
}
}

\seealso{
\code{\link{options}},
\code{\link{getOption}},
Comment thread
MichaelChirico marked this conversation as resolved.
Outdated
\code{\link{data.table}}
}

\keyword{data}
\keyword{utilities}
Loading