diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 82ca6fed617f..451ac6c05f77 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -36,7 +36,7 @@ Imports: utils, vctrs Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source") -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.1.9001 VignetteBuilder: knitr Suggests: decor, diff --git a/r/R/compression.R b/r/R/compression.R index ebd4c54cd82b..bb051b8d5356 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -63,6 +63,8 @@ Codec$create <- function(type = "gzip", compression_level = NA) { #' "zstd", "lz4", "lzo", or "bz2", case insensitive. #' @return Logical: is `type` available? #' @export +#' @examples +#' codec_is_available("gzip") codec_is_available <- function(type) { util___Codec__IsAvailable(compression_from_name(type)) } diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R index 854672b66a2a..3259ff1077c9 100644 --- a/r/R/dataset-format.R +++ b/r/R/dataset-format.R @@ -53,6 +53,18 @@ #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`) #' @rdname FileFormat #' @name FileFormat +#' @examplesIf arrow_with_dataset() +#' ## Semi-colon delimited files +#' # Set up directory for examples +#' tf <- tempfile() +#' dir.create(tf) +#' on.exit(unlink(tf)) +#' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) +#' +#' # Create FileFormat object +#' format <- FileFormat$create(format = "text", delimiter = ";") +#' +#' open_dataset(tf, format = format) #' @export FileFormat <- R6Class("FileFormat", inherit = ArrowObject, active = list( diff --git a/r/R/dataset.R b/r/R/dataset.R index 745c39af068e..8716ef0d5c5a 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -69,6 +69,30 @@ #' @export #' @seealso `vignette("dataset", package = "arrow")` #' @include arrow-package.R +#' @examplesIf arrow_with_dataset() +#' # Set up directory for examples +#' tf <- tempfile() +#' dir.create(tf) +#' on.exit(unlink(tf)) +#' \dontrun{ +#' write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet")) +#' write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet")) +#' write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet")) +#' +#' # You can specify a directory containing the files for your dataset and +#' # open_dataset will scan all files in your directory. +#' open_dataset(tf) +#' +#' # You can also supply a vector of paths +#' open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet"))) +#' } +#' ## You must specify the file format if using a format other than parquet. +#' write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv")) +#' write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv")) +#' # This line will results in errors when you try to work with the data +#' \dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))} +#' # This is the correct way to open a dataset containing CSVs +#' open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") open_dataset <- function(sources, schema = NULL, partitioning = hive_partition(), diff --git a/r/R/flight.R b/r/R/flight.R index 486c59a9e12d..b3b354e82f64 100644 --- a/r/R/flight.R +++ b/r/R/flight.R @@ -21,6 +21,8 @@ #' @param path file system path where the Python module is found. Default is #' to look in the `inst/` directory for included modules. #' @export +#' @examples +#' \dontrun{load_flight_server("demo_flight_server")} load_flight_server <- function(name, path = system.file(package = "arrow")) { reticulate::import_from_path(name, path) } diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R index 4f506f3332b8..f2d56864c8ed 100644 --- a/r/R/ipc_stream.R +++ b/r/R/ipc_stream.R @@ -35,6 +35,10 @@ #' serialize data to a buffer. #' [RecordBatchWriter] for a lower-level interface. #' @export +#' @examples +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_ipc_stream(mtcars, tf) write_ipc_stream <- function(x, sink, ...) { x_out <- x # So we can return the data we got if (is.data.frame(x)) { diff --git a/r/R/scalar.R b/r/R/scalar.R index cbda5964a2cb..31a00f9d527f 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -23,9 +23,33 @@ #' @docType class #' #' @description A `Scalar` holds a single value of an Arrow type. +#' +#' @section Methods: +#' `$ToString()`: convert to a string +#' `$as_vector()`: convert to an R vector +#' `$as_array()`: convert to an Arrow `Array` +#' `$Equals(other)`: is this Scalar equal to `other` +#' `$ApproxEquals(other)`: is this Scalar approximately equal to `other` +#' `$is_valid`: is this Scalar valid +#' `$null_count`: number of invalid values - 1 or 0 +#' `$type`: Scalar type #' #' @name Scalar #' @rdname Scalar +#' @examples +#' Scalar$create(pi) +#' Scalar$create(404) +#' # If you pass a vector into Scalar$create, you get a list containing your items +#' Scalar$create(c(1, 2, 3)) +#' +#' # Comparisons +#' my_scalar <- Scalar$create(99) +#' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +#' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +#' my_scalar$Equals(Scalar$create(99.000009)) # FALSE +#' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) +#' +#' my_scalar$ToString() #' @export Scalar <- R6Class("Scalar", inherit = ArrowDatum, diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index b8d4dc01badf..5bc9475b4086 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } +\examples{ +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index 2ef5b02ccbe5..1c115b7c1998 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -7,3 +7,30 @@ \description{ A \code{Scalar} holds a single value of an Arrow type. } +\section{Methods}{ + +\verb{$ToString()}: convert to a string +\verb{$as_vector()}: convert to an R vector +\verb{$as_array()}: convert to an Arrow \code{Array} +\verb{$Equals(other)}: is this Scalar equal to \code{other} +\verb{$ApproxEquals(other)}: is this Scalar approximately equal to \code{other} +\verb{$is_valid}: is this Scalar valid +\verb{$null_count}: number of invalid values - 1 or 0 +\verb{$type}: Scalar type +} + +\examples{ +Scalar$create(pi) +Scalar$create(404) +# If you pass a vector into Scalar$create, you get a list containing your items +Scalar$create(c(1, 2, 3)) + +# Comparisons +my_scalar <- Scalar$create(99) +my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +my_scalar$Equals(Scalar$create(99.000009)) # FALSE +my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) + +my_scalar$ToString() +} diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd index 1b5e8278fa9b..5cda813f4167 100644 --- a/r/man/codec_is_available.Rd +++ b/r/man/codec_is_available.Rd @@ -18,3 +18,6 @@ Support for compression libraries depends on the build-time settings of the Arrow C++ library. This function lets you know which are available for use. } +\examples{ +codec_is_available("gzip") +} diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd index 7e2000a9ca22..e521efa33282 100644 --- a/r/man/load_flight_server.Rd +++ b/r/man/load_flight_server.Rd @@ -15,3 +15,6 @@ to look in the \verb{inst/} directory for included modules.} \description{ Load a Python Flight server } +\examples{ +\dontrun{load_flight_server("demo_flight_server")} +} diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd index e3e361786274..7175bb132eaf 100644 --- a/r/man/open_dataset.Rd +++ b/r/man/open_dataset.Rd @@ -74,6 +74,33 @@ can accelerate queries that only touch some partitions (files). Call \code{open_dataset()} to point to a directory of data files and return a \code{Dataset}, then use \code{dplyr} methods to query it. } +\examples{ +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +\dontrun{ +write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet")) +write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet")) +write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet")) + +# You can specify a directory containing the files for your dataset and +# open_dataset will scan all files in your directory. +open_dataset(tf) + +# You can also supply a vector of paths +open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet"))) +} +## You must specify the file format if using a format other than parquet. +write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv")) +write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv")) +# This line will results in errors when you try to work with the data +\dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))} +# This is the correct way to open a dataset containing CSVs +open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") +\dontshow{\}) # examplesIf} +} \seealso{ \code{vignette("dataset", package = "arrow")} } diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index 4f742ce91786..a504a31c3048 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -31,6 +31,11 @@ with some nonstandard behavior, is deprecated. You should explicitly choose the function that will write the desired IPC format (stream or file) since either can be written to a file or \code{OutputStream}. } +\examples{ +tf <- tempfile() +on.exit(unlink(tf)) +write_ipc_stream(mtcars, tf) +} \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to serialize data to a buffer.