Skip to content

Commit 5af2d9f

Browse files
committed
load_tree_data(): auto-detect CSV data types and force PLT_CN type
1 parent 3ed5fa8 commit 5af2d9f

2 files changed

Lines changed: 90 additions & 18 deletions

File tree

R/load_tree_data.R

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
#' .gpkg), or a PostgreSQL database connection. Other data sources are also
66
#' possible. File-based sources can be read from compressed archives without
77
#' prior extraction if desired (e.g., .zip), and network-hosted files can be
8-
#' read directly without prior download (see Details). This function provides
9-
#' certain conveniences, but tree data could also be loaded in various other
10-
#' ways (e.g., `read.csv()`).
8+
#' read directly without prior download (see Details).
119
#'
1210
#' @details
1311
#' A data source is most commonly specified as one of the following:
@@ -71,6 +69,9 @@
7169
#' @param sql Optional character string containing a SQL SELECT statement to
7270
#' execute on `src` (instead of selecting all records, potentially from a subset
7371
#' of columns, i.e., mutually exclusive with `table` and/or `columns`).
72+
#' @param quoted_cols_as_char A logical value indicating whether to auto-detect
73+
#' columns that contain quoted values as `"character"` type, `TRUE` by default.
74+
#' Only used when `src` is a CSV file.
7475
#' @return
7576
#' A data frame containing tree records fetched from `src`.
7677
#'
@@ -84,6 +85,13 @@
8485
#'
8586
#' For more details: \url{https://gdal.org/en/stable/drivers/vector/index.html}
8687
#'
88+
#' `load_tree_data()` from a PostgreSQL database requires GDAL built with
89+
#' support for the PostgreSQL client library (can be checked with
90+
#' `gdalraster::gdal_formats("postgresql")`).
91+
#'
92+
#' Column names are case-sensitive in \pkg{FIAstemmap} functions, and are
93+
#' assumed to follow the FIADB upper case naming convention.
94+
#'
8795
#' @seealso
8896
#' [DEFAULT_TREE_COLUMNS]
8997
#'
@@ -95,7 +103,7 @@
95103
#' head(tree)
96104
#' @export
97105
load_tree_data <- function(src, table = NULL, columns = DEFAULT_TREE_COLUMNS,
98-
sql = NULL) {
106+
sql = NULL, quoted_cols_as_char = TRUE) {
99107

100108
if (missing(src) || is.null(src))
101109
stop("'src' is required")
@@ -108,6 +116,12 @@ load_tree_data <- function(src, table = NULL, columns = DEFAULT_TREE_COLUMNS,
108116
stop("could not connect to 'src'", call. = FALSE)
109117
}
110118

119+
src_fmt <- gdalraster::ogr_ds_format(src)
120+
if (is.null(src_fmt)) {
121+
cli::cli_alert_danger("unsupported format: {.path {src}}")
122+
stop("'src' is not recognized as a supported format", call. = FALSE)
123+
}
124+
111125
if (!is.null(table) && !is.null(sql))
112126
stop("'table' and 'sql' are mutually exclusive", call. = FALSE)
113127

@@ -120,18 +134,26 @@ load_tree_data <- function(src, table = NULL, columns = DEFAULT_TREE_COLUMNS,
120134
if (!is.null(sql) && !(is.character(sql) && length(sql == 1)))
121135
stop("'sql' must be a single character string")
122136

137+
if (missing(quoted_cols_as_char) || is.null(quoted_cols_as_char)) {
138+
quoted_cols_as_char <- TRUE
139+
} else if (!(is.logical(quoted_cols_as_char) ||
140+
length(quoted_cols_as_char) != 1)) {
141+
stop("'quoted_cols_as_char' must be a single logical value",
142+
call. = FALSE)
143+
}
144+
123145
if (is.null(sql) && !is.null(columns)) {
124146
if (any(c("DIST", "AZIMUTH") %in% columns)) {
125-
tbl <- ""
147+
tbl_tmp <- ""
126148
if (!is.null(table))
127-
tbl <- table
149+
tbl_tmp <- table
128150

129-
if (gdalraster::ogr_field_index(src, tbl, "DIST") < 0 ||
130-
gdalraster::ogr_field_index(src, tbl, "AZIMUTH") < 0) {
151+
if (gdalraster::ogr_field_index(src, tbl_tmp, "DIST") < 0 ||
152+
gdalraster::ogr_field_index(src, tbl_tmp, "AZIMUTH") < 0) {
131153

132-
cli::cli_alert_warning(c(
133-
"The data source does not have ",
134-
"{.field DIST} and/or {.field AZIMUTH}"))
154+
cli::cli_alert_warning(
155+
c("The data source does not have ",
156+
"{.field DIST} and/or {.field AZIMUTH}"))
135157

136158
columns <- columns[!columns %in% c("DIST", "AZIMUTH")]
137159
if (length(columns) == 0)
@@ -141,14 +163,49 @@ load_tree_data <- function(src, table = NULL, columns = DEFAULT_TREE_COLUMNS,
141163
}
142164

143165
ds <- NULL
166+
open_options <- character(0)
167+
168+
if (src_fmt == "CSV") {
169+
# auto-detect column data types
170+
open_options <- c(open_options, "AUTODETECT_TYPE=YES")
171+
if (quoted_cols_as_char) {
172+
open_options <- c(open_options, "QUOTED_FIELDS_AS_STRING=YES")
173+
}
174+
175+
# force PLT_CN as string data type by schema override (GDAL >= 3.11)
176+
# avoids copy, versus changing it later in the data frame
177+
tbl_tmp <- table
178+
if (is.null(tbl_tmp) || tbl_tmp == "") {
179+
tbl_tmp <- gdalraster:::.cpl_get_basename(src)
180+
}
181+
182+
if (gdalraster::ogr_field_index(src, tbl_tmp, "PLT_CN") >= 0 &&
183+
gdalraster::gdal_version_num() >=
184+
gdalraster::gdal_compute_version(3, 11, 0)) {
185+
186+
schema <- 'OGR_SCHEMA={"layers": [{"name": "%s", "fields":[{
187+
"name": "PLT_CN", "type": "String" }]}]}'
188+
189+
override_schema <- sprintf(schema, tbl_tmp)
190+
open_options <- c(open_options, override_schema)
191+
}
192+
}
193+
194+
gdalraster::push_error_handler("quiet")
144195
if (is.null(table) && is.null(sql)) {
145-
ds <- try(methods::new(gdalraster::GDALVector, src), silent = TRUE)
196+
ds <- try(methods::new(gdalraster::GDALVector, src, "", TRUE,
197+
open_options),
198+
silent = TRUE)
146199
} else if (!is.null(table)) {
147-
ds <- try(methods::new(gdalraster::GDALVector, src, table),
200+
ds <- try(methods::new(gdalraster::GDALVector, src, table, TRUE,
201+
open_options),
148202
silent = TRUE)
149203
} else if (!is.null(sql)) {
150-
ds <- try(methods::new(gdalraster::GDALVector, src, sql), silent = TRUE)
204+
ds <- try(methods::new(gdalraster::GDALVector, src, sql, TRUE,
205+
open_options),
206+
silent = TRUE)
151207
}
208+
gdalraster::pop_error_handler()
152209

153210
if (!methods::is(ds, "Rcpp_GDALVector")) {
154211
cli::cli_alert_danger("Failed to access tree data in {.path {src}}")

man/load_tree_data.Rd

Lines changed: 19 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)