Skip to content

Commit bb60aec

Browse files
authored
Merge pull request #1 from pfizer-opensource/cran-release-v0.1
CRAN release v0.1
2 parents 1169cc5 + c5647db commit bb60aec

88 files changed

Lines changed: 1933 additions & 720 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.Rbuildignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
^\.github$
2121
^revdep$
2222
^CRAN-SUBMISSION$
23-
^data-raw$
23+
^inst/build-data$
24+
^inst/data-raw$
25+
^inst/examples$
2426
^\.pfizer\.yml$
2527
^LICENSE\.md$

CRAN-SUBMISSION

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Version: 0.1.0
2+
Date: 2024-07-12 15:44:43 UTC
3+
SHA: dc0ffc21f106e04a632d4488d7adca6934b3c3b4

DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: zippeR
22
Title: Working with United States ZIP Code and ZIP Code Tabulation Area Data
3-
Version: 0.1.0.9000
3+
Version: 0.1.0
44
Authors@R: c(
55
person("Christopher", "Prener", ,"Christopher.Prener@pfizer.com", c("aut", "cre"),
66
comment = c(ORCID = "0000-0002-4310-9888")),
@@ -15,6 +15,7 @@ Description: Provides a set of functions for working with American postal codes,
1515
Depends: R (>= 3.5)
1616
License: Apache License (>= 2)
1717
URL: https://github.com/pfizer-opensource/zippeR
18+
BugReports: https://github.com/pfizer-opensource/zippeR/issues
1819
Encoding: UTF-8
1920
LazyData: true
2021
RoxygenNote: 7.3.1
@@ -26,14 +27,13 @@ Imports:
2627
purrr,
2728
readr,
2829
sf,
29-
spatstat.geom,
30+
spatstat.univar,
3031
stats,
3132
stringr,
3233
tibble,
3334
tidycensus,
3435
tidyr,
35-
tigris,
36-
utils
36+
tigris
3737
Suggests:
3838
covr,
3939
knitr,

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(zi_aggregate)
4+
export(zi_convert)
45
export(zi_crosswalk)
56
export(zi_get_demographics)
67
export(zi_get_geometry)
8+
export(zi_label)
79
export(zi_list_zctas)
810
export(zi_load_crosswalk)
11+
export(zi_load_labels)
12+
export(zi_load_labels_list)
913
export(zi_prep_hud)
1014
export(zi_repair)
1115
export(zi_validate)

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# zippeR (development version)
1+
# zippeR 0.1.0
22

33
* Initial CRAN submission version that contains functionality for working with UDS and HUD ZIP Code crosswalk files as well as Census Bureau ZCTA geometries and demographic data

R/zi_aggregate.R

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -53,50 +53,55 @@
5353
#' \code{tidycensus::census_api_key()} has been used to write your key to
5454
#' your \code{.Renviron} file. You can check whether an API key has been
5555
#' written to \code{.Renviron} by using \code{Sys.getenv("CENSUS_API_KEY")}.
56+
#'
5657
#' @return A tibble containing all aggregated data requested in either
5758
#' \code{"tidy"} or \code{"wide"} format.
5859
#'
5960
#' @examples
60-
#' \dontrun{
61+
#' # load sample demographic data
62+
#' mo22_demos <- zi_mo_pop
63+
#'
64+
#' # the above data can be replicated with the following code:
65+
#' # zi_get_demographics(year = 2022, variables = c("B01003_001", "B19013_001"),
66+
#' # survey = "acs5")
6167
#'
62-
#' # download geometric data
63-
#' geo <- zi_get_geometry(year = 2012, style = "zcta3",
64-
#' state = "MO", territory = NULL, method = "centroid",
65-
#' includes = c("516", "525"))
68+
#' # load sample geometric data
69+
#' mo22_zcta3 <- zi_mo_zcta3
6670
#'
67-
#' # download demographic data
68-
#' demo <- zi_get_demographics(year = 2012, variables = "B01003_001",
69-
#' survey = "acs5")
71+
#' # the above data can be replicated with the following code:
72+
#' # zi_get_geometry(year = 2022, style = "zcta3", state = "MO",
73+
#' # method = "intersect")
7074
#'
71-
#' # aggregate
72-
#' demo <- zi_aggregate(demo, year = 2012, extensive = "B01003_001",
73-
#' survey = "acs5", zcta = geo$ZCTA3)
75+
#' # aggregate a single variable
76+
#' zi_aggregate(mo22_demos, year = 2020, extensive = "B01003_001", survey = "acs5",
77+
#' zcta = mo22_zcta3$ZCTA3)
7478
#'
79+
#' \donttest{
80+
#' # aggregate multiple variables, outputting wide data
81+
#' zi_aggregate(mo22_demos, year = 2020,
82+
#' extensive = "B01003_001", intensive = "B19013_001", survey = "acs5",
83+
#' zcta = mo22_zcta3$ZCTA3, output = "wide")
7584
#' }
7685
#'
7786
#' @export
7887
zi_aggregate <- function(.data, year, extensive = NULL, intensive = NULL,
7988
intensive_method = "mean", survey,
8089
output = "tidy", zcta = NULL, key = NULL){
8190

82-
# global variables
83-
GEOID = ZCTA3 = key = variable = NULL
84-
8591
# evaluate inputs
86-
87-
if (missing(year) == TRUE & missing(survey) == TRUE){
88-
stop("Please specify arguments.")
92+
if (missing(year)){
93+
stop("The 'year' value is missing. Please provide a numeric value between 2010 and 2022.")
8994
}
9095

91-
if (is.numeric(year) == FALSE){
92-
stop("The 'year' value provided is invalid. Please provide a numeric value between 2010 and 2020.")
96+
if (!is.numeric(year)){
97+
stop("The 'year' value provided is invalid. Please provide a numeric value between 2010 and 2022.")
9398
}
9499

95100
if (length(survey) > 1){
96101
stop("One only 'survey' product may be requested at a time.")
97102
}
98103

99-
if (survey %in% c("sf1", "sf3", "acs1", "acs3", "acs5") == FALSE){
104+
if (!survey %in% c("sf1", "sf3", "acs1", "acs3", "acs5")){
100105
stop("The 'survey' requested is invalid. Please choose one of 'sf1', 'sf3', 'acs1', 'acs3', or 'acs5'.")
101106
}
102107

@@ -112,11 +117,15 @@ zi_aggregate <- function(.data, year, extensive = NULL, intensive = NULL,
112117
stop("The 'year' value provided is invalid for 3-year American Community Survey data. Please provide a year between 2010 and 2013.")
113118
}
114119

115-
if (output %in% c("tidy", "wide") == FALSE){
120+
if (!output %in% c("tidy", "wide")){
116121
stop("The 'output' requested is invalid. Please choose one of 'tidy' or 'wide'.")
117122
}
118123

119-
if (survey %in% c("sf1", "sf3") == TRUE){
124+
if (!inherits(.data, what = "data.frame")){
125+
stop("The '.data' object provided is not a dataframe or dataframe like object. Please provide a dataframe.")
126+
}
127+
128+
if (survey %in% c("sf1", "sf3")){
120129
error <- "Input data appear to be malformed - there should be three columns for Decennial Census data: 'GEOID', 'variable', and 'value'. Note that zi_aggregate() only accepts 'tidy' data."
121130

122131
if (length(names(.data)) != 3){
@@ -138,23 +147,27 @@ zi_aggregate <- function(.data, year, extensive = NULL, intensive = NULL,
138147
}
139148
}
140149

141-
if (is.null(zcta) == FALSE){
150+
if (!is.null(zcta)){
142151
valid <- zi_validate(zcta, style = "zcta3")
143152

144153
if (valid == FALSE){
145154
stop("ZCTA data passed to the 'zcta' argument are invalid. Please use 'zi_validate()' with the 'verbose = TRUE' option to investgiate further. The 'zi_repair()' function may be used to address issues.")
146155
}
147156
}
148157

158+
if (is.null(extensive) & is.null(intensive)){
159+
stop("At least one of 'extensive' or 'intensive' must be provided.")
160+
}
161+
149162
# set additional arguments
150163
## call type
151-
if (is.null(extensive) == FALSE){
164+
if (!is.null(extensive)){
152165
extensive_id <- TRUE
153166
} else {
154167
extensive_id <- FALSE
155168
}
156169

157-
if (is.null(intensive) == FALSE){
170+
if (!is.null(intensive)){
158171
intensive_id <- TRUE
159172
} else {
160173
intensive_id <- FALSE
@@ -301,7 +314,10 @@ zi_census_intensive <- function(.data, weights, method){
301314
if (method == "mean"){
302315
.data <- dplyr::summarise(.data, value = stats::weighted.mean(value, weight, na.rm = TRUE))
303316
} else if (method == "median"){
304-
.data <- dplyr::summarise(.data, value = spatstat.geom::weighted.median(value, weight, na.rm = TRUE))
317+
.data <- dplyr::summarise(
318+
.data,
319+
value = spatstat.univar::weighted.median(value, weight)
320+
)
305321
}
306322

307323
## return output
@@ -389,9 +405,11 @@ zi_acs_intensive <- function(.data, weights, method){
389405
estimate = stats::weighted.mean(estimate, weight, na.rm = TRUE),
390406
moe = stats::weighted.mean(moe, weight, na.rm = TRUE))
391407
} else if (method == "median"){
392-
.data <- dplyr::summarise(.data,
393-
estimate = spatstat.geom::weighted.median(estimate, weight, na.rm = TRUE),
394-
moe = spatstat.geom::weighted.median(moe, weight, na.rm = TRUE))
408+
.data <- dplyr::summarise(
409+
.data,
410+
estimate = spatstat.univar::weighted.median(estimate, weight),
411+
moe = spatstat.univar::weighted.median(moe, weight)
412+
)
395413
}
396414

397415
## return output

R/zi_convert.R

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#' Convert Five-digit ZIP Codes to Three-digit ZIP Codes
2+
#'
3+
#' @description This function converts five-digit ZIP Codes to three-digit ZIP
4+
#' Codes. The first three digits of a ZIP Code are known as the ZIP3 Code,
5+
#' and corresponds to the sectional center facility (SCF) that processes mail
6+
#' for a region.
7+
#'
8+
#' @usage zi_convert(.data, input_var, output_var)
9+
#'
10+
#' @param .data A data frame containing a column of five-digit ZIP Codes.
11+
#' @param input_var A character scalar specifying the column name with the five-digit
12+
#' ZIP Codes in the data frame.
13+
#' @param output_var Optional; A character scalar specifying the column name to
14+
#' store the three-digit ZIP Codes in the data frame.
15+
#'
16+
#' @return A tibble containing the original data frame with a new column of
17+
#' three-digit ZIP Codes.
18+
#'
19+
#' @examples
20+
#' # add new column
21+
#' ## create sample data
22+
#' df <- data.frame(id = c(1:3), zip5 = c("63005", "63139", "63636"))
23+
#'
24+
#' ## convert ZIP Codes to ZIP3, creating a new column
25+
#' zi_convert(.data = df, input_var = zip5, output_var = zip3)
26+
#'
27+
#' # overwrite existing column
28+
#' ## create sample data
29+
#' df <- data.frame(id = c(1:3), zip = c("63005", "63139", "63636"))
30+
#'
31+
#' ## convert ZIP Codes to ZIP3, creating a new column
32+
#' zi_convert(.data = df, input_var = zip)
33+
#'
34+
#' @export
35+
zi_convert <- function(.data, input_var, output_var){
36+
37+
# check inputs
38+
if (!inherits(.data, what = "data.frame")){
39+
stop("The '.data' object provided is not a data frame.")
40+
}
41+
42+
if (missing(input_var)){
43+
stop("A value for 'input_var' is required.")
44+
}
45+
46+
input_varQN <- as.character(substitute(input_var))
47+
48+
if (input_varQN %in% names(.data) == FALSE){
49+
stop("The given 'input_var' column is not found in your data object.")
50+
}
51+
52+
valid <- zi_validate(x = .data[[input_varQN]])
53+
54+
if (valid == FALSE){
55+
stop(paste0("Input ZIP Code data in the '", input_varQN, "' column are invalid. Please use 'zi_validate()' with the 'verbose = TRUE' option to investigate further. The 'zi_repair()' function may be used to address issues."))
56+
}
57+
58+
if (!missing(output_var)){
59+
output_varQN <- as.character(substitute(input_var))
60+
61+
if (output_varQN %in% names(.data) == TRUE){
62+
warning(paste0("The given 'output_var' column, '", output_varQN , "', was found in your data object, and the column was overwritten."))
63+
}
64+
} else {
65+
output_varQN <- input_varQN
66+
}
67+
68+
# convert ZIP Codes to ZIP3
69+
.data[[output_varQN]] <- substr(.data[[input_varQN]], 1, 3)
70+
71+
# create output
72+
out <- tibble::as_tibble(.data)
73+
74+
# return output
75+
return(out)
76+
77+
}

0 commit comments

Comments
 (0)