Skip to content

Commit fe0a495

Browse files
authored
Merge pull request #16 from KWB-R/dev
Release v0.6.0
2 parents 1c6444d + d7d75a0 commit fe0a495

13 files changed

Lines changed: 558 additions & 21 deletions

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: kwb.utils
22
Title: General Utility Functions Developed at KWB
3-
Version: 0.5.1
3+
Version: 0.6.0
44
Authors@R:
55
c(person(given = "Hauke",
66
family = "Sonnenberg",

NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export(assignArgumentDefaults)
2727
export(assignGlobally)
2828
export(assignPackageObjects)
2929
export(atLeastOneRowIn)
30+
export(backspace)
3031
export(breakInSequence)
3132
export(callWith)
3233
export(callWithStringsAsFactors)
@@ -141,9 +142,11 @@ export(isNaOrEmpty)
141142
export(isNullOrEmpty)
142143
export(isOddNumber)
143144
export(lastElement)
145+
export(left)
144146
export(limitToRange)
145147
export(linearCombination)
146148
export(listObjects)
149+
export(listToDepth)
147150
export(loadObject)
148151
export(makeUnique)
149152
export(matchesCriteria)
@@ -161,6 +164,7 @@ export(naToLastNonNa)
161164
export(nameByElement)
162165
export(noFactorDataFrame)
163166
export(objectSize)
167+
export(orderBy)
164168
export(pairwise)
165169
export(parallelNonNA)
166170
export(pasteColumns)
@@ -195,10 +199,12 @@ export(removeExtension)
195199
export(removeSpaces)
196200
export(renameAndSelect)
197201
export(renameColumns)
202+
export(repeated)
198203
export(resetRowNames)
199204
export(resolve)
200205
export(resolveAll)
201206
export(revertListAssignments)
207+
export(right)
202208
export(roundColumns)
203209
export(rowOrColumnwisePercentage)
204210
export(rowwisePercentage)
@@ -217,6 +223,7 @@ export(setLoaded)
217223
export(setMatrixColumns)
218224
export(shorten)
219225
export(sourceScripts)
226+
export(space)
220227
export(splitAlongDim)
221228
export(splitIntoFixSizedBlocks)
222229
export(startsToEnds)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Current
22

3+
* new: listToDepth()
4+
* new small helper functions: backspace(), left(), orderBy(), repeated(),
5+
right(), space()
6+
37
# [kwb.utils 0.5.1](https://github.com/KWB-R/kwb.utils/releases/tag/v0.5.1) <small>2020-04-20</small>
48

59
* Fix bug in moveColumnsToFront(): keep data frame structure in case that the

R/listToDepth.R

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
# listToDepth ------------------------------------------------------------------
2+
3+
#' List Elements Recursively up to Depth
4+
#'
5+
#' @param path path to the element at which to start listing
6+
#' @param max_depth maximal depth level of which to list elements. A value of
7+
#' \code{0} means non-recursive listing, a value of \code{NA} represents fully
8+
#' recursive listing.
9+
#' @param full_info return only \code{path} and \code{isdir} information or
10+
#' the full information provided by \code{FUN(full_info = TRUE)}?
11+
#' @param FUN function called to get the listing of the element given in
12+
#' \code{path}. The function must accept a path as its first argument and it
13+
#' must define the argument \code{full_info} second. It may accept further
14+
#' arguments. It must always return a data frame. For \code{full_info = FALSE}
15+
#' the data frame must have columns \code{file} and \code{isdir} (is the
16+
#' "file" a directory?). For \code{full_info = TRUE} the function may return
17+
#' further columns. The function must provide an empty data frame with the
18+
#' expected columns when being called with \code{character()} as the first
19+
#' argument. The function is expected to set the attribute "failed" to the
20+
#' given path in case that the path could not be accessed (e.g. because of a
21+
#' broken internet connection if the listing is done remotely). See
22+
#' \code{kwb.utils:::listFiles} for an example implementation that somehow
23+
#' simulates the behaviour of the \code{\link{dir}} function. See
24+
#' \code{kwb.dwd::list_url()} for a more advanced usage of this function in
25+
#' order to recursively list the files on an FTP server (FTP = file transfer
26+
#' protocol).
27+
#' @param \dots further arguments passed to \code{FUN}
28+
#' @param depth start depth of recursion if \code{max_depth > 0}. This argument
29+
#' is for internal use and not intended to be set by the user!
30+
#' @param prob_mutate probability to alter the path so that it becomes useless.
31+
#' This is zero by default. Set the value only if you want to test how the
32+
#' function behaves if the listing of a path fails.
33+
#' @return data frame containing at least the columns \code{file} and
34+
#' \code{isdir}. If \code{full_info = TRUE} the result data frame may contain
35+
#' further columns, as provided by the function given in \code{FUN} for
36+
#' \code{full_info = TRUE}.
37+
#' @export
38+
#' @examples
39+
#' # Example list function provided in this package (file listing)
40+
#' FUN <- kwb.utils:::listFiles
41+
#'
42+
#' # The list function must provide empty records when no path is given. The
43+
#' # returned data frame must provide the columns "file" and "isdir" ...
44+
#' FUN()
45+
#' FUN(full_info = TRUE)
46+
#'
47+
#' # ... even when being called with an empty character vector
48+
#' FUN(character())
49+
#' FUN(character(), full_info = TRUE)
50+
#'
51+
#' # Call the function recursively up to the given depth level
52+
#' kwb.utils:::listToDepth(".", max_depth = 1, FUN = FUN)
53+
#'
54+
listToDepth <- function(
55+
path,
56+
max_depth = 0L,
57+
full_info = FALSE,
58+
FUN = listFiles,
59+
...,
60+
depth = 0,
61+
prob_mutate = 0
62+
)
63+
{
64+
# Helper function to mutate the path with a probability of "prob"
65+
mutate_or_not <- function(x, prob = 0.1) {
66+
stopifnot(inRange(prob, 0, 1))
67+
# Add some nonsense to the path if the TRUE/FALSE coin lands on TRUE
68+
if (prob > 0 && sample(c(TRUE, FALSE), 1L, prob = c(prob, 1 - prob))) {
69+
x <- paste0(x, "blabla")
70+
}
71+
x
72+
}
73+
74+
# kwb.utils::assignPackageObjects("kwb.utils")
75+
# kwb.utils::assignArgumentDefaults(listToDepth)
76+
# max_depth = 1;full_info=TRUE;set.seed(1)
77+
78+
# Call the user-defined function FUN to list the elements at the given path
79+
info <- FUN(mutate_or_not(path, prob_mutate), full_info, ...)
80+
#info <- FUN(mutate_or_not(path, prob_mutate), full_info)
81+
82+
# Which files represent directories?
83+
is_directory <- selectColumns(info, "isdir")
84+
85+
# Are we already at maximum depth?
86+
at_max_depth <- ! is.na(max_depth) && (depth == max_depth)
87+
88+
# Return the file list if no recursive listing is requested or if we are
89+
# already at maximum depth or if there are no directories. The function is
90+
# also returned from if info is empty (! any(is_directory) is TRUE).
91+
if (at_max_depth || ! any(is_directory)) {
92+
return(info)
93+
}
94+
95+
# URLs representing directories
96+
directories <- selectColumns(info, "file")[is_directory]
97+
98+
# Number of directories
99+
n_directories <- length(directories)
100+
101+
# There must be directories if we arrive here!
102+
stopifnot(n_directories > 0L)
103+
104+
# Indices to loop through
105+
indices <- stats::setNames(seq_along(directories), directories)
106+
107+
# List all directories by calling this function recursively
108+
subdir_infos <- lapply(indices, function(i) {
109+
110+
#i <- 1L
111+
112+
# Show the progress
113+
cat(sprintf("%s%d/%d: ", space(depth), i, n_directories))
114+
115+
# Recursive call of this function
116+
listToDepth(
117+
path = paste0(assertFinalSlash(path), directories[i]),
118+
max_depth = max_depth,
119+
full_info = full_info,
120+
FUN = FUN,
121+
...,
122+
depth = depth + 1,
123+
prob_mutate = prob_mutate
124+
)
125+
})
126+
127+
# We need a template just in case that no data could be retrieved
128+
template <- FUN(full_info = full_info)
129+
130+
# Merge data frames with info on files in subdirectories
131+
subdir_info <- mergeFileInfos(subdir_infos, template)
132+
133+
# Prepend info on files at this level
134+
result <- rbind(info[! is_directory, ], subdir_info)
135+
136+
# Collect information on URLs that failed to be accessed
137+
failed <- c(attr(info, "failed"), attr(subdir_info, "failed"))
138+
139+
# Return the sorted file information with newly composed attribute "failed"
140+
structure(orderBy(result, "file"), failed = failed)
141+
}
142+
143+
# mergeFileInfos ---------------------------------------------------------------
144+
mergeFileInfos <- function(file_infos, template)
145+
{
146+
stopifnot(is.list(file_infos))
147+
148+
# Keep only non-empty data frames
149+
dfs <- file_infos[sapply(file_infos, nrow) > 0L]
150+
151+
# Function to prepend a parent name "p" to column "file" in data frame "df"
152+
prepend_parent <- function(df, p) {
153+
parent <- assertFinalSlash(p)
154+
child <- selectColumns(df, "file")
155+
setColumns(df, file = paste0(parent, child), dbg = FALSE)
156+
}
157+
158+
# Prepend the parent names to the filenames for the remaining data frames
159+
result <- do.call(rbind, mapply(
160+
prepend_parent, dfs, names(dfs), SIMPLIFY = FALSE, USE.NAMES = FALSE
161+
))
162+
163+
# If the result is NULL (no data frames to loop through) set the result to the
164+
# empty file info record
165+
result <- defaultIfNULL(result, template)
166+
167+
# Collect the information on URLs that could not be listed
168+
failed <- unlist(excludeNULL(lapply(file_infos, attr, "failed"), FALSE))
169+
170+
# Merge the file lists returned for each directory
171+
# Return the vector of files with an attribute "failed" holding the merged
172+
# URLs of directories that could not be read
173+
structure(result, failed = failed)
174+
}
175+
176+
# listFiles --------------------------------------------------------------------
177+
listFiles <- function(path = ".", full_info = FALSE, ...)
178+
{
179+
message("listing ", path)
180+
181+
# Return empty data frame if path is empty
182+
if (length(path) == 0L) {
183+
return(listFiles(full_info = full_info)[FALSE, ])
184+
}
185+
186+
files <- dir(path, include.dirs = TRUE)
187+
188+
result <- resetRowNames(file.info(file.path(path, files)))
189+
190+
result$file <- files
191+
192+
FUN <- if (full_info) moveColumnsToFront else selectColumns
193+
194+
FUN(result, c("file", "isdir"))
195+
}

R/utils.R

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# backspace --------------------------------------------------------------------
2+
3+
#' String of n Backspaces
4+
#'
5+
#' @param n number of backspace characters
6+
#' @return vector of character of length one
7+
#' @export
8+
#' @examples
9+
#' update <- function(x) cat(backspace(3), x)
10+
#' x <- "value: 123"
11+
#' cat(x)
12+
#' cat(paste0(x, backspace(3), "987"))
13+
#'
14+
backspace <- function(n)
15+
{
16+
repeated("\b", n)
17+
}
18+
19+
# left -------------------------------------------------------------------------
20+
21+
#' Left Part of a String
22+
#'
23+
#' @param x vector of character
24+
#' @param n number of characters to be kept from the beginning of each character
25+
#' string within \code{x}
26+
#' @return vector of character
27+
#' @export
28+
#' @examples
29+
#' left("Good Morning", 4)
30+
#'
31+
left <- function(x, n)
32+
{
33+
substr(x, 1L, n)
34+
}
35+
36+
# orderBy ----------------------------------------------------------------------
37+
38+
#' Order a Data Frame by One or more Columns
39+
#'
40+
#' @param df data frame
41+
#' @param by vector of column names specifying the columns by which to order
42+
#' @param \dots further arguments passed to \code{\link{order}}, such as
43+
#' \code{decreasing}
44+
#' @return \code{df} being sorted and with newly renumbered rows
45+
#' @export
46+
#' @examples
47+
#' df <- data.frame(number = 4:1, letter = LETTERS[1:4])
48+
#' df
49+
#' orderBy(df, "number")
50+
#' orderBy(df, "letter", decreasing = TRUE)
51+
#'
52+
orderBy <- function(df, by = NULL, ...)
53+
{
54+
kwb.utils::resetRowNames(
55+
df[order(kwb.utils::selectColumns(df, by), ...), , drop = FALSE]
56+
)
57+
}
58+
59+
# repeated ---------------------------------------------------------------------
60+
61+
#' Repeated Substring
62+
#'
63+
#' @param x substring to be repeated and pasted together to a new string
64+
#' @param n number of times to repeat the substring
65+
#' @return vector of character of length one
66+
#' @export
67+
#' @examples
68+
#' repeated("no ", 2)
69+
#' repeated("yes ", 3)
70+
#' repeated("yes no ", 3)
71+
#'
72+
repeated <- function(x, n)
73+
{
74+
paste(rep(x, n), collapse = "")
75+
}
76+
77+
# right ------------------------------------------------------------------------
78+
79+
#' Right Part of a String
80+
#'
81+
#' @param x vector of character
82+
#' @param n number of characters to be kept from the end of each character
83+
#' string within \code{x}
84+
#' @return vector of character
85+
#' @export
86+
#' @examples
87+
#' right("Good Morning", 7)
88+
#'
89+
right <- function(x, n)
90+
{
91+
nc <- nchar(x)
92+
substr(x, nc - n + 1L, nc)
93+
}
94+
95+
# space ------------------------------------------------------------------------
96+
97+
#' Space String Used for Indentation
98+
#'
99+
#' Chain together \code{depth * tabLength} spaces
100+
#'
101+
#' @param depth depth of indentation
102+
#' @param tabLength number of spaces per indentation level
103+
#' @return vector of character of length one consisting of \code{depth *
104+
#' tabLength} space characters
105+
#' @export
106+
#' @examples
107+
#' cat(sprintf("%s1\n%s2\n%s3\n", space(1), space(2), space(3)))
108+
#' cat(sprintf("%s1\n%s2\n%s3\n", space(1, 4), space(2, 4), space(3, 4)))
109+
#'
110+
space <- function(depth = 1L, tabLength = 2L)
111+
{
112+
repeated(" ", depth * tabLength)
113+
}

0 commit comments

Comments
 (0)