Skip to content

Commit ab616a4

Browse files
authored
Drop stringr dependency (#1841)
roxygen2 (and hence devtoosl) no longer depends on stringr (or transitively, stringi), making it easier to install in constrained Linux environments. Replacements: - `str_detect()` → `grepl()` - `str_trim()` → `trimws()` - `str_replace()` / `str_remove()` → `sub()` - `str_replace_all()` → `gsub()` and local `re_replace_all()` helper - `str_split()` → `strsplit()` - `str_extract()` → `regexpr()` + `regmatches()` - `str_count()` → local `re_count()` helper - `str_sub()` → `substr()` / `substring()` - `str_match()` → `regmatches()` + `regexec()` - `str_match_all()` → `regmatches()` + `gregexec()` - `str_locate_all()` → `gregexpr()` - `str_split_fixed()` → local `re_split_half()` helper (only ever needed n=2) Base R quirks worked around: - `strsplit("", pattern)` returns `character(0)`, not `""` like stringr, so `tag_words()` now guards against empty input before splitting. - `substr<-` can't change string length, so `str_sub<-` assignments were replaced with manual `paste0(left, val, right)` concatenation. - `regmatches()` + `gregexec()` returns a list-of-matrices that needs `t()` to match stringr's column-oriented `str_match_all()` layout. - `gregexpr()` returns a list with a single `-1` element (not zero-length) when there's no match, requiring an explicit `> 0` filter in `re_count()`. - `stringr::fixed()` wrappers in tests were replaced with `fixed = TRUE` arguments to base functions. - The `(?x)` free-spacing flag is used inline in `paste0()`-assembled regexps (since base R has no `regex(comments = TRUE)` wrapper), with comments placed on each `paste0()` line instead.
1 parent c35dacc commit ab616a4

35 files changed

Lines changed: 193 additions & 140 deletions

DESCRIPTION

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ Imports:
3131
pkgload (>= 1.0.2),
3232
R6 (>= 2.1.2),
3333
rlang (>= 1.1.0),
34-
stringr (>= 1.0.0),
3534
utils,
3635
withr,
3736
xml2

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,6 @@ export(update_collate)
274274
export(vignette_roclet)
275275
export(warn_roxy_tag)
276276
import(rlang)
277-
import(stringr)
278277
importFrom(R6,R6Class)
279278
importFrom(knitr,knit)
280279
importFrom(knitr,opts_chunk)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# roxygen2 (development version)
2+
* roxygen2 no longer depends on stringr/stringi. This means that no package in the devtools constellation depends on stringr, which in turn means you no longer need stringi, making it a bit easier to install in constrained Linux environments.
23
* roxygen2 options can now be set using `Config/roxygen2/` fields in DESCRIPTION (e.g. `Config/roxygen2/markdown: TRUE`) instead of the `Roxygen` field. The old `Roxygen` field is still supported. Similarly, the roxygen2 version is now stored in `Config/roxygen2/version` instead of `RoxygenNote` (#1328).
34
* Tags that expect single-line input now warn when they span multiple lines, catching common mistakes. Affected tags: `@aliases`, `@concept`, `@encoding`, `@exportClass`, `@exportMethod`, `@exportPattern`, `@exportS3Method`, `@importFrom`, `@importClassesFrom`, `@importMethodsFrom`, `@include`, `@inheritParams`, `@keywords`, `@method`, `@name`, `@order`, `@rdname`, `@S3method`, `@template`, and `@useDynLib` (#1642, #1688). This may break some existing usage, but it prevents a wide class of otherwise silent errors.
45
* `@examplesIf` now warns when there is no example code after the condition (#1695).

R/block.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,16 +240,16 @@ parse_description <- function(tags) {
240240
}
241241

242242
intro <- tags[[1]]
243-
intro$val <- str_trim(intro$raw)
243+
intro$val <- trimws(intro$raw)
244244
if (intro$val == "") {
245245
return(tags[-1])
246246
}
247247

248248
tags <- tags[-1]
249249
tag_names <- tag_names[-1]
250250

251-
paragraphs <- str_split(intro$val, fixed('\n\n'))[[1]]
252-
lines <- str_count(paragraphs, "\n") + rep(2, length(paragraphs))
251+
paragraphs <- strsplit(intro$val, '\n\n', fixed = TRUE)[[1]]
252+
lines <- re_count(paragraphs, "\n") + rep(2, length(paragraphs))
253253
offsets <- c(0, cumsum(lines))
254254

255255
# 1st paragraph = title (unless has @title)

R/collate.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,5 @@ base_path <- function(path, base) {
103103
path <- normalizePath(path, winslash = "/")
104104
base <- normalizePath(base, winslash = "/")
105105

106-
str_replace(path, fixed(paste0(base, "/")), "")
106+
sub(paste0(base, "/"), "", path, fixed = TRUE)
107107
}

R/markdown-code.R

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ markdown_evaluate <- function(text) {
5858
rcode_pos <- parse_md_pos(map_chr(rcode_nodes, xml_attr, "sourcepos"))
5959
rcode_pos <- work_around_cmark_sourcepos_bug(text, rcode_pos)
6060
out <- eval_code_nodes(rcode_nodes)
61-
str_set_all_pos(text, rcode_pos, out, rcode_nodes)
61+
re_set_all_pos(text, rcode_pos, out, rcode_nodes)
6262
}
6363

6464
# Work around commonmark sourcepos bug for inline R code
@@ -68,7 +68,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
6868
return(rcode_pos)
6969
}
7070

71-
lines <- str_split(text, fixed("\n"))[[1]]
71+
lines <- strsplit(text, "\n", fixed = TRUE)[[1]]
7272

7373
for (l in seq_len(nrow(rcode_pos))) {
7474
# Do not try to fix multi-line code, we error for that (below)
@@ -79,7 +79,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
7979
start <- rcode_pos$start_column[l]
8080

8181
# Maybe correct? At some point this will be fixed upstream, hopefully.
82-
if (str_sub(line, start - 1, start + 1) == "`r ") {
82+
if (substr(line, start - 1, start + 1) == "`r ") {
8383
next
8484
}
8585

@@ -91,10 +91,11 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
9191
# the real "`r " left by six characters, there happens to be another
9292
# "`r " there.
9393

94-
indent <- nchar(str_extract(line, "^[ ]+"))
94+
m <- regexpr("^[ ]+", line)
95+
indent <- attr(m, "match.length")
9596
if (
96-
!is.na(indent) &&
97-
str_sub(line, start - 1 + indent, start + 1 + indent) == "`r "
97+
m > 0L &&
98+
substr(line, start - 1 + indent, start + 1 + indent) == "`r "
9899
) {
99100
rcode_pos$start_column[l] <- rcode_pos$start_column[l] + indent
100101
rcode_pos$end_column[l] <- rcode_pos$end_column[l] + indent
@@ -106,7 +107,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
106107

107108
is_markdown_code_node <- function(x) {
108109
info <- xml_attr(x, "info")
109-
str_sub(xml_text(x), 1, 2) == "r " ||
110+
substr(xml_text(x), 1, 2) == "r " ||
110111
(!is.na(info) && grepl("^[{][a-zA-z]+[}, ]", info))
111112
}
112113

@@ -158,7 +159,7 @@ knitr_chunk_defaults <- function() {
158159
)
159160
}
160161

161-
str_set_all_pos <- function(text, pos, value, nodes) {
162+
re_set_all_pos <- function(text, pos, value, nodes) {
162163
# Cmark has a bug when reporting source positions for multi-line
163164
# code tags, and it does not count the indenting space in the
164165
# continuation lines: https://github.com/commonmark/cmark/issues/296
@@ -169,7 +170,7 @@ str_set_all_pos <- function(text, pos, value, nodes) {
169170

170171
# Need to split the string, because of the potential multi-line
171172
# code tags, and then also recode the positions
172-
lens <- nchar(str_split(text, fixed("\n"))[[1]])
173+
lens <- nchar(strsplit(text, "\n", fixed = TRUE)[[1]])
173174
shifts <- c(0, cumsum(lens + 1L))
174175
shifts <- shifts[-length(shifts)]
175176
start <- shifts[pos$start_line] + pos$start_column

R/markdown-escaping.R

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ find_fragile_rd_tags <- function(text, fragile) {
119119
ftags <- ftags[keep, ]
120120

121121
if (nrow(ftags)) {
122-
ftags$text <- str_sub(text, ftags$start, ftags$argend)
122+
ftags$text <- substring(text, ftags$start, ftags$argend)
123123
}
124124

125125
ftags
@@ -142,7 +142,7 @@ find_all_rd_tags <- function(text) {
142142
## Find the end of the argument list for each tag. Note that
143143
## tags might be embedded into the arguments of other tags.
144144
tags$argend <- map_int(seq_len(nrow(tags)), function(i) {
145-
tag_plus <- str_sub(text, tags$end[i], text_len)
145+
tag_plus <- substr(text, tags$end[i], text_len)
146146
findEndOfTag(tag_plus, is_code = FALSE, start = 0L) + tags$end[i]
147147
})
148148

@@ -163,12 +163,28 @@ find_all_rd_tags <- function(text) {
163163

164164
find_all_tag_names <- function(text) {
165165
## Find the tags without arguments first
166-
tag_pos <- str_locate_all(text, r"(\\[a-zA-Z][a-zA-Z0-9]*)")[[1]]
166+
m <- gregexpr(r"(\\[a-zA-Z][a-zA-Z0-9]*)", text)[[1]]
167+
if (m[[1]] == -1L) {
168+
tag_pos <- matrix(
169+
integer(),
170+
ncol = 2,
171+
dimnames = list(NULL, c("start", "end"))
172+
)
173+
} else {
174+
tag_pos <- cbind(
175+
start = as.integer(m),
176+
end = as.integer(m) + attr(m, "match.length") - 1L
177+
)
178+
}
167179

168-
data.frame(
169-
tag = str_sub(text, tag_pos[, "start"], tag_pos[, "end"]),
170-
as.data.frame(tag_pos)
171-
)
180+
if (nrow(tag_pos) == 0) {
181+
data.frame(tag = character(), start = integer(), end = integer())
182+
} else {
183+
data.frame(
184+
tag = substring(text, tag_pos[, "start"], tag_pos[, "end"]),
185+
as.data.frame(tag_pos)
186+
)
187+
}
172188
}
173189

174190
#' Replace fragile Rd tags with placeholders
@@ -184,7 +200,7 @@ find_all_tag_names <- function(text) {
184200
protect_rd_tags <- function(text, rd_tags) {
185201
id <- make_random_string()
186202

187-
text <- str_sub_same(text, rd_tags, id)
203+
text <- re_sub_same(text, rd_tags, id)
188204

189205
attr(text, "roxygen-markdown-subst") <-
190206
list(tags = rd_tags, id = id)
@@ -207,7 +223,7 @@ protect_rd_tags <- function(text, rd_tags) {
207223
#'
208224
#' @noRd
209225

210-
str_sub_same <- function(str, repl, id) {
226+
re_sub_same <- function(str, repl, id) {
211227
repl <- repl[order(repl$start), ]
212228

213229
if (is.unsorted(repl$end) || is.unsorted(repl$argend)) {
@@ -217,7 +233,11 @@ str_sub_same <- function(str, repl, id) {
217233
for (i in seq_len(nrow(repl))) {
218234
## The trailing - is needed, to distinguish between -1 and -10
219235
new_text <- paste0(id, "-", i, "-")
220-
str_sub(str, repl$start[i], repl$argend[i]) <- new_text
236+
str <- paste0(
237+
substr(str, 1, repl$start[i] - 1),
238+
new_text,
239+
substr(str, repl$argend[i] + 1, nchar(str))
240+
)
221241

222242
## Need to shift other coordinates (we shift everything,
223243
## it is just simpler).

R/markdown-link.R

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,26 +64,30 @@ add_linkrefs_to_md <- function(text) {
6464
}
6565

6666
get_md_linkrefs <- function(text) {
67-
refs <- str_match_all(
67+
refs <- regmatches(
6868
text,
69-
regex(
70-
comments = TRUE,
71-
"
72-
(?<=[^\\]\\\\]|^) # must not be preceded by ] or \
73-
\\[([^\\]\\[]+)\\] # match anything inside of []
74-
(?:\\[([^\\]\\[]+)\\])? # match optional second pair of []
75-
(?=[^\\[{]|$) # must not be followed by [ or {
76-
"
69+
gregexec(
70+
paste0(
71+
"(?x)",
72+
"(?<=[^\\]\\\\]|^)", # must not be preceded by ] or \
73+
"\\[([^\\]\\[]+)\\]", # match anything inside of []
74+
"(?:\\[([^\\]\\[]+)\\])?", # match optional second pair of []
75+
"(?=[^\\[{]|$)" # must not be followed by [ or {
76+
),
77+
text,
78+
perl = TRUE
7779
)
7880
)[[1]]
81+
if (length(refs) > 0) {
82+
refs <- t(refs)
83+
}
7984

8085
if (length(refs) == 0) {
8186
return(character())
8287
}
8388

8489
## For the [fun] form the link text is the same as the destination.
85-
# Need to check both NA and "" for different versions of stringr
86-
refs[, 3] <- ifelse(is.na(refs[, 3]) | refs[, 3] == "", refs[, 2], refs[, 3])
90+
refs[, 3] <- ifelse(refs[, 3] == "", refs[, 2], refs[, 3])
8791

8892
refs3encoded <- map_chr(refs[, 3], URLencode)
8993
paste0("[", refs[, 3], "]: ", "R:", refs3encoded)
@@ -126,12 +130,12 @@ parse_link <- function(destination, contents, state) {
126130

127131
is_code <- is_code || (grepl("[(][)]$", destination) && !has_link_text)
128132

129-
pkg <- str_match(destination, "^(.*)::")[1, 2]
133+
pkg <- regmatches(destination, regexec("^(.*)::", destination))[[1]][2]
130134
explicit_pkg <- !is.na(pkg)
131135
fun <- utils::tail(strsplit(destination, "::", fixed = TRUE)[[1]], 1)
132136
topic <- sub("[(][)]$", "", fun)
133-
if (!has_link_text && str_detect(destination, "-class$")) {
134-
fun <- str_match(fun, "^(.*)-class$")[1, 2]
137+
if (!has_link_text && grepl("-class$", destination)) {
138+
fun <- regmatches(fun, regexec("^(.*)-class$", fun))[[1]][2]
135139
}
136140

137141
# Standardise links: cross-packagae always get prefix;

R/markdown.R

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ markdown_pass1 <- function(text) {
7878
rcode_pos <- parse_md_pos(map_chr(rcode_nodes, xml_attr, "sourcepos"))
7979
rcode_pos <- work_around_cmark_sourcepos_bug(text, rcode_pos)
8080
out <- eval_code_nodes(rcode_nodes)
81-
str_set_all_pos(text, rcode_pos, out, rcode_nodes)
81+
re_set_all_pos(text, rcode_pos, out, rcode_nodes)
8282
}
8383

8484
# Work around commonmark sourcepos bug for inline R code
@@ -88,7 +88,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
8888
return(rcode_pos)
8989
}
9090

91-
lines <- str_split(text, fixed("\n"))[[1]]
91+
lines <- strsplit(text, "\n", fixed = TRUE)[[1]]
9292

9393
for (l in seq_len(nrow(rcode_pos))) {
9494
# Do not try to fix multi-line code, we error for that (below)
@@ -99,7 +99,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
9999
start <- rcode_pos$start_column[l]
100100

101101
# Maybe correct? At some point this will be fixed upstream, hopefully.
102-
if (str_sub(line, start - 1, start + 1) == "`r ") {
102+
if (substr(line, start - 1, start + 1) == "`r ") {
103103
next
104104
}
105105

@@ -111,10 +111,11 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
111111
# the real "`r " left by six characters, there happens to be another
112112
# "`r " there.
113113

114-
indent <- nchar(str_extract(line, "^[ ]+"))
114+
m <- regexpr("^[ ]+", line)
115+
indent <- attr(m, "match.length")
115116
if (
116-
!is.na(indent) &&
117-
str_sub(line, start - 1 + indent, start + 1 + indent) == "`r "
117+
m > 0L &&
118+
substr(line, start - 1 + indent, start + 1 + indent) == "`r "
118119
) {
119120
rcode_pos$start_column[l] <- rcode_pos$start_column[l] + indent
120121
rcode_pos$end_column[l] <- rcode_pos$end_column[l] + indent
@@ -126,7 +127,7 @@ work_around_cmark_sourcepos_bug <- function(text, rcode_pos) {
126127

127128
is_markdown_code_node <- function(x) {
128129
info <- xml_attr(x, "info")
129-
str_sub(xml_text(x), 1, 2) == "r " ||
130+
substr(xml_text(x), 1, 2) == "r " ||
130131
(!is.na(info) && grepl("^[{][a-zA-z]+[}, ]", info))
131132
}
132133

@@ -178,7 +179,7 @@ knitr_chunk_defaults <- function() {
178179
)
179180
}
180181

181-
str_set_all_pos <- function(text, pos, value, nodes) {
182+
re_set_all_pos <- function(text, pos, value, nodes) {
182183
# Cmark has a bug when reporting source positions for multi-line
183184
# code tags, and it does not count the indenting space in the
184185
# continuation lines: https://github.com/commonmark/cmark/issues/296
@@ -189,7 +190,7 @@ str_set_all_pos <- function(text, pos, value, nodes) {
189190

190191
# Need to split the string, because of the potential multi-line
191192
# code tags, and then also recode the positions
192-
lens <- nchar(str_split(text, fixed("\n"))[[1]])
193+
lens <- nchar(strsplit(text, "\n", fixed = TRUE)[[1]])
193194
shifts <- c(0, cumsum(lens + 1L))
194195
shifts <- shifts[-length(shifts)]
195196
start <- shifts[pos$start_line] + pos$start_column
@@ -235,13 +236,13 @@ mdxml_children_to_rd_top <- function(xml, state) {
235236
state$section_tag <- uuid()
236237
out <- map_chr(xml_children(xml), mdxml_node_to_rd, state)
237238
out <- c(out, mdxml_close_sections(state))
238-
rd <- str_trim(paste0(out, collapse = ""))
239+
rd <- trimws(paste0(out, collapse = ""))
239240
if (state$has_sections) {
240241
secs <- strsplit(rd, state$section_tag, fixed = TRUE)[[1]] %||% ""
241242
titles <- c("", state$titles)
242243
# strsplit drops trailing empty strings, so pad to match titles length
243244
secs <- c(secs, rep("", length(titles) - length(secs)))
244-
rd <- structure(str_trim(secs), names = titles)
245+
rd <- structure(trimws(secs), names = titles)
245246
}
246247
rd
247248
}

R/namespace.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -224,23 +224,23 @@ roxy_tag_ns.roxy_tag_exportS3Method <- function(x, block, env) {
224224
return()
225225
}
226226

227-
if (!str_detect(x$val, "::")) {
227+
if (!grepl("::", x$val, fixed = TRUE)) {
228228
warn_roxy_tag(x, "must have form package::generic")
229229
return()
230230
}
231231

232-
generic <- str_split(x$val, "::")[[1]]
232+
generic <- re_split_half(x$val, "::")
233233
generic_re <- paste0("^", generic[[2]], "\\.")
234234

235-
if (!str_detect(obj$alias, generic_re)) {
235+
if (!grepl(generic_re, obj$alias)) {
236236
warn_roxy_tag(
237237
x,
238238
"generic ({.str {generic[[2]]}}) doesn't match function ({.str {obj$alias}})",
239239
)
240240
return()
241241
}
242242

243-
class <- str_remove(obj$alias, generic_re)
243+
class <- sub(generic_re, "", obj$alias)
244244
method <- c(x$val, class)
245245
} else {
246246
method <- x$val

0 commit comments

Comments
 (0)