diff --git a/NEWS.md b/NEWS.md index 442e2f66..d5b30b2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # stringr (development version) +* `str_sub<-` now gives a more informative error if `value` is not the correct length. * Add `sep` argument to `str_dup()` so that it is possible to repeat a string and add a separator between every repeated value (@edward-burn, #564). * `str_*` now errors if `pattern` includes any `NA`s (@nash-delcamp-slp, #546). diff --git a/R/sub.R b/R/sub.R index 25a9c80b..961ac94e 100644 --- a/R/sub.R +++ b/R/sub.R @@ -6,18 +6,25 @@ #' #' @inheritParams str_detect #' @param start,end A pair of integer vectors defining the range of characters -#' to extract (inclusive). +#' to extract (inclusive). Positive values count from the left of the string, +#' and negative values count from the right. In other words, if `string` is +#' `"abcdef"` then 1 refers to `"a"` and -1 refers to `"f"`. #' #' Alternatively, instead of a pair of vectors, you can pass a matrix to #' `start`. The matrix should have two columns, either labelled `start` -#' and `end`, or `start` and `length`. +#' and `end`, or `start` and `length`. This makes `str_sub()` work directly +#' with the output from [str_locate()] and friends. +#' #' @param omit_na Single logical value. If `TRUE`, missing values in any of the #' arguments provided will result in an unchanged input. -#' @param value replacement string +#' @param value Replacement string. #' @return #' * `str_sub()`: A character vector the same length as `string`/`start`/`end`. #' * `str_sub_all()`: A list the same length as `string`. Each element is #' a character vector the same length as `start`/`end`. +#' +#' If `end` comes before `start` or `start` is outside the range of `string` +#' then the corresponding output will be the empty string. #' @seealso The underlying implementation in [stringi::stri_sub()] #' @export #' @examples @@ -28,7 +35,7 @@ #' str_sub(hw, 8, 14) #' str_sub(hw, 8) #' -#' # Negative indices index from end of string +#' # Negative values index from end of string #' str_sub(hw, -1) #' str_sub(hw, -7) #' str_sub(hw, end = -7) @@ -67,8 +74,8 @@ str_sub <- function(string, start = 1L, end = -1L) { #' @export #' @rdname str_sub -"str_sub<-" <- function(string, start = 1L, end = -1L, omit_na = FALSE, value) { - vctrs::vec_size_common(string = string, start = start, end = end) +"str_sub<-" <- function(string, start = 1L, end = -1L, omit_na = FALSE, value) { + vctrs::vec_size_common(string = string, start = start, end = end, value = value) if (is.matrix(start)) { stri_sub(string, from = start, omit_na = omit_na) <- value diff --git a/man/str_sub.Rd b/man/str_sub.Rd index a329e7ad..9e5b62ba 100644 --- a/man/str_sub.Rd +++ b/man/str_sub.Rd @@ -17,16 +17,19 @@ str_sub_all(string, start = 1L, end = -1L) coercible to one.} \item{start, end}{A pair of integer vectors defining the range of characters -to extract (inclusive). +to extract (inclusive). Positive values count from the left of the string, +and negative values count from the right. In other words, if \code{string} is +\code{"abcdef"} then 1 refers to \code{"a"} and -1 refers to \code{"f"}. Alternatively, instead of a pair of vectors, you can pass a matrix to \code{start}. The matrix should have two columns, either labelled \code{start} -and \code{end}, or \code{start} and \code{length}.} +and \code{end}, or \code{start} and \code{length}. This makes \code{str_sub()} work directly +with the output from \code{\link[=str_locate]{str_locate()}} and friends.} \item{omit_na}{Single logical value. If \code{TRUE}, missing values in any of the arguments provided will result in an unchanged input.} -\item{value}{replacement string} +\item{value}{Replacement string.} } \value{ \itemize{ @@ -34,6 +37,9 @@ arguments provided will result in an unchanged input.} \item \code{str_sub_all()}: A list the same length as \code{string}. Each element is a character vector the same length as \code{start}/\code{end}. } + +If \code{end} comes before \code{start} or \code{start} is outside the range of \code{string} +then the corresponding output will be the empty string. } \description{ \code{str_sub()} extracts or replaces the elements at a single position in each @@ -48,7 +54,7 @@ str_sub(hw, end = 6) str_sub(hw, 8, 14) str_sub(hw, 8) -# Negative indices index from end of string +# Negative values index from end of string str_sub(hw, -1) str_sub(hw, -7) str_sub(hw, end = -7) diff --git a/tests/testthat/_snaps/sub.md b/tests/testthat/_snaps/sub.md new file mode 100644 index 00000000..4b76a065 --- /dev/null +++ b/tests/testthat/_snaps/sub.md @@ -0,0 +1,13 @@ +# bad vectorisation gives informative error + + Code + str_sub(x, 1:2, 1:3) + Condition + Error in `str_sub()`: + ! Can't recycle `string` (size 2) to match `end` (size 3). + Code + str_sub(x, 1:2, 1:2) <- 1:3 + Condition + Error in `str_sub<-`: + ! Can't recycle `string` (size 2) to match `value` (size 3). + diff --git a/tests/testthat/test-sub.R b/tests/testthat/test-sub.R index e33de87e..5ad31d8f 100644 --- a/tests/testthat/test-sub.R +++ b/tests/testthat/test-sub.R @@ -73,7 +73,11 @@ test_that("missing arguments give missing results", { expect_equal(str_sub("test", NA, NA), NA_character_) expect_equal(str_sub(c(NA, "test"), NA, NA), rep(NA_character_, 2)) +}) +test_that("negative length or out of range gives empty string", { + expect_equal(str_sub("abc", 2, 1), "") + expect_equal(str_sub("abc", 4, 5), "") }) test_that("replacement works", { @@ -101,3 +105,11 @@ test_that("replacement with NA works", { str_sub(x, 1, 1, omit_na = TRUE) <- NA expect_equal(x, "BBCDEF") }) + +test_that("bad vectorisation gives informative error", { + x <- "a" + expect_snapshot(error = TRUE, { + str_sub(x, 1:2, 1:3) + str_sub(x, 1:2, 1:2) <- 1:3 + }) +})