From cd818080606591ac5348005732db0cadfb4e597c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Tue, 5 Oct 2021 19:49:35 +0200 Subject: [PATCH] droplevels for empty table and ordered factors (#5185) --- NEWS.md | 2 +- R/fdroplevels.R | 5 +++-- inst/tests/tests.Rraw | 23 +++++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index b83869e50..18b5c5c49 100644 --- a/NEWS.md +++ b/NEWS.md @@ -139,7 +139,7 @@ 25. `setcolorder()` gains `before=` and `after=`, [#4385](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. -26. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added. +26. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, Boniface Kamgang and Martin Binder for testing, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added. 27. `shift()` now also supports `type="cyclic"`, [#4451](https://github.com/Rdatatable/data.table/issues/4451). Arguments that are normally pushed out by `type="lag"` or `type="lead"` are re-introduced at this type at the first/last positions. Thanks to @RicoDiel for requesting, and Benjamin Schwendinger for the PR. diff --git a/R/fdroplevels.R b/R/fdroplevels.R index 5c53ee42f..c7025dda0 100644 --- a/R/fdroplevels.R +++ b/R/fdroplevels.R @@ -4,12 +4,13 @@ fdroplevels = function(x, exclude = if (anyNA(levels(x))) NULL else NA, ...) { lev = which(tabulate(x, length(levels(x))) & (!match(levels(x), exclude, 0L))) ans = match(as.integer(x), lev) setattr(ans, 'levels', levels(x)[lev]) - setattr(ans, 'class', 'factor') + setattr(ans, 'class', class(x)) return(ans) } droplevels.data.table = function(x, except = NULL, exclude, in.place = FALSE, ...){ - stopifnot(length(x) > 0L, is.logical(in.place)) + stopifnot(is.logical(in.place)) + if (nrow(x)==0L) return(x) ix = vapply(x, is.factor, NA) if(!is.null(except)){ stopifnot(is.numeric(except), except <= length(x)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bfa901d31..619ab6730 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18187,17 +18187,24 @@ test(2213, identical(fread(text="A\n0.8060667366\n")$A, 0.8060667366)) # droplevels.data.table method, and fdroplevels, #647 x = factor(letters[1:10]) DT = data.table(a = x)[1:5] -test(2214.1, fdroplevels(factor()), droplevels(factor())) -test(2214.2, fdroplevels(x[1:5]), droplevels(x[1:5])) +test(2214.01, fdroplevels(factor()), droplevels(factor())) +test(2214.02, fdroplevels(x[1:5]), droplevels(x[1:5])) if (base::getRversion() >= "3.4.0") { # bug fix in R 3.4.0: "droplevels(f) now keeps levels when present." - test(2214.3, fdroplevels(x[1:5], c("b", "d")), droplevels(x[1:5], c("b", "d"))) - test(2214.4, fdroplevels(x[1:5], letters[1:5]), droplevels(x[1:5], letters[1:5])) - test(2214.5, droplevels(DT, exclude=c("b", "d"))[["a"]], droplevels(DT[1:5,a], c("b", "d"))) + test(2214.03, fdroplevels(x[1:5], c("b", "d")), droplevels(x[1:5], c("b", "d"))) + test(2214.04, fdroplevels(x[1:5], letters[1:5]), droplevels(x[1:5], letters[1:5])) + test(2214.05, droplevels(DT, exclude=c("b", "d"))[["a"]], droplevels(DT[1:5,a], c("b", "d"))) } -test(2214.6, droplevels(DT)[["a"]], droplevels(DT[1:5,a])) -test(2214.7, droplevels(DT, 1)[["a"]], x[1:5]) -test(2214.8, droplevels(DT, in.place=TRUE), DT) +test(2214.06, droplevels(DT)[["a"]], droplevels(DT[1:5,a])) +test(2214.07, droplevels(DT, 1)[["a"]], x[1:5]) +test(2214.08, droplevels(DT, in.place=TRUE), DT) +# support ordered factors in fdroplevels +o = factor(letters[1:10], ordered=TRUE) +test(2214.09, fdroplevels(o[1:5]), droplevels(o[1:5])) +# edge case for empty table #5184 +test(2214.10, droplevels(DT[0]), DT[0]) +test(2214.11, droplevels(data.table()), data.table()) + # factor i should be just like character i and work, #1632 DT = data.table(A=letters[1:3], B=4:6, key="A")