From a2c9184a992ef4c999de325431c1cc2696ec89a6 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 21 Oct 2024 13:37:28 +0300 Subject: [PATCH 01/12] Respect shouldPrint when auto-printing from knitr Implementing a method for the knitr::knit_print generic makes it possible to customise the behaviour without looking up the call stack. The current solution only works on R >= 3.6.0 because that's where delayed S3 registration has been introduced. --- NAMESPACE | 2 ++ R/print.data.table.R | 16 +++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 50b81e8e6a..60ede1d9c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,6 +104,8 @@ if (getRversion() >= "4.0.0") { # version of R (and that is checked in .onLoad with error if not). export(.rbind.data.table) # only export in R<4.0.0 where it is still used; R-devel now detects it is missing doc, #5600 } +if (getRversion() >= "3.6.0") S3method(knitr::knit_print, data.table) +# else manual delayed registration from the onLoad hook S3method(dim, data.table) S3method(dimnames, data.table) S3method("dimnames<-", data.table) diff --git a/R/print.data.table.R b/R/print.data.table.R index 0bebfce9a5..611880458d 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -30,13 +30,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS = sys.calls() - if (length(SYS) <= 2L || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) - ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && - as.character(thisSYS) == 'source') || # suppress printing from source(echo = TRUE) calls, #2369 - ( length(SYS) > 3L && is.symbol(thisSYS <- SYS[[length(SYS)-3L]][[1L]]) && - as.character(thisSYS) %chin% mimicsAutoPrint ) ) { + if (length(SYS) <= 2L) { # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) return(invisible(x)) - # is.symbol() temp fix for #1758. } } if (!is.numeric(nrows)) nrows = 100L @@ -158,9 +153,6 @@ format.data.table = function(x, ..., justify="none") { do.call(cbind, lapply(x, format_col, ..., justify=justify)) } -mimicsAutoPrint = c("knit_print.default") -# add maybe repr_text.default. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 - shouldPrint = function(x) { ret = (identical(.global$print, "") || # to save address() calls and adding lots of address strings to R's global cache address(x)!=.global$print) @@ -288,3 +280,9 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){ domain=NA ) } + +# Maybe add a method for repr::repr_text. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 +knit_print.data.table <- function(x, ...) { + if (!shouldPrint(x)) return(invisible(x)) + NextMethod() +} From 5a52ad9ac852dce06e0f40ae81317143969ebf1a Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 21 Oct 2024 17:50:20 +0300 Subject: [PATCH 02/12] Delay S3method(knit_print, data.table) for R < 3.6 Use setHook() to ensure that registerS3method() will be called in the same session if 'knitr' is loaded later. Not needed on R >= 3.6.0 where S3method(knitr::knit_print) will do the right thing by itself. --- R/onLoad.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/onLoad.R b/R/onLoad.R index ef96849e85..cbf272f8f3 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -66,6 +66,15 @@ lockBinding("rbind.data.frame",baseenv()) } } + if (session_r_version < "3.6.0") { + # no delayed registration support for NAMESPACE; perform it manually + if (isNamespaceLoaded("knitr")) { + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + } + setHook(packageEvent("knitr", "onLoad"), function(...) { + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + }) + } # Set options for the speed boost in v1.8.0 by avoiding 'default' arg of getOption(,default=) # In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they From f9e65e230748030f0818b48e14f905aedb10e2e3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 3 Dec 2024 00:05:27 -0800 Subject: [PATCH 03/12] ws-only style --- R/onLoad.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/onLoad.R b/R/onLoad.R index cbf272f8f3..d918db152a 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -69,10 +69,10 @@ if (session_r_version < "3.6.0") { # no delayed registration support for NAMESPACE; perform it manually if (isNamespaceLoaded("knitr")) { - registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) } setHook(packageEvent("knitr", "onLoad"), function(...) { - registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) }) } From 34c7ea0fe8c244c600dc2d4339b6b4c2d0235e72 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 3 Dec 2024 00:07:32 -0800 Subject: [PATCH 04/12] put setHook() in a branch --- R/onLoad.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/onLoad.R b/R/onLoad.R index d918db152a..026327cd95 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -70,10 +70,11 @@ # no delayed registration support for NAMESPACE; perform it manually if (isNamespaceLoaded("knitr")) { registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + } else { + setHook(packageEvent("knitr", "onLoad"), function(...) { + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + }) } - setHook(packageEvent("knitr", "onLoad"), function(...) { - registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) - }) } # Set options for the speed boost in v1.8.0 by avoiding 'default' arg of getOption(,default=) From bd7a35f252ef26dbcab6d84f548da9d5cad5c507 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Tue, 3 Dec 2024 21:40:18 +0300 Subject: [PATCH 05/12] Position comment on the same line --- NAMESPACE | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 60ede1d9c4..6e6343c1dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,8 +104,7 @@ if (getRversion() >= "4.0.0") { # version of R (and that is checked in .onLoad with error if not). export(.rbind.data.table) # only export in R<4.0.0 where it is still used; R-devel now detects it is missing doc, #5600 } -if (getRversion() >= "3.6.0") S3method(knitr::knit_print, data.table) -# else manual delayed registration from the onLoad hook +if (getRversion() >= "3.6.0") S3method(knitr::knit_print, data.table) # else manual delayed registration from the onLoad hook S3method(dim, data.table) S3method(dimnames, data.table) S3method("dimnames<-", data.table) From 2557fa2c5e273e753f91b156cbabef8590f4527b Mon Sep 17 00:00:00 2001 From: Ivan K Date: Tue, 3 Dec 2024 22:05:55 +0300 Subject: [PATCH 06/12] Restore the still-required #2369 condition --- R/print.data.table.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/print.data.table.R b/R/print.data.table.R index 611880458d..fe9da00f68 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -30,7 +30,9 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS = sys.calls() - if (length(SYS) <= 2L) { # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) + if (length(SYS) <= 2L || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) + ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && + as.character(thisSYS) == 'source') ) { # suppress printing from source(echo = TRUE) calls, #2369 return(invisible(x)) } } From 50cc87771d3ea194da12c533c8f09afc3622926a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Dec 2024 03:23:13 +0000 Subject: [PATCH 07/12] Use identical(,print) to check for autoprint --- R/print.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/print.data.table.R b/R/print.data.table.R index fe9da00f68..c92031d932 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -30,7 +30,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS = sys.calls() - if (length(SYS) <= 2L || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) + if (identical(SYS[[1L]][[1L]], print) || ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && as.character(thisSYS) == 'source') ) { # suppress printing from source(echo = TRUE) calls, #2369 return(invisible(x)) From 32e5cf228cfef0a86befb754c33935ca3cf1a4de Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Dec 2024 06:32:35 +0000 Subject: [PATCH 08/12] Add tests, including updating broken test --- tests/autoprint.R | 17 ++++++++++++---- tests/autoprint.Rout.save | 41 +++++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/tests/autoprint.R b/tests/autoprint.R index 1e4694668b..916b2c7139 100644 --- a/tests/autoprint.R +++ b/tests/autoprint.R @@ -10,7 +10,7 @@ DT[FALSE,a:=3L] # no DT[a==4L,a:=5L] # no DT[a %in% 4:8, a:=5L] # no DT # yes -print(DT[2,a:=4L]) # no +print(DT[2,a:=4L]) # yes, as of #6631 print(DT) # yes if (TRUE) DT[2,a:=5L] # no. used to print before v1.9.5 if (TRUE) if (TRUE) DT[2,a:=6L] # no. used to print before v1.9.5 @@ -20,7 +20,7 @@ DT # yes. 2nd time needed, or solutions below (function(){DT[2,a:=5L];NULL})() # print NULL DT[] # yes. guaranteed print (function(){DT[2,a:=5L];NULL})() # print NULL -print(DT) # no. only DT[] is guaranteed print from v1.9.6 and R 3.2.0 +print(DT) # yes. restored in #6631 behavior that had changed in 1.9.6. (function(){DT[2,a:=5L][];NULL})() # print NULL DT # yes. i) function needs to add [] after last one, so that "DT" alone is guaranteed anyway (function(){DT[2,a:=5L];DT[];NULL})() # print NULL @@ -29,9 +29,9 @@ DT2 = data.table(b=3:4) # no (function(){DT[2,a:=6L];DT2[1,b:=7L];NULL})() DT # yes. last := was on DT2 not DT {DT[2,a:=6L];invisible()} # no -print(DT) # no +print(DT) # yes (function(){print(DT[2,a:=7L]);print(DT);invisible()})() # yes*2 -{print(DT[2,a:=8L]);print(DT);invisible()} # yes*1 Not within function so as at prompt +{print(DT[2,a:=8L]);print(DT);invisible()} # yes*2 as at prompt, again as of #6631 DT[1][,a:=9L] # no (was too tricky to detect that DT[1] is a new object). Simple rule is that := always doesn't print DT[2,a:=10L][1] # yes DT[1,a:=10L][1,a:=10L] # no @@ -43,3 +43,12 @@ DT[1,a:=10L][] # yes. ...[] == oops, forgot print(...) tryCatch(DT[,foo:=ColumnNameTypo], error=function(e) e$message) # error: not found. DT # yes DT # yes + +# child class of data.table doesn't induce unintended print, #3029 +dt <- data.table(x = 1) +class(dt) <- c("foo", "data.table", "data.frame") +print.foo <- function(x, ...) { + NextMethod("print") +} + +dt[, y := 1] # no diff --git a/tests/autoprint.Rout.save b/tests/autoprint.Rout.save index 41aaa89655..3dac656c91 100644 --- a/tests/autoprint.Rout.save +++ b/tests/autoprint.Rout.save @@ -1,7 +1,7 @@ -R version 4.3.2 (2023-10-31) -- "Eye Holes" -Copyright (C) 2023 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R Under development (unstable) (2024-12-01 r87412) -- "Unsuffered Consequences" +Copyright (C) 2024 The R Foundation for Statistical Computing +Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -44,7 +44,11 @@ Index: 1: 1 2: 3 -> print(DT[2,a:=4L]) # no +> print(DT[2,a:=4L]) # yes, as of #6631 + a + +1: 1 +2: 4 > print(DT) # yes a @@ -69,7 +73,11 @@ NULL 2: 5 > (function(){DT[2,a:=5L];NULL})() # print NULL NULL -> print(DT) # no. only DT[] is guaranteed print from v1.9.6 and R 3.2.0 +> print(DT) # yes. restored in #6631 behavior that had changed in 1.9.6. + a + +1: 1 +2: 5 > (function(){DT[2,a:=5L][];NULL})() # print NULL NULL > DT # yes. i) function needs to add [] after last one, so that "DT" alone is guaranteed anyway @@ -93,7 +101,11 @@ NULL 1: 1 2: 6 > {DT[2,a:=6L];invisible()} # no -> print(DT) # no +> print(DT) # yes + a + +1: 1 +2: 6 > (function(){print(DT[2,a:=7L]);print(DT);invisible()})() # yes*2 a @@ -103,7 +115,11 @@ NULL 1: 1 2: 7 -> {print(DT[2,a:=8L]);print(DT);invisible()} # yes*1 Not within function so as at prompt +> {print(DT[2,a:=8L]);print(DT);invisible()} # yes*2 as at prompt, again as of #6631 + a + +1: 1 +2: 8 a 1: 1 @@ -136,6 +152,15 @@ NULL 1: 10 2: 10 > +> # child class of data.table doesn't induce unintended print, #3029 +> dt <- data.table(x = 1) +> class(dt) <- c("foo", "data.table", "data.frame") +> print.foo <- function(x, ...) { ++ NextMethod("print") ++ } +> +> dt[, y := 1] # no +> > proc.time() user system elapsed - 0.223 0.016 0.231 + 0.209 0.053 0.288 From 843f00a6dfa52e098ffe051672f769f1399aacbf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 3 Dec 2024 22:41:09 -0800 Subject: [PATCH 09/12] add comment --- R/print.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/print.data.table.R b/R/print.data.table.R index c92031d932..525aebe930 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -30,7 +30,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS = sys.calls() - if (identical(SYS[[1L]][[1L]], print) || + if (identical(SYS[[1L]][[1L]], print) || # this is what auto-print looks like, i.e. '> DT' and '> DT[, a:=b]' in the terminal; see #3029. ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && as.character(thisSYS) == 'source') ) { # suppress printing from source(echo = TRUE) calls, #2369 return(invisible(x)) From 5a7882d6c2325fbec5935a762dd44ff2a5c950e9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Dec 2024 06:56:52 +0000 Subject: [PATCH 10/12] test withAutoprint() behavior too --- tests/autoprint.R | 11 ++++++++++- tests/autoprint.Rout.save | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/tests/autoprint.R b/tests/autoprint.R index 916b2c7139..b752de6cce 100644 --- a/tests/autoprint.R +++ b/tests/autoprint.R @@ -50,5 +50,14 @@ class(dt) <- c("foo", "data.table", "data.frame") print.foo <- function(x, ...) { NextMethod("print") } - dt[, y := 1] # no + +# withAutoprint() testing (since R3.4.0) +if (!exists("withAutoprint", baseenv())) { + q("no") +} +if (TRUE) withAutoprint({ + DT # yes + DT[1L, 1L] # yes + DT[2L, a := 10L] # no +}) diff --git a/tests/autoprint.Rout.save b/tests/autoprint.Rout.save index 3dac656c91..19d5b801a6 100644 --- a/tests/autoprint.Rout.save +++ b/tests/autoprint.Rout.save @@ -158,9 +158,28 @@ NULL > print.foo <- function(x, ...) { + NextMethod("print") + } -> > dt[, y := 1] # no > +> # withAutoprint() testing (since R3.4.0) +> if (!exists("withAutoprint", baseenv())) { ++ q("no") ++ } +> if (TRUE) withAutoprint({ ++ DT # yes ++ DT[1L, 1L] # yes ++ DT[2L, a := 10L] # no ++ }) +> DT + a + +1: 10 +2: 10 +> DT[1L, 1L] + a + +1: 10 +> DT[2L, `:=`(a, 10L)] +> > proc.time() user system elapsed - 0.209 0.053 0.288 + 0.182 0.056 0.246 From 422e57a937e2bcec56d41948f6c7440198b9b865 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Dec 2024 23:44:11 -0800 Subject: [PATCH 11/12] NEWS entry --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 53f37d478e..6508890c59 100644 --- a/NEWS.md +++ b/NEWS.md @@ -115,7 +115,9 @@ rowwiseDT( 14. Added a `data.frame` method for `format_list_item()` to fix error printing data.tables with columns containing 1-column data.frames, [#6592](https://github.com/Rdatatable/data.table/issues/6592). Thanks to @r2evans for the bug report and fix. -15. The auto-printing suppression in `knitr` documents is now done by implementing a method for `knit_print` instead of looking up the call stack, [#6589](https://github.com/Rdatatable/data.table/pull/6589). Thanks to @jangorecki for the report [#6509](https://github.com/Rdatatable/data.table/issues/6509) and @aitap for the fix. +15. Auto-printing gets some substantial improvements + - Suppression in `knitr` documents is now done by implementing a method for `knit_print` instead of looking up the call stack, [#6589](https://github.com/Rdatatable/data.table/pull/6589). The old way was fragile and wound up broken by some implementation changes in {knitr}. Thanks to @jangorecki for the report [#6509](https://github.com/Rdatatable/data.table/issues/6509) and @aitap for the fix. + - `print()` methods for S3 subclasses of data.table (e.g. an object of class `c("my.table", "data.table", "data.frame")`) no longer print where plain data.tables wouldn't, e.g. `myDT[, y := 2]`, [#3029](https://github.com/Rdatatable/data.table/issues/3029). The improved detection of auto-printing scenarios has the added benefit of _allowing_ print in highly explicit statements like `print(DT[, y := 2])`, obviating our recommendation since v1.9.6 to append `[]` to signal "please print me". ## NOTES From 9d8f3c6b706b66131b60e403318203603502b97e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Dec 2024 23:49:41 -0800 Subject: [PATCH 12/12] blank line in correct place? --- tests/autoprint.Rout.save | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/autoprint.Rout.save b/tests/autoprint.Rout.save index 48a5533ee6..4052fbe303 100644 --- a/tests/autoprint.Rout.save +++ b/tests/autoprint.Rout.save @@ -169,6 +169,7 @@ NULL > DT = data.table(a = 1) > DT[, `:=`(a, 1)] +> > # child class of data.table doesn't induce unintended print, #3029 > dt <- data.table(x = 1) > class(dt) <- c("foo", "data.table", "data.frame")