Rdatatable · MichaelChirico · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
@@ -10,6 +10,8 @@
 
 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix.
 
+2. All direct use of `NAMED` and some direct use of `LEVELS` has been removed from the package, reducing the use of non-API functions, see [#6420](https://github.com/Rdatatable/data.table/pull/6420) by Ivan Krylov. The work is on the overall issue [#6180](https://github.com/Rdatatable/data.table/issues/6180) reported by Michael Chirico.
+
 # data.table [v1.16.0](https://github.com/Rdatatable/data.table/milestone/30)  (25 August 2024)
 
 ## BREAKING CHANGES

@@ -549,12 +549,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
          (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
          ) {
         if (verbose) {
-          Rprintf(_("RHS for item %d has been duplicated because NAMED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n"),
-                  i+1, NAMED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
+          Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n"),
+                  i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
         }
         thisvalue = copyAsPlain(thisvalue);   // PROTECT not needed as assigned as element to protected list below.
       } else {
-        if (verbose) Rprintf(_("Direct plonk of unnamed RHS, no copy. NAMED==%d, MAYBE_SHARED==%d\n"), NAMED(thisvalue), MAYBE_SHARED(thisvalue));  // e.g. DT[,a:=as.character(a)] as tested by 754.5
+        if (verbose) Rprintf(_("Direct plonk of unnamed RHS, no copy. MAYBE_REFERENCED==%d, MAYBE_SHARED==%d\n"), MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue));  // e.g. DT[,a:=as.character(a)] as tested by 754.5
       }
       SET_VECTOR_ELT(dt, coln, thisvalue);                 // plonk new column in as it's already the correct length
       setAttrib(thisvalue, R_NamesSymbol, R_NilValue);     // clear names such as  DT[,a:=mapvector[a]]

@@ -17,9 +17,6 @@ Differences over standard binary search (e.g. bsearch in stdlib.h) :
   o non equi joins (no != yet) since 1.9.8
 */
 
-#define ENC_KNOWN(x) (LEVELS(x) & 12)
-// 12 = LATIN1_MASK (1<<2) | UTF8_MASK (1<<3)  // Would use these definitions from Defn.h, but that appears to be private to R. Hence 12.
-
 #define EQ 1
 #define LE 2
 #define LT 3

@@ -33,9 +33,10 @@
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
-#define IS_UTF8(x)  (LEVELS(x) & 8)
-#define IS_ASCII(x) (LEVELS(x) & 64)
-#define IS_LATIN(x) (LEVELS(x) & 4)
+/* we mean the encoding bits, not CE_NATIVE in a UTF-8 locale */
+#define IS_UTF8(x)  (getCharCE(x) == CE_UTF8)
+#define IS_LATIN(x) (getCharCE(x) == CE_LATIN1)
+#define IS_ASCII(x) (LEVELS(x) & 64) // API expected in R >= 4.5
 #define IS_TRUE(x)  (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE)
 #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE)
 #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL)
@@ -60,14 +61,6 @@
 // for use with CPLXSXP, no macro provided by R internals
 #define ISNAN_COMPLEX(x) (ISNAN((x).r) || ISNAN((x).i)) // TRUE if either real or imaginary component is NA or NaN
 
-// Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0
-#ifndef MAYBE_SHARED
-#  define MAYBE_SHARED(x) (NAMED(x) > 1)
-#endif
-#ifndef MAYBE_REFERENCED
-#  define MAYBE_REFERENCED(x) ( NAMED(x) > 0 )
-#endif
-
 // If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will
 // always be checked in UTF8 locale. This seems to be the best fix Arun could think of to put the encoding issues to rest.
 // Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues in