Rdatatable · mattdowle · Apr 7, 2018 · Mar 22, 2018 · Mar 23, 2018 · Mar 23, 2018
@@ -117,6 +117,8 @@ Thus, `with=` should no longer be needed in any cases. Please change to using th
 
 19. `setindexv` can now assign multiple (separate) indices by accepting a `list` in the `cols` argument.
 
+20. `as.matrix.data.table` method now has an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix`. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases.
+
 #### BUG FIXES
 
 1. The new quote rules handles this single field `"Our Stock Screen Delivers an Israeli Software Company (MNDO, CTCH)<\/a> SmallCapInvestor.com - Thu, May 19, 2011 10:02 AM EDT<\/cite><\/div>Yesterday in \""Google, But for Finding

@@ -1881,17 +1881,59 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
 #    x
 #}
 
-
-as.matrix.data.table <- function(x,...)
-{
-  dm <- dim(x)
-  cn <- names(x)
+as.matrix.data.table <- function(x, rownames, ...) {
+  rn <- NULL
+  rnc <- NULL
+  if (!missing(rownames)) { # Convert rownames to a column index if possible
+    if (length(rownames) == nrow(x)) {
+      # rownames argument is a vector of row names, no column in x to drop.
+      rn <- rownames
+      rnc <- NULL 
+    } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through
+      stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x)))
+    } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) {
+      # E.g. because rownames is some sort of object that can't be converted to a column index
+      stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names")
+    } else if (!is.null(rownames) && !is.na(rownames) && !identical(rownames, FALSE)) { # Handles cases where rownames is a column name, or key(x) from TRUE
+      if (identical(rownames, TRUE)) {
+        if (haskey(x)) { 
+          rownames <- key(x) 
+          if (length(rownames) > 1L) {
+            warning(sprintf("rownames is TRUE but multiple keys [%s] found for x; defaulting to first column x[,1]",
+                            paste(rownames, collapse = ','), rownames[1L]))
+            rownames <- 1L
+          }
+        } else { 
+          rownames <- 1L
+        }
+      }
+      if (is.character(rownames)) { 
+        rnc <- chmatch(rownames, names(x))
+        if (is.na(rnc)) stop(rownames, " is not a column of x")
+      } else { # rownames is an index already
+        if (rownames < 1L || rownames > ncol(x))
+          stop(sprintf("rownames is %d which is outside the column number range [1,ncol=%d]", rownames, ncol(x)))
+        rnc <- rownames
+      }
+    }
+  }
+  # If the rownames argument has been used, and is a single column,
+  # extract that column's index (rnc) and drop it from x
+  if (!is.null(rnc)) { 
+    rn <- x[[rnc]]
+    dm <- dim(x) - c(0, 1)
+    cn <- names(x)[-rnc]
+    X <- x[, .SD, .SDcols = cn]
+  } else {
+    dm <- dim(x) 
+    cn <- names(x) 
+    X <- x
+  }
   if (any(dm == 0L))
-    return(array(NA, dim = dm, dimnames = list(NULL, cn)))
+    return(array(NA, dim = dm, dimnames = list(rn, cn)))
   p <- dm[2L]
   n <- dm[1L]
   collabs <- as.list(cn)
-  X <- x
   class(X) <- NULL
   non.numeric <- non.atomic <- FALSE
   all.logical <- TRUE
@@ -1936,7 +1978,7 @@ as.matrix.data.table <- function(x,...)
   }
   X <- unlist(X, recursive = FALSE, use.names = FALSE)
   dim(X) <- c(n, length(X)/n)
-  dimnames(X) <- list(NULL, unlist(collabs, use.names = FALSE))
+  dimnames(X) <- list(rn, unlist(collabs, use.names = FALSE))
   X
 }
 

@@ -11573,6 +11573,31 @@ test(1898.2, set2keyv(DT, "a"), error="deprecated. Please use setindexv() instea
 test(1898.3, key2(DT), error="deprecated. Please use indices() instead.")
 
 
+# Allow column to be used as rownames when converting to matrix #2702
+DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8)
+mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y")))
+mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y")))
+mat3 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(1:4, c("id", "X", "Y")))
+test(1899.01, as.matrix(DT, 1), mat)
+test(1899.02, as.matrix(DT, "id"), mat)
+test(1899.03, as.matrix(DT, TRUE), mat)
+setkey(DT, id)
+test(1899.04, as.matrix(DT, TRUE), mat)
+test(1899.05, as.matrix(DT, 1:4), mat3)
+# errors
+test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range")
+test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x")
+test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)")
+test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names")
+# values that pass through (rownames ignored)
+test(1899.10, as.matrix(DT, NA), mat2)
+test(1899.11, as.matrix(DT, NULL), mat2)
+test(1899.12, as.matrix(DT, FALSE), mat2)
+# Warnings:
+setkey(DT, id, X)
+test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys")
+
+
 ###################################
 #  Add new tests above this line  #
 ###################################

@@ -0,0 +1,63 @@
+\name{as.matrix}
+\alias{as.matrix}
+\alias{as.matrix.data.table}
+\title{Convert a data.table to a matrix}
+\description{
+Converts a \code{data.table} into a \code{matrix}, optionally using one
+of the columns in the \code{data.table} as the \code{matrix} \code{rownames}.
+}
+\usage{
+\method{as.matrix}{data.table}(x, rownames, ...)}
+
+\arguments{
+\item{x}{a \code{data.table}}
+\item{rownames}{optional, a single column name or column index to use as 
+the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the
+\code{\link{key}} of the \code{data.table} will be used if it is a 
+single column, otherwise the first column in the \code{data.table} will
+be used. Alternative a vector of length \code{nrow(x)} to assign as the
+row names of the returned \code{matrix}.}
+\item{\dots}{additional arguments to be passed to or from methods.}
+}
+
+\details{
+\code{\link{as.matrix}} is a generic function in base R. It dispatches to
+\code{as.matrix.data.table} if its \code{x} argument is a \code{data.table}.
+
+The method for \code{data.table}s will return a character matrix if there 
+are only atomic columns and any non-(numeric/logical/complex) column, 
+applying \code{\link{as.vector}} to factors and \code{\link{format}} to other 
+non-character columns. Otherwise, the usual coercion hierarchy (logical < 
+integer < double < complex) will be used, e.g., all-logical data frames 
+will be coerced to a logical matrix, mixed logical-integer will give an 
+integer matrix, etc.
+
+An additional argument \code{rownames} is provided for \code{as.matrix.data.table}
+to facilitate conversions to matrices where the \code{\link{rownames}} are stored
+in a single column of \code{x}, e.g. the first column after using 
+\code{\link{dcast.data.table}}.
+}
+
+\value{
+A new \code{matrix} containing the contents of \code{x}.
+}
+
+\seealso{
+\code{\link{data.table}}, \code{\link{as.matrix}}, \code{\link{data.matrix}}
+\code{\link{array}}
+}
+
+\examples{
+(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20))
+as.matrix(dt1) # character matrix
+as.matrix(dt1, rownames = "A")
+as.matrix(dt1, rownames = 1)
+as.matrix(dt1, rownames = TRUE)
+
+(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20))
+setkey(dt1, A)
+as.matrix(dt1, rownames = TRUE)
+}
+
+\keyword{ array }
+