check.and.install.pkgs <- function(pkgs){
new.packages <- pkgs[!pkgs %in% installed.packages()[,"Package"]]
if(length(new.packages)) install.packages(new.packages, dependencies = TRUE)
suppressPackageStartupMessages(invisible(lapply(pkgs, library, character.only = TRUE)))
}
check.and.install.pkgs(c("data.table", "reshape2", "scatterplot3d"))
Happy New Year!
Recently I spent some time working with array in R.
I believe it is a bad idea to work with array using for
loop, which is both slow and error-prone. We can just melt it into a long data, do the work, and arrange back into array in the end if needed.
Example: a 3-D array with dimension 4x3x2
Notice how the values (1:24
) fill into the three dimentions, The values 1:12
go into the 1st 4x3 matrix by column, then the rest go into the 2nd 4x3 matrix.
arr = array(1:24, dim=c(4,3,2), dimnames = list(group = LETTERS[1:4], year = 2001:2003, sex = c("F", "M")))
arr
## , , sex = F
##
## year
## group 2001 2002 2003
## A 1 5 9
## B 2 6 10
## C 3 7 11
## D 4 8 12
##
## , , sex = M
##
## year
## group 2001 2002 2003
## A 13 17 21
## B 14 18 22
## C 15 19 23
## D 16 20 24
Melt into long
Melt directly, and in the long-format data, the values are sorted exactly from 1 to 24
arr_long <- reshape2::melt(arr)
rmarkdown::paged_table(arr_long)
Thus if we create an array with the same dimension, it reverts back to the original one
arr2 <- array(data = arr_long$value,
dim = c(4,3,2))
identical(unname(arr), arr2) # since I did not set dimnames for arr2, I need `unname`
## [1] TRUE
Recover the array
But the problem is we usually need to work on the data and the row orders will change.
For example, to add another year of data. And in many other cases, it is just easier to work with the long-format data.
arr_long_extra <- expand.grid(group = LETTERS[1:4], year = 2004, sex = c("F", "M"))
arr_long_extra$value <- 100 + 1:8
arr_long_extra
## group year sex value
## 1 A 2004 F 101
## 2 B 2004 F 102
## 3 C 2004 F 103
## 4 D 2004 F 104
## 5 A 2004 M 105
## 6 B 2004 M 106
## 7 C 2004 M 107
## 8 D 2004 M 108
arr_long_new <- rbind(arr_long, arr_long_extra)
Set the rows in the right order is the key to produce the right array
Since the original dimension 4x3x2 corresponds to group x year x sex, the array should be ordered by sex, year, and group if we want to recover the original array. (Thus the arr_long is in the right order if we don’t resort it.)
# order by sex, year, group
arr_long_new <- arr_long_new[with(arr_long_new, order(sex, year, group)),]
# create new array with 4 groups x 4 years x 2 sex categories
# The order of the values will fill into the dimensions correctly:
arr_long_new$value
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 101 102 103 104 13 14 15
## [20] 16 17 18 19 20 21 22 23 24 105 106 107 108
array(arr_long_new$value, dim=c(4,4,2), dimnames = list(group = LETTERS[1:4], year = 2001:2004, sex = c("F", "M")))
## , , sex = F
##
## year
## group 2001 2002 2003 2004
## A 1 5 9 101
## B 2 6 10 102
## C 3 7 11 103
## D 4 8 12 104
##
## , , sex = M
##
## year
## group 2001 2002 2003 2004
## A 13 17 21 105
## B 14 18 22 106
## C 15 19 23 107
## D 16 20 24 108
Notice that the extra values from arr_long_extra
have been added correctly into the array.
# Alternatively, if you use data.table, this does the same thing:
data.table::setorder(arr_long_new, sex, year, group)
Similarly, we can set one dimension of the array by a specific order or using another vector.
Suppose I want to reorder the year
variable as year_order
:
year_order <- c("2003", "2001", "2002")
arr_long2 <- arr_long[with(arr_long, order(sex, match(year, year_order), group)),]
# notice that the new dimnames need to be assigned correctly (manually)
# as the array arranges the values correctly, there is no name automatically linked to the value
arr3 <- array(arr_long2$value, dim=c(4,3,2), dimnames = list(group = LETTERS[1:4], year = year_order, sex = c("F", "M")))
arr3
## , , sex = F
##
## year
## group 2003 2001 2002
## A 9 1 5
## B 10 2 6
## C 11 3 7
## D 12 4 8
##
## , , sex = M
##
## year
## group 2003 2001 2002
## A 21 13 17
## B 22 14 18
## C 23 15 19
## D 24 16 20
It is the same as reordering the 2nd dimension of array:
identical(arr3,
arr[, order(match(dimnames(arr)[[2]], year_order)),]
)
## [1] TRUE
To visualize the array, the long data is required as well
library("scatterplot3d")
shapes <- 15:18
shapes <- shapes[as.numeric(arr_long$group)]
# haven't figured out how to round the axis annotation
scatterplot3d(arr_long, pch = shapes, cex.symbols = 2,
xlab = "Group", ylab = "Year", zlab = "Sex",
highlight.3d = TRUE)