| merge.Matrix {Matrix.utils} | R Documentation |
Implementation of merge for Matrix. By explicitly
calling merge.Matrix it will also work for matrix, for
data.frame, and vector objects as a much faster alternative to
the built-in merge.
## S3 method for class 'Matrix' merge(x, y, by.x, by.y, all.x = TRUE, all.y = TRUE, out.class = class(x), fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA), fill.y = fill.x, ...) join.Matrix(x, y, by.x, by.y, all.x = TRUE, all.y = TRUE, out.class = class(x), fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA), fill.y = fill.x, ...)
x, y |
|
by.x |
vector indicating the names to match from |
by.y |
vector indicating the names to match from |
all.x |
logical; if |
all.y |
logical; if |
out.class |
the class of the output object. Defaults to the class of x. Note that some output classes are not possible due to R coercion capabilities, such as converting a character matrix to a Matrix. |
fill.x, fill.y |
the value to put in merged columns where there is no match. Defaults to 0/FALSE for sparse matrices in order to preserve sparsity, NA for all other classes |
... |
arguments to be passed to or from methods. Currently ignored |
#' all.x/all.y correspond to the four types of database joins in the
following way:
all.x=TRUE, all.y=FALSE
all.x=FALSE, all.y=TRUE
all.x=FALSE, all.y=FALSE
all.x=TRUE, all.y=TRUE
Note that NA values will match other NA values.
orders<-Matrix(as.matrix(data.frame(orderNum=1:1000,
customer=sample(100,1000,TRUE))))
cancelledOrders<-Matrix(as.matrix(data.frame(orderNum=sample(1000,100),
cancelled=1)))
skus<-Matrix(as.matrix(data.frame(orderNum=sample(1000,10000,TRUE),
sku=sample(1000,10000,TRUE), amount=runif(10000))))
a<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'])
b<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'],all.x=FALSE)
c<-merge(orders,skus,orders[,'orderNum'],skus[,'orderNum'])
#The above Matrices could be converted to matrices or data.frames and handled in other methods.
#However, this is not possible in the sparse case, which can be handled by this function:
sm<-cbind2(1:200000,rsparsematrix(200000,10000,density=.0001))
sm2<-cbind2(sample(1:200000,50000,TRUE),rsparsematrix(200000,10,density=.01))
sm3<-merge.Matrix(sm,sm2,by.x=sm[,1],by.y=sm2[,1])
## Not run:
#merge.Matrix can also handle many other data types, such as data frames, and is generally fast.
orders<-data.frame(orderNum=as.character(sample(1e5, 1e6, TRUE)),
sku=sample(1e3, 1e6, TRUE),
customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
cancelledOrders<-data.frame(orderNum=as.character(sample(1e5,1e4)),
cancelled=1,stringsAsFactors=FALSE)
system.time(a<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
cancelledOrders[,'orderNum']))
system.time(b<-merge.data.frame(orders,cancelledOrders,all.x = TRUE,all.y=TRUE))
system.time(c<-dplyr::full_join(orders,cancelledOrders))
system.time({require(data.table);
d<-merge(data.table(orders),data.table(cancelledOrders),
by='orderNum',all=TRUE,allow.cartesian=TRUE)})
orders<-data.frame(orderNum=sample(1e5, 1e6, TRUE), sku=sample(1e3, 1e6,
TRUE), customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
cancelledOrders<-data.frame(orderNum=sample(1e5,1e4),cancelled=1,stringsAsFactors=FALSE)
system.time(b<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
cancelledOrders[,'orderNum']))
system.time(e<-dplyr::full_join(orders,cancelledOrders))
system.time({require(data.table);
d<-merge(data.table(orders),data.table(cancelledOrders),
by='orderNum',all=TRUE,allow.cartesian=TRUE)})
#In certain cases, merge.Matrix can be much faster than alternatives.
one<-as.character(1:1000000)
two<-as.character(sample(1:1000000,1e5,TRUE))
system.time(b<-merge.Matrix(one,two,one,two))
system.time(c<-dplyr::full_join(data.frame(key=one),data.frame(key=two)))
system.time({require(data.table);
d<-merge(data.table(data.frame(key=one)),data.table(data.frame(key=two)),
by='key',all=TRUE,allow.cartesian=TRUE)})
## End(Not run)