无法使用r data.table包执行计算

时间:2013-05-03 14:24:59

标签: r data.table

我有一个庞大的数据框,最后30行在下面:

libary(data.table)

dput(p)的

structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075, 
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255, 
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435, 
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615, 
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795, 
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt"
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002", 
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002", 
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L, 
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L, 
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8, 
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6, 
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7, 
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6, 
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21, 
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45, 
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94, 
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L, 
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L, 
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53, 
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887, 
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054, 
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488, 
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702, 
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT", 
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE", 
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA, 
-30L))

当我喜欢使用data.table计算一些值时,如下所示:

p<-data.table(p)
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")]

我收到此错误:

Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100,  : 
  Type of RHS ('integer') must match LHS ('double'). To check and coerce would
  impact performance too much for the fastest cases. Either change the type of
  the target column, or coerce the RHS of := yourself (e.g. by using 1L instead
  of 1)

这个错误是什么意思?我怎样才能解决这个错误?

2 个答案:

答案 0 :(得分:10)

问题是您的ifelse语句对于某些值返回integer类型,对于其他一些条目则返回numeric(double)。并且data.table抱怨列类型不匹配,因为它期望用户执行强制(出于性能原因,如错误中给出)。所以,只需用as.numeric包裹它,以便所有值都转换为double。

p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, 
                      USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]

答案 1 :(得分:3)

我这样做了:

sapply(p, class)

并注意到我的一个列是整数。然后我这样做了:

x<-x[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, ((USED_CORES/ENT)*100), as.numeric(USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]

这是金钱