Question

假设我有以下数据框

treatmet1<-data.frame(id=c(1,2,7))
treatment2<-data.frame(id=c(3,7,10))
control<-data.frame(id=c(4,5,8,9))

我想创建一个新数据框，它是这些数据框的并集，并且有一个指标列，每个数据框的值为1。

experiment<-data.frame(id=c(1:10),treatment1=0, treatment2=0, control=0)

其中experiment$treatment1[1]=1等等

在R中执行此操作的最佳方式是什么？

谢谢！

Answer 1

根据@ Flodel更新：

    kk<-rbind(treatment1,treatment2,control)
    var1<-c("treatment1","treatment2","control")
    kk$df<-rep(var1,c(dim(treatment1)[1],dim(treatment2)[1],dim(control)[1]))
 kk

    id         df
1   1 treatment1
2   2 treatment1
3   7 treatment1
4   3 treatment2
5   7 treatment2
6  10 treatment2
7   4    control
8   5    control
9   8    control
10  9    control

如果您想要1和0的形式，可以使用table

ll<-table(kk)
ll


  df
id   control treatment1 treatment2
  1        0          1          0
  2        0          1          0
  3        0          0          1
  4        1          0          0
  5        1          0          0
  7        0          1          1
  8        1          0          0
  9        1          0          0
  10       0          0          1

如果您想将其作为data.frame，那么您可以使用reshape：

kk2<-reshape(data.frame(ll),timevar = "df",idvar = "id",direction = "wide")

names(kk2)[-1]<-sort(var1)
> kk2
 kk2
  id control treatment1 treatment2
1  1       0          1          0
2  2       0          1          0
3  3       0          0          1
4  4       1          0          0
5  5       1          0          0
6  7       0          1          1
7  8       1          0          0
8  9       1          0          0
9 10       0          0          1

Answer 2

df.bind <- function(...) {

  df.names <- all.names(substitute(list(...)))[-1L]
  ids.list <- setNames(lapply(list(...), `[[`, "id"), df.names)
  num.ids  <- max(unlist(ids.list))
  tabs     <- lapply(ids.list, tabulate, num.ids)

  data.frame(id = seq(num.ids), tabs)
}

df.bind(treatment1, treatment2, control)

#    id treatment1 treatment2 control
# 1   1          1          0       0
# 2   2          1          0       0
# 3   3          0          1       0
# 4   4          0          0       1
# 5   5          0          0       1
# 6   6          0          0       0
# 7   7          1          1       0
# 8   8          0          0       1
# 9   9          0          0       1
# 10 10          0          1       0

（请注意它是如何包含id == 6的行。）

Answer 3

以

treatment1<-data.frame(id=c(1,2,7))
treatment2<-data.frame(id=c(3,7,10))
control<-data.frame(id=c(4,5,8,9))

您可以使用：

x <- c("treatment1", "treatment2", "control")
f <- function(s) within(get(s), assign(s, 1))
r <- Reduce(function(x,y) merge(x,y,all=TRUE), lapply(x, f))
r[is.na(r)] <- 0

结果：

> r
  id treatment1 treatment2 control
1  1          1          0       0
2  2          1          0       0
3  3          0          1       0
4  4          0          0       1
5  5          0          0       1
6  7          1          1       0
7  8          0          0       1
8  9          0          0       1
9 10          0          1       0

Answer 4

这说明了我想象的rbind策略：

alldf <- rbind(treatmet1,treatment2,control)
alldf$grps <- model.matrix( ~ factor( c( rep(1,nrow(treatmet1)),
                                            rep(2,nrow(treatment2)),
                                            rep(3,nrow(control) ) ))-1)
dimnames( alldf[[2]])[2]<- list(c("trt1","trt2","ctrl"))
 alldf
#-------------------
   id grps.trt1 grps.trt2 grps.ctrl
1   1         1         0         0
2   2         1         0         0
3   7         1         0         0
4   3         0         1         0
5   7         0         1         0
6  10         0         1         0
7   4         0         0         1
8   5         0         0         1
9   8         0         0         1
10  9         0         0         1

使用现有数据框创建新数据框

4 个答案: