什么是最好的方法?

时间:2014-09-09 19:36:21

标签: r

我有一个这样的文件:

"1" 4 10 "t" "t" "t" "t" "t" "s" "t" "t" "t" "t" "t" "t" "t" "s" "t" "t" "t" "t" "t" "0" "0" "0" "0" "0" "0" "0" "0" "0"

2 个答案:

答案 0 :(得分:2)

尝试:(从输入数据集显示,不清楚数据是matrix还是data.frame

txt <- rbind(c("1", 4, 10, "t", "t", "t", "t", "s", "t", "t","0", "0", "0"), c("2", 10, 22, "r", "t", "t", "t", "t", "t", "0", "0", "0", "0")) 

txt[,-(1:3)][txt[,-(1:3)]!=0] <- 1
 txt1 <- txt[,1:4]
 txt1[,4] <- as.character(interaction(as.data.frame(txt[,-(1:3)]),sep="") )
 txt1
 #    [,1] [,2] [,3] [,4]        
 #[1,] "1"  "4"  "10" "1111111000"
 #[2,] "2"  "10" "22" "1111110000"

更新

假设您有numeric列和character

 d1 <- structure(list(V1 = 1:3, V2 = c(4L, 10L, 10L), V3 = c(10L, 22L, 
 295L), V4 = c("t", "r", "s"), V5 = c("t", "t", "t"), V6 = c("t", 
 "t", "s"), V7 = c("t", "t", "t"), V8 = c("t", "t", "t"), V9 = c("s", 
 "t", "s"), V10 = c("t", "t", "t"), V11 = c("t", "t", "t"), V12 = c("t", 
 "t", "s"), V13 = c("t", "t", "q"), V14 = c("t", "0", "r"), V15 = c("t", 
 "t", "t"), V16 = c("t", "s", "0"), V17 = c("s", "s", "0"), V18 = c("t", 
 "t", "0"), V19 = c("t", "t", "0"), V20 = c("t", "t", "0"), V21 = c("t", 
 "s", "0"), V22 = c("t", "t", "0"), V23 = c(0L, 0L, 0L), V24 = c(0L, 
 0L, 0L), V25 = c(0L, 0L, 0L), V26 = c(0L, 0L, 0L), V27 = c(0L, 
 0L, 0L), V28 = c(0L, 0L, 0L), V29 = c(0L, 0L, 0L), V30 = c(0L, 
 0L, 0L), V31 = c(0L, 0L, 0L)), .Names = c("V1", "V2", "V3", "V4", 
 "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", "V14", 
 "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", "V23", 
 "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31"), class = "data.frame", row.names = c(NA, 
 -3L))


  d1[,-(1:3)][d1[,-(1:3)]!=0] <- 1
  d2 <- d1[,1:4]
  d2[,4] <- as.character(interaction(d1[,-(1:3)],sep=""))
  d2
  #  V1 V2  V3                           V4
  #1  1  4  10 1111111111111111111000000000
  #2  2 10  22 1111111111011111111000000000
  #3  3 10 295 1111111111110000000000000000

UPDATE2

或者您可以使用characters

在极少数qdap中完成此操作
 library(qdap)
 cbind(d1[,1:3],V4=paste2((d1[,-(1:3)]!=0)+0, sep=""))
 #  V1 V2  V3                           V4
 #1  1  4  10 1111111111111111111000000000
 #2  2 10  22 1111111111011111111000000000
 #3  3 10 295 1111111111110000000000000000

答案 1 :(得分:1)

这非常简单。说数据框叫df然后:

df[,4:32] <- lapply(df[,4:32], function(x) ifelse(x==0, 0, 1))
# notice the collapse argument below
longstring <- apply(df[,4:32], 1, paste, collapse="")

# and now if you want to replace cols 4:32 by longstring:
df <- df[,-(5:32)]
df[,4] <- longstring