根据单个单元格值更改多个单元格值

时间:2017-07-13 06:19:44

标签: r if-statement

我有一个数据框:

a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
Zoo = cbind.data.frame(a, b, c, d)
colnames(Zoo) = c("animal", "colour", "age", "gender")    

   animal colour  age  gender
    yes    brown   7   male
    yes    grey    6 female
    no     white  NA female
    yes    grey   10   male
    no     NA      8 female

如果动物的价值为'是的,我想将相应列中的任何非NA值更改为" NL" (非逻辑的)。我可以按如下方式一次完成这一栏:

Zoo$colour = as.character(Zoo$colour)

Zoo$colour = 
  ifelse(Zoo$animal == "no" & !is.na(Zoo$colour), "NL", Zoo$colour)

并最终到达:

   animal colour  age  gender
    yes    brown   7   male
    yes    grey    6 female
    no     NL     NA     NL
    yes    grey   10   male
    no     NA     NL     NL

我确信有一种方法可以更有效地完成这项工作。在那儿?谢谢!

2 个答案:

答案 0 :(得分:3)

这是另一种方式。请注意,我使用stringsAsFactors = FALSE创建了一个data.frame,因为在此设置中使用因子级别非常繁琐。完成后,您可以自由地将字符列转换为因子。

基本上,此代码遍历每一行,找到具有非NA的列并在其位置插入"NL"

a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
zoo <- data.frame(animal = a, color = b, age = c, gender = d, stringsAsFactors = FALSE)

for (i in 1:nrow(zoo)) {
  if (zoo[i, "animal"] == "no") {
    find.el <- !is.na(zoo[i, which(colnames(zoo) != "animal")])
    zoo[, 2:ncol(zoo)][i, find.el] <- "NL"
  }
}

  animal color  age gender
1    yes brown    7   male
2    yes  grey    6 female
3     no    NL <NA>     NL
4    yes  grey   10   male
5     no  <NA>   NL     NL

答案 1 :(得分:0)

对于多列,我们可以使用set

data.table的有效方法
library(data.table)
setDT(Zoo)
for(nm in names(Zoo)[-1]) {
  set(Zoo, i = NULL, j = nm, as.character(Zoo[[nm]]))
  set(Zoo, i = which(Zoo[['animal']]=='no' & !is.na(Zoo[[nm]])),
      j = nm, value = "NL")
}

Zoo
#   animal colour age gender
#1:    yes  brown   7   male
#2:    yes   grey   6 female
#3:     no     NL  NA     NL
#4:    yes   grey  10   male
#5:     no     NA  NL     NL

注意:这应该非常有效,因为我们正在使用set

或者,我们可以使用优雅的tidyverse语法

library(dplyr)
Zoo %>% 
   mutate_at(2:4, funs(replace(., Zoo[['animal']]== 'no' & !is.na(.), 'NL')))
#   animal colour  age gender
#1    yes  brown    7   male
#2    yes   grey    6 female
#3     no     NL <NA>     NL
#4    yes   grey   10   male
#5     no   <NA>   NL     NL

基准

Zoo1 <- Zoo[rep(1:nrow(Zoo), 1e5),]
Zoo2 <- copy(Zoo1)
Zoo3 <- copy(Zoo2)

system.time({
setDT(Zoo2)
for(nm in names(Zoo2)[-1]) {
  set(Zoo2, i = NULL, j = nm, as.character(Zoo2[[nm]]))
  set(Zoo2, i = which(Zoo[['animal']]=='no' & !is.na(Zoo2[[nm]])),
      j = nm, value = "NL")
}
})
# user  system elapsed 
#   0.40    0.01    0.42 

system.time({
  Zoo3 %>% 
   mutate_at(2:4, funs(replace(., Zoo3[['animal']]== 'no' & !is.na(.), 'NL')))
 })
 #user  system elapsed 
 #  0.42    0.03    0.46 


system.time({
 for (i in 1:nrow(Zoo1)) {
  if (Zoo1[i, "animal"] == "no") {
    find.el <- !is.na(Zoo1[i, which(colnames(Zoo1) != "animal")])
    Zoo1[, 2:ncol(Zoo1)][i, find.el] <- "NL"
  }
}
})
#     user  system elapsed 
#  2086.49  577.51 2686.83 

数据

Zoo <- data.frame(animal = a, colour = b, age = c, gender = d, stringsAsFactors=FALSE)