使用大型数据框创建新的多个ifelse变量变量

时间:2018-08-16 16:12:55

标签: r

样本数据 (仅一行)

CNTRCT_NBR   year CF_ANNUAL_1   CF_ANNUAL_2 CF_ANNUAL_3 CF_ANNUAL_4 CF_ANNUAL_5 CF_ANNUAL_6 CF_ANNUAL_7 CF_ANNUAL_8 CF_ANNUAL_9 CF_ANNUAL_10    CF_ANNUAL_11    CF_ANNUAL_12    CF_ANNUAL_13    CF_ANNUAL_14    CF_ANNUAL_15    CF_ANNUAL_16    CF_ANNUAL_17    CF_ANNUAL_18    CF_ANNUAL_19    CF_ANNUAL_20    CF_ANNUAL_21    CF_ANNUAL_22    CF_ANNUAL_23    CF_ANNUAL_24    CF_ANNUAL_25    CF_ANNUAL_26    CF_ANNUAL_27    CF_ANNUAL_28    CF_ANNUAL_29    CF_ANNUAL_30
00222L 2 351.1 175.55   175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55

我正在处理一个非常大的数据框(400,000+行),并试图创建一个基于条件的新变量。根据年份值,此新变量将等于已经存在的列值。

aux304$CF_FINAL <- NA
within(aux304, aux304$CF_FINAL <- ifelse(aux304$year == 1, aux304     $CF_ANNUAL_1, 
                                  ifelse(aux304$year == 2, aux304$CF_ANNUAL_2,
                                  ifelse(aux304$year == 3, aux304$CF_ANNUAL_3,
                                  ifelse(aux304$year == 4, aux304$CF_ANNUAL_4,
                                  ifelse(aux304$year == 5, aux304$CF_ANNUAL_5, 
                                  ifelse(aux304$year == 6, aux304$CF_ANNUAL_6, 
                                  ifelse(aux304$year == 7, aux304$CF_ANNUAL_7, 
                                  ifelse(aux304$year == 8, aux304$CF_ANNUAL_8, 
                                  ifelse(aux304$year == 9, aux304$CF_ANNUAL_9, 
                                  ifelse(aux304$year == 10, aux304$CF_ANNUAL_10, 
                                  ifelse(aux304$year == 11, aux304$CF_ANNUAL_11, 
                                  ifelse(aux304$year == 12, aux304$CF_ANNUAL_12, 
                                  ifelse(aux304$year == 13, aux304$CF_ANNUAL_13, 
                                  ifelse(aux304$year == 14, aux304$CF_ANNUAL_14, 
                                  ifelse(aux304$year == 15, aux304$CF_ANNUAL_15, 
                                  ifelse(aux304$year == 16, aux304$CF_ANNUAL_16, 
                                  ifelse(aux304$year == 17, aux304$CF_ANNUAL_17, 
                                  ifelse(aux304$year == 18, aux304$CF_ANNUAL_18, 
                                  ifelse(aux304$year == 19, aux304$CF_ANNUAL_19, 
                                  ifelse(aux304$year == 20, aux304$CF_ANNUAL_20, 
                                  ifelse(aux304$year == 21, aux304$CF_ANNUAL_21, 
                                  ifelse(aux304$year == 22, aux304$CF_ANNUAL_22, 
                                  ifelse(aux304$year == 23, aux304$CF_ANNUAL_23, 
                                  ifelse(aux304$year == 24, aux304$CF_ANNUAL_24, 
                                  ifelse(aux304$year == 25, aux304$CF_ANNUAL_25, 
                                  ifelse(aux304$year == 26, aux304$CF_ANNUAL_26, 
                                  ifelse(aux304$year == 27, aux304$CF_ANNUAL_27, 
                                  ifelse(aux304$year == 28, aux304$CF_ANNUAL_28, 
                                  ifelse(aux304$year == 29, aux304$CF_ANNUAL_29, 
                                  ifelse(aux304$year == 30, aux304$CF_ANNUAL_30, 0)))))))))))))))))))))))))))))))

aux30 <- subset(aux304, select=c("CNTRCT_NBR", "year", "CF_FINAL" ))

我遇到一个错误:$<-.data.frame*tmp*,CF_FINAL,值= logical(0))中的错误:   替换有0行,数据有441300

然后,当我尝试对该数据库进行子集设置时,它告诉我创建的变量(CF_FINAL)不存在。我想我的嵌套ifelse语句遇到了问题,因为有很多问题,所以它不能正确创建变量。

2 个答案:

答案 0 :(得分:0)

# generate example data
year <- 1:30
mat  <- diag(101:130)
df   <- as.data.frame(cbind(year, mat))
colnames(df) <- c("year", paste0("annual",1:30))

# solution
df$final <- NA
for(i in 1:30) {
    df[df$year==i, "final"] <- df[df$year==i, paste0("annual",i)]
}

答案 1 :(得分:0)

就像@AndS。建议您使用mutatecase_when

library(tidyverse)

aux304 <- aux304 %>% # a pipe moves to next command
     mutate( #to create a new variable
       CF_FINAL = 
        case_when( #runs a series of ifelse statements
           year == 1 ~ CF_ANNUAL_1, #framework: if ~ then
           year == 2 ~ CF_ANNUAL_2,
           ...,
           TRUE ~ 0
     )
   )

由于这里有一个清晰的模式,您可以查看map,但是如果这种情况仅发生一次或您想对其进行暴力破解,则case_when提供了一种更简洁的选项,有助于减少代码错误

祝你好运!