样本数据 (仅一行)
CNTRCT_NBR year CF_ANNUAL_1 CF_ANNUAL_2 CF_ANNUAL_3 CF_ANNUAL_4 CF_ANNUAL_5 CF_ANNUAL_6 CF_ANNUAL_7 CF_ANNUAL_8 CF_ANNUAL_9 CF_ANNUAL_10 CF_ANNUAL_11 CF_ANNUAL_12 CF_ANNUAL_13 CF_ANNUAL_14 CF_ANNUAL_15 CF_ANNUAL_16 CF_ANNUAL_17 CF_ANNUAL_18 CF_ANNUAL_19 CF_ANNUAL_20 CF_ANNUAL_21 CF_ANNUAL_22 CF_ANNUAL_23 CF_ANNUAL_24 CF_ANNUAL_25 CF_ANNUAL_26 CF_ANNUAL_27 CF_ANNUAL_28 CF_ANNUAL_29 CF_ANNUAL_30
00222L 2 351.1 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55
我正在处理一个非常大的数据框(400,000+行),并试图创建一个基于条件的新变量。根据年份值,此新变量将等于已经存在的列值。
aux304$CF_FINAL <- NA
within(aux304, aux304$CF_FINAL <- ifelse(aux304$year == 1, aux304 $CF_ANNUAL_1,
ifelse(aux304$year == 2, aux304$CF_ANNUAL_2,
ifelse(aux304$year == 3, aux304$CF_ANNUAL_3,
ifelse(aux304$year == 4, aux304$CF_ANNUAL_4,
ifelse(aux304$year == 5, aux304$CF_ANNUAL_5,
ifelse(aux304$year == 6, aux304$CF_ANNUAL_6,
ifelse(aux304$year == 7, aux304$CF_ANNUAL_7,
ifelse(aux304$year == 8, aux304$CF_ANNUAL_8,
ifelse(aux304$year == 9, aux304$CF_ANNUAL_9,
ifelse(aux304$year == 10, aux304$CF_ANNUAL_10,
ifelse(aux304$year == 11, aux304$CF_ANNUAL_11,
ifelse(aux304$year == 12, aux304$CF_ANNUAL_12,
ifelse(aux304$year == 13, aux304$CF_ANNUAL_13,
ifelse(aux304$year == 14, aux304$CF_ANNUAL_14,
ifelse(aux304$year == 15, aux304$CF_ANNUAL_15,
ifelse(aux304$year == 16, aux304$CF_ANNUAL_16,
ifelse(aux304$year == 17, aux304$CF_ANNUAL_17,
ifelse(aux304$year == 18, aux304$CF_ANNUAL_18,
ifelse(aux304$year == 19, aux304$CF_ANNUAL_19,
ifelse(aux304$year == 20, aux304$CF_ANNUAL_20,
ifelse(aux304$year == 21, aux304$CF_ANNUAL_21,
ifelse(aux304$year == 22, aux304$CF_ANNUAL_22,
ifelse(aux304$year == 23, aux304$CF_ANNUAL_23,
ifelse(aux304$year == 24, aux304$CF_ANNUAL_24,
ifelse(aux304$year == 25, aux304$CF_ANNUAL_25,
ifelse(aux304$year == 26, aux304$CF_ANNUAL_26,
ifelse(aux304$year == 27, aux304$CF_ANNUAL_27,
ifelse(aux304$year == 28, aux304$CF_ANNUAL_28,
ifelse(aux304$year == 29, aux304$CF_ANNUAL_29,
ifelse(aux304$year == 30, aux304$CF_ANNUAL_30, 0)))))))))))))))))))))))))))))))
aux30 <- subset(aux304, select=c("CNTRCT_NBR", "year", "CF_FINAL" ))
我遇到一个错误:$<-.data.frame
(*tmp*
,CF_FINAL,值= logical(0))中的错误:
替换有0行,数据有441300
然后,当我尝试对该数据库进行子集设置时,它告诉我创建的变量(CF_FINAL)不存在。我想我的嵌套ifelse语句遇到了问题,因为有很多问题,所以它不能正确创建变量。
答案 0 :(得分:0)
# generate example data
year <- 1:30
mat <- diag(101:130)
df <- as.data.frame(cbind(year, mat))
colnames(df) <- c("year", paste0("annual",1:30))
# solution
df$final <- NA
for(i in 1:30) {
df[df$year==i, "final"] <- df[df$year==i, paste0("annual",i)]
}
答案 1 :(得分:0)
就像@AndS。建议您使用mutate
和case_when
。
library(tidyverse)
aux304 <- aux304 %>% # a pipe moves to next command
mutate( #to create a new variable
CF_FINAL =
case_when( #runs a series of ifelse statements
year == 1 ~ CF_ANNUAL_1, #framework: if ~ then
year == 2 ~ CF_ANNUAL_2,
...,
TRUE ~ 0
)
)
由于这里有一个清晰的模式,您可以查看map
,但是如果这种情况仅发生一次或您想对其进行暴力破解,则case_when
提供了一种更简洁的选项,有助于减少代码错误
祝你好运!