这是我的起始数据集:
> data
ID Record Value
A 1 100
A 3 200
A 4 300
B 1 800
对于每个ID,我想要每个数字1到4的记录。如果记录不可用,请使用最新记录创建它。
最终数据集应如下所示:
> newdata
ID Updt_Record Value
A 1 100
A 2 100
A 3 200
A 4 300
B 1 800
B 2 800
B 3 800
B 4 800
要执行此操作,我目前正在使用dplyr
:
library(dplyr)
data1 <- data %>% group_by(ID) %>% filter(Record <= 1) %>% filter(Record == max(Record)) %>% mutate(Updt_Record = 1)
data2 <- data %>% group_by(ID) %>% filter(Record <= 2) %>% filter(Record == max(Record)) %>% mutate(Updt_Record = 2)
data3 <- data %>% group_by(ID) %>% filter(Record <= 3) %>% filter(Record == max(Record)) %>% mutate(Updt_Record = 3)
data4 <- data %>% group_by(ID) %>% filter(Record <= 4) %>% filter(Record == max(Record)) %>% mutate(Updt_Record = 4)
newdata <- data1 %>%
bind_rows(data2) %>% bind_rows(data3) %>% bind_rows(data4) %>%
arrange(ID, Record) %>%
select(ID, Updt_Record, Value)
有更有效的方法吗?谢谢!
答案 0 :(得分:4)
library(tidyr)
library(dplyr)
data %>%
mutate(Record=factor(Record, 1:4)) %>%
complete(ID, Record) %>%
fill(Value) %>%
mutate(Record=as.character(as.numeric(Record)))
# # A tibble: 8 x 3
# ID Record Value
# <fctr> <dbl> <int>
# 1 A 1 100
# 2 A 2 100
# 3 A 3 200
# 4 A 4 300
# 5 B 1 800
# 6 B 2 800
# 7 B 3 800
# 8 B 4 800
data <- structure(list(ID = structure(c(1L, 1L, 1L, 2L), .Label = c("A",
"B"), class = "factor"), Record = c(1L, 3L, 4L, 1L), Value = c(100L,
200L, 300L, 800L)), .Names = c("ID", "Record", "Value"), class = "data.frame", row.names = c(NA,
-4L))