按下周的日期求和

时间:2017-07-19 12:01:59

标签: r dplyr

考虑以下数据集:

data_example = structure(list(date = structure(c(16764, 16700, 17080, 16969, 
16714, 17002, 16713, 17010, NA, 16729, 16756, 16892, 16891, 17135, 
17094, 17108, 16861, 16765, NA, 16974, 16960, NA, 17105, 17084, 
17086, 17155, 17158, 17157, 17157, 16742, 16741, 17051, 17128, 
NA, 17133, 16790, 16743, 17109, 16924, 17158, 17049, 17158, 16806, 
16773, 17143, 16799, 16770, 17142, 17021, 16902, 16995, 16862, 
16848, NA, 16833, 17065, NA, 16933, 17148, 16721, 16798, 16792, 
16724, 17099, 16687, 17049, 16920, 16905, 16745, 17039, 16699, 
16840, 16778, 17016, 16709, 16741, 17137, 16821, 16833, 16792, 
16948, 16757, 16983, 16632, 16902, 17164, 16653, 16994, 16986, 
16986, 17049, 17072, 16874, 16757, 16941, 16988, 16706, 17000, 
16745, 16710), class = "Date"), tdiff = c(0, 0, 0, 0, 0, 1, 0, 
0, NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, NA, 0, 0, NA, 0, 0, 0, 0, 0, 
0, 0, 0, 3, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 1, NA, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("date", "tdiff"
), row.names = c("1506", "705", "5585", "4182", "894", "4583", 
"851", "4628", "NA.169", "1079", "1423", "3263", "3244", "6181", 
"5730", "5902", "2786", "1529", "NA.268", "4229", "4061", "NA.166", 
"5870", "5606", "5643", "6438", "6494", "6475", "6478", "1276", 
"1235", "5180", "6074", "NA.286", "6121", "1800", "1310", "5923", 
"3703", "6485", "5137", "6483", "2030", "1617", "6311", "1928", 
"1577", "6282", "4763", "3379", "4505", "2828", "2614", "NA.283", 
"2416", "5364", "NA.279", "3777", "6355", "994", "1903", "1856", 
"1019", "5804", "545", "5140", "3674", "3431", "1342", "5003", 
"675", "2509", "1658", "4699", "805", "1260", "6217", "2244", 
"2409", "1852", "3894", "1445", "4375", "243", "3386", "6530", 
"319", "4502", "4385", "4399", "5147", "5452", "3003", "1444", 
"3848", "4443", "761", "4552", "1336", "835"), class = "data.frame")

因此,要获得当前周的tdiff值的总和,我会这样做:

data_example_0 = data_example %>%
  mutate(week = as.POSIXct(cut(date, "week"))) 


data_example_0 %<>%
                group_by(week) %>%
                mutate(week_sum = sum(tdiff, na.rm = TRUE))

但是我需要创建一个变量来查看下周的tdiff值的总和; (next_week_sum)。怎么做?

1 个答案:

答案 0 :(得分:1)

只需添加lead(week_sum, 1),就可以在没有任何订单或尊重周的情况下对行进行处理。因此,我们必须首先总结每个组的周总和(按周),根据汇总数据添加lead,然后重新加入原始数据。

# first we have to create a dummy data set, so that the weeks will be handeled on their own
data_example_0 %>%
  group_by(week) %>%
  summarise(week_sum = sum(tdiff)) %>% 
  mutate(next_week_sum = lead(week_sum, 1, order_by = week)) %>% 
  ### and then we join the original data
  right_join(data_example_0, by = "week")