多个群组的滚动均值

时间:2017-09-19 08:54:25

标签: r dplyr mean zoo

我试图通过多个组计算滚动平均变量。基本上,这是每年平均投票和在国家中嵌套的派对家庭的席位份额。以下是三个国家和最近几年的数据集样本:

structure(list(vote_share = c(23.97, 4.5, 4.79, 2.82, 62.51, 
28.98, 10.8, 0.61, 57.12, 15.11, 0.41, 12.85, 18.61, 52.73, 27.26, 
0.49, 11.46, 5.7, 54.28, 21.19, 2.47, 21.18, 54.56, 19.63, 1.86, 
16.49, 61.48, 24.84, 2.25, 11.98, 60.3, 5.7, 3.6, 18.2, 71.5, 
36.9, 17.7, 9, 6.3, 28.5, 35, 7.9, 7.7, 15.3, 32.9, 24.3, 5.6, 
13.7, 14.7, 39.3, 32.7, 6.5, 10, 22.1, 26.8, 35.4, 6.2, 7.6, 
22.9, 26.6, 30.8, 5.5, 8, 16.3, 36.6, 37.7, 8.5, 7.9, 6.7, 38.1, 
45.3, 7.7, 11.2, 1.2, 33.7, 36.38, 6.12, 16.48, 39.37, 39.9, 
6.2, 13, 1.44, 37.8, 35.2, 7.9, 11, 2.9, 40.2, 30.66, 7.21, 12.94, 
5.7, 42.72, 31.01, 9.23, 12.61, 12.86, 33.32), seat_share = c(24.6666660308838, 
4.66666668653488, 4.66666662693024, 2, 63.9999988079071, 30, 
10.6666667461395, 0, 59.3333343267441, 15.3333330154419, 0, 12.6666665077209, 
18.666667342186, 53.333332657814, 28, 0, 11.3333334922791, 5.33333349227905, 
55.3333333730698, 22, 1.33333337306976, 21.3333325386047, 55.3333343267441, 
20, 1.33333337306976, 16.6666665077209, 61.999999165535, 25.3333339691162, 
1.33333337306976, 12.6666667461395, 60.6666675806046, 6, 3.33333325386047, 
18.6666660308838, 72.0000003576279, 40.6060600280762, 19.3939399719238, 
8.48484861850739, 6.06060600280762, 25.4545446634293, 39.3939399719238, 
6.66666650772095, 5.45454549789429, 15.151515007019, 33.3333333730698, 
26.0606060028076, 6.06060600280762, 13.9393939971924, 15.7575759887695, 
38.1818188428879, 36.0946731567383, 6.50887584686279, 8.87574005126953, 
22.4852066040039, 26.035502910614, 37.8698234558105, 6.50887584686279, 
6.50887584686279, 24.2603549957275, 24.8520716428757, 32.5443801879883, 
5.91715955734253, 4.73372763395309, 17.1597633361816, 39.6449699401855, 
39.5415458679199, 8.8825216293335, 4.58452701568604, 7.16332387924194, 
39.8280801773071, 46.1318054199219, 7.73638963699341, 11.4613180160522, 
0, 34.6704869270325, 37.535816192627, 5.15759325027466, 16.9054441452026, 
40.4011468887329, 41.2607460021973, 6.30372476577759, 13.4670486450195, 
0, 38.9684820175171, 37.2492828369141, 8.30945587158203, 11.747850894928, 
0, 42.6934089660645, 32.0916900634766, 6.59025764465332, 12.6074500083923, 
5.73065900802612, 42.9799423217773, 32.3782234191895, 6.30372476577759, 
13.1805157661438, 14.040114402771, 34.0974206924438), el_date = structure(c(8888, 
8888, 8888, 8888, 8888, 10352, 10352, 10352, 10352, 11822, 11822, 
11822, 11822, 11822, 12074, 12074, 12074, 12074, 12074, 13474, 
13474, 13474, 13474, 14769, 14769, 14769, 14769, 15595, 15595, 
15595, 15595, 17240, 17240, 17240, 17240, 8656, 8656, 8656, 8656, 
8656, 10120, 10120, 10120, 10120, 10120, 11575, 11575, 11575, 
11575, 11575, 13038, 13038, 13038, 13038, 13038, 14501, 14501, 
14501, 14501, 14501, 15957, 15957, 15957, 15957, 15957, 7927, 
7927, 7927, 7927, 7927, 9026, 9026, 9026, 9026, 9026, 10489, 
10489, 10489, 10489, 11945, 11945, 11945, 11945, 11945, 13408, 
13408, 13408, 13408, 13408, 14871, 14871, 14871, 14871, 14871, 
16327, 16327, 16327, 16327, 16327), class = "Date"), family = c("Left", 
"Other", "Radical Left", "Radical Right", "Right", "Left", "Radical Left", 
"Radical Right", "Right", "Left", "Other", "Radical Left", "Radical Right", 
"Right", "Left", "Other", "Radical Left", "Radical Right", "Right", 
"Left", "Other", "Radical Left", "Right", "Left", "Other", "Radical Left", 
"Right", "Left", "Other", "Radical Left", "Right", "Left", "Other", 
"Radical Left", "Right", "Left", "Other", "Radical Left", "Radical Right", 
"Right", "Left", "Other", "Radical Left", "Radical Right", "Right", 
"Left", "Other", "Radical Left", "Radical Right", "Right", "Left", 
"Other", "Radical Left", "Radical Right", "Right", "Left", "Other", 
"Radical Left", "Radical Right", "Right", "Left", "Other", "Radical Left", 
"Radical Right", "Right", "Left", "Other", "Radical Left", "Radical Right", 
"Right", "Left", "Other", "Radical Left", "Radical Right", "Right", 
"Left", "Other", "Radical Left", "Right", "Left", "Other", "Radical Left", 
"Radical Right", "Right", "Left", "Other", "Radical Left", "Radical Right", 
"Right", "Left", "Other", "Radical Left", "Radical Right", "Right", 
"Left", "Other", "Radical Left", "Radical Right", "Right"), country = c("NLD", 
"NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", 
"NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", 
"NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", 
"NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NLD", "NOR", "NOR", 
"NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", 
"NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", 
"NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", "NOR", 
"NOR", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", 
"SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", 
"SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", 
"SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE", "SWE"), year = c(1994, 
1994, 1994, 1994, 1994, 1998, 1998, 1998, 1998, 2002, 2002, 2002, 
2002, 2002, 2003, 2003, 2003, 2003, 2003, 2006, 2006, 2006, 2006, 
2010, 2010, 2010, 2010, 2012, 2012, 2012, 2012, 2017, 2017, 2017, 
2017, 1993, 1993, 1993, 1993, 1993, 1997, 1997, 1997, 1997, 1997, 
2001, 2001, 2001, 2001, 2001, 2005, 2005, 2005, 2005, 2005, 2009, 
2009, 2009, 2009, 2009, 2013, 2013, 2013, 2013, 2013, 1991, 1991, 
1991, 1991, 1991, 1994, 1994, 1994, 1994, 1994, 1998, 1998, 1998, 
1998, 2002, 2002, 2002, 2002, 2002, 2006, 2006, 2006, 2006, 2006, 
2010, 2010, 2010, 2010, 2010, 2014, 2014, 2014, 2014, 2014)), .Names = c("vote_share", 
"seat_share", "el_date", "family", "country", "year"), row.names = c(NA, 
-99L), class = c("tbl_df", "tbl", "data.frame"))

即使多次被问到,我也不太清楚如何做到这一点。我试图跟随this suggestion

countries = countries %>%
  group_by(country, family, year) %>%
  arrange(country, family, year) %>%
  mutate(vote.avg = rollmean(x = vote_share, 5, align = "right", fill = NA))

它不起作用。然后我尝试跟随this suggestion

countries$avg_vote <- ave(countries$vote_share, c(countries$country, countries$family),
                    FUN= function(x) rollmean(x, k=5, na.pad=T) )

它不起作用,它两次都出现类似的错误:

Error in mutate_impl(.data, dots) : k <= n is not TRUE

所以我假设我没有正确指定组。我尝试了其他多种方法,但无法弄明白。重要的是,这些手段不会在国家和政党家庭团体之间“流动”。

我感谢任何帮助!谢谢!

0 个答案:

没有答案
相关问题