我有一个非常初学者的问题。我想使用时间序列包 xts。因此,要将我的数据转换为 xts 格式。
我当前的数据集“data”是“data.table”“data.frame”格式。 Year 列是一个“整数”。 过滤后的 data$Year 列包含 1999-2018 年的数据。 在我的代码底部,我提供了我的数据示例。
我面临以下问题:
as.Date 无法将我的年度数据识别为日期。它会自动将它们转换为每日数据,从 1975 年初开始......
我尝试了以下命令:
data$Year <- as.Date(data$Year)
data_xts <- as.xts(data, data[, -1], order.by = data$Year)
# now xts format
class(data_xts)
# here is the problem: > 0 seconds periodicity
# from 1975-06-23 to 1975-07-12
periodicity(data_xts$year)
这里是错误输出的头部
> head(data_xts)
Year ReporterName PartnerName TradeValue in 1000 USD year_group total_average_period_in_1000USD
1975-06-23 "1975-06-23" "Comoros" "France" " 1360.758" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "United States" " 1392.263" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Germany" " 633.666" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "United Kingdom" " 152.029" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Singapore" " 450.452" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Indonesia" " 194.580" "1999-2002" " 524.8275"
total_average_period_byPartner_in_1000USD percentage_of_group
1975-06-23 "3.638645e+03" "6.933030e+00"
1975-06-23 "1.449703e+03" "2.762247e+00"
1975-06-23 "6.692080e+02" "1.275101e+00"
1975-06-23 "4.821123e+02" "9.186109e-01"
1975-06-23 "4.325665e+02" "8.242070e-01"
1975-06-23 "1.945800e+02" "3.707504e-01"
这是我的初始数据示例
dput(head(data, n = 100))
structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011,
2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007,
2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015,
2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017,
2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009,
2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009,
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016,
2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Date"), ReporterName = c("Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola"), PartnerName = c("China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"India", "India", "India", "India", "United States", "United States",
"United States", "Spain", "Spain", "Spain", "Spain", "United States",
"United States", "United States", "United States", "South Africa",
"South Africa", "South Africa", "South Africa", "Other Asia, nes",
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "United States",
"United States", "United States", "United States", "Canada",
"Canada", "Canada", "Canada", "France", "France", "France", "France",
"Portugal", "Portugal", "Portugal", "Portugal", "United Arab Emirates",
"United Arab Emirates", "United Arab Emirates", "United Arab Emirates",
"India", "India", "India", "India", "India", "India", "India",
"Italy", "Italy", "Italy", "Italy", "United Kingdom", "United Kingdom",
"United Kingdom", "United Kingdom", "Canada", "Canada", "Canada",
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes",
"Indonesia", "Indonesia", "Indonesia", "Indonesia", "Netherlands",
"Netherlands", "Netherlands", "Netherlands", "France", "France",
"France", "Canada", "Canada", "Canada", "Malaysia", "Malaysia",
"Malaysia", "Malaysia", "Other Asia, nes", "Other Asia, nes",
"Other Asia, nes", "Singapore", "Singapore", "Singapore"), `TradeValue in 1000 USD` = c(14320565.527,
13923091.96, 19487066.539, 24517058.342, 24360792.847, 33710030.023,
31947235.081, 27527110.851, 13459326.563, 15954060.922, 20963245.476,
2676339.583, 1948845.077, 2890061.159, 3768940.47, 10875646.624,
7708378.359, 9965785.888, 2245976.426, 882089.095, 1025777.275,
1250554.873, 1265801.316, 1525650.265, 1079503.617, 1470132.736,
1376041.349, 1309031.634, 1342549.642, 1161852.097, 1410793.303,
1136068.366, 1388765.375, 145025.028, 16475024.144, 6594525.851,
5018390.939, 2548807.59, 1035618.609, 873616.866, 1079684.282,
647164.297, 1599581.068, 910864.068, 330799.771, 734551.345,
1199355.049, 851431.606, 334787.698, 1074137.369, 665253.613,
801908.541, 1016519.507, 884725.078, 6842018.3, 6932060.8, 6764232.765,
4507416.181, 2376843.352, 3659557.185, 5117824.926, 1105765.643,
488551.728, 460504.642, 468914.918, 1011126.417, 411203.618,
373206.578, 425616.975, 2913186.035, 2324861.006, 4039116.578,
5386493.281, 4699797.618, 4007020.057, 2329013.301, 566597.802,
376715.236, 415879.351, 575477.283, 1107123.4, 507950.826, 93143.377,
162760.789, 2592972.627, 3030206.205, 2213064.563, 3519981.595,
3305027.169, 2719654.992, 94484.681, 480779.397, 571648.578,
242727.975, 1913906.941, 1154653.223, 2699439.575, 456600.595,
114849.93, 273956.82), year_group = structure(c(5L, 5L, 5L, 5L,
4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 5L
), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014",
"2015-2018"), class = "factor"), total_average_period_in_1000USD = c(251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 1938789.42919853,
1938789.42919853, 1938789.42919853, 1938789.42919853, 1416797.00579381,
1416797.00579381, 1416797.00579381, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 1416797.00579381, 1416797.00579381,
1416797.00579381, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853,
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853,
1938789.42919853, 1416797.00579381, 1416797.00579381, 1416797.00579381,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
1416797.00579381, 1416797.00579381, 1416797.00579381, 1938789.42919853,
1938789.42919853, 1938789.42919853, 1938789.42919853, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 251327.404028933, 251327.404028933, 1416797.00579381,
1416797.00579381, 1416797.00579381, 1938789.42919853, 1938789.42919853,
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933,
251327.404028933, 1416797.00579381, 1416797.00579381, 1416797.00579381,
251327.404028933, 251327.404028933, 251327.404028933), total_average_period_byPartner_in_1000USD = c(18061945.592,
18061945.592, 18061945.592, 18061945.592, 29386292.2005, 29386292.2005,
29386292.2005, 29386292.2005, 16792210.987, 16792210.987, 16792210.987,
2821046.57225, 2821046.57225, 2821046.57225, 2821046.57225, 9516603.62366667,
9516603.62366667, 9516603.62366667, 1351099.41725, 1351099.41725,
1351099.41725, 1351099.41725, 1335271.9835, 1335271.9835, 1335271.9835,
1335271.9835, 1297368.6805, 1297368.6805, 1297368.6805, 1297368.6805,
1020163.018, 1020163.018, 1020163.018, 1020163.018, 7659187.131,
7659187.131, 7659187.131, 7659187.131, 909021.0135, 909021.0135,
909021.0135, 909021.0135, 893949.063, 893949.063, 893949.063,
893949.063, 864927.9305, 864927.9305, 864927.9305, 864927.9305,
842101.68475, 842101.68475, 842101.68475, 842101.68475, 6261432.0115,
6261432.0115, 6261432.0115, 6261432.0115, 3718075.15433333, 3718075.15433333,
3718075.15433333, 630934.23275, 630934.23275, 630934.23275, 630934.23275,
555288.397, 555288.397, 555288.397, 555288.397, 3092387.873,
3092387.873, 3092387.873, 4105581.06425, 4105581.06425, 4105581.06425,
4105581.06425, 483667.418, 483667.418, 483667.418, 483667.418,
467744.598, 467744.598, 467744.598, 467744.598, 2612081.13166667,
2612081.13166667, 2612081.13166667, 3181554.58533333, 3181554.58533333,
3181554.58533333, 347410.15775, 347410.15775, 347410.15775, 347410.15775,
1922666.57966667, 1922666.57966667, 1922666.57966667, 339847.48225,
339847.48225, 339847.48225), percentage_of_group = c(71.8662004320097,
71.8662004320097, 71.8662004320097, 71.8662004320097, 15.1570313711933,
15.1570313711933, 15.1570313711933, 15.1570313711933, 11.8522349485003,
11.8522349485003, 11.8522349485003, 11.2245880354744, 11.2245880354744,
11.2245880354744, 11.2245880354744, 6.71698456783132, 6.71698456783132,
6.71698456783132, 5.3758539482406, 5.3758539482406, 5.3758539482406,
5.3758539482406, 5.31287858822702, 5.31287858822702, 5.31287858822702,
5.31287858822702, 5.16206613247255, 5.16206613247255, 5.16206613247255,
5.16206613247255, 4.05909981023223, 4.05909981023223, 4.05909981023223,
4.05909981023223, 3.95049973743988, 3.95049973743988, 3.95049973743988,
3.95049973743988, 3.61687981066861, 3.61687981066861, 3.61687981066861,
3.61687981066861, 3.55691042309532, 3.55691042309532, 3.55691042309532,
3.55691042309532, 3.4414390020136, 3.4414390020136, 3.4414390020136,
3.4414390020136, 3.35061625294572, 3.35061625294572, 3.35061625294572,
3.35061625294572, 3.22955753585287, 3.22955753585287, 3.22955753585287,
3.22955753585287, 2.62428219365846, 2.62428219365846, 2.62428219365846,
2.51040763018969, 2.51040763018969, 2.51040763018969, 2.51040763018969,
2.2094224032015, 2.2094224032015, 2.2094224032015, 2.2094224032015,
2.18266121424175, 2.18266121424175, 2.18266121424175, 2.11760029347137,
2.11760029347137, 2.11760029347137, 2.11760029347137, 1.92445157291451,
1.92445157291451, 1.92445157291451, 1.92445157291451, 1.8610966830587,
1.8610966830587, 1.8610966830587, 1.8610966830587, 1.84365235173768,
1.84365235173768, 1.84365235173768, 1.64100058387906, 1.64100058387906,
1.64100058387906, 1.38230114257658, 1.38230114257658, 1.38230114257658,
1.38230114257658, 1.35705155488342, 1.35705155488342, 1.35705155488342,
1.35221021186721, 1.35221021186721, 1.35221021186721)), row.names = c(NA,
-100L), groups = structure(list(ReporterName = c("Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola",
"Angola", "Angola", "Angola", "Angola"), PartnerName = c("Canada",
"Canada", "Canada", "China", "China", "China", "France", "France",
"India", "India", "India", "Indonesia", "Italy", "Malaysia",
"Netherlands", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes",
"Portugal", "Singapore", "South Africa", "Spain", "United Arab Emirates",
"United Kingdom", "United States", "United States", "United States"
), year_group = structure(c(3L, 4L, 5L, 3L, 4L, 5L, 3L, 5L, 3L,
4L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L,
4L, 5L), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014",
"2015-2018"), class = "factor"), .rows = structure(list(70:72,
88:90, 39:42, 9:11, 5:8, 1:4, 85:87, 43:46, 59:61, 55:58,
12:15, 77:80, 62:65, 91:94, 81:84, 95:97, 73:76, 31:34, 47:50,
98:100, 27:30, 19:22, 51:54, 66:69, 16:18, 35:38, 23:26), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 27L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
答案 0 :(得分:3)
首先,在主数据集中,您似乎应该将 class = "Date"
数据中的 class = "Integer"
变量的 Year
更改为 dput
,以防止它创建年份“ 1975"
structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011,
2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007,
2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015,
2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017,
2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009,
2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009,
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016,
2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Integer"), ...
R 不能仅将年份识别为日期格式,因此您应该将年份转换为 y/m/d 格式。
您可以通过将例如“2018”转换为“2018-12-31”(将年末视为年份(或任何您想要的)来实现。
所以下面的代码会做到这一点:
library(xts)
data_new <- data #creating a new dataset to preserve original
class(data_new$Year) #now class of year is integer
## [1] "Integer"
#formatting integer year to a Date format
data_new$Year <- as.Date(paste(data_new$Year, 12, 31, sep = "-"))
class(data_new$Year) #check changed format of year
## [1] "Date"
#creating xts object
data_xts <- as.xts(data_new, data_new[,-c("Year")], order.by = data_new$Year)
head(data_new)
## Year ReporterName PartnerName TradeValue in 1000 USD year_group
## 1 2015-12-31 Angola China 14320566 2015-2018
## 2 2016-12-31 Angola China 13923092 2015-2018
## 3 2017-12-31 Angola China 19487067 2015-2018
## 4 2018-12-31 Angola China 24517058 2015-2018
## 5 2011-12-31 Angola China 24360793 2011-2014
## 6 2012-12-31 Angola China 33710030 2011-2014
## total_average_period_in_1000USD total_average_period_byPartner_in_1000USD
## 1 251327.4 18061946
## 2 251327.4 18061946
## 3 251327.4 18061946
## 4 251327.4 18061946
## 5 1938789.4 29386292
## 6 1938789.4 29386292
## percentage_of_group
## 1 71.86620
## 2 71.86620
## 3 71.86620
## 4 71.86620
## 5 15.15703
## 6 15.15703
# check periodicity
periodicity(data_xts$Year)
## 0 seconds periodicity from 2007-12-31 to 2018-12-31
答案 1 :(得分:1)
仅在数据框中保留数字列并使用 xts
:
library(xts)
library(dplyr)
new_data <- data %>% ungroup() %>% select(where(is.numeric))
data_xts <- xts(new_data, order.by = data$Year)
class(data_xts)
#[1] "xts" "zoo"
head(data_xts)
# TradeValue in 1000 USD total_average_period_in_1000USD
#1975-07-01 13459327 1416797
#1975-07-01 10875647 1416797
#1975-07-01 2376843 1416797
#1975-07-01 2913186 1416797
#1975-07-01 2592973 1416797
#1975-07-01 1913907 1416797
# total_average_period_byPartner_in_1000USD percentage_of_group
#1975-07-01 16792211 11.852235
#1975-07-01 9516604 6.716985
#1975-07-01 3718075 2.624282
#1975-07-01 3092388 2.182661
#1975-07-01 2612081 1.843652
#1975-07-01 1922667 1.357052
答案 2 :(得分:0)
我们可以将 base R
与 xts
一起使用
xts::xts(Filter(is.numeric, data), order.by = data$Year)
-输出
TradeValue in 1000 USD total_average_period_in_1000USD total_average_period_byPartner_in_1000USD percentage_of_group
1975-07-01 13459326.56 1416797.0 16792211.0 11.852235
1975-07-01 10875646.62 1416797.0 9516603.6 6.716985
1975-07-01 2376843.35 1416797.0 3718075.2 2.624282
1975-07-01 2913186.04 1416797.0 3092387.9 2.182661
1975-07-01 2592972.63 1416797.0 2612081.1 1.843652
1975-07-01 1913906.94 1416797.0 1922666.6 1.357052
1975-07-03 15954060.92 1416797.0 16792211.0 11.852235