问题描述
我有一个非常初学者的问题。我想使用时间序列包 xts。因此,要将我的数据转换为 xts 格式。
我当前的数据集“data”是“data.table”“data.frame”格式。 Year 列是一个“整数”。 过滤后的 data$Year 列包含 1999-2018 年的数据。 在我的代码底部,我提供了我的数据示例。
我面临以下问题:
as.Date 无法将我的年度数据识别为日期。它会自动将它们转换为每日数据,从 1975 年初开始......
我尝试了以下命令:
data$Year <- as.Date(data$Year)
data_xts <- as.xts(data,data[,-1],order.by = data$Year)
# Now xts format
class(data_xts)
# here is the problem: > 0 seconds periodicity
# from 1975-06-23 to 1975-07-12
periodicity(data_xts$year)
> head(data_xts)
Year ReporterName PartnerName TradeValue in 1000 USD year_group total_average_period_in_1000USD
1975-06-23 "1975-06-23" "Comoros" "France" " 1360.758" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "United States" " 1392.263" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Germany" " 633.666" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "United Kingdom" " 152.029" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Singapore" " 450.452" "1999-2002" " 524.8275"
1975-06-23 "1975-06-23" "Comoros" "Indonesia" " 194.580" "1999-2002" " 524.8275"
total_average_period_byPartner_in_1000USD percentage_of_group
1975-06-23 "3.638645e+03" "6.933030e+00"
1975-06-23 "1.449703e+03" "2.762247e+00"
1975-06-23 "6.692080e+02" "1.275101e+00"
1975-06-23 "4.821123e+02" "9.186109e-01"
1975-06-23 "4.325665e+02" "8.242070e-01"
1975-06-23 "1.945800e+02" "3.707504e-01"
这是我的初始数据示例
dput(head(data,n = 100))
structure(list(Year = structure(c(2015,2016,2017,2018,2011,2012,2013,2014,2007,2009,2010,2015,2017),class = "Date"),ReporterName = c("Angola","Angola","Angola"),PartnerName = c("China","China","India","United States","Spain","South Africa","Other Asia,nes","Canada","France","Portugal","United arab Emirates","Italy","United Kingdom","Indonesia","Netherlands","Malaysia","Singapore","Singapore"),`TradeValue in 1000 USD` = c(14320565.527,13923091.96,19487066.539,24517058.342,24360792.847,33710030.023,31947235.081,27527110.851,13459326.563,15954060.922,20963245.476,2676339.583,1948845.077,2890061.159,3768940.47,10875646.624,7708378.359,9965785.888,2245976.426,882089.095,1025777.275,1250554.873,1265801.316,1525650.265,1079503.617,1470132.736,1376041.349,1309031.634,1342549.642,1161852.097,1410793.303,1136068.366,1388765.375,145025.028,16475024.144,6594525.851,5018390.939,2548807.59,1035618.609,873616.866,1079684.282,647164.297,1599581.068,910864.068,330799.771,734551.345,1199355.049,851431.606,334787.698,1074137.369,665253.613,801908.541,1016519.507,884725.078,6842018.3,6932060.8,6764232.765,4507416.181,2376843.352,3659557.185,5117824.926,1105765.643,488551.728,460504.642,468914.918,1011126.417,411203.618,373206.578,425616.975,2913186.035,2324861.006,4039116.578,5386493.281,4699797.618,4007020.057,2329013.301,566597.802,376715.236,415879.351,575477.283,1107123.4,507950.826,93143.377,162760.789,2592972.627,3030206.205,2213064.563,3519981.595,3305027.169,2719654.992,94484.681,480779.397,571648.578,242727.975,1913906.941,1154653.223,2699439.575,456600.595,114849.93,273956.82),year_group = structure(c(5L,5L,4L,3L,5L
),.Label = c("1999-2002","2003-2007","2008-2010","2011-2014","2015-2018"),class = "factor"),total_average_period_in_1000USD = c(251327.404028933,251327.404028933,1938789.42919853,1416797.00579381,251327.404028933),total_average_period_byPartner_in_1000USD = c(18061945.592,18061945.592,29386292.2005,16792210.987,2821046.57225,9516603.62366667,1351099.41725,1335271.9835,1297368.6805,1020163.018,7659187.131,909021.0135,893949.063,864927.9305,842101.68475,6261432.0115,3718075.15433333,630934.23275,555288.397,3092387.873,4105581.06425,483667.418,467744.598,2612081.13166667,3181554.58533333,347410.15775,1922666.57966667,339847.48225,339847.48225),percentage_of_group = c(71.8662004320097,71.8662004320097,15.1570313711933,11.8522349485003,11.2245880354744,6.71698456783132,5.3758539482406,5.31287858822702,5.16206613247255,4.05909981023223,3.95049973743988,3.61687981066861,3.55691042309532,3.4414390020136,3.35061625294572,3.22955753585287,2.62428219365846,2.51040763018969,2.2094224032015,2.18266121424175,2.11760029347137,1.92445157291451,1.8610966830587,1.84365235173768,1.64100058387906,1.38230114257658,1.35705155488342,1.35221021186721,1.35221021186721)),row.names = c(NA,-100L),groups = structure(list(ReporterName = c("Angola",PartnerName = c("Canada","United States"
),year_group = structure(c(3L,5L),.rows = structure(list(70:72,88:90,39:42,9:11,5:8,1:4,85:87,43:46,59:61,55:58,12:15,77:80,62:65,91:94,81:84,95:97,73:76,31:34,47:50,98:100,27:30,19:22,51:54,66:69,16:18,35:38,23:26),ptype = integer(0),class = c("vctrs_list_of","vctrs_vctr","list"))),27L),class = c("tbl_df","tbl","data.frame"),.drop = TRUE),class = c("grouped_df","tbl_df","data.frame"))
解决方法
首先,在主数据集中,您似乎应该将 class = "Date"
数据中的 class = "Integer"
变量的 Year
更改为 dput
,以防止它创建年份“ 1975"
structure(list(Year = structure(c(2015,2016,2017,2018,2011,2012,2013,2014,2007,2009,2010,2015,2017),class = "Integer"),...
R 不能仅将年份识别为日期格式,因此您应该将年份转换为 y/m/d 格式。
您可以通过将例如“2018”转换为“2018-12-31”(将年末视为年份(或任何您想要的)来实现。
所以下面的代码会做到这一点:
library(xts)
data_new <- data #creating a new dataset to preserve original
class(data_new$Year) #now class of year is integer
## [1] "Integer"
#formatting integer year to a Date format
data_new$Year <- as.Date(paste(data_new$Year,12,31,sep = "-"))
class(data_new$Year) #check changed format of year
## [1] "Date"
#creating xts object
data_xts <- as.xts(data_new,data_new[,-c("Year")],order.by = data_new$Year)
head(data_new)
## Year ReporterName PartnerName TradeValue in 1000 USD year_group
## 1 2015-12-31 Angola China 14320566 2015-2018
## 2 2016-12-31 Angola China 13923092 2015-2018
## 3 2017-12-31 Angola China 19487067 2015-2018
## 4 2018-12-31 Angola China 24517058 2015-2018
## 5 2011-12-31 Angola China 24360793 2011-2014
## 6 2012-12-31 Angola China 33710030 2011-2014
## total_average_period_in_1000USD total_average_period_byPartner_in_1000USD
## 1 251327.4 18061946
## 2 251327.4 18061946
## 3 251327.4 18061946
## 4 251327.4 18061946
## 5 1938789.4 29386292
## 6 1938789.4 29386292
## percentage_of_group
## 1 71.86620
## 2 71.86620
## 3 71.86620
## 4 71.86620
## 5 15.15703
## 6 15.15703
# check periodicity
periodicity(data_xts$Year)
## 0 seconds periodicity from 2007-12-31 to 2018-12-31
,
仅在数据框中保留数字列并使用 xts
:
library(xts)
library(dplyr)
new_data <- data %>% ungroup() %>% select(where(is.numeric))
data_xts <- xts(new_data,order.by = data$Year)
class(data_xts)
#[1] "xts" "zoo"
head(data_xts)
# TradeValue in 1000 USD total_average_period_in_1000USD
#1975-07-01 13459327 1416797
#1975-07-01 10875647 1416797
#1975-07-01 2376843 1416797
#1975-07-01 2913186 1416797
#1975-07-01 2592973 1416797
#1975-07-01 1913907 1416797
# total_average_period_byPartner_in_1000USD percentage_of_group
#1975-07-01 16792211 11.852235
#1975-07-01 9516604 6.716985
#1975-07-01 3718075 2.624282
#1975-07-01 3092388 2.182661
#1975-07-01 2612081 1.843652
#1975-07-01 1922667 1.357052
,
我们可以将 base R
与 xts
一起使用
xts::xts(Filter(is.numeric,data),order.by = data$Year)
-输出
TradeValue in 1000 USD total_average_period_in_1000USD total_average_period_byPartner_in_1000USD percentage_of_group
1975-07-01 13459326.56 1416797.0 16792211.0 11.852235
1975-07-01 10875646.62 1416797.0 9516603.6 6.716985
1975-07-01 2376843.35 1416797.0 3718075.2 2.624282
1975-07-01 2913186.04 1416797.0 3092387.9 2.182661
1975-07-01 2592972.63 1416797.0 2612081.1 1.843652
1975-07-01 1913906.94 1416797.0 1922666.6 1.357052
1975-07-03 15954060.92 1416797.0 16792211.0 11.852235