将年度数据转换为 xts 格式

问题描述

我有一个非常初学者的问题。我想使用时间序列包 xts。因此,要将我的数据转换为 xts 格式

我当前的数据集“data”是“data.table”“data.frame”格式。 Year 列是一个“整数”。 过滤后的 data$Year 列包含 1999-2018 年的数据。 在我的代码底部,我提供了我的数据示例。

我面临以下问题:

as.Date 无法将我的年度数据识别为日期。它会自动将它们转换为每日数据,从 1975 年初开始......

我尝试了以下命令:

data$Year <- as.Date(data$Year)
data_xts <- as.xts(data,data[,-1],order.by = data$Year)

# Now xts format
class(data_xts)

# here is the problem: > 0 seconds periodicity
# from 1975-06-23 to 1975-07-12
periodicity(data_xts$year) 

这里是错误输出的头部

> head(data_xts)
           Year         ReporterName PartnerName      TradeValue in 1000 USD year_group  total_average_period_in_1000USD
1975-06-23 "1975-06-23" "Comoros"    "France"         "    1360.758"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United States"  "    1392.263"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Germany"        "     633.666"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United Kingdom" "     152.029"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Singapore"      "     450.452"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Indonesia"      "     194.580"         "1999-2002" "    524.8275"                 
           total_average_period_byPartner_in_1000USD percentage_of_group
1975-06-23 "3.638645e+03"                            "6.933030e+00"     
1975-06-23 "1.449703e+03"                            "2.762247e+00"     
1975-06-23 "6.692080e+02"                            "1.275101e+00"     
1975-06-23 "4.821123e+02"                            "9.186109e-01"     
1975-06-23 "4.325665e+02"                            "8.242070e-01"     
1975-06-23 "1.945800e+02"                            "3.707504e-01" 

这是我的初始数据示例

dput(head(data,n = 100))
structure(list(Year = structure(c(2015,2016,2017,2018,2011,2012,2013,2014,2007,2009,2010,2015,2017),class = "Date"),ReporterName = c("Angola","Angola","Angola"),PartnerName = c("China","China","India","United States","Spain","South Africa","Other Asia,nes","Canada","France","Portugal","United arab Emirates","Italy","United Kingdom","Indonesia","Netherlands","Malaysia","Singapore","Singapore"),`TradeValue in 1000 USD` = c(14320565.527,13923091.96,19487066.539,24517058.342,24360792.847,33710030.023,31947235.081,27527110.851,13459326.563,15954060.922,20963245.476,2676339.583,1948845.077,2890061.159,3768940.47,10875646.624,7708378.359,9965785.888,2245976.426,882089.095,1025777.275,1250554.873,1265801.316,1525650.265,1079503.617,1470132.736,1376041.349,1309031.634,1342549.642,1161852.097,1410793.303,1136068.366,1388765.375,145025.028,16475024.144,6594525.851,5018390.939,2548807.59,1035618.609,873616.866,1079684.282,647164.297,1599581.068,910864.068,330799.771,734551.345,1199355.049,851431.606,334787.698,1074137.369,665253.613,801908.541,1016519.507,884725.078,6842018.3,6932060.8,6764232.765,4507416.181,2376843.352,3659557.185,5117824.926,1105765.643,488551.728,460504.642,468914.918,1011126.417,411203.618,373206.578,425616.975,2913186.035,2324861.006,4039116.578,5386493.281,4699797.618,4007020.057,2329013.301,566597.802,376715.236,415879.351,575477.283,1107123.4,507950.826,93143.377,162760.789,2592972.627,3030206.205,2213064.563,3519981.595,3305027.169,2719654.992,94484.681,480779.397,571648.578,242727.975,1913906.941,1154653.223,2699439.575,456600.595,114849.93,273956.82),year_group = structure(c(5L,5L,4L,3L,5L
),.Label = c("1999-2002","2003-2007","2008-2010","2011-2014","2015-2018"),class = "factor"),total_average_period_in_1000USD = c(251327.404028933,251327.404028933,1938789.42919853,1416797.00579381,251327.404028933),total_average_period_byPartner_in_1000USD = c(18061945.592,18061945.592,29386292.2005,16792210.987,2821046.57225,9516603.62366667,1351099.41725,1335271.9835,1297368.6805,1020163.018,7659187.131,909021.0135,893949.063,864927.9305,842101.68475,6261432.0115,3718075.15433333,630934.23275,555288.397,3092387.873,4105581.06425,483667.418,467744.598,2612081.13166667,3181554.58533333,347410.15775,1922666.57966667,339847.48225,339847.48225),percentage_of_group = c(71.8662004320097,71.8662004320097,15.1570313711933,11.8522349485003,11.2245880354744,6.71698456783132,5.3758539482406,5.31287858822702,5.16206613247255,4.05909981023223,3.95049973743988,3.61687981066861,3.55691042309532,3.4414390020136,3.35061625294572,3.22955753585287,2.62428219365846,2.51040763018969,2.2094224032015,2.18266121424175,2.11760029347137,1.92445157291451,1.8610966830587,1.84365235173768,1.64100058387906,1.38230114257658,1.35705155488342,1.35221021186721,1.35221021186721)),row.names = c(NA,-100L),groups = structure(list(ReporterName = c("Angola",PartnerName = c("Canada","United States"
),year_group = structure(c(3L,5L),.rows = structure(list(70:72,88:90,39:42,9:11,5:8,1:4,85:87,43:46,59:61,55:58,12:15,77:80,62:65,91:94,81:84,95:97,73:76,31:34,47:50,98:100,27:30,19:22,51:54,66:69,16:18,35:38,23:26),ptype = integer(0),class = c("vctrs_list_of","vctrs_vctr","list"))),27L),class = c("tbl_df","tbl","data.frame"),.drop = TRUE),class = c("grouped_df","tbl_df","data.frame"))

解决方法

首先,在主数据集中,您似乎应该将 class = "Date" 数据中的 class = "Integer" 变量的 Year 更改为 dput,以防止它创建年份“ 1975"

structure(list(Year = structure(c(2015,2016,2017,2018,2011,2012,2013,2014,2007,2009,2010,2015,2017),class = "Integer"),...

R 不能仅将年份识别为日期格式,因此您应该将年份转换为 y/m/d 格式。
您可以通过将例如“2018”转换为“2018-12-31”(将年末视为年份(或任何您想要的)来实现。
所以下面的代码会做到这一点:

library(xts)

data_new <- data #creating a new dataset to preserve original  
class(data_new$Year)  #now class of year is integer

## [1] "Integer"


#formatting integer year to a Date format
data_new$Year <- as.Date(paste(data_new$Year,12,31,sep = "-")) 

class(data_new$Year) #check changed format of year

## [1] "Date"

#creating xts object
data_xts <- as.xts(data_new,data_new[,-c("Year")],order.by = data_new$Year)

head(data_new)

##         Year ReporterName PartnerName TradeValue in 1000 USD year_group
## 1 2015-12-31       Angola       China               14320566  2015-2018
## 2 2016-12-31       Angola       China               13923092  2015-2018
## 3 2017-12-31       Angola       China               19487067  2015-2018
## 4 2018-12-31       Angola       China               24517058  2015-2018
## 5 2011-12-31       Angola       China               24360793  2011-2014
## 6 2012-12-31       Angola       China               33710030  2011-2014
##   total_average_period_in_1000USD total_average_period_byPartner_in_1000USD
## 1                        251327.4                                  18061946
## 2                        251327.4                                  18061946
## 3                        251327.4                                  18061946
## 4                        251327.4                                  18061946
## 5                       1938789.4                                  29386292
## 6                       1938789.4                                  29386292
##   percentage_of_group
## 1            71.86620
## 2            71.86620
## 3            71.86620
## 4            71.86620
## 5            15.15703
## 6            15.15703


# check periodicity
periodicity(data_xts$Year)

## 0 seconds periodicity from 2007-12-31 to 2018-12-31
,

仅在数据框中保留数字列并使用 xts :

library(xts)
library(dplyr)

new_data <- data %>% ungroup() %>% select(where(is.numeric))
data_xts <- xts(new_data,order.by = data$Year)
class(data_xts)
#[1] "xts" "zoo"

head(data_xts)

#           TradeValue in 1000 USD total_average_period_in_1000USD
#1975-07-01               13459327                         1416797
#1975-07-01               10875647                         1416797
#1975-07-01                2376843                         1416797
#1975-07-01                2913186                         1416797
#1975-07-01                2592973                         1416797
#1975-07-01                1913907                         1416797

#           total_average_period_byPartner_in_1000USD percentage_of_group
#1975-07-01                                  16792211           11.852235
#1975-07-01                                   9516604            6.716985
#1975-07-01                                   3718075            2.624282
#1975-07-01                                   3092388            2.182661
#1975-07-01                                   2612081            1.843652
#1975-07-01                                   1922667            1.357052
,

我们可以将 base Rxts 一起使用

xts::xts(Filter(is.numeric,data),order.by = data$Year)

-输出

           TradeValue in 1000 USD total_average_period_in_1000USD total_average_period_byPartner_in_1000USD percentage_of_group
1975-07-01            13459326.56                       1416797.0                                16792211.0           11.852235
1975-07-01            10875646.62                       1416797.0                                 9516603.6            6.716985
1975-07-01             2376843.35                       1416797.0                                 3718075.2            2.624282
1975-07-01             2913186.04                       1416797.0                                 3092387.9            2.182661
1975-07-01             2592972.63                       1416797.0                                 2612081.1            1.843652
1975-07-01             1913906.94                       1416797.0                                 1922666.6            1.357052
1975-07-03            15954060.92                       1416797.0                                16792211.0           11.852235