此数据适合ARIMA建模吗?

问题描述

我使用以下数据开发了自动ARIMA模型。但是在查看结果之后,我不知道此数据是否适合运行ARIMA模型。在第三次延迟时将变量Count与给出的p值区分开,auto.arima建议的阶数为(3,0)。但是事实证明,这些预测值不是什么期望值,它们大多是负值值。实际数据不包含任何值。我不明白这是什么问题。该模型在统计上看起来正确,但是预测值看起来不理想。任何帮助表示赞赏。

数据:

dput(Enrollment_Data)
structure(list(COUNT = c(17L,1L,5L,8L,45L,21L,18L,43L,82L,116L,192L,289L,242L,254L,335L,138L,71L,98L,91L,175L,232L,155L,376L,197L,271L,421L),Enrolment_date = structure(c(25L,20L,10L,16L,14L,12L,3L,26L,23L,6L,11L,9L,17L,2L,19L,15L,13L,4L,27L,24L,22L,7L),.Label = c("APR2018","APR2019","AUG2018","AUG2019","DEC2017","DEC2018","DEC2019","FEB2018","FEB2019","JAN2018","JAN2019","JUL2018","JUL2019","JUN2018","JUN2019","MAR2018","MAR2019","MAY2018","MAY2019","NOV2017","NOV2018","NOV2019","OCT2018","OCT2019","SEP2017","SEP2018","SEP2019"),class = "factor")),class = "data.frame",row.names = c(NA,-27L))

代码

Enrollment_Data <- read.csv('EnrollmentRateT0.csv')

print(Enrollment_Data)
dput(Enrollment_Data)
#load packages
library("tseries")
library("ggplot2")
library("forecast")
library(FitAR)
library("fUnitRoots")
library(lmtest)
library(fpp2)


attach(Enrollment_Data)
#Step-1 : Model Identification
#Stationarity Check - Dicky-Fuller test

#P-value > 0.5 Heance the data is non - stationary

d.COUNT <- diff(COUNT,differences = 3)
summary(COUNT)
summary(d.COUNT)

plot(d.COUNT)

adf.test(d.COUNT,alternative="stationary")

acf(d.COUNT)
pacf(d.COUNT)

#Step 2: Model Estimation


#Step 4: Diagnosis
auto.arima(d.COUNT)
auto.arima(d.COUNT,stepwise = FALSE,approximation = FALSE)

arima.final <-auto.arima(d.COUNT,approximation = FALSE,D=1)

tsdiag(arima.final)

arima.final

'Choose the one that has least AIC and significant co-efficients'

#arima.final <-arima(COUNT,c(3,3,1))

forecast1 <- forecast(arima.final,h = 12)

forecast1

plot.forecast(futurVal)
plot(forecast1)
class(forecast1)
print(forecast1)
summary(forecast1)
accuracy(forecast1)
plot(d.COUNT)

p <- predict(arima.final,n.ahead = 12); 
f <- forecast(arima.final,h = 12); 
all.equal(f$mean,p$pred)

accuracy(f)
p
f

结果:

Point    Forecast      Lo 80    Hi 80     Lo 95     Hi 95
25  -234.78798559 -376.20497 -93.3710 -451.0666 -18.50937
26   248.28301149  -21.68036 518.2464 -164.5903 661.15636
27    38.07516814 -281.53132 357.6817 -450.7208 526.87112
28  -278.77782716 -600.00425  42.4486 -770.0513 212.49560
29   251.40378400  -74.76879 577.5764 -247.4341 750.24168
30   -31.49668698 -359.73170 296.7383 -533.4888 470.49545
31  -144.02466378 -474.75484 186.7055 -649.8328 361.78350
32   130.22859430 -211.26598 471.7232 -392.0423 652.49947
33    13.52166802 -332.92417 359.9675 -516.3215 543.36485
34  -123.35180366 -469.81119 223.1076 -653.2157 406.51210
35   103.92492852 -244.63788 452.4877 -429.1559 637.00574
36    -0.06911659 -349.40010 349.2619 -534.3247 534.18651

解决方法

您正在auto.arima()上运行d.COUNT,这是原始Enrollment_Data$COUNT的第三个滞后差。 d.COUNT确实包含许多负值。我相信您想改为在auto.arima上运行Enrollment_Data$COUNT