问题描述
我使用以下数据开发了自动ARIMA模型。但是在查看结果之后,我不知道此数据是否适合运行ARIMA模型。在第三次延迟时将变量Count与给出的p值区分开,auto.arima建议的阶数为(3,0)。但是事实证明,这些预测值不是什么期望值,它们大多是负值值。实际数据不包含任何负值。我不明白这是什么问题。该模型在统计上看起来正确,但是预测值看起来不理想。任何帮助表示赞赏。
数据:
dput(Enrollment_Data)
structure(list(COUNT = c(17L,1L,5L,8L,45L,21L,18L,43L,82L,116L,192L,289L,242L,254L,335L,138L,71L,98L,91L,175L,232L,155L,376L,197L,271L,421L),Enrolment_date = structure(c(25L,20L,10L,16L,14L,12L,3L,26L,23L,6L,11L,9L,17L,2L,19L,15L,13L,4L,27L,24L,22L,7L),.Label = c("APR2018","APR2019","AUG2018","AUG2019","DEC2017","DEC2018","DEC2019","FEB2018","FEB2019","JAN2018","JAN2019","JUL2018","JUL2019","JUN2018","JUN2019","MAR2018","MAR2019","MAY2018","MAY2019","NOV2017","NOV2018","NOV2019","OCT2018","OCT2019","SEP2017","SEP2018","SEP2019"),class = "factor")),class = "data.frame",row.names = c(NA,-27L))
代码:
Enrollment_Data <- read.csv('EnrollmentRateT0.csv')
print(Enrollment_Data)
dput(Enrollment_Data)
#load packages
library("tseries")
library("ggplot2")
library("forecast")
library(FitAR)
library("fUnitRoots")
library(lmtest)
library(fpp2)
attach(Enrollment_Data)
#Step-1 : Model Identification
#Stationarity Check - Dicky-Fuller test
#P-value > 0.5 Heance the data is non - stationary
d.COUNT <- diff(COUNT,differences = 3)
summary(COUNT)
summary(d.COUNT)
plot(d.COUNT)
adf.test(d.COUNT,alternative="stationary")
acf(d.COUNT)
pacf(d.COUNT)
#Step 2: Model Estimation
#Step 4: Diagnosis
auto.arima(d.COUNT)
auto.arima(d.COUNT,stepwise = FALSE,approximation = FALSE)
arima.final <-auto.arima(d.COUNT,approximation = FALSE,D=1)
tsdiag(arima.final)
arima.final
'Choose the one that has least AIC and significant co-efficients'
#arima.final <-arima(COUNT,c(3,3,1))
forecast1 <- forecast(arima.final,h = 12)
forecast1
plot.forecast(futurVal)
plot(forecast1)
class(forecast1)
print(forecast1)
summary(forecast1)
accuracy(forecast1)
plot(d.COUNT)
p <- predict(arima.final,n.ahead = 12);
f <- forecast(arima.final,h = 12);
all.equal(f$mean,p$pred)
accuracy(f)
p
f
结果:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
25 -234.78798559 -376.20497 -93.3710 -451.0666 -18.50937
26 248.28301149 -21.68036 518.2464 -164.5903 661.15636
27 38.07516814 -281.53132 357.6817 -450.7208 526.87112
28 -278.77782716 -600.00425 42.4486 -770.0513 212.49560
29 251.40378400 -74.76879 577.5764 -247.4341 750.24168
30 -31.49668698 -359.73170 296.7383 -533.4888 470.49545
31 -144.02466378 -474.75484 186.7055 -649.8328 361.78350
32 130.22859430 -211.26598 471.7232 -392.0423 652.49947
33 13.52166802 -332.92417 359.9675 -516.3215 543.36485
34 -123.35180366 -469.81119 223.1076 -653.2157 406.51210
35 103.92492852 -244.63788 452.4877 -429.1559 637.00574
36 -0.06911659 -349.40010 349.2619 -534.3247 534.18651
解决方法
您正在auto.arima()
上运行d.COUNT
,这是原始Enrollment_Data$COUNT
的第三个滞后差。 d.COUNT
确实包含许多负值。我相信您想改为在auto.arima
上运行Enrollment_Data$COUNT
。