问题描述
我一直在尝试在R中绘制时间序列数据。我在线咨询了多个不同的资源,但在创建此图时我仍然遇到问题。我在下面模拟了一些数据,这些数据代表了一家虚拟公司从2014年到2016年收到的每日信息:
#create data
date_decision_made = seq(as.Date("2014/1/1"),as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made),"%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
car_damages_in_dollars <- rnorm(731,105,8)
other_damages_in_dollars <- rnorm(731,104,9)
location <- c("canada","usa")
location <- sample(location,731,replace=TRUE,prob=c(0.3,0.7))
type_of_house <- c("single","townhome","rental" )
type_of_house<- sample(type_of_house,prob=c(0.5,0.3,0.2))
response_variable <- c("claim_approved","claim_rejected")
response_variable<- sample(response_variable,prob=c(0.4,0.6))
final_dataset <- cbind(date_decision_made,property_damages_in_dollars,car_damages_in_dollars,other_damages_in_dollars,location,type_of_house,response_variable)
final_dataset <- as.data.frame(final_dataset)
final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)
final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)
final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)
prop_damage <-subset(final_dataset,select = c(date_decision_made,property_damages_in_dollars))
car_damage <-subset(final_dataset,car_damages_in_dollars))
other_damage <-subset(final_dataset,other_damages_in_dollars))
new <-subset(final_dataset,other_damages_in_dollars))
基于此数据,我尝试将这些数据绘制为R中的时间序列。我尝试了几种方法,所有这些方法都产生错误。我试图解决这些问题,但似乎无法解决。有人可以帮我吗?
#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)
ggplot() + geom_line(data = prop_damage,aes(x = date_decision_made,y = property_damages_in_dollars,group = 1),color = "red")
+ scale_x_date(date_breaks = "days",date_labels = "%b %d %a")+
geom_line(data = car_damage,y = car_damages_in_dollars,group =1 ),color = "blue")
+ geom_line(data = other_damage,y = other_damages_in_dollars,group =1),color = "green")
+ xlab('data_date') + ylab('percent.change')
#second way (error)
ggplot(data = new,aes(x = date_decision_made)) +
geom_line(aes(y = property_damages_in_dollars,colour = "property_damages_in_dollars")) +
geom_line(aes(y = car_damages_in_dollars,colour = "car_damages_in_dollars")) +
geom_line(aes(y =other_damages_in_dollars,colour = "other_damages_in_dollars")) +
scale_colour_manual("",breaks = c("property_damages_in_dollars","car_damages_in_dollars","other_damages_in_dollars"),values = c("red","green","blue")) +
xlab(" ") +
scale_y_continuous("Dollars",limits = c(0,10000)) +
labs(title="demo graph")
#3rd way error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub,id=c("date_decision_made"))
ggplot(dd) + geom_line(aes(x=date_decision_made,y=value,colour=variable,group=1)) + scale_x_date(date_breaks = "days",date_labels = "%b %d %a")+ scale_colour_manual(values=c("red","blue"))
#4th error
mymts = ts(new,frequency = 1,start = c(2014,1))
autoplot(mymts) +
ggtitle("Time Series Plot") +
theme(plot.title = element_text(hjust = 0.5))
#5th Method error
x1 = ts(prop_damage,1))
x2 = ts(other_damage,1))
ts.plot(x,y,gpars = list(col = c("black","red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))
#6th method error
##Subset the necessary columns
dd_sub = new[,id=c("date_decision_made"))
qplot(date_decision_made,value,data=dd,geom='line',color=variable)
#7th way error
x1 = ts(prop_damage,1))
comb_ts <- cbind(x1,x2)
plot.ts(comb_ts,plot.type = "single")
有人可以告诉我这些代码在做什么错吗?谢谢
解决方法
尝试这种方法,并注意日期。由于年份的缘故,日期很多:
FETCH_UNIQUE
输出:
,另一种选择是使用pivot_longer
中的tidyr
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
pivot_longer(cols = -date_decision_made) %>%
mutate(date_decision_made = ymd(date_decision_made)) %>%
ggplot() +
geom_line(aes(x = date_decision_made,y=value,colour = name,group = 1)) +
scale_x_date(date_breaks = "months",breaks = '12 months',date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle = 90))