问题描述
我决定修改我的问题以使其更清楚 [25.03.2021]:
我有一个数据集,其中包含一条潮汐河沿岸 12 个测量站超过一年的水位测量数据。因此,每个测量站都有一个河流公里值,作为代表空间的变量。水位随着时间和空间显示出周期性/正弦模式。现在我需要模拟时间和空间的水位。
由于这个数据集太大,没有权限分享,我根据波函数ψ(x,t) = Ao sin[ωt - kx + φo]模拟了一些数据。 real data 有点复杂,但我尝试的两种方法(nlsLM 和 GAM)都不适用于真实数据和模拟数据。因此,模拟数据足以证明我的问题。
nlsLM 只工作然后我几乎完美地预定义了所有模型参数,如果它们只是稍微偏离这个方法完全失败as can be seen here。
GAM 分别适用于时间和空间分量,但不适用于组合 as can be seen here。
###### Simulate data (similar to original dataset but less complicated) ######
### Create Long-Table
stations <- seq(0,350,30)
start_time <- "2020-01-01 00:00:00"
end_time <- "2020-01-31 00:00:00"
time_interval <- "30 mins"
time_vector <- seq.POSIXt(from = as.POSIXct(start_time),to = as.POSIXct(end_time),by = time_interval)
df <- data.frame(time = rep(time_vector,times = length(stations)),place = rep(stations,each=length(time_vector)),timediff = as.numeric(difftime(rep(time_vector,as.POSIXct("2020-01-01 00:00:00"),units = "mins")))
### Parameter according to a wave function
A0 <- 200
k0 <- 0.023
w0 <- 0.005
phi0 <- 10
### Simulate water level values
df$level <- A0*sin(k0*df$place + w0*df$timediff + phi0)
### Plot simulated data
par(mfrow=c(1,2))
plot(level~timediff,data = df[df$place==30,],type = "l",main="Water level over time")
plot(level~place,data = df[df$timediff==30,main="Water level over space")
par(mfrow=c(1,1))
###### Try to estimate model function parameters using nlsLM ######
### Modelling using nlsLM
library(minpack.lm)
nlsmod <- nlsLM(level ~ A*sin(k*df$place + w*df$timediff + phi),data = df,start=c(A = 200,k = 0.023,w = 0.001,phi = 10),lower=c(A = 100,k = 0.001,w = 0.005,phi = 0),upper=c(A = 1000,k = 0.01,w = 0.1,phi = 1000),control=nls.lm.control(maxiter=1000))
# defining an area to search for parameters did not worked despite the real values are included
nlsmod <- nlsLM(level ~ A*sin(k*df$place + w*df$timediff + phi),control=nls.lm.control(maxiter=1000))
# defining exactly the real model values worked but this makes no sense since I would like
# to estimate them
nlsmod <- nlsLM(level ~ A*sin(k*df$place + w*df$timediff + phi),start=c(A = 150,control=nls.lm.control(maxiter=1000))
# then changing just two values slightly the nlsLM (and nls) function does not work anymore
summary(nlsmod)
nlsmod
### Create new dataset
df.new = data.frame(timediff = df$timediff,place = df$place)
df.new$pred <- predict(nlsmod,df.new)
### Plot simulated and predicted data
par(mfrow=c(1,type = "l")
lines(pred~timediff,data = df.new[df.new$place==30,col = "red")
plot(level~place,data = df[df$timediff==0,type = "l")
lines(pred~place,data = df.new[df.new$timediff==0,col = "red")
par(mfrow=c(1,1))
###### Modeling using GAM ######
### Create one time and one space dataset for testing fit seperately
df.time <- df[df$place==30,]
df.place <- df[df$timediff==0,]
### Load package
library(mgcv)
### Test modeling space
plot(level ~ place,data = df.place,type = "p")
bam_mod <- bam(level ~ s(place),data = df.place)
plot(bam_mod)
df.new = data.frame(place = df.place$place)
df.new$pred <- predict(bam_mod,df.new)
plot(level~place,type = "p")
lines(pred~place,data = df.new,type = "p",col = "red")
# works well
### Test modeling time
plot(level ~ timediff,data = df.time,type = "l")
bam_mod <- bam(level ~ s(timediff,k=400,bs="cc"),discrete=TRUE,nthreads=10)
plot(bam_mod)
df.new = data.frame(timediff = df.time$timediff)
df.new$pred <- predict(bam_mod,df.new)
plot(level~timediff,col = "red")
# works well
### Test modeling place and time
par(mfrow=c(1,type = "l")#,xlim=c(0,5000))
plot(level~place,type = "l")
par(mfrow=c(1,1))
bam_mod <- bam(level ~ s(place,k = 7,bs="cc") +
s(timediff,bs="cc") +
s(timediff,place,k=400) +
place +
timediff,data = df)#,nthreads=10)
# takes a while but did not work
bam_mod <- bam(level ~ s(place,bs="cc") + s(timediff,nthreads=10)
# faster but also did not work
plot(bam_mod)
### Create new dataset
df.new = data.frame(timediff = df$timediff,place = df$place)
df.new$pred <- predict(bam_mod,5000))
lines(pred~timediff,1))
解决方法
正如您可以从here
我使用了一个简单的线性模型,实现了您自己的公式如下:
stations <- seq(0,350,30)
start_time <- "2020-01-01 00:00:00"
end_time <- "2020-01-31 00:00:00"
time_interval <- "30 mins"
time_vector <- seq.POSIXt(from = as.POSIXct(start_time),to = as.POSIXct(end_time),by = time_interval)
df <- data.frame(time = rep(time_vector,times = length(stations)),place = rep(stations,each=length(time_vector)),timediff = as.numeric(difftime(rep(time_vector,as.POSIXct("2020-01-01 00:00:00"),units = "mins")))
A <- 200
w <- 0.001
phi <- 10
k <- 1
df$level <- A*sin(k*df$place + w*df$timediff + phi)
mod = lm( level ~ sin(k*place + w*timediff + phi),data = df )
plot(level~timediff,data = df[df$place==210,],type = "p",pch=16)
lines(fitted(mod)~timediff,data = df.new[df.new$place==30,col='green')
plot(level~place,data = df[df$timediff==0,pch=16)
lines(fitted(mod)~place,data = df.new[df.new$timediff==30,type = "l",col = "red")
将提供 100% 拟合:Plot of simulated and LM predicted data