问题描述
我使用 0.5 毫米的翻斗记录了持续不规则时间序列的降雨数据。
例如
日期和时间 | 降雨 |
---|---|
11/05/2021 11:05:17 | 0.5 |
11/05/2021 11:15:10 | 0.5 |
11/05/2021 11:20:04 | 0.5 |
11/05/2021 11:28:22 | 0.5 |
11/05/2021 11:33:25 | 0.5 |
11/05/2021 11:36:39 | 0.5 |
11/05/2021 11:39:50 | 0.5 |
11/05/2021 11:41:43 | 0.5 |
11/05/2021 11:43:35 | 0.5 |
11/05/2021 11:44:57 | 0.5 |
11/05/2021 11:47:02 | 0.5 |
11/05/2021 11:48:42 | 0.5 |
11/05/2021 11:53:04 | 0.5 |
11/05/2021 11:58:33 | 0.5 |
11/05/2021 12:01:27 | 0.5 |
11/05/2021 12:02:52 | 0.5 |
11/05/2021 12:07:35 | 0.5 |
11/05/2021 12:10:32 | 0.5 |
11/05/2021 12:12:55 | 0.5 |
11/05/2021 12:16:22 | 0.5 |
11/05/2021 12:17:45 | 0.5 |
11/05/2021 12:20:14 | 0.5 |
11/05/2021 12:22:26 | 0.5 |
在活动期间,我希望能够计算:
- 累计 60 分钟
- 活动总时长不超过 60 分钟(不是每小时 11 点到 12 点、12 点到 1 点等)
即在上面的示例中:60 分钟的时间段可以从 12:07:35 开始并包括回到 11:15:10(60 分钟的时间段开始于精确的 hh:mm:ss 提示)。
了解最大总数有助于我们比较事件和预测。
到目前为止我的思考过程:
我一直在使用滞后函数计算水位上升率(以 5 分钟为间隔记录)。
River_Hour_RoR <- mutate(River,RoR = Stage - lag(Stage,n = 12))
我想也许我可以将 sum/cumulative-sum 函数与滞后函数结合使用(以类似于 WL 上升率的方式),但我不确定如何指定 60 分钟的时间间隔。
任何关于如何执行此操作的想法或不同的方法将不胜感激!谢谢:)!
解决方法
library(lubridate)
library(slider)
# convert to POSIXct datetime format
df1$Date.and.time = lubridate::mdy_hms(df1$Date.and.time)
# sum over prior 60 minutes using slider::slide_index
df1$hourly_total = slider::slide_index_dbl(df1$Rainfall,df1$Date.and.time,sum,.before = minutes(60))
df1[df1$hourly_total == max(df1$hourly_total),]
# Date.and.time Rainfall hourly_total
#23 2021-11-05 12:22:26 0.5 10
或者,这里有一种 dplyr
方法,我们采用累积数据总和,加上具有负降雨量的滞后一小时版本。这将允许仅累积过去 60m 的总数。
library(dplyr)
bind_rows(
df1,df1 %>% mutate(Date.and.time = Date.and.time + dhours(1),Rainfall = -Rainfall)) %>%
arrange(Date.and.time) %>%
mutate(Rainfall_60m = cumsum(Rainfall))
如果我们将其输入 ggplot,我们可以直观地看到它是如何工作的:
... %>%
ggplot(aes(Date.and.time,Rainfall_60m)) +
geom_step() +
geom_col(aes(y = Rainfall))
,
使用 data.table,自连接到最近 60 分钟内的那些记录:
library(data.table)
setDT(dat)
dat[,Datetime := as.POSIXct(Datetime,format="%d/%m/%Y %H:%M:%S",tz="UTC") ]
dat[,subthour := Datetime - as.difftime(1,units="hours") ]
dat[,Sum_Rainfall :=
dat[
dat,on = c("Datetime>=subthour","Datetime<=Datetime"),sum(Rainfall),by=.EACHI
]$V1
]
dat[,subthour := NULL]
结果:
# Datetime Rainfall Sum_Rainfall
# 1: 2021-05-11 11:05:17 0.5 0.5
# 2: 2021-05-11 11:15:10 0.5 1.0
# 3: 2021-05-11 11:20:04 0.5 1.5
# 4: 2021-05-11 11:28:22 0.5 2.0
# 5: 2021-05-11 11:33:25 0.5 2.5
# 6: 2021-05-11 11:36:39 0.5 3.0
# 7: 2021-05-11 11:39:50 0.5 3.5
# 8: 2021-05-11 11:41:43 0.5 4.0
# 9: 2021-05-11 11:43:35 0.5 4.5
#10: 2021-05-11 11:44:57 0.5 5.0
#11: 2021-05-11 11:47:02 0.5 5.5
#12: 2021-05-11 11:48:42 0.5 6.0
#13: 2021-05-11 11:53:04 0.5 6.5
#14: 2021-05-11 11:58:33 0.5 7.0
#15: 2021-05-11 12:01:27 0.5 7.5
#16: 2021-05-11 12:02:52 0.5 8.0
#17: 2021-05-11 12:07:35 0.5 8.0
#18: 2021-05-11 12:10:32 0.5 8.5
#19: 2021-05-11 12:12:55 0.5 9.0
#20: 2021-05-11 12:16:22 0.5 9.0
#21: 2021-05-11 12:17:45 0.5 9.5
#22: 2021-05-11 12:20:14 0.5 9.5
#23: 2021-05-11 12:22:26 0.5 10.0
# Datetime Rainfall Sum_Rainfall
dat
在哪里:
dat <- structure(list(Datetime = c("11/05/2021 11:05:17","11/05/2021 11:15:10","11/05/2021 11:20:04","11/05/2021 11:28:22","11/05/2021 11:33:25","11/05/2021 11:36:39","11/05/2021 11:39:50","11/05/2021 11:41:43","11/05/2021 11:43:35","11/05/2021 11:44:57","11/05/2021 11:47:02","11/05/2021 11:48:42","11/05/2021 11:53:04","11/05/2021 11:58:33","11/05/2021 12:01:27","11/05/2021 12:02:52","11/05/2021 12:07:35","11/05/2021 12:10:32","11/05/2021 12:12:55","11/05/2021 12:16:22","11/05/2021 12:17:45","11/05/2021 12:20:14","11/05/2021 12:22:26"
),Rainfall = c(0.5,0.5,0.5)),class = "data.frame",row.names = c(NA,-23L))