问题描述
基本上,这是来自How to calculate daily average from ERA5 hourly netCDF data?的转帖或跟进 我有四个大气变量,即。露点温度、长波辐射、短波辐射和风速(根据 u10 和 v10 风分量计算)。所有都是小时刻度,我需要将它们转换为日刻度(请注意,这里我仅给出了露点温度的示例)。为了首先做到这一点,我应用了以下命令
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
####measured lab data - nh2cl decay
#data set 1
t_data1 = [0,0.08333,0.5,1,4,22.6167] #nh2cl
x_data1 = [0,4.91e-5,4.57e-5,4.74e-5,4.17e-5,2.76e-5] #8.01e-5
x_data1mgl = [0,3.48,3.24,3.36,2.96,1.96]#5.68
#data set 2
t_data2 = [0,22.8167]
x_data2 = [0,5.92e-5,5.7e-5,5.64e-5,5.30e-5,4.6e-5] #8.01e-5
x_data2mgl = [0,4.2,4.04,3.76,2.88]#5.68
####Preparing lab data and time dataframe
#combine data and time in the same dataframe
data1 = pd.DataFrame({'time':t_data1,'x1':x_data1,'x1mgl': x_data1mgl})
data2 = pd.DataFrame({'time':t_data2,'x2':x_data2,'x2mgl': x_data2mgl})
data1.set_index('time',inplace=True)
data2.set_index('time',inplace=True)
# merge dataframes
data = data1.join(data2,how='outer')
# simulation time points
dftp = pd.DataFrame({'time':np.linspace(0,5,100)})
dftp.set_index('time',inplace=True)
# merge dataframes
dff = data.join(dftp,how='outer')
z1 = (dff['x1']==dff['x1']).astype(int)
z2 = (dff['x2']==dff['x2']).astype(int)
# replace NaN with zeros
df0 = dff.fillna(value=0)
####Estimator Model
m = GEKKO(remote=False)
#m = GEKKO()
m.time = df0.index.values
# measurements - Gekko arrays to store measured data
xm = m.Array(m.Param,2)
xm[0].value = df0['x1'].values
xm[1].value = df0['x2'].values
####inital measured values
hocl_init_val=m.Array(m.Const,2)
hocl_init_val[0].value= 8.01e-5
hocl_init_val[1].value= 8.01e-5
nh3_init_val=m.Array(m.Const,2)
nh3_init_val[0].value= 1.37e-3
nh3_init_val[1].value= 6.82e-4
h_init_val=m.Array(m.Const,2)
h_init_val[0].value= 6.31e-8
h_init_val[1].value= 2e-8
h2co3_init_val=m.Array(m.Const,2)
h2co3_init_val[0].value= 5.39e-4
h2co3_init_val[1].value= 1.88e-4
hco3_init_val=m.Array(m.Const,2)
hco3_init_val[0].value= 3.46e-3
hco3_init_val[1].value= 3.80e-3
co32_init_val=m.Array(m.Const,2)
co32_init_val[0].value= 2.16e-6
co32_init_val[1].value= 7.5e-6
alk_init_val=m.Array(m.Const,2)
alk_init_val[0].value= 3.46e-3
alk_init_val[1].value= 3.82-3
# hocl=m.Array(m.Var,2)
# nh3=m.Array(m.Var,2)
# nh2cl=m.Array(m.Var,2,lb=1e-10)
# nh2cl_meas=m.Array(m.Param,2)
# nhcl2=m.Array(m.Var,lb=1e-10)
# h=m.Array(m.Var,2)
# oh=m.Array(m.Var,2)
# I=m.Array(m.Var,lb=1e-10)
# ocl=m.Array(m.Var,2)
# nh4=m.Array(m.Var,2)
# h2co3=m.Array(m.Var,2)
# hco3=m.Array(m.Var,2)
# co32=m.Array(m.Var,2)
# alk=m.Array(m.Var,2)
# #DOC1=m.Array(m.Var,2)
# #DOC2=m.Array(m.Var,2)
# cnh3=m.Array(m.Var,2)
# cnh2cl=m.Array(m.Var,2)
r1=m.Array(m.Var,2)
r2=m.Array(m.Var,2)
r3=m.Array(m.Var,2)
r4=m.Array(m.Var,2)
r5=m.Array(m.Var,2)
r6=m.Array(m.Var,2)
r7=m.Array(m.Var,2)
r8=m.Array(m.Var,2)
r9=m.Array(m.Var,2)
r10=m.Array(m.Var,2)
r11=m.Array(m.Var,2)
r12=m.Array(m.Var,2)
r13=m.Array(m.Var,2)
r14=m.Array(m.Var,2)
r15=m.Array(m.Var,2)
r16=m.Array(m.Var,2)
#Define GEKKO variables that determine if time point contains data to be used in regression
#index for objective (0=not measured,1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z2
# fit to measurement
x=m.Array(m.Param,2)
x[0].value=x_data1[0]
x[1].value=x_data2[0]
meas=m.Array(m.Param,2)
#### adjustable parameters
kdoc1 = m.FV(1.2334e6,lb=1,ub=100000000000) #m-1h-1
kdoc2 = m.FV(3.8809e9,ub=10000000000) #m-1h-1
x10=3.1684e-5
x20=3.1308e-5
#DOC10 = m.FV(x10,lb=1e-6,ub=1)
#DOC20 = m.FV(x20,ub=1)
#constrains
DOC10=m.Array(m.FV,2)
DOC10[0].value=x10
DOC10[1].value=x10*0.5
DOC20=m.Array(m.FV,2)
DOC20[0].value=x20
DOC20[1].value=x20*0.5
##variables connected to the parameters DOC20 and DOC10: DOC10 is DOC1 when t=0 and DOC20 is DOC2 when t=0
#DOC1=m.Array(m.SV,fixed_initial=False)
##DOC1[0].value=x10
##DOC1[1].value=x10*0.5
#DOC2=m.Array(m.SV,fixed_initial=False)
##DOC2[0].value=x20
##DOC2[1].value=x20*0.5
#t = m.Param(value=m.time)
####Rate constants
k1 = m.Const(1.5e10)
k2 = m.Const(7.6e-2)
k3 = m.Const(1e6)
k4 = m.Const(2.3e-3)
k6 = m.Const(2.2e8)
k7 = m.Const(4e5)
k8 = m.Const(1e8)
k9 = m.Const(3e7)
k10 = m.Const(55)
k11 = m.Const(3.16e-8*1e10)
k12 = m.Const(1e10)
k13 = m.Const(5.01e-10*1e10)
k14 = m.Const(1e10)
k5=m.Array(m.Var,2)
for i in range(2):
m.Connection(DOC2[i],DOC20[i],pos1=1,pos2=1,node1=1,node2=1)
m.Connection(DOC1[i],DOC10[i],node2=1)
#variables - initial values
hocl[i] = m.Var(value=hocl_init_val[i])
nh3[i] = m.Var(value=nh3_init_val[i])
nh2cl[i] = m.Var(value=x[i])
nh2cl_meas[i] = m.Param(xm[i])
meas[i] = m.Param(zm[i])
nhcl2[i] = m.Var(value=0)
h[i] = m.Var(value=h_init_val[i])
oh[i] = m.Var(value=1e-14/h_init_val[i])
I[i] = m.Var(value=0)
ocl[i]= m.Var(value=0)
nh4[i] = m.Var(value=0)
h2co3[i] = m.Var(value=h2co3_init_val[i])
hco3[i] = m.Var(value=hco3_init_val[i])
co32[i] = m.Var(value=co32_init_val[i])
alk[i] = m.Var(value=alk_init_val[i])
DOC1[i] = m.SV(value=DOC10[i],fixed_initial=False)
DOC2[i] = m.SV(value=DOC20[i],fixed_initial=False)
cnh3[i] = m.Var(value=0)
cnh2cl[i] = m.Var(value=0)
###Equations 1
m.Equation(k5[i] == 2.5e7*h[i]+4e4*h2co3[i]+800*hco3[i])
###Equations 2
m.Equation(r1[i] == k1 * hocl[i] * nh3[i])
m.Equation(r2[i] == k2 * nh2cl[i])
m.Equation(r3[i] == k3 * hocl[i] * nh2cl[i])
m.Equation(r4[i] == k4 * nhcl2[i])
m.Equation(r5[i] == k5[i] * nh2cl[i] * nh2cl[i])
m.Equation(r6[i] == k6 * nhcl2[i] * nh3[i]* h[i])
m.Equation(r7[i] == k7 * nhcl2[i] * oh[i])
m.Equation(r8[i] == k8 * I[i] * nhcl2[i])
m.Equation(r9[i] == k9 * I[i] * nh2cl[i])
m.Equation(r10[i] == k10 * nh2cl[i] * nhcl2[i])
m.Equation(r11[i] == k11*hocl[i])
m.Equation(r12[i] == k12*h[i]*ocl[i])
m.Equation(r13[i] == k13*nh4[i])
m.Equation(r14[i] == k14*h[i]*nh3[i])
m.Equation(r15[i] == kdoc1*DOC1[i]*nh2cl[i])
m.Equation(r16[i] == kdoc2*DOC2[i]*hocl[i])
####Equations 3
m.Equation(hocl[i].dt() == -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r16[i] - r11[i] + r12[i])
m.Equation(nh3[i].dt() == -r1[i] + r2[i] + r5[i] - r6[i] + r13[i] - r14[i])
m.Equation(nh2cl[i].dt() == r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r15[i])
m.Equation(nhcl2[i].dt() == r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i])
m.Equation(h[i].dt() == 0)
m.Equation(oh[i] == 1e-14/h[i])
m.Equation(I[i].dt() == r7[i] - r8[i] - r9[i])
m.Equation(ocl[i].dt() == r11[i] - r12[i])
m.Equation(nh4[i].dt() == -r13[i] + r14[i])
m.Equation(h2co3[i] == (hco3[i]*h[i])/5.01e-7)
m.Equation(hco3[i] == alk[i] - 2*co32[i] - oh[i] + h[i])
m.Equation(co32[i] == (5.01e-11*hco3[i])/h[i])
m.Equation(alk[i].dt() == 0)
m.Equation(DOC1[i].dt() == -r15[i])
m.Equation(DOC2[i].dt() == -r16[i])
m.Equation(cnh3[i] == 17000*nh3[i])
m.Equation(cnh2cl[i] == 70900*nh2cl[i])
###obj function
m.Minimize(meas[i]*((nh2cl[i]-nh2cl_meas[i]))**2)
#Application options
m.options.soLVER = 1 #IPOPT solver=3 #APOPT solver=1
m.options.IMODE = 8 #Dynamic Simultaneous - estimation = 5 # Dynamic sequential - estimation = 8
m.options.RTOL = 1E-3
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 2 #collocation nodes (2 up to 6)
#m.options.MAX_ITER = 500
m.open_folder()
if True:
kdoc1.STATUS=1
kdoc2.STATUS=1
DOC10[i].STATUS=1
DOC20[i].STATUS=1
m.options.TIME_SHIFT = 0
try:
m.solve(disp=True)
except:
print("don't stop when not finding Doc10")
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
print('kdoc1 = ' + str(kdoc1.value[0]))
print('kdoc2 = ' + str(kdoc2.value[0]))
print('DOC10_1 = ' + str(DOC10[0].value[0]))
print('DOC20_1 = ' + str(DOC20[0].value[0]))
print('DOC10_2 = ' + str(DOC10[1].value[0]))
print('DOC20_2 = ' + str(DOC20[1].value[0]))
####Graphics
plt.figure(1,figsize=(8,5))
plt.subplot(2,1)
plt.plot(m.time,nh2cl[0],'m',label='Predicted nh2cl_1')
plt.plot(m.time,nh2cl[1],'c',label='Predicted nh2cl_2')
plt.plot(t_data1,x_data1,'mo',label='Meas 1')
plt.plot(t_data2,x_data2,'co',label='Meas 2')
plt.legend(loc='best')
plt.ylabel('mol/L')
plt.legend(loc='center right',bBox_to_anchor=(1.45,0.5),ncol=2) #shadow=True,plt.subplot(2,2)
plt.plot(m.time,cnh2cl[0].value,label ='Predicted nh2cl_1')
plt.plot(m.time,cnh2cl[1].value,label ='Predicted nh2cl_2')
plt.plot(t_data1,x_data1mgl,x_data2mgl,label='Meas 2')
plt.legend(loc='best')
plt.ylabel('mg Cl2/L')
plt.xlabel('time (h)')
plt.legend(loc='right',bBox_to_anchor=(1.4,plt.figure(2,figsize=(12,8))
plt.subplot(4,3,hocl[i].value,label ='hocl')
plt.legend()
plt.subplot(4,nh3[i].value,label ='nh3')
plt.legend()
plt.subplot(4,3)
plt.plot(m.time,nhcl2[i].value,label ='nhcl2')
plt.legend()
plt.subplot(4,4)
plt.plot(m.time,h[i].value,label ='h')
plt.legend()
plt.subplot(4,5)
plt.plot(m.time,oh[i].value,label ='oh')
plt.legend()
plt.subplot(4,6)
plt.plot(m.time,I[i].value,label ='I')
plt.legend()
plt.subplot(4,7)
plt.plot(m.time,ocl[i].value,label ='ocl')
plt.legend()
plt.subplot(4,8)
plt.plot(m.time,nh4[i].value,label ='nh4')
plt.legend()
plt.subplot(4,9)
plt.plot(m.time,h2co3[i].value,label ='h2co3')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,10)
plt.plot(m.time,hco3[i].value,label ='hco3')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,11)
plt.plot(m.time,co32[i].value,label ='co32')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,12)
plt.plot(m.time,alk[i].value,label ='alk')
plt.legend()
plt.xlabel('time (h)')
plt.show()
并得到以下结果。
cdo daymean -shifttime,-1hour d2m.nc d2m_wb.nc
结果(日均值)对我来说似乎很可疑(因为在时间戳中它有 cdo sinfo d2m_wb.nc
File format : NetCDF2
-1 : Institut Source T Steptype Levels Num Points Num Dtype : Parameter ID
1 : unkNown unkNown v instant 1 1 475 1 F64 : -1
Grid coordinates :
1 : lonlat : points=475 (19x25)
lon : 85.5 to 90 by 0.25 degrees_east
lat : 21.5 to 27.5 by 0.25 degrees_north
Vertical coordinates :
1 : surface : levels=1
Time coordinate : 25904 steps
RefTime = 1900-01-01 00:00:00 Units = hours Calendar = gregorian Bounds = true
YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss
1949-12-31 23:00:00 1950-01-01 11:00:00 1950-01-02 11:00:00 1950-01-03 11:00:00
1950-01-04 11:00:00 1950-01-05 11:00:00 1950-01-06 11:00:00 1950-01-07 11:00:00
1950-01-08 11:00:00 1950-01-09 11:00:00 1950-01-10 11:00:00 1950-01-11 11:00:00
1950-01-12 11:00:00 1950-01-13 11:00:00 1950-01-14 11:00:00 1950-01-15 11:00:00
1950-01-16 11:00:00 1950-01-17 11:00:00 1950-01-18 11:00:00 1950-01-19 11:00:00
1950-01-20 11:00:00 1950-01-21 11:00:00 1950-01-22 11:00:00 1950-01-23 11:00:00
1950-01-24 11:00:00 1950-01-25 11:00:00 1950-01-26 11:00:00 1950-01-27 11:00:00
1950-01-28 11:00:00 1950-01-29 11:00:00 1950-01-30 11:00:00 1950-01-31 11:00:00
..................................................................................
................................................................................
................................................................................
.................
2020-10-31 11:00:00 2020-11-01 11:00:00 2020-11-02 11:00:00 2020-11-03 11:00:00
2020-11-04 11:00:00 2020-11-05 11:00:00 2020-11-06 11:00:00 2020-11-07 11:00:00
2020-11-08 11:00:00 2020-11-09 11:00:00 2020-11-10 11:00:00 2020-11-11 11:00:00
2020-11-12 11:00:00 2020-11-13 11:00:00 2020-11-14 11:00:00 2020-11-15 11:00:00
2020-11-16 11:00:00 2020-11-17 11:00:00 2020-11-18 11:00:00 2020-11-19 11:00:00
2020-11-20 11:00:00 2020-11-21 11:00:00 2020-11-22 11:00:00 2020-11-23 11:00:00
2020-11-24 11:00:00 2020-11-25 11:00:00 2020-11-26 11:00:00 2020-11-27 11:00:00
2020-11-28 11:00:00 2020-11-29 11:00:00 2020-11-30 11:00:00 2020-12-31 23:00:00
cdo sinfo: Processed 1 variable over 25904 timesteps [6.03s 37MB
)。因此,我在 CDO 论坛上问了同样的问题,他们建议将 1 分钟而不是 1 小时后移。
11:00:00
此处,(cdo daymean -shifttime,-1min d2m.nc d2m_wb.nc
的)时间戳显示 d2m_wb.nc
而不是 12:00:00
。我不确定结果是否正确。因此,我尝试在 python (3.8.5) 中使用以下脚本进行交叉检查。
11:00:00
在计算 import xarray as xr
hourly_d2m = xr.open_dataset(r'E:\Data\Rda\d2m\d2m.nc')
d2m = hourly_d2m['d2m']
d2m= hourly_d2m.shift(time=-1).dropna(dim='time',how='all')
sds = d2m.resample(time='D').mean(dim='time')
sds.to_netcdf('E:\Data\Rda\d2m\daily\d2m_wb.nc')
的每日平均值后,我得到了 00:00:00
时间戳。现在,问题是不同的软件和命令正在为一个共同的目标(即日均值)产生不同的结果。
我完全困惑,正在寻找合适的解决方案或说明。谢谢。
解决方法
考虑重新阅读我对您上一个问题的回答。正如我当时所说,时间步长软件在聚合输出中使用哪个有点武断。这就是 CDO 为用户提供更改它的选项的原因。例如,如果需要,您可以将其设置为第一个或最后一个时间步。 xarray 对选择哪个时间步使用了稍微不同的假设。在选择的时间步长方面没有对错。它们都实现了相同的目标,即为您提供正确日期的时间平均值。