为什么不同的软件和命令在从 ERA5 小时数据计算每日平均值时会产生不同的结果?

问题描述

基本上,这是来自How to calculate daily average from ERA5 hourly netCDF data?的转帖或跟进 我有四个大气变量,即。露点温度、长波辐射、短波辐射和风速(根据 u10 和 v10 风分量计算)。所有都是小时刻度,我需要将它们转换为日刻度(请注意,这里我仅给出了露点温度的示例)。为了首先做到这一点,我应用了以下命令

from gekko import GEKKO import numpy as np import matplotlib.pyplot as plt import math as math import pandas as pd ####measured lab data - nh2cl decay #data set 1 t_data1 = [0,0.08333,0.5,1,4,22.6167] #nh2cl x_data1 = [0,4.91e-5,4.57e-5,4.74e-5,4.17e-5,2.76e-5] #8.01e-5 x_data1mgl = [0,3.48,3.24,3.36,2.96,1.96]#5.68 #data set 2 t_data2 = [0,22.8167] x_data2 = [0,5.92e-5,5.7e-5,5.64e-5,5.30e-5,4.6e-5] #8.01e-5 x_data2mgl = [0,4.2,4.04,3.76,2.88]#5.68 ####Preparing lab data and time dataframe #combine data and time in the same dataframe data1 = pd.DataFrame({'time':t_data1,'x1':x_data1,'x1mgl': x_data1mgl}) data2 = pd.DataFrame({'time':t_data2,'x2':x_data2,'x2mgl': x_data2mgl}) data1.set_index('time',inplace=True) data2.set_index('time',inplace=True) # merge dataframes data = data1.join(data2,how='outer') # simulation time points dftp = pd.DataFrame({'time':np.linspace(0,5,100)}) dftp.set_index('time',inplace=True) # merge dataframes dff = data.join(dftp,how='outer') z1 = (dff['x1']==dff['x1']).astype(int) z2 = (dff['x2']==dff['x2']).astype(int) # replace NaN with zeros df0 = dff.fillna(value=0) ####Estimator Model m = GEKKO(remote=False) #m = GEKKO() m.time = df0.index.values # measurements - Gekko arrays to store measured data xm = m.Array(m.Param,2) xm[0].value = df0['x1'].values xm[1].value = df0['x2'].values ####inital measured values hocl_init_val=m.Array(m.Const,2) hocl_init_val[0].value= 8.01e-5 hocl_init_val[1].value= 8.01e-5 nh3_init_val=m.Array(m.Const,2) nh3_init_val[0].value= 1.37e-3 nh3_init_val[1].value= 6.82e-4 h_init_val=m.Array(m.Const,2) h_init_val[0].value= 6.31e-8 h_init_val[1].value= 2e-8 h2co3_init_val=m.Array(m.Const,2) h2co3_init_val[0].value= 5.39e-4 h2co3_init_val[1].value= 1.88e-4 hco3_init_val=m.Array(m.Const,2) hco3_init_val[0].value= 3.46e-3 hco3_init_val[1].value= 3.80e-3 co32_init_val=m.Array(m.Const,2) co32_init_val[0].value= 2.16e-6 co32_init_val[1].value= 7.5e-6 alk_init_val=m.Array(m.Const,2) alk_init_val[0].value= 3.46e-3 alk_init_val[1].value= 3.82-3 # hocl=m.Array(m.Var,2) # nh3=m.Array(m.Var,2) # nh2cl=m.Array(m.Var,2,lb=1e-10) # nh2cl_meas=m.Array(m.Param,2) # nhcl2=m.Array(m.Var,lb=1e-10) # h=m.Array(m.Var,2) # oh=m.Array(m.Var,2) # I=m.Array(m.Var,lb=1e-10) # ocl=m.Array(m.Var,2) # nh4=m.Array(m.Var,2) # h2co3=m.Array(m.Var,2) # hco3=m.Array(m.Var,2) # co32=m.Array(m.Var,2) # alk=m.Array(m.Var,2) # #DOC1=m.Array(m.Var,2) # #DOC2=m.Array(m.Var,2) # cnh3=m.Array(m.Var,2) # cnh2cl=m.Array(m.Var,2) r1=m.Array(m.Var,2) r2=m.Array(m.Var,2) r3=m.Array(m.Var,2) r4=m.Array(m.Var,2) r5=m.Array(m.Var,2) r6=m.Array(m.Var,2) r7=m.Array(m.Var,2) r8=m.Array(m.Var,2) r9=m.Array(m.Var,2) r10=m.Array(m.Var,2) r11=m.Array(m.Var,2) r12=m.Array(m.Var,2) r13=m.Array(m.Var,2) r14=m.Array(m.Var,2) r15=m.Array(m.Var,2) r16=m.Array(m.Var,2) #Define GEKKO variables that determine if time point contains data to be used in regression #index for objective (0=not measured,1=measured) zm = m.Array(m.Param,2) zm[0].value=z1 zm[1].value=z2 # fit to measurement x=m.Array(m.Param,2) x[0].value=x_data1[0] x[1].value=x_data2[0] meas=m.Array(m.Param,2) #### adjustable parameters kdoc1 = m.FV(1.2334e6,lb=1,ub=100000000000) #m-1h-1 kdoc2 = m.FV(3.8809e9,ub=10000000000) #m-1h-1 x10=3.1684e-5 x20=3.1308e-5 #DOC10 = m.FV(x10,lb=1e-6,ub=1) #DOC20 = m.FV(x20,ub=1) #constrains DOC10=m.Array(m.FV,2) DOC10[0].value=x10 DOC10[1].value=x10*0.5 DOC20=m.Array(m.FV,2) DOC20[0].value=x20 DOC20[1].value=x20*0.5 ##variables connected to the parameters DOC20 and DOC10: DOC10 is DOC1 when t=0 and DOC20 is DOC2 when t=0 #DOC1=m.Array(m.SV,fixed_initial=False) ##DOC1[0].value=x10 ##DOC1[1].value=x10*0.5 #DOC2=m.Array(m.SV,fixed_initial=False) ##DOC2[0].value=x20 ##DOC2[1].value=x20*0.5 #t = m.Param(value=m.time) ####Rate constants k1 = m.Const(1.5e10) k2 = m.Const(7.6e-2) k3 = m.Const(1e6) k4 = m.Const(2.3e-3) k6 = m.Const(2.2e8) k7 = m.Const(4e5) k8 = m.Const(1e8) k9 = m.Const(3e7) k10 = m.Const(55) k11 = m.Const(3.16e-8*1e10) k12 = m.Const(1e10) k13 = m.Const(5.01e-10*1e10) k14 = m.Const(1e10) k5=m.Array(m.Var,2) for i in range(2): m.Connection(DOC2[i],DOC20[i],pos1=1,pos2=1,node1=1,node2=1) m.Connection(DOC1[i],DOC10[i],node2=1) #variables - initial values hocl[i] = m.Var(value=hocl_init_val[i]) nh3[i] = m.Var(value=nh3_init_val[i]) nh2cl[i] = m.Var(value=x[i]) nh2cl_meas[i] = m.Param(xm[i]) meas[i] = m.Param(zm[i]) nhcl2[i] = m.Var(value=0) h[i] = m.Var(value=h_init_val[i]) oh[i] = m.Var(value=1e-14/h_init_val[i]) I[i] = m.Var(value=0) ocl[i]= m.Var(value=0) nh4[i] = m.Var(value=0) h2co3[i] = m.Var(value=h2co3_init_val[i]) hco3[i] = m.Var(value=hco3_init_val[i]) co32[i] = m.Var(value=co32_init_val[i]) alk[i] = m.Var(value=alk_init_val[i]) DOC1[i] = m.SV(value=DOC10[i],fixed_initial=False) DOC2[i] = m.SV(value=DOC20[i],fixed_initial=False) cnh3[i] = m.Var(value=0) cnh2cl[i] = m.Var(value=0) ###Equations 1 m.Equation(k5[i] == 2.5e7*h[i]+4e4*h2co3[i]+800*hco3[i]) ###Equations 2 m.Equation(r1[i] == k1 * hocl[i] * nh3[i]) m.Equation(r2[i] == k2 * nh2cl[i]) m.Equation(r3[i] == k3 * hocl[i] * nh2cl[i]) m.Equation(r4[i] == k4 * nhcl2[i]) m.Equation(r5[i] == k5[i] * nh2cl[i] * nh2cl[i]) m.Equation(r6[i] == k6 * nhcl2[i] * nh3[i]* h[i]) m.Equation(r7[i] == k7 * nhcl2[i] * oh[i]) m.Equation(r8[i] == k8 * I[i] * nhcl2[i]) m.Equation(r9[i] == k9 * I[i] * nh2cl[i]) m.Equation(r10[i] == k10 * nh2cl[i] * nhcl2[i]) m.Equation(r11[i] == k11*hocl[i]) m.Equation(r12[i] == k12*h[i]*ocl[i]) m.Equation(r13[i] == k13*nh4[i]) m.Equation(r14[i] == k14*h[i]*nh3[i]) m.Equation(r15[i] == kdoc1*DOC1[i]*nh2cl[i]) m.Equation(r16[i] == kdoc2*DOC2[i]*hocl[i]) ####Equations 3 m.Equation(hocl[i].dt() == -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r16[i] - r11[i] + r12[i]) m.Equation(nh3[i].dt() == -r1[i] + r2[i] + r5[i] - r6[i] + r13[i] - r14[i]) m.Equation(nh2cl[i].dt() == r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r15[i]) m.Equation(nhcl2[i].dt() == r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i]) m.Equation(h[i].dt() == 0) m.Equation(oh[i] == 1e-14/h[i]) m.Equation(I[i].dt() == r7[i] - r8[i] - r9[i]) m.Equation(ocl[i].dt() == r11[i] - r12[i]) m.Equation(nh4[i].dt() == -r13[i] + r14[i]) m.Equation(h2co3[i] == (hco3[i]*h[i])/5.01e-7) m.Equation(hco3[i] == alk[i] - 2*co32[i] - oh[i] + h[i]) m.Equation(co32[i] == (5.01e-11*hco3[i])/h[i]) m.Equation(alk[i].dt() == 0) m.Equation(DOC1[i].dt() == -r15[i]) m.Equation(DOC2[i].dt() == -r16[i]) m.Equation(cnh3[i] == 17000*nh3[i]) m.Equation(cnh2cl[i] == 70900*nh2cl[i]) ###obj function m.Minimize(meas[i]*((nh2cl[i]-nh2cl_meas[i]))**2) #Application options m.options.soLVER = 1 #IPOPT solver=3 #APOPT solver=1 m.options.IMODE = 8 #Dynamic Simultaneous - estimation = 5 # Dynamic sequential - estimation = 8 m.options.RTOL = 1E-3 m.options.EV_TYPE = 2 #absolute error m.options.NODES = 2 #collocation nodes (2 up to 6) #m.options.MAX_ITER = 500 m.open_folder() if True: kdoc1.STATUS=1 kdoc2.STATUS=1 DOC10[i].STATUS=1 DOC20[i].STATUS=1 m.options.TIME_SHIFT = 0 try: m.solve(disp=True) except: print("don't stop when not finding Doc10") print('Final SSE Objective: ' + str(m.options.objfcnval)) print('Solution') print('kdoc1 = ' + str(kdoc1.value[0])) print('kdoc2 = ' + str(kdoc2.value[0])) print('DOC10_1 = ' + str(DOC10[0].value[0])) print('DOC20_1 = ' + str(DOC20[0].value[0])) print('DOC10_2 = ' + str(DOC10[1].value[0])) print('DOC20_2 = ' + str(DOC20[1].value[0])) ####Graphics plt.figure(1,figsize=(8,5)) plt.subplot(2,1) plt.plot(m.time,nh2cl[0],'m',label='Predicted nh2cl_1') plt.plot(m.time,nh2cl[1],'c',label='Predicted nh2cl_2') plt.plot(t_data1,x_data1,'mo',label='Meas 1') plt.plot(t_data2,x_data2,'co',label='Meas 2') plt.legend(loc='best') plt.ylabel('mol/L') plt.legend(loc='center right',bBox_to_anchor=(1.45,0.5),ncol=2) #shadow=True,plt.subplot(2,2) plt.plot(m.time,cnh2cl[0].value,label ='Predicted nh2cl_1') plt.plot(m.time,cnh2cl[1].value,label ='Predicted nh2cl_2') plt.plot(t_data1,x_data1mgl,x_data2mgl,label='Meas 2') plt.legend(loc='best') plt.ylabel('mg Cl2/L') plt.xlabel('time (h)') plt.legend(loc='right',bBox_to_anchor=(1.4,plt.figure(2,figsize=(12,8)) plt.subplot(4,3,hocl[i].value,label ='hocl') plt.legend() plt.subplot(4,nh3[i].value,label ='nh3') plt.legend() plt.subplot(4,3) plt.plot(m.time,nhcl2[i].value,label ='nhcl2') plt.legend() plt.subplot(4,4) plt.plot(m.time,h[i].value,label ='h') plt.legend() plt.subplot(4,5) plt.plot(m.time,oh[i].value,label ='oh') plt.legend() plt.subplot(4,6) plt.plot(m.time,I[i].value,label ='I') plt.legend() plt.subplot(4,7) plt.plot(m.time,ocl[i].value,label ='ocl') plt.legend() plt.subplot(4,8) plt.plot(m.time,nh4[i].value,label ='nh4') plt.legend() plt.subplot(4,9) plt.plot(m.time,h2co3[i].value,label ='h2co3') plt.legend() plt.xlabel('time (h)') plt.subplot(4,10) plt.plot(m.time,hco3[i].value,label ='hco3') plt.legend() plt.xlabel('time (h)') plt.subplot(4,11) plt.plot(m.time,co32[i].value,label ='co32') plt.legend() plt.xlabel('time (h)') plt.subplot(4,12) plt.plot(m.time,alk[i].value,label ='alk') plt.legend() plt.xlabel('time (h)') plt.show() 并得到以下结果。

cdo daymean -shifttime,-1hour d2m.nc d2m_wb.nc

结果(日均值)对我来说似乎很可疑(因为在时间戳中它有 cdo sinfo d2m_wb.nc File format : NetCDF2 -1 : Institut Source T Steptype Levels Num Points Num Dtype : Parameter ID 1 : unkNown unkNown v instant 1 1 475 1 F64 : -1 Grid coordinates : 1 : lonlat : points=475 (19x25) lon : 85.5 to 90 by 0.25 degrees_east lat : 21.5 to 27.5 by 0.25 degrees_north Vertical coordinates : 1 : surface : levels=1 Time coordinate : 25904 steps RefTime = 1900-01-01 00:00:00 Units = hours Calendar = gregorian Bounds = true YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss 1949-12-31 23:00:00 1950-01-01 11:00:00 1950-01-02 11:00:00 1950-01-03 11:00:00 1950-01-04 11:00:00 1950-01-05 11:00:00 1950-01-06 11:00:00 1950-01-07 11:00:00 1950-01-08 11:00:00 1950-01-09 11:00:00 1950-01-10 11:00:00 1950-01-11 11:00:00 1950-01-12 11:00:00 1950-01-13 11:00:00 1950-01-14 11:00:00 1950-01-15 11:00:00 1950-01-16 11:00:00 1950-01-17 11:00:00 1950-01-18 11:00:00 1950-01-19 11:00:00 1950-01-20 11:00:00 1950-01-21 11:00:00 1950-01-22 11:00:00 1950-01-23 11:00:00 1950-01-24 11:00:00 1950-01-25 11:00:00 1950-01-26 11:00:00 1950-01-27 11:00:00 1950-01-28 11:00:00 1950-01-29 11:00:00 1950-01-30 11:00:00 1950-01-31 11:00:00 .................................................................................. ................................................................................ ................................................................................ ................. 2020-10-31 11:00:00 2020-11-01 11:00:00 2020-11-02 11:00:00 2020-11-03 11:00:00 2020-11-04 11:00:00 2020-11-05 11:00:00 2020-11-06 11:00:00 2020-11-07 11:00:00 2020-11-08 11:00:00 2020-11-09 11:00:00 2020-11-10 11:00:00 2020-11-11 11:00:00 2020-11-12 11:00:00 2020-11-13 11:00:00 2020-11-14 11:00:00 2020-11-15 11:00:00 2020-11-16 11:00:00 2020-11-17 11:00:00 2020-11-18 11:00:00 2020-11-19 11:00:00 2020-11-20 11:00:00 2020-11-21 11:00:00 2020-11-22 11:00:00 2020-11-23 11:00:00 2020-11-24 11:00:00 2020-11-25 11:00:00 2020-11-26 11:00:00 2020-11-27 11:00:00 2020-11-28 11:00:00 2020-11-29 11:00:00 2020-11-30 11:00:00 2020-12-31 23:00:00 cdo sinfo: Processed 1 variable over 25904 timesteps [6.03s 37MB )。因此,我在 CDO 论坛上问了同样的问题,他们建议将 1 分钟而不是 1 小时后移。

11:00:00

此处,(cdo daymean -shifttime,-1min d2m.nc d2m_wb.nc 的)时间戳显示 d2m_wb.nc 而不是 12:00:00。我不确定结果是否正确。因此,我尝试在 python (3.8.5) 中使用以下脚本进行交叉检查。

11:00:00

在计算 import xarray as xr hourly_d2m = xr.open_dataset(r'E:\Data\Rda\d2m\d2m.nc') d2m = hourly_d2m['d2m'] d2m= hourly_d2m.shift(time=-1).dropna(dim='time',how='all') sds = d2m.resample(time='D').mean(dim='time') sds.to_netcdf('E:\Data\Rda\d2m\daily\d2m_wb.nc') 的每日平均值后,我得到了 00:00:00 时间戳。现在,问题是不同的软件和命令正在为一个共同的目标(即日均值)产生不同的结果。 我完全困惑,正在寻找合适的解决方案或说明。谢谢。

解决方法

考虑重新阅读我对您上一个问题的回答。正如我当时所说,时间步长软件在聚合输出中使用哪个有点武断。这就是 CDO 为用户提供更改它的选项的原因。例如,如果需要,您可以将其设置为第一个或最后一个时间步。 xarray 对选择哪个时间步使用了稍微不同的假设。在选择的时间步长方面没有对错。它们都实现了相同的目标,即为您提供正确日期的时间平均值。