Python问题-相关随机样本和Cholesky分解

问题描述

import numpy as np
from scipy.linalg import cholesky

X1_temp = np.random.normal(0,1,(40,10)) # with mean 0 and std. dev. 1
C = cholesky(C0,lower = False)
X1 = X1_temp @ np.transpose(C)

非常感谢。这只是一个非常简单的任务。我得到了期望的协方差矩阵C0。我想生成随机相关数据矩阵X1,以使X1的协方差矩阵为C0。我使用Cholesky分解。代码使用Python。

但是,如果我随后手动使用np.cov()来计算X1的协方差矩阵,则结果与C0完全不同。结果请看下面。

C0
array([[0.00119545,0.00055428,0.00094478,0.00057466,0.00039038,0.0004846,0.00047505,0.00039403,0.00041767,0.00053985],[0.00055428,0.00055869,0.0005272,0.00046757,0.0002733,0.00037907,0.00034639,0.00030749,0.00034975,0.00041578],[0.00094478,0.00163294,0.00049306,0.00032083,0.0004176,0.000403,0.00038588,0.00037359,0.00050818],[0.00057466,0.00161212,0.00065667,0.00080889,0.00075579,0.00048425,0.00066279,0.00044288],[0.00039038,0.00076096,0.00054659,0.00061203,0.00032021,0.00047247,0.00025952],[0.0004846,0.00116606,0.00056961,0.00040392,0.00053751,0.00033647],[0.00047505,0.0008417,0.0003747,0.00054147,0.00033309],[0.00039403,0.00042903,0.00035186,0.00030802],[0.00041767,0.00061011,0.00033744],[0.00053985,0.00041578,0.00050818,0.00044288,0.00025952,0.00033647,0.00033309,0.00030802,0.00033744,0.00048086]])

np.cov(X1,rowvar=False)
array([[ 2.73579114e-03,1.11107458e-03,7.39905489e-04,8.80909471e-04,2.67940563e-04,3.00456896e-04,2.78558784e-04,1.95630006e-04,-8.84664656e-07,3.07873704e-04],[ 1.11107458e-03,7.67420975e-04,6.32049491e-05,7.29592024e-04,2.30491196e-04,2.16890868e-04,1.67582787e-04,8.49125674e-05,-8.34866957e-06,1.68615033e-04],[ 7.39905489e-04,1.14805709e-03,-2.24580150e-04,8.49680219e-05,-1.42327782e-05,-1.19349618e-04,-1.75276890e-05,3.43158455e-05,6.89819740e-05],[ 8.80909471e-04,1.44999733e-03,1.27584928e-04,3.49017835e-04,1.89078071e-04,4.95665628e-05,-3.51667683e-05,1.25940711e-04],[ 2.67940563e-04,4.63007494e-04,1.58792113e-04,5.37907654e-05,5.71734068e-05,2.18693890e-05,7.16343773e-05],[ 3.00456896e-04,4.23799861e-04,1.37080444e-05,9.08977322e-06,-6.28273606e-05,6.64415798e-06],[ 2.78558784e-04,2.33866264e-04,6.65816973e-05,1.84881869e-05,5.54885576e-05],[ 1.95630006e-04,2.04723105e-04,3.60286054e-05,4.11557090e-05],[-8.84664656e-07,1.43719872e-04,9.74047167e-06],[ 3.07873704e-04,1.68615033e-04,6.89819740e-05,1.25940711e-04,7.16343773e-05,6.64415798e-06,5.54885576e-05,4.11557090e-05,9.74047167e-06,1.24753756e-04]])

解决方法

计算C时不要转置X1

In [33]: C0
Out[33]: 
array([[0.00119545,0.00055428,0.00094478,0.00057466,0.00039038,0.0004846,0.00047505,0.00039403,0.00041767,0.00053985],[0.00055428,0.00055869,0.0005272,0.00046757,0.0002733,0.00037907,0.00034639,0.00030749,0.00034975,0.00041578],[0.00094478,0.00163294,0.00049306,0.00032083,0.0004176,0.000403,0.00038588,0.00037359,0.00050818],[0.00057466,0.00161212,0.00065667,0.00080889,0.00075579,0.00048425,0.00066279,0.00044288],[0.00039038,0.00076096,0.00054659,0.00061203,0.00032021,0.00047247,0.00025952],[0.0004846,0.00116606,0.00056961,0.00040392,0.00053751,0.00033647],[0.00047505,0.0008417,0.0003747,0.00054147,0.00033309],[0.00039403,0.00042903,0.00035186,0.00030802],[0.00041767,0.00061011,0.00033744],[0.00053985,0.00041578,0.00050818,0.00044288,0.00025952,0.00033647,0.00033309,0.00030802,0.00033744,0.00048086]])

In [34]: C = cholesky(C0,lower=False)

In [35]: n = 40

In [36]: X1_temp = np.random.normal(0,1,(n,10)) # with mean 0 and std. dev. 1

In [37]: X1 = X1_temp @ C

In [38]: np.cov(X1,rowvar=False)
Out[38]: 
array([[0.00170434,0.00091094,0.00134416,0.00056663,0.00063199,0.00050691,0.00052851,0.00058113,0.00070497,0.00094468],[0.00091094,0.00076975,0.00085689,0.00042849,0.00033691,0.00022588,0.00031262,0.00043736,0.00043965,0.00068883],[0.00134416,0.00177699,0.00060582,0.00059343,0.00035828,0.00047125,0.00062899,0.00069686,0.00082439],[0.00056663,0.00107095,0.0004217,0.00037837,0.00038153,0.00043858,0.00043721,0.00037525],[0.00063199,0.00079374,0.00042106,0.00051381,0.00035244,0.00050983,0.00024706],[0.00050691,0.00080684,0.0003886,0.00021881,0.00042829,0.00023707],[0.00052851,0.00057768,0.00029707,0.00039979,0.0002629 ],[0.00058113,0.00042199,0.00034844,0.00040239],[0.00070497,0.00058689,0.00043979],[0.00094468,0.00068883,0.00082439,0.00037525,0.00024706,0.00023707,0.0002629,0.00040239,0.00043979,0.00077079]])

对于测试,如果使用更多的样本,您将获得更具说服力的结果。

In [39]: n = 400

In [40]: X1_temp = np.random.normal(0,10)) # with mean 0 and std. dev. 1

In [41]: X1 = X1_temp @ C

In [42]: np.cov(X1,rowvar=False)
Out[42]: 
array([[0.00123199,0.00055531,0.00096428,0.0006604,0.00046758,0.00060183,0.00056823,0.00041429,0.00046345,0.00056343],[0.00055531,0.00054127,0.00055571,0.00048491,0.00028983,0.00038863,0.0003752,0.00029274,0.0003503,0.0004217 ],[0.00096428,0.00154136,0.00062761,0.00042062,0.00050582,0.0004616,0.00043355,0.00044465,0.00056019],[0.0006604,0.00152187,0.00065816,0.0008483,0.0007723,0.00051182,0.00069478,0.00049743],[0.00046758,0.00081247,0.00057251,0.00069877,0.00033519,0.00050191,0.00030041],[0.00060183,0.00130518,0.00059381,0.00044213,0.00057237,0.00039522],[0.00056823,0.00097244,0.00039577,0.00059776,0.00038547],[0.00041429,0.00044904,0.00036922,0.00033238],[0.00046345,0.00062956,0.00037302],[0.00056343,0.00056019,0.00049743,0.00030041,0.00039522,0.00038547,0.00033238,0.00037302,0.00050632]])