这个错误是什么意思? 统计模型->最低回归

问题描述

我正在尝试使用statsmodel的非参数Lowess回归。

import statsmodels.api as sm
lowess = sm.nonparametric.lowess
x = # see array below
y = # array below

我要回归的两个数组如下:

y = array([0.39460831,0.3943878,0.42335204,0.45601645,0.35705666,0.43106245,0.40876906,0.40865107,0.44079581,0.40809921,0.34715472,0.28673869,0.41429625,0.33814808,0.36065362,0.42323234,0.43075707,0.39036774,0.45624754,0.4264153,0.40854912,0.38919367,0.47063437,0.41819225,0.43655294,0.39817544,0.41817408,0.38579407,0.3758216,0.39295075,0.35279793,0.41396078,0.35835224,0.39877632,0.41840176,0.42235122,0.31909751,0.41690713,0.40415778,0.36172602,0.34810291,0.45065063,0.44980572,0.4362235,0.4041436,0.34270833,0.45133976,0.43398212,0.4352659,0.27949522,0.47130217,0.35789634,0.35249196,0.35665419,0.45161807,0.43424503,0.35939123,0.45345458,0.3410767,0.43481544,0.33007379,0.27520451,0.35824199,0.35671327,0.39329133,0.3978407,0.38899924,0.44263212,0.36393358,0.26942863,0.47378067,0.41692978,0.31262075,0.45270572,0.45358348,0.46485582,0.38381883,0.40847049,0.4552592,0.33233801,0.37616339,0.45312021,0.34311701,0.45405103,0.33026848,0.35512348,0.35781538,0.34892051,0.39946672,0.45352116,0.3457032,0.39038526,0.45294301,0.33150022,0.41658717,0.45179918,0.44187217,0.45275743,0.42749683,0.43431399,0.41056891,0.34365128,0.31107728,0.45103128,0.45603937,0.45043726,0.44908607,0.44996669,0.44877551,0.2674405,0.4367155,0.37446672,0.29808357,0.41506218,0.34848782,0.26261556,0.3925812,0.39168392,0.39917386,0.43770031,0.34058531,0.43773453,0.39221085,0.45325688,0.39165179,0.38675654,0.33180298,0.42533256,0.38089742,0.33567492,0.45202054,0.46304505,0.24622921,0.38733682,0.43539081,0.29633964,0.37827505,0.38701604,0.38291542,0.4413525,0.43415077,0.19365321,0.26597312,0.46805815,0.38402756,0.344355,0.38900422,0.34102323,0.43035007,0.30790008,0.30624349,0.45190784,0.32628674,0.41108733,0.41423588,0.3024584,0.42424687,0.26391213,0.31816119,0.36106708,0.1717933,0.29266398,0.25844219,0.46372781,0.46637148,0.3663237,0.45297022,0.44975711,0.28564902,0.42772618,0.37050513,0.26967221,0.32682098,0.31635641,0.33347002,0.32393267,0.37002245,0.32407268,0.41696097,0.32144044,0.32856049,0.33036114,0.30185272,0.40384943,0.35919559,0.23375244,0.31330424,0.31227119,0.42390002,0.44658582,0.18798846,0.47670001,0.47980779,0.40565634,0.2525325,0.41990849,0.44037665,0.45987354,0.25672615,0.23839431,0.43833015,0.3100529,0.31093935,0.44371944,0.2827502,0.39270912,0.42325646,0.35252627,0.36903986,0.46089718,0.28396175,0.43911932,0.26672449,0.41892012,0.30693675,0.43318908,0.37950292,0.46129589,0.25452025,0.30712549,0.41901032,0.36828848,0.30788856,0.36771637,0.26494857,0.47713128,0.47389691,0.31507796,0.29791308,0.3593851,0.37455814,0.30016467,0.38229718,0.25025446,0.45963692,0.45522095,0.32985042,0.43283525,0.31217255,0.41351679,0.34508365,0.30047853,0.30118257,0.37682165,0.32832871,0.27933756,0.43506952,0.35988311,0.32919213,0.39700601,0.17783712,0.25290452,0.30147635,0.43682787,0.3481169,0.36671704,0.28059754,0.46192216,0.34731488,0.26537987,0.34751195,0.26354371,0.38429764,0.34672167,0.40366341,0.36729379,0.43119706,0.34186555,0.39534354,0.29777454,0.36771012,0.39062032,0.45505276,0.34036781,0.34268798,0.30934533,0.30802175,0.34121105,0.29611571,0.2990357,0.33412808,0.40805029,0.33208352,0.3091976,0.31022855,0.29812804,0.30051468,0.37954714,0.34171002,0.31469502,0.38749887,0.34095206,0.31293642,0.30649068,0.38043712,0.33196421,0.33587843,0.25991555,0.31701787,0.31000817,0.3177085,0.37388784,0.30877847,0.35030022,0.27287812,0.3431596,0.35110244,0.34890499,0.3495343,0.34234006,0.44562415,0.30204603,0.31174037,0.29278232,0.29226819,0.32403962,0.31514662,0.32231812,0.3897924,0.29584331,0.36638174,0.30530051,0.36568886,0.35720481,0.30552954,0.35051081,0.35642678,0.29582181,0.32978801,0.35005709,0.27489794,0.33016554,0.31645336,0.34399874,0.27676855,0.35875935,0.3608851,0.35956166,0.28778531,0.26424722,0.31621874,0.45565476,0.45541966,0.34187107,0.45180643,0.46396924,0.28593057,0.46273672,0.4631243,0.27556483,0.46390976,0.45542472,0.45565796,0.34237884,0.46306743,0.37166394,0.37553933,0.46451886,0.46732794,0.46331317,0.46787095,0.4561402,0.46620574,0.46676643,0.35720607,0.38720735,0.46666564,0.35809937,0.38879081,0.43231008,0.43338316,0.2767611,0.31506268,0.43249034,0.42334298,0.41633699,0.27455114,0.31500147,0.42727391,0.43358162,0.43269304,0.37385574,0.27834975,0.43168928,0.42550945,0.43847614,0.42730983,0.37523301,0.42752101,0.42323596,0.35279942,0.43681325,0.3589688,0.42756389,0.33666363,0.34794648,0.35665194,0.26856497,0.34522123,0.26620059,0.43770414,0.36892801,0.3282393,0.3285013,0.43613956,0.43342386,0.4316186,0.19697687,0.23229651,0.36771552,0.3666904,0.36076434,0.35960867,0.32850539,0.27185207,0.19697687])

x = array([5,4,5,3,2,3])

当我对这些进行低调处理时,我会收到警告

lowess(t5.F1.values,t5.nterms.values)
/anaconda3/lib/python3.7/site-packages/numpy/lib/function_base.py:3405: RuntimeWarning: Invalid value encountered in median
  r = func(a,**kwargs)
/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/smoothers_lowess.py:165: RuntimeWarning: invalid value encountered in greater_equal
  res = _lowess(y,x,frac=frac,it=it,delta=delta)

输出结果如下:

array([[ 2.,nan],[ 2.,...,[ 5.,nan]])

不是很确定为什么这不起作用? (我可以在R中执行此操作,但我不希望这样做:-)

解决方法

statsmodels lowess不适用于具有许多相同x值的情况。参见https://github.com/statsmodels/statsmodels/issues/2449

问题在于计算邻居集时,该邻居集的工作方式不适用于x值很多的情况。 Lowess是为没有或只有几个联系的连续变量而设计的。

仅使用几个唯一的x值,使用箱线图或类似图形比较合适。

,

您可以尝试使用np.array(data,dtype = np.float64)代替常规array()吗?

我的印象是数据类型有些奇怪。如果没有帮助,请提供更多信息,例如代码。