如何使用特定数据集列以贝叶斯岭回归绘制曲线拟合

问题描述

为了绘制贝叶斯回归,我遵循以下方法: “ https://scikit-learn.org/stable/auto_examples/linear_model/plot_bayesian_ridge_curvefit.html”

我无法将x,y绘图轴数据匹配到我的列,然后将其传递到def Func(x) 它应该生成绘图的绘图函数。输出终端抛出:

Traceback (most recent call last):
  File "/home/...../......./....py",line 26,in <module>
    y_train = func(x_train) + ['Low'] ##.(scale=0.1,size=size)
  File "/home/....../....../......py",line 10,in func
    def func(x): return (np.sin(2*np.pi*x))
TypeError: can't multiply sequence by non-int of type 'float'

在所示的代码中,我试图使用列/表中的数据来绘制拟合贝叶斯回归;而“ scikit”示例使用的是随机生成的数字,但我认为这并不是一个大问题。

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as pdr
from sklearn.linear_model import BayesianRidge
import matplotlib.dates as mdates
import datetime as dt


def func(x): return (np.sin(2*np.pi*x))


rng = pdr.get_data_yahoo('GM',start='3/14/2009',end='4/14/2016')

# Reset Index And Convert Dates Into Numerical Format
rng['Ticks'] = range(0,len(rng.index.values))
rng = rng.reset_index()
# Restructure Data Into OHLC Format
rng = rng[['High','Low','Ticks']]
print(rng.head(25))
# #############################################################################
# Generate sinusoidal data with noise
size = 25
rng = rng[['High','Ticks']]
x_train = ['Ticks']##(0.,1.,size) ##rng.uniform(0.,size)
y_train = func(x_train) + ['Low'] ##.(scale=0.1,size=size)
x_test = np.linspace(0.,100)


# #############################################################################
# Fit by cubic polynomial
n_order = 3
X_train = np.vander(x_train,n_order + 1,increasing=True)
X_test = np.vander(x_test,increasing=True)

# #############################################################################
# Plot the true and predicted curves with log marginal likelihood (L)
reg = BayesianRidge(tol=1e-6,fit_intercept=False,compute_score=True)
fig,axes = plt.subplots(1,2,figsize=(8,4))
for i,ax in enumerate(axes):
    # Bayesian ridge regression with different initial value pairs
    if i == 0:
        init = [1 / np.var(y_train),1.]  # Default values
    elif i == 1:
        init = [1.,1e-3]
        reg.set_params(alpha_init=init[0],lambda_init=init[1])
    reg.fit(X_train,y_train)
    ymean,ystd = reg.predict(X_test,return_std=True)

    ax.plot(x_test,func(x_test),color="blue",label="sin($2\\pi x$)")
    ax.scatter(x_train,y_train,s=50,alpha=0.5,label="observation")
    ax.plot(x_test,ymean,color="red",label="predict mean")
    ax.fill_between(x_test,ymean-ystd,ymean+ystd,color="pink",label="predict std")
    ax.set_ylim(-1.3,1.3)
    ax.legend()
    title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
            init[0],init[1])
    if i == 0:
        title += " (Default)"
    ax.set_title(title,fontsize=12)
    text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format(
           reg.alpha_,reg.lambda_,reg.scores_[-1])
    ax.text(0.05,-1.0,text,fontsize=12)

plt.tight_layout()
plt.show()

解决方法

尝试一下:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as pdr
from sklearn.linear_model import BayesianRidge
import matplotlib.dates as mdates
import datetime as dt

def func(x): return (np.sin(2*np.pi*x))

rng = pdr.get_data_yahoo('GM',start='3/14/2009',end='4/14/2016')

# Reset Index And Convert Dates Into Numerical Format
rng['Ticks'] = range(0,len(rng.index.values))
rng = rng.reset_index()
# Restructure Data Into OHLC Format
rng = rng[['High','Low','Ticks']]
# print(rng.head(25))
# #############################################################################
# Generate sinusoidal data with noise
size = 25
rng = rng[['High','Ticks']]
x_train = rng['Ticks']##(0.,1.,size) ##rng.uniform(0.,size)
y_train = func(x_train) + rng['Low'] ##.(scale=0.1,size=size)
x_test = np.linspace(min(x_train),max(x_train),100)


# #############################################################################
# Fit by cubic polynomial
n_order = 3
X_train = np.vander(x_train,n_order + 1,increasing=True)
X_test = np.vander(x_test,increasing=True)

# #############################################################################
# Plot the true and predicted curves with log marginal likelihood (L)
reg = BayesianRidge(tol=1e-6,fit_intercept=False,compute_score=True)
fig,axes = plt.subplots(1,2,figsize=(8,4))
for i,ax in enumerate(axes):
    # Bayesian ridge regression with different initial value pairs
    if i == 0:
        init = [1 / np.var(y_train),1.]  # Default values
    elif i == 1:
        init = [1.,1e-3]
        reg.set_params(alpha_init=init[0],lambda_init=init[1])
    reg.fit(X_train,y_train)
    ymean,ystd = reg.predict(X_test,return_std=True)

    ax.plot(x_test,func(x_test),color="blue",label="sin($2\\pi x$)")
    ax.scatter(x_train,y_train,s=50,alpha=0.5,label="observation")
    ax.plot(x_test,ymean,color="red",label="predict mean")
    ax.fill_between(x_test,ymean-ystd,ymean+ystd,color="pink",label="predict std")
    ax.set_ylim(-50,50)
    ax.legend()
    title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
            init[0],init[1])
    if i == 0:
        title += " (Default)"
    ax.set_title(title,fontsize=12)
    text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format(
           reg.alpha_,reg.lambda_,reg.scores_[-1])
    ax.text(0.05,-1.0,text,fontsize=12)

plt.tight_layout()
plt.show()

enter image description here

相关问答

错误1:Request method ‘DELETE‘ not supported 错误还原:...
错误1:启动docker镜像时报错:Error response from daemon:...
错误1:private field ‘xxx‘ is never assigned 按Alt...
报错如下,通过源不能下载,最后警告pip需升级版本 Requirem...