黑匣子可能性示例

问题描述

我试图了解如何在pymc中使用黑匣子似然函数。基本上，对此进行了here的说明。我尝试使用非常简单的Python模型（双重逻辑函数）并且没有渐变来自行实现此功能。除了我提到的链接中的代码，它围绕对数似然函数设置了theano包装，我还有以下代码

# Define the model
def my_model(p,t,m_m,m_M):
    """An simple double logistic model"""
    return m_m + (m_M - m_m) * (
        1.0 / (1 + np.exp(-p[0] * (t - p[1] * 365)))
        + 1.0 / (1 + np.exp(p[2] * (t - p[3] * 365)))
        - 1
    )

# The loglikelihood function
def log_lklhood(theta,data,sigma):
    """normal log-likelihoood"""
    y_pred = my_model(theta,data.max(),data.min())
    retval = -(0.5 / sigma ** 2) * np.sum((data - y_pred) ** 2)
    return retval

## Experimental data

data = np.array([0.104,0.133,0.131,0.329,0.669,0.822,0.847,0.807,0.638,0.486,0.162,0.125])
t = np.array([1,32,61,92,122,153,183,214,245,275,306,336])
sigma = 0.02 # Uncertainty in the observations
# Straight outta internet...
logl = LogLike(log_lklhood,sigma)
ndraws = 500  # number of draws from the distribution
nburn = 100  # number of "burn-in points" (which we'll discard)

# use PyMC3 to sampler from log-likelihood
with pm.Model():
    theta1 = pm.normal("theta_1",mu=0,sigma=1,testval=0.1)
    theta3 = pm.normal("theta_3",testval=0.1)
    theta2 = pm.Uniform("theta_2",lower=0.01,upper=0.5,testval=120.0 / 365)
    theta4 = pm.Uniform("theta_4",lower=0.51,upper=0.99,testval=250.0 / 365)

    # convert theta1...theta4 to a tensor vector
    theta = tt.as_tensor_variable([theta1,theta2,theta3,theta4])
    # use a Densitydist (use a lamdba function to "call" the Op)
    pm.Densitydist("likelihood",lambda v: logl(v),observed={"v": theta})
    # Use simple metropolis
    step = pm.Metropolis()
    trace = pm.sample(ndraws,tune=nburn,step=step,discard_tuned_samples=True)

这遍历了采样，但是失败了，并出现了一个相当神秘的错误：

Multiprocess sampling (2 chains in 2 jobs)
CompoundStep
>Metropolis: [theta_4]
>Metropolis: [theta_2]
>Metropolis: [theta_3]
>Metropolis: [theta_1]

100.00% [1010/1010 00:01<00:00 Sampling 2 chains,0 divergences]

Sampling 2 chains for 5 tune and 500 draw iterations (10 + 1_000 draws total) took 2 seconds.

---------------------------------------------------------------------------
MissingInputError                         Traceback (most recent call last)
<ipython-input-14-3e5ab1ff0971> in <module>
     17     pm.Densitydist('likelihood',observed={'v': theta})
     18     step = pm.Metropolis()
---> 19     trace = pm.sample(ndraws,discard_tuned_samples=True)

[...]


MissingInputError: Input 0 of the graph (indices start from 0),used to compute sigmoid(theta_4_interval__),was not provided and not given a value. Use the Theano flag exception_verbosity='high',for more information on this error.

我是pymc的新手，所以真的不知道这个错误是什么意思，或者我做错了什么。有很多SO问题（eg 0暗示我正在调用以theano数组作为问题基础的python函数。但是，我不知道这是怎么回事。

解决方法

因此，黑匣子似然示例存在一个问题：不要使用pm.DensityDist，而要使用pm.Potential（see this arviz issue）。现在，即使使用scipy.optimize.approx_fprime来近似对数似然的梯度，示例也可以正常工作：

import numpy as np
import pymc3 as pm
import theano.tensor as tt
import arviz as az
import scipy.optimize # Can't be bothered calculating derivatives by hand

## Experimental data

data = np.array([0.104,0.133,0.131,0.329,0.669,0.822,0.847,0.807,0.638,0.486,0.162,0.125])
t = np.array([1,32,61,92,122,153,183,214,245,275,306,336])

## LogLikelihood and gradient of the LogLikelihood functions
def log_likelihood(theta,data,t):
    if type(theta) == list:
        theta = theta[0]
    (
        theta1,theta2,theta3,theta4,sigma,) = theta

    m_m = data.min()
    m_M = data.max()
    y_pred = m_m + (m_M - m_m) * (
        1.0 / (1 + np.exp(-theta1 * (t - theta2)))
        + 1.0 / (1 + np.exp(theta3 * (t - theta4)))
        - 1
    )

    logp = -len(data) * np.log(np.sqrt(2.0 * np.pi) * sigma)
    logp += -np.sum((data - y_pred) ** 2.0) / (2.0 * sigma ** 2.0)
    return logp


def der_log_likelihood(theta,t):
    def lnlike(values):
        return log_likelihood(values,t)

    eps = np.sqrt(np.finfo(float).eps)
    grads = scipy.optimize.approx_fprime(theta[0],lnlike,eps * np.ones(len(theta)))
    return grads

## Wrapper classes to theano-ize LogLklhood and gradient...
class Loglike(tt.Op):
    itypes = [tt.dvector]
    otypes = [tt.dscalar]

    def __init__(self,t):
        self.data = data
        self.t = t
        self.loglike_grad = LoglikeGrad(self.data,self.t)

    def perform(self,node,inputs,outputs):
        logp = log_likelihood(inputs,self.data,self.t)
        outputs[0][0] = np.array(logp)

    def grad(self,grad_outputs):
        (theta,) = inputs
        grads = self.loglike_grad(theta)
        return [grad_outputs[0] * grads]


class LoglikeGrad(tt.Op):
    itypes = [tt.dvector]
    otypes = [tt.dvector]

    def __init__(self,t):
        self.der_likelihood = der_log_likelihood
        self.data = data
        self.t = t

    def perform(self,outputs):
        (theta,) = inputs
        grads = self.der_likelihood(inputs,self.t)
        outputs[0][0] = grads

## Sample!
loglike = Loglike(data,t)
with pm.Model() as model:
    theta1 = pm.Normal("theta_1",mu=0,sigma=1,testval=0.1)
    theta3 = pm.Normal("theta_3",testval=0.1)
    theta2 = pm.DiscreteUniform("theta_2",lower=1,upper=180,testval=120)
    theta4 = pm.DiscreteUniform("theta_4",lower=181,upper=365,testval=250)
    sigma = pm.HalfNormal("sigma",sigma=0.6,testval=0.01)
    # convert parameters to a tensor vector
    theta = tt.as_tensor_variable([theta1,sigma])
    # Do not use a DensityDist!
    pm.Potential("like",loglike(theta))

with model:
    trace = pm.sample(step=pm.NUTS())
    print(pm.summary(trace).to_string())

很高兴，上面的代码导致了

Multiprocess sampling (2 chains in 2 jobs)
CompoundStep
>NUTS: [sigma,theta_3,theta_1]
>CompoundStep
>>Metropolis: [theta_4]
>>Metropolis: [theta_2]

100.00% [4000/4000 00:16<00:00 Sampling 2 chains,0 divergences]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 17 seconds.
The number of effective samples is smaller than 25% for some parameters.

            mean     sd   hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_mean  ess_sd  ess_bulk  ess_tail  r_hat
theta_1    0.073  0.019    0.049    0.095      0.001    0.001     258.0   230.0     641.0     305.0   1.01
theta_3    0.052  0.010    0.037    0.070      0.000    0.000     404.0   361.0     681.0     379.0   1.01
theta_2  104.616  3.004  100.000  110.000      0.178    0.126     285.0   283.0     307.0     312.0   1.01
theta_4  270.178  3.139  264.000  275.000      0.147    0.104     455.0   455.0     454.0     606.0   1.01
sigma      0.040  0.013    0.020    0.063      0.001    0.001     324.0   298.0     410.0     422.0   1.01

bayesian pymc python