创建正常均值和标准差的 2 层级估计

问题描述

我有一个正态分布变量 x(如产品需求)、一个索引 id_1(如产品编号)和第二个索引 id_2(如产品组)。我的目标是分层估计 x 的均值和标准差(所有 > 产品组 > 产品)。 这是我的数据:

import numpy as np
import pymc3 as pm
import arviz as az

# data
my_step_1 = 0.4
my_step_2 = 4.1
sd_step_1 = 0.1
sd_step_2 = 0.2
my = 10
sd = .1
grp_n = 8
grps_1 = 5
grps_2 = 4

x = np.round(np.concatenate([np.random.normal(my + i * my_step_1 + j * my_step_2,\
                      sd + i * sd_step_1 + j * sd_step_2,grp_n) \
    for i in range(grps_1) for j in range(grps_2)]),1) # demand
id_1 = np.repeat(np.arange(grps_1 * grps_2),grp_n) # group,product number
id_2 = np.tile(np.repeat(np.arange(grps_2),grp_n),grps_1) # super-group,product group
shape_1 = len(np.unique(id_1))
shape_2 = len(np.unique(id_2))

我管理了一个层次结构:

with pm.Model() as model_h1:
    #
    mu_mu_hyper = pm.normal('mu_mu_hyper',mu = 0,sd = 10)
    mu_sd_hyper = pm.Halfnormal('mu_sd_hyper',10)
    sd_hyper = pm.Halfnormal('sd_hyper',10)
    #
    mu = pm.normal('mu',mu = mu_mu_hyper,sd = mu_sd_hyper,shape = shape_1)
    sd = pm.Halfnormal('sd',sd = sd_hyper,shape = shape_1)

    y = pm.normal('y',mu = mu[id_1],sd = sd[id_1],observed = x)

    trace_h1 = pm.sample(1000)

#az.plot_forest(trace_h1,var_names=['mu','sd'],combined = True)

但是我如何编码 2 个层次结构?

# 2 hierarchies .. doesn't work
with pm.Model() as model_h2:  
    #
    mu_mu_hyper2 = pm.normal('mu_mu_hyper2',sd = 10)
    mu_sd_hyper2 = pm.Halfnormal('mu_sd_hyper2',sd = 10)
    sd_mu_sd_hyper2 = pm.Halfnormal('sd_mu_sd_hyper2',sd = 10)
    sd_hyper2 = pm.Halfnormal('sd_hyper2',sd = 10)    
    #
    mu_mu_hyper1 = pm.normal('mu_hyper1',mu = mu_mu_hyper2,sd = mu_sd_hyper2,shape = shape_2)
    mu_sd_hyper1 = pm.Halfnormal('mu_sd_hyper1',sd = sd_mu_sd_hyper2,shape = shape_2)
    sd_hyper1 = pm.Halfnormal('sd_hyper1',sd = sd_hyper2,shape = shape_2)
    #sd_hyper1 = pm.Halfnormal('sd_hyper1',sd = sd_hyper2[id_2],shape = shape_2)??
    #
    mu = pm.normal('mu',mu = mu_mu_hyper1,sd = mu_sd_hyper1,sd = sd_hyper1,shape = shape_1)
    
   y = pm.normal('y',observed = x)

    trace_h2 = pm.sample(1000)

解决方法

您可以尝试遍历产品组并使用组的均值和标准差作为属于该特定组的产品的约束。

# sample product group to product mapping
group_product_mapping = {0: [1,2,3],1: [4,5,6]}
total_groups = len(group_product_mapping.keys())
with pm.model() as model_h:
    mu_all = pm.Normal('mu_all',10)
    sd_all = pm.HalfNormal('sd_all',10)
    sd_mu_group = pm.HalfNormal('sd_mu_group',10)
    
    # group parameters constrained to mu,sd from all
    mu_group = pm.Normal('mu_group',mu_all,sd_all,shape=total_groups) 
    sd_group = pm.HalfNormal('sd_group',sd_mu_group,shape=total_groups)
    
    mu_products = dict()
    # iterate through groups and constrain product parameters to the product group they belong to
    for idx,group in enumerate(group_product_mapping.keys()):
        mu_products[group] = pm.Normal(f'mu_products_{group}',mu_group[idx],sd_group[idx],shape=len(group_product_mapping[group]))
        sd_produtcs[group] = pm.HalfNormal(f'sd_mu_products_{group}',10)