问题描述
我正在尝试实现此 paper 中给出的以下算法:
这是我的代码:
import numpy as np
import torch
import sklearn
from torch.autograd import Variable
from torch.optim import RMSprop,Adadelta,Adam,Adagrad,SGD
from torch.distributions import normal,Categorical,Poisson,Bernoulli,ContinuousBernoulli,Uniform
from torch.distributions.mixture_same_family import MixtureSameFamily
from torch.distributions import Multivariatenormal
from scipy.stats import multivariate_normal
def get_median(tensor):
"""
torch.median works differently than np.median
see https://stackoverflow.com/questions/54310861/pytorch-median-is-it-bug-or-am-i-using-it-wrong
"""
tensor = tensor.detach().flatten()
tensor_max = torch.tensor([tensor.max()])
return (torch.cat((tensor,tensor_max)).median() + tensor.median()) / 2.
def kernel_rbf(x,y,h=-1):
"""
returns the RBF kernel matrix k(x,y) = exp(-(1/h^2)|x-y|^2)
x: [num_samples,dimension]
y: [num_samples,dimension]
h: bandwidth parameter
"""
if x.dim() == 1:
x = x.unsqueeze(1)
if y.dim() == 1:
y = y.unsqueeze(1)
n = x.size()[0]
x_i = x.unsqueeze(1)
y_j = y.unsqueeze(0)
dnorm2 = ((x_i-y_j)**2).sum(2)
if h < 0:
h = get_median(dnorm2)
h = torch.sqrt(h**2/(2*np.log(n+1)))
kernel_matrix = torch.exp(-dnorm2 / h)
return kernel_matrix
def get_gf_optimal_direction(target_model,surrogate_model,inputs,h=-1,debug=False):
"""
returns phi(x) the optimal direction of perturbation for gradient descent
model: target model distribution (torch object)
inputs: [num_samples,dimension]
"""
n = inputs.shape[0]
inputs = inputs.detach().requires_grad_(True)
# compute the score function of the surrogate model
log_prob = surrogate_model.log_prob(inputs)
log_prob_grad = torch.autograd.grad(log_prob.sum(),inputs)[0]
# compute the gradient of the kernel
kernel = kernel_rbf(inputs,h=h)
kernel_grad = -0.5 * torch.autograd.grad(kernel.sum(),inputs)[0]
# compute weights
dummy_input = inputs.clone().detach().squeeze().numpy()
# evaluate rho(x)
surrogate_temp = multivariate_normal(mean=surrogate_model.mean.numpy(),cov=surrogate_model.variance.numpy())
weights_top = torch.tensor(surrogate_temp.pdf(dummy_input))
# evaluate p(x)
target_temp = multivariate_normal(mean=target_model.mean.numpy(),cov=target_model.variance.numpy())
weights_bottom = torch.tensor(target_temp.pdf(dummy_input))
weights = torch.unsqueeze(torch.div(weights_top,weights_bottom),1)
weight_sum = weights.sum()
# optimal pertubation direction for discrete distribution
phi_x = (weights/weight_sum) * (kernel.mm(log_prob_grad) + kernel_grad)
return phi_x
def update_gf_Adam(target_model,x0,n_iter=10):
optimizer = Adam([x0],lr=0.0001)
for iter in range(1,n_iter + 1):
optimizer.zero_grad()
x0.grad = (-1.) * get_gf_optimal_direction(target_model,debug=True)
optimizer.step()
return x0
# target distribution
d = 2
number_of_particles = 5
mu = torch.tensor(np.zeros(d))
A = torch.tensor(np.eye(d))
target_model = Multivariatenormal(mu,2 * A)
# surrogate distribution
surrogate_model = Multivariatenormal(mu + 5,6 * A)
# initial distribution
initial_model = Multivariatenormal(mu + 2,10*A)
x0 = initial_model.sample([number_of_particles])
theta = x0.clone()
theta = update_gf_Adam(target_model,theta)
expected_mean = torch.mean(theta,dim=0)
expected_covariance = get_covariance(theta)
print("expected mean = {}\n expected covariance = {}".format(expected_mean,expected_covariance))
print("actual mean = {}\n actual covariance = {}".format(target_model.mean,target_model.covariance_matrix))
我正在尝试使算法适用于以下简单情况:
- 目标分布 $p$ 是二维多元正态分布,$p(x)=\mathcal{N}(x;\mu,2*A)$ 其中均值 $\mu=(0,0)$ 和 矩阵 $A=\operatorname{Id}.$
- 代理密度函数 $\rho(x) = \mathcal{N}(x;\mu+5,6*A).$
- 初始粒子${x_i^0}_{i=1}^{n}$从分布$\mathcal{N}(x;\mu+2,10*A).$
- 核$k$被选为RBF核,形式为$k(x,x')=\exp(-\frac{1}{h^2}||x-x'||) $ 其中 $h$ 根据中值启发式设置,即 $h=\operatorname{med}^2/(2\log(n+1))$ 其中 $n$ 是粒子数。
更新规则是在函数get_gf_optimal_direction
中实现的,我想我已经在PyTorch中正确计算了数量,但是算法没有收敛。任何意见/建议将不胜感激。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)