问题描述
我想学习如何在 stan 中使用 R 拟合神经网络模型。我正在按照本网站上的示例进行操作:http://srmart.in/neural-networks-in-stan-or-how-i-was-utterly-surprised-that-it-worked-at-all/
我的第一步是从字面上复制和粘贴适合分类模型所需的代码,然后运行它。它立即失败了。我不知道为什么它会失败。
当我运行模型时,它给了我一个输出。但是当我想读取后验样本以获取验证集的预测时,我收到以下错误消息:
Warning message:
In matrix(.,N_test,3,byrow = TRUE) :
data length [151] is not a sub-multiple or multiple of the number of rows [50]
R 代码:
library(rstan)
library(magrittr)
sm <- stan_model("./stan_model.stan")
fit_nn_cat <- function(x_train,y_train,x_test,y_test,H,n_H,method = "optimizing",...) {
stan_data <- list(
N = nrow(x_train),P = ncol(x_train),x = x_train,labels = y_train,H = H,n_H = n_H,N_test = length(y_test)
)
if(method == "optimizing") {
optOut <- optimizing(sm,data = stan_data)
test_char <- paste0("output_test[",1:length(y_test),",rep(1:max(y_train),each = length(y_test)),"]")
y_test_pred <- matrix(optOut$par[test_char],stan_data$N_test,max(y_train))
y_test_cat <- apply(y_test_pred,1,which.max)
out <- list(y_test_pred = y_test_pred,y_test_cat = y_test_cat,conf = table(y_test_cat,y_test),fit = optOut)
return(out)
} else if(method == "sampling") {
out <- sampling(sm,data = stan_data,pars = "output_test",...)
return(out)
}
}
data(iris)
x <- iris[,1:4]
y <- as.numeric(as.factor((iris[,"Species"])))
N_test <- 50
test_indices <- sample(1:nrow(x),N_test)
x_train <- x[-test_indices,]
y_train <- y[-test_indices]
x_test <- x[test_indices,]
y_test <- y[test_indices]
fit_nuts <- fit_nn_cat(x_train,2,50,method = "sampling",cores = 4,iter = 1000)
cat_nuts <- summary(fit_nuts)$summary[,"mean"] %>%
matrix(N_test,byrow = TRUE) %>%
apply(1,which.max)
table(cat_nuts,y_test)
倒数第二行是发生错误的地方。
STAN 代码:
functions {
vector[] nn_predict(matrix x,matrix d_t_h,matrix[] h_t_h,matrix h_t_d,row_vector[] hidden_bias,row_vector y_bias) {
int N = rows(x);
int n_H = cols(d_t_h);
int H = size(hidden_bias);
int num_labels = cols(y_bias) + 1;
matrix[N,n_H] hidden_layers[H];
vector[num_labels] output_layer_logit[N];
vector[N] ones = rep_vector(1.,N);
hidden_layers[1] = inv_logit(x * d_t_h + ones * hidden_bias[1]);
for(h in 2:H) {
hidden_layers[h] = inv_logit(hidden_layers[h-1] * h_t_h[h - 1] + ones * hidden_bias[h]);
}
for(n in 1:N) {
output_layer_logit[n,1] = 0.0;
output_layer_logit[n,2:num_labels] = (hidden_layers[H,n] * h_t_d + y_bias)';
}
return(output_layer_logit);
}
}
data {
int N; // Number of training samples
int P; // Number of predictors (features)
matrix[N,P] x; // Feature data
int labels[N]; // Outcome labels
int H; // Number of hidden layers
int n_H; // Number of nodes per layer (All get the same)
int N_test; // Number of test samples
matrix[N_test,P] x_test; // Test predictors
}
transformed data {
int num_labels = max(labels); // How many labels are there
}
parameters {
matrix[P,n_H] data_to_hidden_weights; // Data -> Hidden 1
matrix[n_H,n_H] hidden_to_hidden_weights[H - 1]; // Hidden[t] -> Hidden[t+1]
matrix[n_H,num_labels - 1] hidden_to_data_weights; // Hidden[T] -> Labels. Base class gets 0.
// ordered[n_H] hidden_bias[H]; // Use ordered if using NUTS
row_vector[n_H] hidden_bias[H]; // Hidden layer biases
row_vector[num_labels - 1] labels_bias; // Labels biases. Base class gets 0.
}
transformed parameters {
vector[num_labels] output_layer_logit[N]; // Predicted output layer logits
output_layer_logit = nn_predict(x,data_to_hidden_weights,hidden_to_hidden_weights,hidden_to_data_weights,hidden_bias,labels_bias);
}
model {
// Priors
to_vector(data_to_hidden_weights) ~ std_normal();
for(h in 1:(H-1)) {
to_vector(hidden_to_hidden_weights[h]) ~ std_normal();
}
to_vector(hidden_to_data_weights) ~ std_normal();
for(h in 1:H) {
to_vector(hidden_bias[h]) ~ std_normal();
}
labels_bias ~ std_normal();
for(n in 1:N) { // Likelihood
labels[n] ~ categorical_logit(output_layer_logit[n]);
}
}
generated quantities {
vector[num_labels] output_layer_logit_test[N_test] = nn_predict(x_test,labels_bias);
matrix[N_test,num_labels] output_test;
for(n in 1:N_test) {
output_test[n] = softmax(output_layer_logit_test[n])';
}
}
起初我以为参数初始化有问题。但我是 STAN 的新手,我可能不完全了解正在发生的事情。在最后的 for 循环中,我希望 output_test 变量获得 N_test
列。并且 N_test
= 50。然而在返回的模型中,output_test 有 151 列。
这里出了什么问题?
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)