手动在R

问题描述

我正在尝试使用代码和优化(不使用程序包)在R中实现多项式回归(mlogit或多项程序包)。

rm(list= ls())
data = read.table("~/Desktop/R Code/textfiles/keane.csv",sep = ",",header = T)

data1 = data[,c("educ","exper","expersq","black","status")]
data1 = na.omit(data1)

data2 = as.matrix(data1)


y_1 = rep(0,nrow(data2))
y_2 = rep(0,nrow(data2))
y_3 = rep(0,nrow(data2))


data2 = cbind(data2[,1:5],y_1,y_2,y_3)

data2[,6] = ifelse(data2[,5] == 1,1,0)
data2[,7] = ifelse(data2[,5] == 2,8] = ifelse(data2[,5] == 3,0)


int = rep(1,nrow(data2)) #intercept

data2 = cbind(int,data2[,c(1:4,6,7,8)]) 


X = as.matrix(data2[,c(1:5)])
y_1 = as.matrix(data2[,6]) #replace y values(status = 1)
y_2 = as.matrix(data2[,7]) #replace y values(status = 2)
y_3 = as.matrix(data2[,8]) #replace y values(status = 3)


Y = cbind(y_1,y_3) 


##beta


beta = solve(t(X) %*% X) %*% t(X) %*% Y #LPM coefficient 


logit.nll = function (beta,X,Y) {
    
    
    
    
    P = as.matrix(rowSums(exp(X %*% beta))); #Sum_(h=1)^3 exp(X * Beta_(h))
    
    
    
    Pr_1 = exp(X %*% beta[,2])/(1 + P); #P(y = 2 | X)
    Pr_2 = exp(X %*% beta[,3])/(1 + P); #P(y = 3 | X)
    
    Pr_0 = 1/(1+P);#P(y = 1 | X)
    
 
 
 (colSums(Y[,1] * log(Pr_0)) + colSums(Y[,2] * log(Pr_1)) + colSums(Y[,3] * log(Pr_2))) #log-likelihood
    
    
    
}

optim(beta,logit.nll,X = X,Y = Y,method = "BFGS")

当我执行此代码时,它会提示我“ X%*%beta中的错误:参数不一致”。我的方法可能根本上是错误的,或者对数似然函数的实现是错误的。我可以得到一些帮助来修复此代码吗?

解决方法

对svm优化或要执行的操作不是很熟悉,optim所带来的错误与向量一起使用。您需要将其强制转换为函数内部的矩阵,假设您的数据是这样的:

set.seed(111)
data = iris

X = model.matrix(~.,data=data[,1:4])
Y = model.matrix(~0+Species,data=data)

beta = solve(t(X) %*% X) %*% t(X) %*% Y

现在我们添加矩阵部分,还要注意默认情况下optim会执行最小化(https://stat.ethz.ch/R-manual/R-devel/library/stats/html/optim.html),因此您需要返回对数似然的负数:

logit.nll = function (beta,X,Y) {
    
    beta = matrix(beta,ncol=3)    
    P = as.matrix(rowSums(exp(X %*% beta))); #Sum_(h=1)^3 exp(X * Beta_(h))    
    Pr_1 = exp(X %*% beta[,2])/(1 + P); #P(y = 2 | X)
    Pr_2 = exp(X %*% beta[,3])/(1 + P); #P(y = 3 | X)    
    Pr_0 = 1/(1+P);#P(y = 1 | X)
     
LL = (colSums(Y[,1] * log(Pr_0)) + colSums(Y[,2] * log(Pr_1)) + colSums(Y[,3] * log(Pr_2))) #log-likelihood
print(LL)
return(-LL)

}

res = optim(beta,logit.nll,X = X,Y = Y,method = "BFGS")

res
$par
             Speciessetosa Speciesversicolor Speciesvirginica
(Intercept)      -2.085162         15.040679        -27.60634
Sepal.Length     -4.649971         -8.971237        -11.43702
Sepal.Width      -9.286757         -5.016616        -11.69764
Petal.Length     12.803070         17.125483         26.55641
Petal.Width       6.025760          3.342659         21.63200