问题描述
所以我在学习了 Andrew Ng 的 Coursera Swift 课程后尝试实现逻辑回归。我遇到了一些麻烦,当我对数据集进行标准化时,我的预测类与数据未标准化时不同。我还降低了标准化版本的学习率,以查看这是否提供了更准确的结果,虽然这样做了,但需要更长的时间,并且非标准化结果在迭代次数更少的情况下仍然好得多。我知道对一个功能进行标准化并不是真正必要的,但我只是想掌握一些窍门,所以我想当我遇到这个问题时会尝试一下。我之前也遇到过一个问题,我对线性回归执行了错误的正则化,这导致了一些问题,但我检查了数学中的错误,但仍然找不到这个逻辑回归问题的任何内容。
这是代码
//
// LogisticRegression.swift
// MachineLearning
//
// Created by Sahil Srivastava on 7/2/21.
//
import Foundation
class LogisticRegression {
private var maxs: Matrix?
private var mins: Matrix?
private var thetas: [Matrix] = [Matrix]()
init(inputs: [[Double]],outputs: [Double],normalize: Bool,lr: Double,lm: Double,error: Double,regularization: Bool) {
// First convert our inputs to X with column 0 made of 1s for theta_0
var mutateInputs = inputs
for i in 0..<mutateInputs.count {
mutateInputs[i].insert(1,at: 0)
}
// Use this to create X
let X = Matrix(mutateInputs)
// Use outputs to create y
let y = Matrix(outputs,isColumnVector: true)
// Check for normalized optimization
if normalize {
// Init maxs and mins for the number of features plus theta_0
self.maxs = Matrix(0..<X.columns,isColumnVector: true)
self.mins = Matrix(0..<X.columns,isColumnVector: true)
// Set up our normalized X
var Xn = X
// Iterate through each feature and normalize
for j in 0..<Xn.columns {
// Get max an min for this feature
var max = Xn.max(column: j).0
var min = Xn.min(column: j).0
// If feature has no range just set max to one and min to 0 so they don't affect normalizing
// This if statement will always trigget for feature_0 added for theta_0 since all x_0 = 1
// for training sets i
if max - min == 0 {
max = 1
min = 0
}
// Add the range and min to our ranges and mins at the appropriate index for future normalizations
self.maxs![0,j] = max
self.mins![0,j] = min
// Now normalize this feature set for our matrix
Xn[column: j] = (Xn[column: j] - min) / (max - min)
}
// Fit our thetas to normalized data
self.thetas = fit(X: Xn,y: y,lr: lr,lm: lm,error: error,regularization: regularization)
} else {
// Get our thetas data
self.thetas = fit(X: X,regularization: regularization)
}
}
private func fit(X: Matrix,y: Matrix,regularization: Bool) -> [Matrix] {
// Set up empty thetas
var thetas = [Matrix]()
// Get the number of classes we are solving for
let classes = y.uniqued().count
// Iterate for the number of classes
let start = CFAbsoluteTimeGetCurrent()
for i in 1...classes {
// First set up our to be refactored y vector
var y_i = Matrix(y.grid,isColumnVector: true)
// Now refactor by normalizing labels to 0 - 1 with 0 being all other classes and 1 being this class
for j in 0..<y.count {
// If this target is our current class set it to one our positive class
// Otherwise set it to zero our negative class
if y_i[j] == Double(i) {
y_i[j] = 1
} else {
y_i[j] = 0
}
}
// Now that our refactored y vector for this class is set up,run gradient descent for it
// Then add it to our thetas to be used in solve
thetas.append(GradientDescent(X: X,y: y_i,regularization: regularization).theta)
}
let diff = CFAbsoluteTimeGetCurrent() - start
print("---------------------")
print("LogisticRegression.fit: Gradient Descent time taken: \(diff)")
// Return our thetas
return thetas
}
public func solve(for input: [Double]) throws -> Double {
if input.count != self.thetas.first!.count - 1 {
throw LogisticRegressionError.incorrectNumberOfFeatures(needed: self.thetas.first!.count - 1)
}
// First convert our input to x with column 0 set to 1 for theta_0
var mutateFeatures = input
mutateFeatures.insert(1,at: 0)
var x = Matrix(mutateFeatures)
// Check to see if we are normalizing
if self.maxs != nil,self.mins != nil {
// normalize features
x = (x - self.mins!) / (self.maxs! - self.mins!)
print(self.mins!,self.maxs!)
// Solve
let target = target(x: x,thetas: self.thetas)
return target
} else {
// Solve
let target = target(x: x,thetas: self.thetas)
return target
}
}
private func target(x: Matrix,thetas: [Matrix]) -> Double {
print("---------------------")
let start = CFAbsoluteTimeGetCurrent()
// Solve
var outputs = [(class: Double,prediction: Double)]()
// Iterate through and solve for each class
var i = 1.0
for theta in thetas {
let output = (1 / (1 + (-(x <*> theta)).exp())).sum()
outputs.append((i,output))
i += 1.0
}
// Choose the output to be the max
let output = outputs.max(by: { $0.prediction < $1.prediction })!
print("LogisticRegression.target: Outputs list \(outputs)")
print("LogisticRegression.target: For input \(x.grid),found \(output)")
let diff = CFAbsoluteTimeGetCurrent() - start
print("LogisticRegression.target: Time taken: \(diff)")
// Return the class found for this feature set
return output.class
}
private class GradientDescent {
public private(set) var theta: Matrix
init(X: Matrix,regularization: Bool) {
print("---------------------")
// Array of costs
var Js = [Double]()
// Init theta to contain elements of all 0s,X.columns is equal to the number of features
// including theta_0
var theta = Matrix(Array(repeating: 0,count: X.columns)).transpose()
// Set up the calculation of our cost function that returns
// true when it equals zero and false otherwise
let cost_minimum: () -> Bool = {
// First get the sum of squared errors between our predicted and expected values
// X is multiplied by row vector theta to get our predicted values
// Then we subtract this product by y to get the difference as a vector containing
// the difference for each training set
// We then square this vector to get ssqe_vector
// First get hypothesis vector
let hypothesis = (1 / (1 + (-(X <*> theta)).exp())).transpose()
let ssqe_vector = hypothesis.log() <*> y + (1 - hypothesis).log() <*> (1 - y)
// We Now find the sum of ssqe_vector to get our sum of squared errors
let ssqe = ssqe_vector.sum() // This will already only be one value because of above matrix math
// Now we calculate our coefficient -1 / m where m is the number of training sets AKA the rows in X
let coeff = -1.0 / (Double(X.rows))
// Finally we get our J(theta) value by multiplying this coefficient with the sum of squared errors
let J = regularization ? coeff * ssqe + (lm / (2.0 * Double(X.rows))) * theta.pow(2).sum() : coeff * ssqe // Depends on regularization
// Add this cost to our array of costs
Js.append(J)
// Print out this cost and theta params
print("GradienDescent.init: Cost: \(J)")
// Make sure cost is decreasing each iteration
// if Js.count >= 2 { print("GradienDescent.init: Difference: \(Js[Js.count - 2] - Js[Js.count - 1])") }
if Js.count >= 2 { if Js[Js.count - 2] - Js[Js.count - 1] < 0 { fatalError() } }
// Check if the difference between
return Js.count < 2 ? false : abs(Js[Js.count - 2] - Js[Js.count - 1]) < error
// return J < 0.01
}
// Run the gradient descent algorithm to find our final theta value
repeat {
// First get our derived coefficient
let coeff = 1.0 / (Double(X.rows))
// Now get our predicted vector
// First get hypothesis vector
let hypothesis = (1 / (1 + (-(X <*> theta)).exp()))
// Now get the predicted difference
let predicted = hypothesis - y
// Set up our derived sum of squared errors vector
// Made as a vector since every theta_j will have its own derived sum of squared errors
var S = Matrix(0..<theta.count)
for j in 0..<theta.count {
// Each element in S represents a derived sum of squared errors for its theta_j
S[0,j] = (predicted * X[column: j]).sum()
}
// Revise theta with updated values and run cost function
theta = regularization ? theta * (1 - lr * lm / Double(X.rows)) - (lr * coeff * S) : theta - (lr * coeff * S) // Depends on regularization
} while !cost_minimum()
// Now we set our final theta
print("GradienDescent.init: Iterations: \(Js.count)")
self.theta = theta
}
}
}
enum LogisticRegressionError: Error {
case incorrectNumberOfFeatures(needed: Int)
}
extension Sequence where Element: Hashable {
func uniqued() -> [Element] {
var set = Set<Element>()
return filter { set.insert($0).inserted }
}
}
这就是我运行它的地方(我的样本数据将数字分组在 0-10、10-20、20-30 之间)
//
// LogisticView.swift
// MachineLearning
//
// Created by Sahil Srivastava on 7/4/21.
//
import SwiftUI
struct LogisticView: View {
var body: some View {
Text("Hello,World!")
.onAppear {
var inputs = [[Double]]()
for i in 0..<3000 {
let val = Double(i) / 100.0
if val <= 10.0 {
inputs.append([Double.random(in: 0..<10.0)])
} else if val > 10.0 && val <= 20.0 {
inputs.append([Double.random(in: 10.0..<20.0)])
} else {
inputs.append([Double.random(in: 20.0...30.0)])
}
}
var outputs = [Double]()
for j in 0..<3000 {
let val = Double(j) / 100.0
if val <= 10.0 {
outputs.append(1) // 0 - 10 is CLASS 1
} else if val > 10.0 && val <= 20.0 {
outputs.append(2) // 10 - 20 is CLASS 2
} else {
outputs.append(3) // 20 - 30 is CLASS 3
}
}
let features = [11.0]
let lr = 0.01
// normalized logistic regression is slightly more inaccurate despite having higher iterations
// Regularization is not a variable factor
let lrgd = LogisticRegression(inputs: inputs,outputs: outputs,normalize: true,lm: 0.01,error: lr * lr * lr,regularization: true)
let target = try! lrgd.solve(for: features)
print("---------------------")
print("LogisticView.body: Target \(target) found for features \(features)")
}
}
}
struct LogisticView_Previews: PreviewProvider {
static var previews: some View {
LogisticView()
}
}
这是我使用的矩阵结构,因为 github 是 https://github.com/hollance/Matrix/blob/master/Matrix.swift
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)