归一化逻辑回归给出错误结果

问题描述

所以我在学习了 Andrew Ng 的 Coursera Swift 课程后尝试实现逻辑回归。我遇到了一些麻烦,当我对数据集进行标准化时,我的预测类与数据未标准化时不同。我还降低了标准化版本的学习率,以查看这是否提供了更准确的结果,虽然这样做了,但需要更长的时间,并且非标准化结果在迭代次数更少的情况下仍然好得多。我知道对一个功能进行标准化并不是真正必要的,但我只是想掌握一些窍门,所以我想当我遇到这个问题时会尝试一下。我之前也遇到过一个问题,我对线性回归执行了错误的正则化,这导致了一些问题,但我检查了数学中的错误,但仍然找不到这个逻辑回归问题的任何内容

这是代码

//
//  LogisticRegression.swift
//  MachineLearning
//
//  Created by Sahil Srivastava on 7/2/21.
//

import Foundation

class LogisticRegression {
    
    private var maxs: Matrix?
    private var mins: Matrix?
    private var thetas: [Matrix] = [Matrix]()
    
    init(inputs: [[Double]],outputs: [Double],normalize: Bool,lr: Double,lm: Double,error: Double,regularization: Bool) {
        // First convert our inputs to X with column 0 made of 1s for theta_0
        var mutateInputs = inputs
        for i in 0..<mutateInputs.count {
            mutateInputs[i].insert(1,at: 0)
        }
        // Use this to create X
        let X = Matrix(mutateInputs)
        // Use outputs to create y
        let y = Matrix(outputs,isColumnVector: true)
        // Check for normalized optimization
        if normalize {
            // Init maxs and mins for the number of features plus theta_0
            self.maxs = Matrix(0..<X.columns,isColumnVector: true)
            self.mins = Matrix(0..<X.columns,isColumnVector: true)
            // Set up our normalized X
            var Xn = X
            // Iterate through each feature and normalize
            for j in 0..<Xn.columns {
                // Get max an min for this feature
                var max = Xn.max(column: j).0
                var min = Xn.min(column: j).0
                // If feature has no range just set max to one and min to 0 so they don't affect normalizing
                // This if statement will always trigget for feature_0 added for theta_0 since all x_0 = 1
                // for training sets i
                if max - min == 0 {
                    max = 1
                    min = 0
                }
                // Add the range and min to our ranges and mins at the appropriate index for future normalizations
                self.maxs![0,j] = max
                self.mins![0,j] = min
                // Now normalize this feature set for our matrix
                Xn[column: j] = (Xn[column: j] - min) / (max - min)
            }
            // Fit our thetas to normalized data
            self.thetas = fit(X: Xn,y: y,lr: lr,lm: lm,error: error,regularization: regularization)
        } else {
            // Get our thetas data
            self.thetas = fit(X: X,regularization: regularization)
        }
    }
    
    private func fit(X: Matrix,y: Matrix,regularization: Bool) -> [Matrix] {
        // Set up empty thetas
        var thetas = [Matrix]()
        // Get the number of classes we are solving for
        let classes = y.uniqued().count
        // Iterate for the number of classes
        let start = CFAbsoluteTimeGetCurrent()
        for i in 1...classes {
            // First set up our to be refactored y vector
            var y_i = Matrix(y.grid,isColumnVector: true)
            // Now refactor by normalizing labels to 0 - 1 with 0 being all other classes and 1 being this class
            for j in 0..<y.count {
                // If this target is our current class set it to one our positive class
                // Otherwise set it to zero our negative class
                if y_i[j] == Double(i) {
                    y_i[j] = 1
                } else {
                    y_i[j] = 0
                }
            }
            // Now that our refactored y vector for this class is set up,run gradient descent for it
            // Then add it to our thetas to be used in solve
            thetas.append(GradientDescent(X: X,y: y_i,regularization: regularization).theta)
        }
        let diff = CFAbsoluteTimeGetCurrent() - start
        print("---------------------")
        print("LogisticRegression.fit: Gradient Descent time taken: \(diff)")
        // Return our thetas
        return thetas
    }
    
    public func solve(for input: [Double]) throws -> Double {
        if input.count != self.thetas.first!.count - 1 {
            throw LogisticRegressionError.incorrectNumberOfFeatures(needed: self.thetas.first!.count - 1)
        }
        // First convert our input to x with column 0 set to 1 for theta_0
        var mutateFeatures = input
        mutateFeatures.insert(1,at: 0)
        var x = Matrix(mutateFeatures)
        // Check to see if we are normalizing
        if self.maxs != nil,self.mins != nil {
            // normalize features
            x = (x - self.mins!) / (self.maxs! - self.mins!)
            print(self.mins!,self.maxs!)
            // Solve
            let target = target(x: x,thetas: self.thetas)
            return target
        } else {
            // Solve
            let target = target(x: x,thetas: self.thetas)
            return target
        }
    }
    
    private func target(x: Matrix,thetas: [Matrix]) -> Double {
        print("---------------------")
        let start = CFAbsoluteTimeGetCurrent()
        // Solve
        var outputs = [(class: Double,prediction: Double)]()
        // Iterate through and solve for each class
        var i = 1.0
        for theta in thetas {
            let output = (1 / (1 + (-(x <*> theta)).exp())).sum()
            outputs.append((i,output))
            i += 1.0
        }
        // Choose the output to be the max
        let output = outputs.max(by: { $0.prediction < $1.prediction })!
        print("LogisticRegression.target: Outputs list \(outputs)")
        print("LogisticRegression.target: For input \(x.grid),found \(output)")
        let diff = CFAbsoluteTimeGetCurrent() - start
        print("LogisticRegression.target: Time taken: \(diff)")
        // Return the class found for this feature set
        return output.class
    }
    
    private class GradientDescent {
        
        public private(set) var theta: Matrix

        init(X: Matrix,regularization: Bool) {
            print("---------------------")
            // Array of costs
            var Js = [Double]()
            // Init theta to contain elements of all 0s,X.columns is equal to the number of features
            // including theta_0
            var theta = Matrix(Array(repeating: 0,count: X.columns)).transpose()
            // Set up the calculation of our cost function that returns
            // true when it equals zero and false otherwise
            let cost_minimum: () -> Bool = {
                // First get the sum of squared errors between our predicted and expected values
                
                // X is multiplied by row vector theta to get our predicted values
                // Then we subtract this product by y to get the difference as a vector containing
                // the difference for each training set
                // We then square this vector to get ssqe_vector
                // First get hypothesis vector
                let hypothesis =  (1 / (1 + (-(X <*> theta)).exp())).transpose()
                let ssqe_vector = hypothesis.log() <*> y + (1 - hypothesis).log() <*> (1 - y)
                // We Now find the sum of ssqe_vector to get our sum of squared errors
                let ssqe = ssqe_vector.sum() // This will already only be one value because of above matrix math
                // Now we calculate our coefficient -1 / m where m is the number of training sets AKA the rows in X
                let coeff = -1.0 / (Double(X.rows))
                // Finally we get our J(theta) value by multiplying this coefficient with the sum of squared errors
                let J = regularization ? coeff * ssqe + (lm / (2.0 * Double(X.rows))) * theta.pow(2).sum() : coeff * ssqe // Depends on regularization
                // Add this cost to our array of costs
                Js.append(J)
                // Print out this cost and theta params
                print("GradienDescent.init: Cost: \(J)")
                // Make sure cost is decreasing each iteration
//                if Js.count >= 2 { print("GradienDescent.init: Difference: \(Js[Js.count - 2] - Js[Js.count - 1])") }
                if Js.count >= 2 { if Js[Js.count - 2] - Js[Js.count - 1] < 0 { fatalError() } }
                // Check if the difference between
                return Js.count < 2 ? false : abs(Js[Js.count - 2] - Js[Js.count - 1]) < error
//                return J < 0.01
            }
            // Run the gradient descent algorithm to find our final theta value
            repeat {
                // First get our derived coefficient
                let coeff = 1.0 / (Double(X.rows))
                // Now get our predicted vector
                // First get hypothesis vector
                let hypothesis =  (1 / (1 + (-(X <*> theta)).exp()))
                // Now get the predicted difference
                let predicted = hypothesis - y
                // Set up our derived sum of squared errors vector
                // Made as a vector since every theta_j will have its own derived sum of squared errors
                var S = Matrix(0..<theta.count)
                for j in 0..<theta.count {
                    // Each element in S represents a derived sum of squared errors for its theta_j
                    S[0,j] = (predicted * X[column: j]).sum()
                }
                // Revise theta with updated values and run cost function
                theta = regularization ? theta * (1 - lr * lm / Double(X.rows)) - (lr * coeff * S) : theta - (lr * coeff * S) // Depends on regularization
            } while !cost_minimum()
            // Now we set our final theta
            print("GradienDescent.init: Iterations: \(Js.count)")
            self.theta = theta
        }
    }
}

enum LogisticRegressionError: Error {
    case incorrectNumberOfFeatures(needed: Int)
}

extension Sequence where Element: Hashable {
    func uniqued() -> [Element] {
        var set = Set<Element>()
        return filter { set.insert($0).inserted }
    }
}

这就是我运行它的地方(我的样本数据将数字分组在 0-10、10-20、20-30 之间)

//
//  LogisticView.swift
//  MachineLearning
//
//  Created by Sahil Srivastava on 7/4/21.
//

import SwiftUI

struct LogisticView: View {
    var body: some View {
        Text("Hello,World!")
            .onAppear {
                var inputs = [[Double]]()
                for i in 0..<3000 {
                    let val = Double(i) / 100.0
                    if val <= 10.0 {
                        inputs.append([Double.random(in: 0..<10.0)])
                    } else if val > 10.0 && val <= 20.0 {
                        inputs.append([Double.random(in: 10.0..<20.0)])
                    } else {
                        inputs.append([Double.random(in: 20.0...30.0)])
                    }
                }
                var outputs = [Double]()
                for j in 0..<3000 {
                    let val = Double(j) / 100.0
                    if val <= 10.0 {
                        outputs.append(1) // 0 - 10 is CLASS 1
                    } else if val > 10.0 && val <= 20.0 {
                        outputs.append(2) // 10 - 20 is CLASS 2
                    } else {
                        outputs.append(3) // 20 - 30 is CLASS 3
                    }
                }
                
                let features = [11.0]
                let lr = 0.01
                // normalized logistic regression is slightly more inaccurate despite having higher iterations
                // Regularization is not a variable factor
                let lrgd = LogisticRegression(inputs: inputs,outputs: outputs,normalize: true,lm: 0.01,error: lr * lr * lr,regularization: true)
                let target = try! lrgd.solve(for: features)
                print("---------------------")
                print("LogisticView.body: Target \(target) found for features \(features)")
            }
    }
}

struct LogisticView_Previews: PreviewProvider {
    static var previews: some View {
        LogisticView()
    }
}

这是我使用的矩阵结构,因为 github 是 https://github.com/hollance/Matrix/blob/master/Matrix.swift

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)