用遗传算法选择重要特征的几个错误:

问题描述

代码

import numpy as np
import pandas as pd
import math
import target as target
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


dataset = pd.read_csv('Tehran_hies98.csv')

to_drop = ['Address','weight','WH','inc3','Income','exp1','exp4','exp5','exp6','exp7','exp8','exp11','exp12','exp13','Income_Mis','exp2','exp3','exp9','inc2','inc1','exp14']

dataset.drop(to_drop,inplace=True,axis=1)

dataset = pd.get_dummies(dataset,columns=['HSize','SSex','SAge','SMadrak','SActivity','SMarital','Tasarrof','Otagh','ZirBana'],drop_first=True)

target,feature_list = 'DV',[i for i in dataset.columns if i not in target]


def init_population(n,c):
     return np.array([[math.ceil(e) for e in pop] for pop in (np.random.rand(n,c) - 0.5)]),np.zeros((2,c))-1

def single_poin_crossover(population):
       r,c,n = population.shape[0],population.shape[1],np.random.randint(1,population.shape[1])
       for i in range(0,r,2):
             population[i],population[i + 1] = np.append(population[i][0:n],population[i + 1] 
             [n:c]),np.append(
             population[i + 1][0:n],population[i][n:c])
       return population

def flip_mutation(population):
      return population.max() - population

def random_selection(population):
   r = population.shape[0]
   new_population = population.copy()
   for i in range(r):
       new_population[i] = population[np.random.randint(0,r)]
   return new_population

def get_fitness(data,feature_list,target,population):
   fitness = []
   for i in range(population.shape[0]):
       columns = [feature_list[j] for j in range(population.shape[1]) if population[i,j] == 1]
       fitness.append(predictive_model(data[columns],data[target]))
   return fitness

def predictive_model(X,y):
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=7)
    lr = LogisticRegression(solver='liblinear',max_iter=100,random_state=7)
    lr.fit(X_train,y_train)
    return accuracy_score(y_test,lr.predict(X_test))


def genetic_algorithm(data,n,max_iter):
  c = len(feature_list)

  population,memory = init_population(n,c)
  temp1 = population
  temp2 = memory
  population,memory = temp2,temp1

  fitness = get_fitness(data,population)

  optimal_value = max(fitness)
  optimal_solution = population[np.where(fitness == optimal_value)][0]

  for i in range(max_iter):
      population = random_selection(population)
      population = single_poin_crossover(population)
      if np.random.rand() < 0.3:
          population = flip_mutation(population)
      temp1 = population
      temp2 = memory
      population,temp1

      fitness = get_fitness(data,population)

      if max(fitness) > optimal_value:
          optimal_value = max(fitness)
          optimal_solution = population[np.where(fitness == optimal_value)][0]

  return optimal_solution,optimal_value


feature_set,acc_score = genetic_algorithm(dataset,10,1000)
feature_set = [feature_list[i] for i in range(len(feature_list)) if feature_set[i] == 1]

print('Optimal Feature Set\n',feature_set,'\noptimal Accuracy = ',round(acc_score * 100),'%')

一个错误: target,[i for i in dataset.columns if i not in target] 类型错误:“模块”类型的参数不可迭代

代码

target,[i for i in dataset.columns if i not in target]

第二个错误: 如果我删除上面代码的这部分:

 if i not in target

出现这个错误

从 err 提高 KeyError(key) 密钥错误:'DV'

我写了这段代码,到目前为止出现了这个问题。

如果您发现任何其他错误,请帮助我修复。

如果可能,请帮助我或输入正确的代码

谢谢。

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)