问题描述
我正在从头开始构建决策树,而不使用 sklibrary。我目前用来分割样本的方法是 K 折法。我想从 K-fold 方法切换到 train_test_split 方法。我如何计算准确度?我已经完成了预测部分。
我的 K 折方法
def cross_validation_split(dataset,n_folds):
dataset_split = list()
dataset_copy = list(dataset)
fold_size = int(len(dataset) / n_folds)
for i in range(n_folds):
fold = list()
while len(fold) < fold_size:
index = randrange(len(dataset_copy))
fold.append(dataset_copy.pop(index))
dataset_split.append(fold)
return dataset_split
# Calculate accuracy percentage
def accuracy_metric(actual,predicted):
correct = 0
for i in range(len(actual)):
if actual[i] == predicted[i]:
correct += 1
return correct / float(len(actual)) * 100.0
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset,algorithm,n_folds,*args):
folds = cross_validation_split(dataset,n_folds)
scores = list()
for fold in folds:
train_set = list(folds)
train_set.remove(fold)
train_set = sum(train_set,[])
test_set = list()
for row in fold:
row_copy = list(row)
test_set.append(row_copy)
row_copy[-1] = None
predicted = algorithm(train_set,test_set,*args)
actual = [row[-1] for row in fold]
accuracy = accuracy_metric(actual,predicted)
scores.append(accuracy)
return scores
我的 train_test_split 方法
def train_test_split(X,y,test_size=0.33):
i = int((1 - test_size) * X.shape[0]) + 1
X_train,X_test = np.split(X,[i])
y_train,y_test = np.split(y,[i])
return X_train,X_test,y_train,y_test
数据是如何拆分的
df = pd.read_csv("corrected.csv")
df = df.sample(frac=0.33,random_state=255,replace=True)
data = df.to_numpy()
X = data[:,:-1]
y = data[:,-1] - 1
X_train,y_test = train_test_split(X,y)
决策树
def decision_tree(train,test,max_depth,min_size):
tree = build_tree(train,min_size)
predictions = list()
for row in test:
prediction = predict(tree,row)
predictions.append(prediction)
return(predictions)
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)