问题描述
用于计算熵的代码:
def entropy(column):
""" Calculates the entropy"""
values,counts = np.unique(column,return_counts=True)
entropy_val = 0
for i in range(len(counts)):
entropy_val += (
(-counts[i] / sum(counts)) * math.log2(counts[i] / (sum(counts)))
)
return entropy_val
其中“列”是数据框中的特征,例如df [0]。 我对从这里去哪里有些困惑。有人能指出我正确的方向吗,我的最终目标是找到最好的信息。
entropy_vals = {}
entropy_vals = entropy(X[0]),entropy(X[1]),entropy(X[2]),entropy(X[3]),entropy(y)
print(entropy_vals)
df = pd.read_csv('data_banknote_authentication.txt',header=None)
print(df)
y = df.iloc[:,-1]
X = df.iloc[:,:4]
def count_labels(rows):
"""Counts number of each unique value in selected column."""
counts = {}
for row in rows:
label = row
if label not in counts:
counts[label] = 1
else:
counts[label] += 1
return counts
def entropy(column):
""" Calculates the entropy"""
values,return_counts=True)
entropy_val = 0
for i in range(len(counts)):
entropy_val += (
(-counts[i] / sum(counts)) * math.log2(counts[i] / (sum(counts)))
)
return entropy_val
entropy_vals = {}
entropy_vals = entropy(X[0]),entropy(y)
print(entropy_vals)
def check_unique(data):
label_col = data[data.columns[-1]]
print(label_col)
unique_features = np.unique(label_col)
if len(unique_features) == 1:
return True
else:
return False
def categorize_data(data):
label_col = data[data.columns[-1]]
values,counts = np.unique(label_col,return_counts=True)
print(values,counts)
index = counts.argmax()
category = values[index]
return category
def split(data):
x_less = data[data <= np.mean(data)]
x_greater = data[data > np.mean(data)]
return x_less,x_greater
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)