问题描述
想要从具有出现次数的唯一数据集创建正态分布图,如下所示。下面的代码片段完成了这项工作,但可能有一种 Pythonic 的实现方式,而不仅仅是多次重复唯一数据。
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
uniqueVals = [1.0,3.0,5.0,7.0,9.0,11.0,13.0,15.0]
occurrences = [14989.0,13386.0,14419.0,8368.0,11019.0,11573.0,7989.0,7170.0]
# Avoid this portion to create multiple occurrence of same data
# _________________________________
vals = []
for v,occr in list(zip(uniqueVals,occurrences)):
for _ in range(int(occr)):
vals.append(v)
# ------------------------------
mean = np.mean(vals)
sd = np.std(vals)
pdf = stats.norm.pdf(vals,mean,sd)
zscore = stats.zscore(vals)
fig = plt.figure(figsize=(6,5))
ax = fig.add_subplot(111)
ax.plot(vals,pdf,c='b',marker='.')
ax.set_xlabel("vals",fontsize=10)
ax.set_ylabel("p",fontsize=10)
ax.grid(b=True,which='major',color='0.5',linestyle='--')
ax2 = ax.twiny()
ax2.plot(zscore,marker='.')
ax2.grid(b=True,color='r',linestyle='-')
ax2.set_xlabel("zscore",fontsize=10)
plt.show()
解决方法
作为一个想法:
#---- convert to integer ---
v = np.array(uniqueVals,dtype=int)
occr = np.array(occurrences,dtype=int)
#---- we take smaller numbers for the example
v = np.array([1,3,5])
occr = np.array([2,4,6])
#---- now we need the list again
vL = v.tolist()
oL = occr.tolist()
#----try this just to check
for v,o in zip(vL,oL):
print([v]*o)
#---- now lets build the array
A = np.array([])
for v,oL):
B = np.array([v]*o)
A = np.r_[A,B].astype(int)
A
给予
[1,1]
[3,3]
[5,5,5]
array([1,1,5])