问题描述
试图建立两个数组的人工数据集。 形状X(100,2)-收入和年龄显示在两列中。 形状的y(100,1)
在附加循环时,我得到的X只有(200,1)。
感谢您的支持。
from scipy.stats import norm
import random
from numpy import *
import numpy as np
from ast import literal_eval
from ast import literal_eval
from pandas import DataFrame
# Function for N points in k clusters to generate artificial data
def create_clustered_data(N,k):
random.seed(10)
points_per_cluster=float(N)/k
X=np.array([])
y=np.array([])
for i in range(k):
income_centroid=np.random.uniform(20000,200000)
age_centroid=np.random.uniform(20,70)
for j in range(int(points_per_cluster)):
X=np.append(X,[np.random.normal(income_centroid,10000),np.random.normal(age_centroid,2)])
y=np.append(y,i)
X=np.array(X)
y=np.array(y)
return X,y
(X,y)=create_clustered_data(100,5) # using the function to create two arrays
print(X[0:4]) # getting the income and age appending together in single dimension
X.shape # I need to get the shape as (100,2) instead of (200,1) currently being achieved
X.ndim # I need to get this as 2 instead of 1 currently being achieved
解决方法
只需将您的np.array修改为标准python列表,然后使用+=
运算符即可将值附加到列表中。您还需要将记录括在方括号[]
中,请参见[[np.random.normal(income_centroid,10000),np.random.normal(age_centroid,2)]]
# Function for N points in k clusters to generate artificial data
def create_clustered_data(N,k):
random.seed(10)
points_per_cluster=float(N)/k
X=[]
y=[]
for i in range(k):
income_centroid=np.random.uniform(20000,200000)
age_centroid=np.random.uniform(20,70)
for j in range(int(points_per_cluster)):
X += [[np.random.normal(income_centroid,2)]]
y += [i]
return X,y
(X,y) = create_clustered_data(100,5)
X = np.array(X)
y = np.array(y)
,在您转换X = np.array(X)
之后,
X.shape
将返回(100,2)