问题描述
我正在寻求使用 Faker 库为我的研究生成一个假数据集。我无法将人的性别和名字联系起来。在这方面我能得到一些帮助吗?功能如下。
def faker_categorical(num=1,seed=None):
np.random.seed(seed)
fake.seed_instance(seed)
output = [
{
"gender": np.random.choice(["M","F"],p=[0.5,0.5]),"Givenname": fake.first_name_male() if "gender"=="M" else fake.first_name_female(),"Surname": fake.last_name(),"Zipcode": fake.zipcode(),"Date of Birth": fake.date_of_birth(),"country": np.random.choice(["United Kingdom","France","Belgium"]),}
for x in range(num)
]
return output
df = pd.DataFrame(faker_categorical(num=1000))
解决方法
您的问题不清楚,但我想您正在寻找的是一种从代码中两个不同位置引用 np.random.choice()
结果的方法。简单 -- 将它分配给一个临时变量,然后从两个地方引用该变量。
def faker_categorical(num=1,seed=None):
np.random.seed(seed)
fake.seed_instance(seed)
output = []
for x in range(num):
gender = np.random.choice(["M","F"],p=[0.5,0.5])
output.append(
{
"gender": gender,"GivenName": fake.first_name_male() if gender=="M" else fake.first_name_female(),"Surname": fake.last_name(),"Zipcode": fake.zipcode(),"Date of Birth": fake.date_of_birth(),"country": np.random.choice(["United Kingdom","France","Belgium"]),})
return output