问题描述
使用Faker 库;我正在 Jupyter Notebooks 中模拟数据集。
import numpy as np
import pandas as pd
from faker import Faker
fake = Faker()
import random
np.random.seed(42)
def example_dataset_simulation(samples):
df = pd.DataFrame(index=np.arange(samples))
df['ID'] = [str(i) for i in range(1,samples+1)]
df['Prefix'] = [fake.prefix_male() for _ in range(samples)]
df['Forename'] = [fake.first_name_male() for _ in range(samples)]
df['Surname'] = [fake.last_name_nonbinary() for _ in range(samples)]
return df
df = example_dataset_simulation(500)
df
现在,我希望能够更改函数以添加 n 列数,作为传递名为 cols
的可变整数。
所需的循环代码:
list = [["Prefix","fake.prefix_male()"],["Forename","fake.first_name_male()"],["Surname","fake.last_name_nonbinary()"],["Suffix","fake.suffix_male()"],["dob","fake.date()"],["e-mail","fake.company_email()"],["Telephone","fake.phone_number()"]]
def example_dataset_simulation(samples,cols):
df = pd.DataFrame(index=np.arange(samples))
df['Prefix'] = [fake.prefix_male() for _ in range(samples)] # once
for col_name,method in list[:cols-1]:
df[str(col_name)] = [eval(method) for _ in range(samples)] # cols-1
return df
输出:
500 rows × 3 columns
解决方法
eval()
正是我想要的。
list = [["Prefix","fake.prefix_male()"],["Forename","fake.first_name_male()"],["Surname","fake.last_name_nonbinary()"],["Suffix","fake.suffix_male()"],["DOB","fake.date()"],["e-mail","fake.company_email()"],["Telephone","fake.phone_number()"]]
def example_dataset_simulation(samples,cols):
df = pd.DataFrame(index=np.arange(samples))
df['Prefix'] = [fake.prefix_male() for _ in range(samples)] # once
for col_name,method in list[:cols]:
df[str(col_name)] = [eval(method) for _ in range(samples)] # cols-1
return df
df = example_dataset_simulation(500,6)
df
>>> 500 rows × 6 columns