问题描述
我有一个 .dbf
文件,我想将其转换为 Pandas DataFrame
,但 DataFrame
无法直接转换数据。
解决方法
使用 my dbf
library,以下函数将完成这项工作:
def dbf_to_dataframe(filename):
"""
converts the dbf table at filename into a Panda's DataFrame
data types and field names are preserved
"""
import dbf
import numpy as np
import pandas as pd
from datetime import date,datetime,time
names = []
types = []
table = dbf.Table(filename)
for name in table.field_names:
ftype,size,decimals,_ = table.field_info(name)
ftype = chr(ftype)
if ftype in 'GP':
continue
if ftype == 'N' and decimals:
ftype = 'F'
dtype = {
'B': 'float64','C': 'string','D': 'datetime64[ns]','F': 'float64','I': 'int64','L': 'boolean','M': 'string','N': 'int64','T': 'datetime64[ns]','Y': 'float64',}[ftype]
names.append(name)
types.append(dtype)
with table:
series = [[] for _ in names]
for rec in table:
for i,value in enumerate(rec):
if isinstance(value,date):
value = datetime.combine(value,time())
elif value is None:
value = np.nan
series[i].append(value)
data_recs = dict(
(n,pd.Series(s,dtype=t))
for n,s,t in zip(names,series,types)
)
return pd.DataFrame(data_recs)