如何将 .dbf 文件转换为 Pandas DataFrame?

问题描述

我有一个 .dbf 文件,我想将其转换为 Pandas DataFrame,但 DataFrame 无法直接转换数据。

解决方法

使用 my dbf library,以下函数将完成这项工作:

def dbf_to_dataframe(filename):
    """
    converts the dbf table at filename into a Panda's DataFrame
    data types and field names are preserved
    """
    import dbf
    import numpy as np
    import pandas as pd
    from datetime import date,datetime,time
    names = []
    types = []
    table = dbf.Table(filename)
    for name in table.field_names:
        ftype,size,decimals,_ = table.field_info(name)
        ftype = chr(ftype)
        if ftype in 'GP':
            continue
        if ftype == 'N' and decimals:
            ftype = 'F'
        dtype = {
                'B': 'float64','C': 'string','D': 'datetime64[ns]','F': 'float64','I': 'int64','L': 'boolean','M': 'string','N': 'int64','T': 'datetime64[ns]','Y': 'float64',}[ftype]
        names.append(name)
        types.append(dtype)
    with table:
        series = [[] for _ in names]
        for rec in table:
            for i,value in enumerate(rec):
                if isinstance(value,date):
                    value = datetime.combine(value,time())
                elif value is None:
                    value = np.nan
                series[i].append(value)
        data_recs = dict(
                (n,pd.Series(s,dtype=t))
                for n,s,t in zip(names,series,types)
                )
        return pd.DataFrame(data_recs)