.diff() 函数仅在 Pandas 数据框中返回 NaN 值

问题描述

我想在我的 for 循环中的 log_price 列上使用 .diff() 函数。我所追求的是旧日志价格值 - 来自 df_DC_product 数据框的新日志价格值。当我尝试在 for 循环中使用 .diff() 时,它只返回 NaN 值。任何想法为什么会发生这种情况?感谢您的帮助。

DC_list = data4['Geography'].drop_duplicates().tolist()
Product_List = data4['Product'].drop_duplicates().tolist()

# create multiple empty lists to store values in:
my_dict = {
    "Product" : [],"Geography" : [],"Base Dollar Sales": [],"Base Unit Sales" :[],"Price Numerator" : [],"Price Denominator": [],"Demand Numerator" : [],"Demand Denominator" : [],"% Change in Price" : [],"% Change in Demand": [],"Price Elasticity of Demand" : []
}
dc_product_ped_with_metrics_all = []

for DC in DC_list:
    
    df_DC = data4.copy()
    # # Filtering to the loop's current DC
    df_DC = df_DC.loc[(df_DC['Geography'] == DC)]
    df_DC = df_DC.copy()
    # Making a list of all of the current DC's Product to loop through
    Product_list = df_DC['Product'].drop_duplicates().tolist()
    
    for Product in Product_list:
        
        df_DC_product = df_DC.copy()
        # # Filtering to the Product
        df_DC_product = df_DC_product.loc[(df_DC_product['Product'] == Product)]
        df_DC_product = df_DC_product.copy()
        
        # create container:
        df_DC_product['pn'] = df_DC_product.iloc[:,5].diff()
        df_DC_product['price_d'] = np.divide(df_DC_product.iloc[:,5].cumsum(),2)
        df_DC_product['dn'] = df_DC_product.iloc[:,6].diff()
        df_DC_product['dd'] = np.divide(df_DC_product.iloc[:,6].cumsum(),2)
        df_DC_product['% Change in Demand'] = np.divide(df_DC_product['dn'],df_DC_product['dd'])*100
        df_DC_product['% Change in Price'] = np.divide(df_DC_product['pn'],df_DC_product['price_d'])*100
        df_DC_product['ped']= np.divide(df_DC_product['% Change in Demand'],df_DC_product['% Change in Price'])
        
        Product = Product,DC = DC
        sales = df_DC_product['Base_Dollar_Sales'].sum()
        qty = df_DC_product['Base_Unit_Sales'].sum()
        price = df_DC_product['Price'].mean()
        log_price = df_DC_product['log_price'].mean()
        log_units = df_DC_product['log_units'].sum()
        price_numerator = df_DC_product['pn'].mean()
        price_denominator = df_DC_product['price_d'].sum()
        demand_numerator = df_DC_product['dn'].mean()
        demand_denominator = df_DC_product['dd'].sum()
        delta_demand = df_DC_product['% Change in Demand'].sum()
        delta_price = df_DC_product['% Change in Price'].mean()
        ped = df_DC_product['ped'].mean()
        
        dc_product_ped_with_metrics = [
            Product,DC,sales,qty,price,price_numerator,price_denominator,demand_numerator,demand_denominator,delta_demand,delta_price,ped
        ]
        
        dc_product_ped_with_metrics_all.append(dc_product_ped_with_metrics)
        
columns = [
    'Product','Geography','Sales','Qty','Price','Price Numerator','Price Denominator','Demand Numerator','Demand Denominator','% Change in Demand','% Change in Price','Price Elasticity of Demand'
]

dc_product_ped_with_metrics_all = pd.DataFrame(data=dc_product_ped_with_metrics_all,columns=columns)
dc_product_ped_with_metrics_all

enter image description here

解决方法

.append() 不会就地更新您的数据框。您需要重新分配数据框。

for DC in DC_list:
    # your code
    for Product in Product_list:
        # your code
        dc_product_ped_with_metrics_all = dc_product_ped_with_metrics_all.append(dc_product_ped_with_metrics)