DPLYR 等价于多列的 n_distinct

问题描述

我有一个包含日期和位置的数据框,我需要的是位置天数的总和。同时总结销售的单位数量。我从 sql 数据库提取此数据,因此首选与 DBPLYR 兼容的数据,但不是必需的。

寻找看起来像这样的东西:

[![在此处输入图片描述][1]][1]

可重现的 DF

foo <- structure(list(Location_ID = c(5L,5L,38L,9003L,9003L),Date = structure(c(1624147200,1624233600,1624320000,1624406400,1624492800,1624579200,1624147200,1624579200),tzone = "UTC",class = c("POSIXct","POSIXt")),units = c(11,4,10,13,17,7,14,10)),row.names = c(NA,-10L),groups = structure(list(Location_ID = c(5L,.rows = structure(list(1:6,7L,8:10),ptype = integer(0),class = c("vctrs_list_of","vctrs_vctr","list"))),-3L),class = c("tbl_df","tbl","data.frame"),.drop = TRUE),class = c("grouped_df","tbl_df","data.frame")) 

原始代码

foo <- item_sales_fact %>%
  left_join(product,by = "Product_Key") %>%
  left_join(calendar,by = "Calendar_Key") %>%
  left_join(organization,by = "Organization_Key") %>%
  filter(Product_Key == 32976,Date >= as.Date("2021-06-20"),Date <= as.Date("2021-06-25")) %>%
  select(Location_ID,Date,Qty_Sold) %>%
  collect()
SELECT "Location_ID","Date","Qty_Sold"
FROM (SELECT "Calendar_Key","LHS"."Organization_Key" AS "Organization_Key","Product_Key","Promotion_Key","Shift_Key","ETL_source_system_key","Pack_Size","Qty_Sold","Inv_Unit_Qty","Extended_Cost","Extended_Purchase_Rebate","Extended_Sales_Rebate","Extended_Sales","Ent_Source_Hdr_Key","Ent_Source_Dtl_Key","UPC_Sell_Unit_Desc","UPC","Sell_Unit_Desc","Sell_Unit_Qty","Item_ID","Item_Desc","Sub_Category_ID","Sub_Category_Desc","Category_ID","Category_Desc","Department_ID","Department_Desc","Size_Desc","Item_Type","GrP_level_4_ID","GrP_level_4_Desc","Alt_GrP_level_1_ID","Alt_GrP_level_1_Desc","Alt_GrP_level_2_ID","Alt_GrP_level_2_Desc","Alt_GrP_level_3_ID","Alt_GrP_level_3_Desc","Create_Date","Ent_Item_Number","UPC_discontinue_Date","Scan_Modifier","IsPurchasable","Purchase_discontinue_Date","IsSellable","Sales_discontinue_Date","Audit_Flag","Day_Of_Week_ID","Day_Of_Week","Holiday","Type_Of_Day","Calendar_Month_No","Calendar_Month_Name","Calendar_Qtr_No","Calendar_Qtr_Desc","Calendar_Year","Fiscal_Week","Fiscal_Period_No","Fiscal_Period_Desc","Fiscal_Year","Location_ID","Location_Desc","Last_Tank_Reading_Date","First_Transaction_Date","Last_Remodel_Date","Closing_Date","GPS_Latitude","GPS_Longitude","GPS_Address1","GPS_Address2","GPS_City","GPS_State","GPS_Zip","Corp_site","TimeZone_ID","Site_ID_Formatted","Business Entity_desc","Business Entity_id","PB OTP Pricing zones_desc","PB OTP Pricing zones_id","PB Entity ID_desc","PB Entity ID_id","PB Cigarette Pricing_desc","PB Cigarette Pricing_id","PB Ice Cream_desc","PB Ice Cream_id","PB Uses Pricebook_desc","PB Uses Pricebook_id","AD State_desc","AD State_id","PB Reward Programs_desc","PB Reward Programs_id","PB PKG BEV_desc","PB PKG BEV_id","PB Cooler DR P_desc","PB Cooler DR P_id","PB COOLER 3105_desc","PB COOLER 3105_id","PB Sugar Tax_desc","PB Sugar Tax_id","PB Tobacco Min Age_desc","PB Tobacco Min Age_id","PB Seasonal Promos Beach_desc","PB Seasonal Promos Beach_id","PB Region_desc","PB Region_id","PB WIC level_desc","PB WIC level_id","AU Paperwork Set_desc","AU Paperwork Set_id","AU Armored Car Pickups Per Week_desc","AU Armored Car Pickups Per Week_id","AU Trucker Cash_desc","AU Trucker Cash_id","IT Desktop Assignment_desc","IT Desktop Assignment_id","IT POS Type_desc","IT POS Type_id","AD C Store Brand_desc","AD C Store Brand_id","AD Site Status_desc","AD Site Status_id","AD Site Type_desc","AD Site Type_id","AD Duns Number 10 Digits_desc","AD Duns Number 10 Digits_id","AD Turnpikes TollRoads Airports_desc","AD Turnpikes TollRoads Airports_id","AD Duns Number 8 Digits_desc","AD Duns Number 8 Digits_id","RE Real Estate Type of Thing_desc","RE Real Estate Type of Thing_id","RE 3rd Party Lease Name_desc","RE 3rd Party Lease Name_id","MK Grocery Loyalty Martins_desc","MK Grocery Loyalty Martins_id","MK Plenti Loyalty_desc","MK Plenti Loyalty_id","MK APlus Rewards Loyalty_desc","MK APlus Rewards Loyalty_id","RE Acquisition Name_desc","RE Acquisition Name_id","RE Property Ownership_desc","RE Property Ownership_id","RE Property Owner_desc","RE Property Owner_id","RE Property Operator_desc","RE Property Operator_id","RE Lease Type_desc","RE Lease Type_id","RE Lease Company_desc","RE Lease Company_id","AD County_desc","AD County_id","CM Site Bank Name_desc","CM Site Bank Name_id","OP Market Sales Area_desc","OP Market Sales Area_id","AD Car Wash Type_desc","AD Car Wash Type_id","AD FF Subway Franchisee_desc","AD FF Subway Franchisee_id","FP Same Store_desc","FP Same Store_id","MK ATM_desc","MK ATM_id","MK Air Machine_desc","MK Air Machine_id","MK Vacuum_desc","MK Vacuum_id","CM Telecheck_desc","CM Telecheck_id","RM MACS Altria MLP_desc","RM MACS Altria MLP_id","AD Site Number_desc","AD Site Number_id","AD Cash Acceptor Fuel_desc","AD Cash Acceptor Fuel_id","AD Class of Trade_desc","AD Class of Trade_id","AD Tax Authority_desc","AD Tax Authority_id","FP Store Square Feet_desc","FP Store Square Feet_id","FP Store Open Time_desc","FP Store Open Time_id","FP Store Close Time_desc","FP Store Close Time_id","FP Acquisition Name_desc","FP Acquisition Name_id","MK Grocery Loyalty Redners_desc","MK Grocery Loyalty Redners_id","MK Grocery Loyalty Fuelperks_desc","MK Grocery Loyalty Fuelperks_id","MK Grocery Loyalty Price Chopper_desc","MK Grocery Loyalty Price Chopper_id","MK Grocery Loyalty SuperValu_desc","MK Grocery Loyalty SuperValu_id","MK Grocery Loyalty SuperValu WV_desc","MK Grocery Loyalty SuperValu WV_id","MK USC Loyalty_desc","MK USC Loyalty_id","AD Alcohol Sales Permit _desc","AD Alcohol Sales Permit _id","FP Daily Hours of Operation_desc","FP Daily Hours of Operation_id","PB Wholesale_desc","PB Wholesale_id","PB Linked Fee_desc","PB Linked Fee_id","AD Old Site Number_desc","AD Old Site Number_id","AD Lottery vending Machine_desc","AD Lottery vending Machine_id","AD FF Laredo Taco Company_desc","AD FF Laredo Taco Company_id","PB Flat Grill Ladson Legacy _desc","PB Flat Grill Ladson Legacy _id","RM SUN LOR RDA_desc","RM SUN LOR RDA_id","RM SUN States_desc","RM SUN States_id","AD Fiscal Systems_desc","AD Fiscal Systems_id","RM State with Entity Parcing _desc","RM State with Entity Parcing _id","PB City_desc","PB City_id","PB Beer Cooler Promo_desc","PB Beer Cooler Promo_id","RM Aplus Reverse Locs for Feb 2016 _desc","RM Aplus Reverse Locs for Feb 2016 _id","MK Fresh Made Smoothies_desc","MK Fresh Made Smoothies_id","MK Gourmet Latte Espresso_desc","MK Gourmet Latte Espresso_id","RM Mid Atlantic State Parcing UM_desc","RM Mid Atlantic State Parcing UM_id","AU On Line Lottery Income _desc","AU On Line Lottery Income _id","AU Lottery Paid Out Income _desc","AU Lottery Paid Out Income _id","PB LTC Kitchen_desc","PB LTC Kitchen_id","RM John Middleton Store Groupings_desc","RM John Middleton Store Groupings_id","PB McLane Fresh_desc","PB McLane Fresh_id","PB STRIPES WATER_desc","PB STRIPES WATER_id","PB TROLLI PROMOTION EXPIRES 4 4 2017_desc","PB TROLLI PROMOTION EXPIRES 4 4 2017_id","PB STRIPES DR PEPPER NO COKE OR PEPSI_desc","PB STRIPES DR PEPPER NO COKE OR PEPSI_id","PB STRIPES BIG RED_desc","PB STRIPES BIG RED_id","AD FF Godfather s Pizza Franchise_desc","AD FF Godfather s Pizza Franchise_id","AD CAT Scale_desc","AD CAT Scale_id","AD Foreign Currency Accepted_desc","AD Foreign Currency Accepted_id","PB Beer_desc","PB Beer_id","MK Grocery Loyalty Safeway_desc","MK Grocery Loyalty Safeway_id","CM Shadow Account Number_desc","CM Shadow Account Number_id","RM Altria Groups_desc","RM Altria Groups_id","RM ITG Rebates_desc","RM ITG Rebates_id","RM Cigar Grouping_desc","RM Cigar Grouping_id","RM Usst CANS_desc","RM Usst CANS_id","PB Acquisition_desc","PB Acquisition_id","MK Grocery Loyalty Acme_desc","MK Grocery Loyalty Acme_id","MK Grocery Loyalty Shaws_desc","MK Grocery Loyalty Shaws_id","MK Grill 44_desc","MK Grill 44_id","MK Desserts_desc","MK Desserts_id","MK Beer On Tap_desc","MK Beer On Tap_id","MK Oven Fried_desc","MK Oven Fried_id","RE Lot Size_desc","RE Lot Size_id","RE Construction Year_desc","RE Construction Year_id","RE Remodel Year_desc","RE Remodel Year_id","AD Subway Sales POS_desc","AD Subway Sales POS_id","AD FF Subway Number_desc","AD FF Subway Number_id","PB STRIPES PKG BEV_desc","PB STRIPES PKG BEV_id","PB FS Fresh On Site_desc","PB FS Fresh On Site_id","MK Roller Grill_desc","MK Roller Grill_id","AD Open Date_desc","AD Open Date_id","AD Close Date_desc","AD Close Date_id","MK US Food Div_desc","MK US Food Div_id","MK McLane DC_desc","MK McLane DC_id","RM Altria MST_desc","RM Altria MST_id","AD FF Chicken Deli_desc","AD FF Chicken Deli_id","IT Kalibrate Import_desc","IT Kalibrate Import_id","MK Grocery Loyalty AmBucks_desc","MK Grocery Loyalty AmBucks_id","AD DUNS 7 Eleven_desc","AD DUNS 7 Eleven_id","AD Co Op divestment_desc","AD Co Op divestment_id","AD RGIS _desc","AD RGIS _id","RM Altria Numark_desc","RM Altria Numark_id","Site Association_desc","Site Association_id","PB Restricted Cigar_desc","PB Restricted Cigar_id","Corporate_desc","Corporate_id","Director_desc","Director_id","Region Manager_desc","Region Manager_id","Area_desc","Area_id","Site_desc","Site_id","DNU PB Cooler Coke_id","PB Grand opening_id","DNU PB Cooler Coke_desc","DNU PB disp Bev_id","DNU PB Aloha Kitchen_desc","DNU PB Highway Turnpike Optima Kiosk_id","DNU PB Cooler 3110_id","DNU PB Dairy_desc","DNU PB Highway Turnpike Optima Kiosk_desc","DNU PB disp Bev_desc","DNU PB Aloha Kitchen_id","DNU PB Dairy_id","PB Grand opening_desc","DNU PB Cooler 3110_desc","T_Site_id","T_Site_desc","PB Beer NEW_id","PB Beer NEW_desc","FIN Rest Region_id","FIN Rest Region_desc","PB PROMO DAY 0 DAY 1_id","PB PROMO DAY 0 DAY 1_desc","PB Fees_id","PB Fees_desc"
FROM (SELECT "LHS"."Calendar_Key" AS "Calendar_Key","Organization_Key","Fiscal_Year"
FROM (SELECT "Calendar_Key","LHS"."Product_Key" AS "Product_Key","Audit_Flag"
FROM (SELECT "Calendar_key" AS "Calendar_Key","Organization_key" AS "Organization_Key","Ent_Source_Dtl_Key"
FROM "item_sales_fact") "LHS"
LEFT JOIN (SELECT "Product_Key","Audit_Flag"
FROM "product") "RHS"
ON ("LHS"."Product_Key" = "RHS"."Product_Key")
) "LHS"
LEFT JOIN (SELECT "calendar_key" AS "Calendar_Key","Day_Date" AS "Date","Fiscal_Year"
FROM "calendar") "RHS"
ON ("LHS"."Calendar_Key" = "RHS"."Calendar_Key")
) "LHS"
LEFT JOIN (SELECT "organization_key" AS "Organization_Key","PB Fees_desc"
FROM "organization") "RHS"
ON ("LHS"."Organization_Key" = "RHS"."Organization_Key")
) "q01"
WHERE (("Product_Key" = 32976.0) AND ("Date" >= TRY_CAST('2021-06-20' AS DATE)) AND ("Date" <= TRY_CAST('2021-06-25' AS DATE)))

解决方法

n_distinct() 支持多列。

foo %>%
    dplyr::group_by(
        Location_ID
    ) %>% 
    dplyr::summarise(
        count = dplyr::n_distinct(Date,units,na.rm = TRUE)
    )

您提供的示例数据生成以下 df

> foo
# A tibble: 10 x 3
# Groups:   Location_ID [3]
   Location_ID Date                units
         <int> <dttm>              <dbl>
 1           5 2021-06-20 00:00:00    11
 2           5 2021-06-21 00:00:00     4
 3           5 2021-06-22 00:00:00    10
 4           5 2021-06-23 00:00:00    13
 5           5 2021-06-24 00:00:00    17
 6           5 2021-06-25 00:00:00    13
 7          38 2021-06-20 00:00:00     7
 8        9003 2021-06-23 00:00:00    13
 9        9003 2021-06-24 00:00:00    14
10        9003 2021-06-25 00:00:00    10

我提供的代码生成以下计数

# A tibble: 3 x 2
  Location_ID count
        <int> <int>
1           5     6
2          38     1
3        9003     3
,

直接使用 dbplyr 执行此操作要求数据库对多列的不同值进行计数。我在使用过的任何 SQL 数据库中都没有看到此功能(SELECT COUNT(DISTINCT col1,col2) AS num_dist FROM table 会出错)。

我建议的解决方法是将您的列连接到一个列中,如下所示:

remote_table %>%
  mutate(conc = paste0(Date,"_",units)) %>%
  group_by(Location_ID) %>%
  summarise(num = n_distinct(conc))