问题描述
我希望能够使用Upsert,在这里我可以更新条件更改的现有行或在新行可用时追加新行。我想从R studio中进行编码,并与MS SQL Server配对。我使用虹膜数据集复制了以下工作流程。我想我快到了,但是不能完全完成SQL查询。另外,欢迎就工作流程提出建议。
> pacman::p_load(DBI,dbplyr,dplyr,odbc)
>
> # Connection
> con <- DBI::dbConnect(odbc::odbc(),+ Driver = "SQL Server",+ Server = "localhost\\SQLEXPRESS",+ Database = "master",+ Trusted_Connection = "True")
>
> Data.DWH <- dbReadTable(con,"iris") %>%
+ unite("Lookup_Key",colnames(select(.,- ID)),remove = FALSE)
>
> # Data in data warehouse
> head(Data.DWH)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_0.2_setosa 5.1 3.5 1.4 0.2 setosa 1
2 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
3 4.7_3.2_1.3_0.2_setosa 4.7 3.2 1.3 0.2 setosa 3
4 4.6_3.1_1.5_0.2_setosa 4.6 3.1 1.5 0.2 setosa 4
5 5_3.6_1.4_0.2_setosa 5.0 3.6 1.4 0.2 setosa 5
6 5.4_3.9_1.7_0.4_setosa 5.4 3.9 1.7 0.4 setosa 6
>
> # New data example (created),1 entry to append,1 entry to ignore,1 entry to update
>
> New.Data.Raw <- data.frame(stringsAsFactors=FALSE,+ Sepal.Length = c(5.1,1,4.9),+ Sepal.Width = c(3.5,2,3),+ Petal.Length = c(1.4,3,1.4),+ Petal.Width = c(2,4,0.2),+ Species = c("setosa","setosa","setosa"),+ ID = c(1,151,2)
+ ) %>% unite("Lookup_Key",remove = FALSE)
>
> head(New.Data.Raw)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2.0 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4.0 setosa 151
3 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
>
> # Ready for insert/update
> # check for changes in the look up key (mash up of row values) or new entries according to ID
> New.Data <- New.Data.Raw %>%
+ filter(!ID %in% Data.DWH$ID |
+ (ID %in% Data.DWH$ID & !Lookup_Key %in% Data.DWH$Lookup_Key))
>
> head(New.Data)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4 setosa 151
>
> # Construct sql query for ms sql ===========================================================================
>
> # construct columns for query - produces 1 string
> cols <- paste0('(',paste0(colnames(New.Data),collapse=','),')')
>
> # construct values for query - produce 1 string
> vals <- paste0(
+ apply(New.Data,function(x) paste0("('",paste0(x,collapse = "','"),"')")),collapse = ",")
>
> # construct update values for query
> insertVals <- paste0('(',paste0('s.',colnames(New.Data),')')
>
> # construct update set for query
> updateSet <- paste0(colnames(New.Data%>%select(-ID)),+ ' = s.',colnames(New.Data%>%select(-ID)),')
>
> # construct upsert query (does not currently work!)
> queryNew.Data <- paste0('MERGE iris AS t ',+ 'USING (VALUES ',vals,') AS s',cols,+ ' ON t.ID = s.ID ',+ 'WHEN MATCHED THEN ',+ 'UPDATE SET ',updateSet,+ ' WHEN NOT MATCHED THEN ',+ 'INSERT',+ ' VALUES',insertVals,';')
>
> queryNew.Data <- gsub("\\b'\\b","",queryNew.Data)
> queryNew.Data <- gsub("'NA'",'NULL',queryNew.Data)
>
> # send the query to the database (no lucky currently)
> DBI::dbGetQuery(con,queryNew.Data)
解决方法
解决了这个问题,代码工作正常。只需修复列的命名约定。 SQL无法处理'。'在名称中输入,因此将其更改为“ _”,代码即可按需工作。