问题描述
我正在尝试比较两列,例如,我必须得到唯一的区别
select * from table1
Column_1 column_2
---------------- ------------------
Swetha working Swetha is working in Chennai
Raju 10th Raju is studying 10th std
ranjith Ranjith played yesterday
how to play how to play Cricket
My name is my name is john
输出:
如果单词之间也插入单词,也应该像第1行和第2行一样删除
Column_1 column_2 column_3
---------------- ------------------ ------------------------
Swetha working Swetha is working in Chennai is in Chennai
Raju 10th Raju is studying 10th std is studying std
ranjith Ranjith played yesterday played yesterday
how to play how to play Cricket Cricket
My name is my name is john john
解决方法
这比您之前的问题要复杂得多。您可以将第一列分解为单词,然后在第二列中分别替换它们。为此,您需要递归CTE:
with words as (
select t.*,s.*,max(s.seqnum) over (partition by t.id) as max_seqnum
from t cross apply
(select s.value as word,row_number() over (order by (select null)) as seqnum
from string_split(col1,' ') s
) s
),cte as (
select id,col1,col2,replace(' ' + col2 + ' ',' ' + word + ' ',' ') as result,word,seqnum,max_seqnum
from words
where seqnum = 1
union all
select cte.id,cte.col1,cte.col2,replace(cte.result,' ' + w.word + ' ',' '),w.word,w.seqnum,cte.max_seqnum
from cte join
words w
on w.id = cte.id and w.seqnum = cte.seqnum + 1
)
select id,ltrim(rtrim(result)) as result
from cte
where max_seqnum = seqnum
order by id;
Here是db 小提琴。
我添加了id
,因此每一行都是唯一定义的。如果您的SQL Server版本没有内置的string_split()
函数,则可以轻松找到具有相同功能的版本。
此方法使用的一个技巧是处理第二列中的第一个和最后一个单词。该代码在开头和结尾添加空格。这样,字符串中的所有单词都被空格包围,从而使替换完整单词变得更加容易。
,SQL 2016绝对具有字符串拆分功能。这种方法在第2列的拆分词的两侧增加了一个额外的空间。
数据
drop table if exists #strings;
go
create table #strings(
Id int,Column_1 varchar(200),Column_2 varchar(200));
go
insert #strings(Id,Column_1,Column_2) values
(1,'Swetha','Swetha is working in Chennai'),(2,'Raju','Raju is studying 10 std'),(3,'Swetha working',(4,'Raju 10th','Raju is studying 10th std');
查询
declare
@add_delim char(1)=' ';
;with
c1_cte(split_str) as (
select ltrim(rtrim(s.[value]))
from
#strings st
cross apply
string_split(st.Column_1,' ') s),c2_cte(Id,ndx,split_str) as (
select Id,charindex(@add_delim + s.[value] + @add_delim,@add_delim + st.Column_2 + @add_delim),s.[value]
from
#strings st
cross apply
string_split(st.Column_2,' ') s
where
st.Column_2 not like '% %')
select
Id,stuff((select ' ' + c.split_str
from c2_cte c
where c.Id = c2.Id and not exists(select 1
from c1_cte c1
where c.split_str=c1.split_str)
order by c.ndx FOR XML PATH('')),1,'') [new_str]
from c2_cte c2
group by Id;
结果
Id new_str
1 is in Chennai
2 is studying 10 std
3 is in Chennai
4 is studying std
,
这是使用curl http://ingress-domain.westeurope.cloudapp.azure.com.westeurope.cloudapp.azure.com/health
和STRING_SPLIT
的解决方案
DBFIDDLE工作链接
STRING_AGG
对于SQL版本2017+,其中支持;WITH split_words
AS (
SELECT *
FROM dbo.Strings
CROSS APPLY (
SELECT VALUE
FROM STRING_SPLIT(column_2,' ')
WHERE VALUE NOT IN (
SELECT VALUE
FROM STRING_SPLIT(column_1,' ')
)
) a
)
SELECT *,(
SELECT sw.VALUE + ' ' [text()]
FROM split_words sw
WHERE sw.Column_1 = s.Column_1
AND sw.Column_2 = s.Column_2
FOR XML PATH(''),TYPE
).value('.','NVARCHAR(MAX)') [difference]
FROM dbo.Strings s
STRING_AGG
结果:
,WITH
-- your input
input(column_1,column_2,column_3) AS (
SELECT 'Swetha working','Swetha is working in Chennai','is in Chennai'
UNION ALL SELECT 'Raju 10th','Raju is studying 10th std','is studying std'
UNION ALL SELECT 'ranjith','Rantith played yesterday','played yesterday'
UNION ALL SELECT 'how to play','how to play Cricket','Cricket'
UNION ALL SELECT 'My name is','my name is john','john'
),-- need a series of integers
-- you can also try to play with the STRING_SPLIT() function
i(i) AS (
SELECT 1
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
),-- you can also try to play with the STRING_SPLIT() function
unfound_tokens AS (
SELECT
i,column_1,TOKEN(column_2,' ',i) AS token
FROM input CROSS JOIN i
WHERE TOKEN(column_2,i) <> ''
AND CHARINDEX(
UPPER(TOKEN(column_2,i)),UPPER(column_1)
) = 0
)
SELECT
column_1,STRING_AGG(token,' ') AS column_3
FROM unfound_tokens
GROUP BY
column_1,column_2
-- out column_1 | column_2 | column_3
-- out ----------------+------------------------------+--------------------------
-- out My name is | my name is john | john
-- out Swetha working | Swetha is working in Chennai | is Chennai
-- out how to play | how to play Cricket | Cricket
-- out Raju 10th | Raju is studying 10th std | is studying std
-- out ranjith | Rantith played yesterday | Rantith played yesterday
,
我不确定在使用STRING_AGG或STRING_SPLIT时,结果是否会保留单词的顺序...
只需查看此查询即可给出不同的顺序:
WITH
SS1 AS
(SELECT Id,SS.value AS COL1
FROM #strings
CROSS APPLY STRING_SPLIT(Column_1,' ') AS SS
),SS2 AS
(SELECT Id,SS.value AS COL2
FROM #strings
CROSS APPLY STRING_SPLIT(Column_2,DIF AS
(
SELECT Id,COL2 AS COL
FROM SS2
EXCEPT
SELECT Id,COL1
FROM SS1
)
SELECT DIF.Id,Column_2,STRING_AGG(COL,' ')
FROM DIF
JOIN #strings AS S ON S.Id = DIF.Id
GROUP BY DIF.Id,Column_2;
您必须尝试使用大量数据,以查看所给出的查询是否不会产生诸如不一致顺序的副作用(我很确定由于并行性,不会出现一致的顺序... )
因此,保持一致顺序的唯一方法是创建一个递归查询,该查询在句子中添加单词的指示值...