将具有匹配数据的行合并为字符串

时间:2018-12-05 16:08:28

标签: sql sql-server

我正在寻找有关我无法解决的编程问题的任何帮助/反馈。我有一个具有多个客户的表,我可以根据数据以3种不同的方式将这些客户“分组”。我需要做的就是根据分组值找出哪些客户实际上是同一个人。我简化了以下8个原始客户记录的一些分组数据。

**Var1  Var2    Var3**  
NULL    222     NULL  
NULL    222     444  
NULL    555     333  
NULL    666     333  
NULL    666     777  
111     NULL    333  
111     555     333  
111     666     333  

我正在寻找一个结果,该结果会合并值相等的行中的所有数据,包括那些通过传递性组合(如a = b和b = c然后a = c)的结果。

对于上面的示例,我希望看到2个字符串(即有2个不同的客户)作为流程的输出:

'222, 444'  
'111, 333, 555, 666, 777'

1 个答案:

答案 0 :(得分:0)

假设源表(SO_Test)有列,我对您的问题进行了一次小测试 ID1 varchar(10),ID2 varchar(10),ID3,UID唯一标识符 下面是在MS SQL Server上运行的SQL脚本。

最后,您应该按UID列进行分组。enter image description here

declare @cnt int, @cnt_i int = 0, @ID1 varchar(100), @ID2 varchar(100)

--Build up association mapping table
select x.*, ROW_NUMBER() OVER (Order by ID1) as ROW_NUM
into #temp
from
(
select ID1, ID2
--into #temp
from SO_Test
where (ID1 IS NOT NULL AND ID2 IS NOT NULL)
UNION
select ID1, ID3 
from SO_Test
where (ID1 IS NOT NULL AND ID3 IS NOT NULL)
UNION
select ID2, ID3 
from SO_Test
where (ID2 IS NOT NULL AND ID3 IS NOT NULL)
) x

set @cnt = (select max(ROW_NUM) from #temp)

create table #final
(
    PKID uniqueidentifier,
    [str] varchar(1000)
)

--Scan mapping and put together
while (@cnt > @cnt_i)
begin
    set @cnt_i = @cnt_i + 1

    select  @ID1 = ID1, 
            @ID2 = ID2
    from #temp
    where ROW_NUM = @cnt_i  

    IF not exists (
        select *
        from #final
        where [str] LIKE '%' + @ID1 + '%'
            or [str] LIKE '%' + @ID2 + '%'
    )
    begin
        insert into #final
        select NewID(), @ID1

        if not exists (
            select *
            from #final
            where [str] like '%' + @ID2 + '%'
        )
        begin
            update #final
            set [str] = [str] + ',' + @ID2
            where [str] LIKE '%' + @ID1 + '%'
        end     
    end
    else
    begin
        if not exists (
            select *
            from #final
            where [str] like '%' + @ID2 + '%'
        )
        begin
            update #final
            set [str] = [str] + ',' + @ID2
            where [str] LIKE '%' + @ID1 + '%'
        end
    end
end

--New Code to process single item case
declare @res_cnt int, @res_cnt_i int, @str varchar(1000)
set @res_cnt_i = 0
select *, ROW_NUMBER() over (order by PKID) as ROW_NUM into #temp2 from #final
set @res_cnt = (select max(ROW_NUM) from #temp2)

create table #final_table
(
    [str]   varchar(10)
)

while (@res_cnt > @res_cnt_i)
begin
    set @res_cnt_i = @res_cnt_i + 1

    select @str = [str]
    from #temp2
    where ROW_NUM = @res_cnt_i

    insert into #final_table
    select value from [dbo].[StringToRow_varchar](@str)
end

insert into #final
select NewID(), ID1
from 
(
    select ID1
    from SO_Test
    where (ID1 IS NOT NULL AND ID2 IS NULL AND ID3 IS NULL)
    UNION
    select ID2
    from SO_Test
    where (ID1 IS NULL AND ID2 IS NOT NULL AND ID3 IS NULL)
    UNION
    select ID3 
    from SO_Test
    where (ID1 IS NULL AND ID2 IS NULL AND ID3 IS NOT NULL)
) z
where ID1 not in (select [str] from #final_table)


--View final result
select * from #final

--Update original source table for uniqueID (group by column)
update src
set src.UID = f.PKID
from SO_Test src, #final f
where f.[str] like '%' + src.ID1 + '%'
    or f.[str] like '%' + src.ID2 + '%'
    or f.[str] like '%' + src.ID3 + '%'

select * from SO_Test

drop table #temp
drop table #temp2
drop table #final
drop table #final_table
update SO_Test set [UID] = NULL

这是新结果

enter image description here

CREATE FUNCTION  [dbo].[StringToRow_varchar]
(
    @input  varchar(2000)
)
RETURNS @temp TABLE 
(
    id int,
    value varchar(100),
    columnName varchar(10)
)
AS
Begin
    declare @after_input varchar(2000) 
    declare @delimiter varchar(2), @i int
    set @i = 0
    set @delimiter = ',' 
    set @after_input = @input + @delimiter

    declare @item varchar(100)

    while (charindex(@delimiter, @after_input) > 0) 
    begin 
        set @i = @i + 1
        set @item = convert(varchar(100), SUBSTRING(@after_input, 1, charindex(@delimiter, @after_input)-1))

        insert into @temp select @i as id, @item, 'Col' + CONVERT(varchar(9), @i)

        set @after_input = SUBSTRING(@after_input, charindex(@delimiter, @after_input) + 1, len(@after_input) - charindex(@delimiter, @after_input) + 1)        
    end
    return 
END