左加入建议

时间:2018-12-21 00:23:10

标签: sql sql-server

我需要在SQL Server中使用共同的列日期联接多个表,但是我想避免合并时重复来自不同表的值。

drop table if exists #d, #t1, #t2

create table #d  (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)

insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.15, 1.19),
    ('20181203', 3.16, 1.195)

insert into #t2 values 
    ('20181201', 4.14, 2.18),
    ('20181203', 4.15, 2.19),
    ('20181203', 4.1, 2.195)

select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
    left join #t1 on #d.DataDate = #t1.DataDate
    left join #t2 on #d.DataDate = #t2.DataDate

实际结果

DataDate   Value1  Value2  Value3  Value4
12/1/2018  3.14    1.18    4.14    2.18
12/1/2018  3.135   1.185   4.14    2.18
12/2/2018  3.15    1.19    NULL    NULL
12/3/2018  3.16    1.195   4.15    2.19
12/3/2018  3.16    1.195   4.1     2.195

所需结果

DataDate    Value1  Value2  Value3  Value4
12/1/2018   3.14    1.18    4.14    2.18
12/1/2018   3.135   1.185   NULL    NULL
12/2/2018   3.15    1.19    NULL    NULL
12/3/2018   3.16    1.195   4.15    2.19
12/3/2018   NULL    NULL    4.1     2.195

2 个答案:

答案 0 :(得分:1)

这是一个建议的解决方案。
我添加了第三个表,只是为了证明可以解决具有公共列的N个表。

准备演示数据:

/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d  (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)

/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.15, 1.19),
    ('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES 
    ('20181201', 4.14, 2.18),
    ('20181203', 4.15, 2.19),
    ('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.16, 1.195)

拟议的查询解决方案:

SELECT
    COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
    , t1.Value1
    , t1.Value2
    , t2.Value3
    , t2.Value4
    , t3.Value5
    , t3.Value6
FROM
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
    FROM #d) AS d
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
    FROM #t1) AS t1
    ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
    FROM #t2) AS t2
    ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
    OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
    FROM #t3) AS t3
    ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
    OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
    OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;  

演示小提琴发布在 db <>小提琴here

结果:

DataDate            | Value1 | Value2 | Value3 | Value4 | Value5 | Value6
:------------------ | -----: | -----: | -----: | -----: | -----: | -----:
01/12/2018 00:00:00 |   3.14 |   1.18 |   4.14 |   2.18 |   3.14 |   1.18
01/12/2018 00:00:00 |  3.135 |  1.185 |   null |   null |  3.135 |  1.185
02/12/2018 00:00:00 |   3.15 |   1.19 |   null |   null |   3.16 |  1.195
03/12/2018 00:00:00 |   3.16 |  1.195 |   4.15 |   2.19 |   null |   null
03/12/2018 00:00:00 |   null |   null |    4.1 |  2.195 |   null |   null

注意(可选):

通过引入索引,可以极大地提高性能。
作为一个演示,我在DateData列上添加了CLUSTERED INDEXES,性能的提高非常明显。

/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);

答案 1 :(得分:0)

如果要通过日期和t1.value1关联最小值(或最大值取决于所要查找的内容),请使用min。在您的示例中,值不是重复项,因此区别将不起作用

select #d.DataDate,#t1.Value1,min(#t1.Value2),max(#t2.Value3),min(#t2.Value4)
from #d
    left join #t1 on #d.DataDate = #t1.DataDate
    left join #t2 on #d.DataDate = #t2.DataDate
group by 1,2

如果有确切的重复项要删除,请使用以下内容

select distinct #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
    from #d
        left join #t1 on #d.DataDate = #t1.DataDate
        left join #t2 on #d.DataDate = #t2.DataDate