如何找到两个表之间序列的差异?

时间:2017-01-09 16:28:56

标签: sql sql-server

我有两个表(一个旧存档和一个新存档)有3列,其中第一个列是一种层次结构,另外两个是不能重复的值。每个层次结构需要检查两个列中的两个列的序列是否完全相同。如果没有,我希望它显示整个层次结构的变化。例如:

表1:

 colum1     colum2    colum3
--------------------------------
| folder1 | detail1 | detail10 |
--------------------------------
| folder1 | detail2 | detail20 |
--------------------------------
| folder1 | detail3 | detail30 |
--------------------------------
| folder2 | detail1 | detail10 |
--------------------------------
| folder2 | detail2 | detail20 |
--------------------------------
| folder2 | detail3 | detail30 |
--------------------------------
| folder3 | detail1 | detail10 |
--------------------------------
| folder3 | detail2 | detail20 |
--------------------------------
| folder3 | detail3 | detail30 |
--------------------------------
| folder4 | detail1 | detail10 |
--------------------------------
| folder4 | detail2 | detail20 |
--------------------------------
| folder4 | detail3 | detail30 |
--------------------------------

表2:

 colum1     colum2    colum3
--------------------------------
| folder1 | detail3 | detail30 |
--------------------------------
| folder1 | detail2 | detail20 |
--------------------------------
| folder1 | detail1 | detail10 |
--------------------------------
| folder2 | detail1 | detail10 |
--------------------------------
| folder2 | detail2 | detail20 |
--------------------------------
| folder2 | detail3 | detail30 |
--------------------------------
| folder3 | detail1 | detail10 |
--------------------------------
| folder3 | detail3 | detail30 |
--------------------------------
| folder3 | detail2 | detail20 |
--------------------------------
| folder4 | detail1 | detail10 |
--------------------------------
| folder4 | detail3 | detail30 |
--------------------------------

因此,如果在1个层次结构之间切换或丢失某些值,则会显示整个层次结构。从上面的例子可以看出结果如下:

             table1                        table2

 colum1     colum2    colum3     colum1    colum2    colum3
---------------------------------------------------------------
| folder1 | detail1 | detail10 | folder1 | detail3 | detail30 |  <--- was switched
---------------------------------------------------------------
| folder1 | detail2 | detail20 | folder1 | detail2 | detail20 |
---------------------------------------------------------------
| folder1 | detail3 | detail30 | folder1 | detail1 | detail10 |  <--- was switched
---------------------------------------------------------------
| folder3 | detail1 | detail10 | folder3 | detail1 | detail10 |
---------------------------------------------------------------
| folder3 | detail2 | detail20 | folder3 | detail3 | detail30 |  <--- was switched
---------------------------------------------------------------
| folder3 | detail3 | detail30 | folder3 | detail2 | detail20 |  <--- was switched
---------------------------------------------------------------
| folder4 | detail1 | detail10 | folder4 | detail1 | detail10 |
---------------------------------------------------------------
| folder4 | detail2 | detail20 |         |         |          |  <---missing
---------------------------------------------------------------
| folder4 | detail3 | detail30 | folder4 | detail3 | detail30 |
---------------------------------------------------------------

这在某种程度上是可能的吗?我想为此使用SQL,但也许另一种语言更方便?任何提示都表示赞赏。

提前致谢

1 个答案:

答案 0 :(得分:1)

如果我们为每个表添加了一个id并依赖于insert命令来表示某些东西(或者如果存在其他一些序数或类型要使用),那么你可以这样做:

更新为基于row_number()的文件夹进行检查:

示例测试设置:http://rextester.com/VXZIP45848

create table a (
    id int not null identity(1,1)
  , one varchar(32)
  , two varchar(32)
  , three varchar(32))
insert into a values 
 ('folder1','detail1','detail10')
,('folder1','detail2','detail20')
,('folder1','detail3','detail30')
,('folder1','detail 98','detail 198')
,('folder2','detail1','detail10')
,('folder2','detail2','detail20')
,('folder2','detail3','detail30')
,('folder1','detail99','detail 199')
,('folder3','detail1','detail10')
,('folder3','detail2','detail20')
,('folder3','detail3','detail30')
,('folder4','detail1','detail10')
,('folder4','detail2','detail20')
,('folder4','detail3','detail30');


create table b (
    id int not null identity(1,1)
  , one varchar(32)
  , two varchar(32)
  , three varchar(32))
insert into b values 
 ('folder1','detail3','detail30')
,('folder1','detail2','detail20')
,('folder1','detail1','detail10')
,('folder2','detail1','detail10')
,('folder2','detail2','detail20')
,('folder2','detail3','detail30')
,('folder3','detail1','detail10')
,('folder3','detail3','detail30')
,('folder3','detail2','detail20')
,('folder4','detail1','detail10')
,('folder4','detail3','detail30')
,('folder5','detail5','detail50');

/* show all a and all b when matched and when not */

select a.*, b.*, diff=case 
    when a.rn=b.rn    then ''
    when a.id is null then 'not in a'
    when b.id is null then 'not in b'
    when a.rn!=b.rn   then 'switched'
    else '?' 
    end
  from (select * ,rn = row_number() over (partition by one order by id)
          from a) as a
      full outer join 
        (select * ,rn = row_number() over (partition by one order by id)
          from b) as b
            on a.one=b.one
           and a.two=b.two
           and a.three=b.three

结果:

+------+---------+-----------+------------+------+------+---------+---------+----------+------+----------+
|  id  |   one   |    two    |   three    |  rn  |  id  |   one   |   two   |  three   |  rn  |   diff   |
+------+---------+-----------+------------+------+------+---------+---------+----------+------+----------+
| 1    | folder1 | detail1   | detail10   | 1    | 3    | folder1 | detail1 | detail10 | 3    | switched |
| 2    | folder1 | detail2   | detail20   | 2    | 2    | folder1 | detail2 | detail20 | 2    |          |
| 3    | folder1 | detail3   | detail30   | 3    | 1    | folder1 | detail3 | detail30 | 1    | switched |
| 4    | folder1 | detail 98 | detail 198 | 4    | NULL | NULL    | NULL    | NULL     | NULL | not in b |
| 8    | folder1 | detail99  | detail 199 | 5    | NULL | NULL    | NULL    | NULL     | NULL | not in b |
| 5    | folder2 | detail1   | detail10   | 1    | 4    | folder2 | detail1 | detail10 | 1    |          |
| 6    | folder2 | detail2   | detail20   | 2    | 5    | folder2 | detail2 | detail20 | 2    |          |
| 7    | folder2 | detail3   | detail30   | 3    | 6    | folder2 | detail3 | detail30 | 3    |          |
| 9    | folder3 | detail1   | detail10   | 1    | 7    | folder3 | detail1 | detail10 | 1    |          |
| 10   | folder3 | detail2   | detail20   | 2    | 9    | folder3 | detail2 | detail20 | 3    | switched |
| 11   | folder3 | detail3   | detail30   | 3    | 8    | folder3 | detail3 | detail30 | 2    | switched |
| 12   | folder4 | detail1   | detail10   | 1    | 10   | folder4 | detail1 | detail10 | 1    |          |
| 13   | folder4 | detail2   | detail20   | 2    | NULL | NULL    | NULL    | NULL     | NULL | not in b |
| 14   | folder4 | detail3   | detail30   | 3    | 11   | folder4 | detail3 | detail30 | 2    | switched |
| NULL | NULL    | NULL      | NULL       | NULL | 12   | folder5 | detail5 | detail50 | 1    | not in a |
+------+---------+-----------+------------+------+------+---------+---------+----------+------+----------+

由于没有序数或排序,在这种情况下找不到“切换”的情况。

示例测试设置:http://rextester.com/YEIAN6814

/* since there is no ordinal or sort, 
  finding cases of 'switching' aren't possible in this case. */

create table a (one varchar(32), two varchar(32), three varchar(32))
insert into a values 
 ('folder1','detail1','detail10')
,('folder1','detail2','detail20')
,('folder1','detail3','detail30')
,('folder2','detail1','detail10')
,('folder2','detail2','detail20')
,('folder2','detail3','detail30')
,('folder3','detail1','detail10')
,('folder3','detail2','detail20')
,('folder3','detail3','detail30')
,('folder4','detail1','detail10')
,('folder4','detail2','detail20')
,('folder4','detail3','detail30');

create table b (one varchar(32), two varchar(32), three varchar(32))
insert into b values 
 ('folder1','detail3','detail30')
,('folder1','detail2','detail20')
,('folder1','detail1','detail10')
,('folder2','detail1','detail10')
,('folder2','detail2','detail20')
,('folder2','detail3','detail30')
,('folder3','detail1','detail10')
,('folder3','detail3','detail30')
,('folder3','detail2','detail20')
,('folder4','detail1','detail10')
,('folder4','detail3','detail30')
,('folder5','detail5','detail50'); /* added this value for example*/

完全外部加入:在匹配或不匹配时显示所有a和所有b

/* show all a and all b when matched and when not */
select a.*, b.*
  , diff=case 
      when a.one is null then 'not in a'
      when b.one is null then 'not in b'
      else ''
      end
  from a 
    full outer join b on a.one  =b.one
      and isnull(a.two,'')  =isnull(b.two,'')
      and isnull(a.three,'')=isnull(b.three,'')
      --and (a.two  =b.two or (a.two is null and b.two is null))
      --and (a.three=b.three or (a.three is null and b.three is null))

+---------+---------+----------+---------+---------+----------+----------+
|   one   |   two   |  three   |   one   |   two   |  three   |   diff   |
+---------+---------+----------+---------+---------+----------+----------+
| folder1 | detail1 | detail10 | folder1 | detail1 | detail10 |          |
| folder1 | detail2 | detail20 | folder1 | detail2 | detail20 |          |
| folder1 | detail3 | detail30 | folder1 | detail3 | detail30 |          |
| folder2 | detail1 | detail10 | folder2 | detail1 | detail10 |          |
| folder2 | detail2 | detail20 | folder2 | detail2 | detail20 |          |
| folder2 | detail3 | detail30 | folder2 | detail3 | detail30 |          |
| folder3 | detail1 | detail10 | folder3 | detail1 | detail10 |          |
| folder3 | detail2 | detail20 | folder3 | detail2 | detail20 |          |
| folder3 | detail3 | detail30 | folder3 | detail3 | detail30 |          |
| folder4 | detail1 | detail10 | folder4 | detail1 | detail10 |          |
| folder4 | detail2 | detail20 | NULL    | NULL    | NULL     | not in b |
| folder4 | detail3 | detail30 | folder4 | detail3 | detail30 |          |
| NULL    | NULL    | NULL     | folder5 | detail5 | detail50 | not in a |
+---------+---------+----------+---------+---------+----------+----------+

左连接:匹配时所有ab

/* show all a, b matches */
select a.*, b.*
from a 
    left join b on a.one  =b.one
      and isnull(a.two,'')  =isnull(b.two,'')
      and isnull(a.three,'')=isnull(b.three,'')
      --and (a.two  =b.two or (a.two is null and b.two is null))
      --and (a.three=b.three or (a.three is null and b.three is null))

+---------+---------+----------+---------+---------+----------+
|   one   |   two   |  three   |   one   |   two   |  three   |
+---------+---------+----------+---------+---------+----------+
| folder1 | detail1 | detail10 | folder1 | detail1 | detail10 |
| folder1 | detail2 | detail20 | folder1 | detail2 | detail20 |
| folder1 | detail3 | detail30 | folder1 | detail3 | detail30 |
| folder2 | detail1 | detail10 | folder2 | detail1 | detail10 |
| folder2 | detail2 | detail20 | folder2 | detail2 | detail20 |
| folder2 | detail3 | detail30 | folder2 | detail3 | detail30 |
| folder3 | detail1 | detail10 | folder3 | detail1 | detail10 |
| folder3 | detail2 | detail20 | folder3 | detail2 | detail20 |
| folder3 | detail3 | detail30 | folder3 | detail3 | detail30 |
| folder4 | detail1 | detail10 | folder4 | detail1 | detail10 |
| folder4 | detail2 | detail20 | NULL    | NULL    | NULL     |
| folder4 | detail3 | detail30 | folder4 | detail3 | detail30 |
+---------+---------+----------+---------+---------+----------+

不存在():来自a的所有行都不在b

/* all in a that aren't in b */
select src='a not in b', * 
  from a 
  where not exists (
      select 1 
        from b 
        where a.one=b.one 
          and isnull(a.two,'')  =isnull(b.two,'')
          and isnull(a.three,'')=isnull(b.three,'')
      );

+------------+---------+---------+----------+
|    src     |   one   |   two   |  three   |
+------------+---------+---------+----------+
| a not in b | folder4 | detail2 | detail20 |
+------------+---------+---------+----------+

不存在():来自b的所有行都不在a

/* all in b that aren't in a */
select src='b not in a', * 
  from b 
  where not exists (
      select 1 
        from a
        where a.one=b.one 
          and isnull(a.two,'')  =isnull(b.two,'')
          and isnull(a.three,'')=isnull(b.three,'')
      );

+------------+---------+---------+----------+
|    src     |   one   |   two   |  three   |
+------------+---------+---------+----------+
| b not in a | folder5 | detail5 | detail50 |
+------------+---------+---------+----------+

除了a中不在b

中的不同值
/* distinct values in a that aren't in b */
select * from a
except 
select * from b;


+---------+---------+----------+
|   one   |   two   |  three   |
+---------+---------+----------+
| folder4 | detail2 | detail20 |
+---------+---------+----------+

相交:a中同样位于b

的不同值
/* distinct values that are in a that are also in b */
select * from a
intersect 
select * from b;


+---------+---------+----------+
|   one   |   two   |  three   |
+---------+---------+----------+
| folder1 | detail1 | detail10 |
| folder1 | detail2 | detail20 |
| folder1 | detail3 | detail30 |
| folder2 | detail1 | detail10 |
| folder2 | detail2 | detail20 |
| folder2 | detail3 | detail30 |
| folder3 | detail1 | detail10 |
| folder3 | detail2 | detail20 |
| folder3 | detail3 | detail30 |
| folder4 | detail1 | detail10 |
| folder4 | detail3 | detail30 |
+---------+---------+----------+