使用SQL Server匹配/分组几个键值对

时间:2013-03-15 00:33:10

标签: sql sql-server-2008 tsql select

我正在构建一个“匹配”配置文件的应用程序。 例如,这是我的schema ::

的简化版本

用户 标识
名字
姓氏

用户配置 标识
用户ID
SomeOtherFields

UserProfileFields 标识
UserProfileId
重点
价值

UserProfile用于保存一些标准信息(出生日期等...)

UserProfileFields基本上是一个键及其值的列表,以便建立一个看起来像这样的字典(再次,为了这个问题的目的简化)

UserProfileID | Key       | Value       
123           | food      | Pizza
123           | food      | Indian
4453          | drink     | Coke
44850         | drink     | Orange Juice
88493         | food      | Pizza
448382        | food      | Chinese

所以,从上面我们可以看到,配置文件123与食物上的88493相匹配 - 它们都有食物|披萨

有没有办法有效地查询此表以获取“匹配”列表

我会设想每天运行一次,结果存储在一个单独的表中

例如:

匹配

MatchID | ProfileID
1       | 123
1       | 88493

我正在猜测

的内容
SELECT * FROM UserProfileFields
GROUP BY Key

查询类型......但不确定这会有多少效率,例如100万行?

3 个答案:

答案 0 :(得分:2)

这应该照顾你。

-- ============================================================================
-- BEGIN: SETUP TEST DATA
-- ============================================================================
CREATE TABLE UserProfileFields (
    UserProfileID   int
   ,[Key]           varchar(5)
   ,Value           varchar(12)
);


INSERT UserProfileFields (UserProfileID, [Key], Value)
SELECT A.*
  FROM (
        SELECT * FROM UserProfileFields WHERE 1=2
        UNION ALL SELECT 123,       'food',     'Pizza'
        UNION ALL SELECT 123,       'food',     'Indian'
        UNION ALL SELECT 4453,      'drink',    'Coke'
        UNION ALL SELECT 44850,     'drink',    'Orange Juice'
        UNION ALL SELECT 88493,     'food',     'Pizza'
        UNION ALL SELECT 448382,    'food',     'Chinese'
        UNION ALL SELECT 88493,     'drink',    'Coke'
        UNION ALL SELECT 88493,     'drink',    'Orange Juice'
       ) A;

--/*
-- Turn 8 records into 1,048,576
DECLARE @Count int; SELECT @Count = 0;
WHILE @Count < 17
  BEGIN
    INSERT UserProfileFields
    SELECT * FROM UserProfileFields

    SELECT @Count = (@Count + 1)
END
--*/
-- SELECT COUNT(*) FROM UserProfileFields WITH (NOLOCK)
-- ============================================================================
-- END: SETUP TEST DATA
-- ============================================================================




-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#DistinctValues', 'U') IS NOT NULL DROP TABLE #DistinctValues;
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;

SELECT [Key], UserProfileID, Value 
  INTO #DistinctValues
  FROM UserProfileFields WITH (NOLOCK)
 GROUP BY [Key], UserProfileID, Value;

SELECT A.[Key], A.Value, A.UserProfileID
  INTO #Matches
  FROM #DistinctValues A
  JOIN #DistinctValues B
    ON A.[Key]           = B.[Key]
   AND A.Value           = B.Value
   AND A.UserProfileID  <> B.UserProfileID;

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
      ,A.UserProfileID
      ,A.[Key]
      ,A.Value
  FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================




-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;

SELECT A.[Key], A.Value, A.UserProfileID
  INTO #Matches
  FROM UserProfileFields A WITH (NOLOCK)
  JOIN UserProfileFields B WITH (NOLOCK)
    ON A.[Key]           = B.[Key]
   AND A.Value           = B.Value
   AND A.UserProfileID  <> B.UserProfileID;

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
      ,A.UserProfileID
      ,A.[Key]
      ,A.Value
  FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================

答案 1 :(得分:1)

WITH Matches
AS
(
    SELECT  a.UserProfileID,
            a.[Key],
            a.Value,
            DENSE_RANK() OVER(ORDER BY a.[Key]) MatchID
    FROM    UserProfileFields a
            INNER JOIN
            (
                SELECT  [Key], Value
                FROM    UserProfileFields 
                GROUP   BY [Key], Value
                HAVING  COUNT(DISTINCT UserProfileID) > 1
            ) b ON  a.[Key] = b.[Key] AND
                    a.Value = b.Value
)
SELECT  MatchID, UserProfileID
FROM    Matches

答案 2 :(得分:1)

使用EXISTS()运算符选项并覆盖索引。这有助于避免过多的数据排序。

CREATE INDEX ix_Key_Value_UserProfileFields ON dbo.UserProfileFields([Key], Value) INCLUDE(UserProfileID)

SELECT DENSE_RANK() OVER(ORDER BY t.[Key], t.Value) AS MatchID, t.UserProfileID
FROM dbo.UserProfileFields t
WHERE EXISTS (
              SELECT 1
              FROM dbo.UserProfileFields t2
              WHERE t.[Key] = t2.[Key]
                AND t.Value = t2.Value
              HAVING COUNT(*) > 1  
              )

enter image description here

SQLFiddle上的演示