SQL查询计数频率

时间:2011-04-08 11:34:27

标签: sql sql-server tsql pivot

我有一张表格,其中包含

等条目
id     keywords
1      cat, dog, man, mouse
2      man, pen, pencil, eraser
3      dog, man, friends
4      dog, leash,......

我想制作类似

的表格
id  cat  dog  man  mouse  pen  pencil  eraser  friends  leash ......
1   1    1    1    1      0    0       0       0        0
2   0    0    1    0      1    1       1       0        0
3   0    1    1    0      0    0       0       1        0

等等。

4 个答案:

答案 0 :(得分:5)

WITH basedata(id,keywords) AS
(
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'
),
cte(id, t, x)
     AS (SELECT *,
                CAST('<foo>' + REPLACE(keywords,',','</foo><foo>') + '</foo>' AS XML)
         FROM   basedata)   
SELECT id,
       LTRIM(RTRIM(w.value('.', 'nvarchar(max)'))) as keyword
INTO #Split       
FROM   cte
       CROSS APPLY x.nodes('//foo') as word(w) 


DECLARE @ColList nvarchar(max)

SELECT @ColList = ISNULL(@ColList + ',','') + keyword 
FROM (
SELECT DISTINCT QUOTENAME(keyword) AS keyword
FROM #Split
) T

EXEC(N'       
SELECT *
FROM #Split
PIVOT (COUNT(keyword) FOR keyword IN (' + @ColList + N')) P')

DROP TABLE #Split

给出

id          cat         dog         eraser      friends     leash       man         mouse       pen         pencil
----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- -----------
1           1           1           0           0           0           1           1           0           0
2           0           0           1           0           0           1           0           1           1
3           0           1           0           1           0           1           0           0           0
4           0           1           0           0           1           0           0           0           0

答案 1 :(得分:2)

您必须使用数据透视表吗?你的最终结果是每个id的频率 - 这看起来很奇怪?否则,单元格总是包含1作为频率。

看看这是否适合你。

示例数据

create table basedata(id int,keywords varchar(max));
insert basedata
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'

查询

;with cte(id, list, word) as (
select id,
    cast(STUFF(keywords,1,CHARINDEX(',',keywords+','),'') as varchar(max)),
    cast(ltrim(rtrim(LEFT(keywords,CHARINDEX(',',keywords+',')-1))) as varchar(max))
from basedata
where keywords > ''
union all
select id,
    STUFF(list,1,CHARINDEX(',',list+','),''),
    ltrim(rtrim(LEFT(list,CHARINDEX(',',list+',')-1)))
from cte
where list > ''
)
select word, COUNT(*) frequency
from cte
group by word

输出

word       frequency
---------- -----------
cat        1
dog        3
eraser     1
friends    1
leash      1
man        3
mouse      1
pen        1
pencil     1

答案 2 :(得分:1)

 SELECT id, 
 CAST(CASE WHEN Charindex('dog', keywords) > 0 THEN 1 ELSE 0 END AS bit) as DOG,
 CAST(CASE WHEN Charindex('cat', keywords) > 0 THEN 1 ELSE 0 END AS bit) as CAT,
 ...
 FROM yourtable

答案 3 :(得分:0)

如果您使用的是SQL Server 2008,则可以使用全文解析器来拆分字符串:

Declare @Inputs Table ( Id int not null Primary Key, Keywords nvarchar(max ) )
Insert @Inputs( Id, Keywords ) Values( 1, 'cat, dog, man, mouse' )
Insert @Inputs( Id, Keywords ) Values( 2, 'man, pen, pencil, eraser' )
Insert @Inputs( Id, Keywords ) Values( 3, 'dog, man, friends' )
Insert @Inputs( Id, Keywords ) Values( 4, 'dog, leash' )

Declare @LCID int
Declare @StopListId int
Declare @AccentSensitive int

Set @LCID = Cast( DatabasePropertyEx('master','LCID') As int )
Set @StopListId = 0
Set @AccentSensitive = 1

Select S.display_term, Count(*) As Frequency
From @Inputs As I
    Cross Apply (
                Select display_term
                From sys.dm_fts_parser( QUOTENAME( I.Keywords, '"')
                    , @LCID, @StopListId, @AccentSensitive)
                ) As S
Group By S.display_term

但是,如果您不使用SQL Server 2008,则需要拆分功能。我在这篇文章的最后提出过。然后您的查询就是:

Select LTrim(RTrim(S.Value)), Count(*) As Frequency
From @Inputs As I
    Cross Apply dbo.Split( I.Keywords, ',' ) As S
Group By LTrim(RTrim(S.Value))

分裂功能:

Create Function [dbo].[Split]
(   
    @DelimitedList nvarchar(max)
    , @Delimiter varchar(2) = ','
)
RETURNS TABLE 
AS
RETURN 
    (
    With CorrectedList As
        (
        Select Case When Left(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
            + @DelimitedList
            + Case When Right(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
            As List
            , DataLength(@Delimiter) As DelimiterLen
        )
        , Numbers As 
        (
        Select TOP (Coalesce(Len(@DelimitedList),1)) Row_Number() Over ( Order By c1.object_id ) As Value
        From sys.objects As c1
            Cross Join sys.columns As c2
        )
    Select CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position
        , Substring (
                    CL.List
                    , CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen     
                    , CharIndex(@Delimiter, CL.list, N.Value + 1)                           
                        - ( CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen ) 
                    ) As Value
    From CorrectedList As CL
        Cross Join Numbers As N
    Where N.Value < Len(CL.List)
        And Substring(CL.List, N.Value, CL.DelimiterLen) = @Delimiter
    )