每天计数状态ID

时间:2019-05-16 15:44:05

标签: sql sql-server

情况:

我有三个桌子。表1包含ID和订阅日期。表2包含ID,活动状态和活动状态更改的最新日期。 表3具有ID和状态更改的所有日志。 注意: 在订阅日期,所有ID均处于活动状态。如果一天中的状态变化不止一个,那么最近的一个就是要选择的状态。

目标:

我需要计算每天每种状态的ID数。即每天有多少人活跃,不活跃和有风险。 我的问题是确保即使在特定日期没有数据,ID的状态也会每天进行计数。例如: ID 5(请参阅下面的小提琴)自5月2日(加入日期)以来一直处于活动状态,并且没有状态更改,因此,直到现在,他应该每天被视为活动状态。

在其他地方咨询了此问题之后,一些人建议创建函数并交叉应用并将计数存储在表中。我没有这样做的技能,但这是否可以解决这个问题?

所需的输出:

+------------+----------+-------+
|    date    |  status  | count |
+------------+----------+-------+
| 1-May-2019 | active   |     0 |
| 1-May-2019 | inactive |     0 |
| 1-May-2019 | risky    |     1 |
| 2-May-2019 | active   |     1 |
| 2-May-2019 | inactive |     0 |
| 2-May-2019 | risky    |     1 |
| 3-May-2019 | active   |     1 |
| 3-May-2019 | inactive |     0 |
| 3-May-2019 | risky    |     1 |
| 4-May-2019 | active   |     1 |
| 4-May-2019 | inactive |     0 |
| 4-May-2019 | risky    |     1 |
| 5-May-2019 | active   |     3 |
| 5-May-2019 | inactive |     0 |
| 5-May-2019 | risky    |     1 |
| ...        | ...      |   ... |
+------------+----------+-------+

小提琴:

--create date table (not sure if usable)
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '2019-05-01'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
  INSERT INTO #dates ([date]) VALUES (@dIncr)
  SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO

-- ID + Subscribed Date (starts active at joindate)
create table #t1 (id int, [subdate] date)
insert into #t1 values 
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO

-- ID + Latest activity date
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values 
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO

-- ID + Activity Logs Date
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values 
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO

我现在拥有什么:

;with cte as (
    select 
        #t1.id
        ,COALESCE(LAG(datechange) over(partition by #t1.id order by datechange),subdate) as StartDate
        ,#t3.datechange
        ,COALESCE(LAG(statuschange) over(partition by #t1.id order by datechange),'active') as PreviousStatusChange
        ,#t3.statuschange
    from #t1
    inner join #t2 on #t1.id=#t2.id
    left join #t3 on #t1.id=#t3.id
) 

        select 
            cte.id
            ,cte.StartDate
            ,coalesce(cte.datechange,'2099-01-01') as EndDate
            ,PreviousStatusChange
            ,coalesce(statuschange,previousstatuschange) AS NewStatus
        from cte 

4 个答案:

答案 0 :(得分:2)

日期表是实现此目的的正确方法。您需要种子数据来获得所需的输出。我打开了您的日期表,以便年长的用户填写。

我还添加了一个状态表,因为您的输出要求每个状态的每个日期都需要一行。

DROP TABLE IF EXISTS #dates
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '01/01/2019'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
  INSERT INTO #dates ([date]) VALUES (@dIncr)
  SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO

DROP TABLE IF EXISTS #status
CREATE TABLE #status (status varchar(20))
INSERT INTO #status VALUES
('active'),
('inactive'),
('risky')
GO

DROP TABLE IF EXISTS #t1
create table #t1 (id int, [subdate] date)
insert into #t1 values 
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO

DROP TABLE IF EXISTS #t2
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values 
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO

DROP TABLE IF EXISTS #t3
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values 
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO

DECLARE
    @From DATE
    , @Thru DATE;

SET @From = '05/01/2019';
SET @Thru = '05/19/2019';

WITH
output_foundation AS
(
    SELECT date, status
    FROM #dates CROSS JOIN #status
)
, id_foundation AS
(
    SELECT DISTINCT id, date
    FROM #t1 CROSS JOIN #Dates
)
, id_stat AS
(
    SELECT id, datechange, statuschange FROM #t3
    UNION
    SELECT id, subdate, 'active' FROM #t1
    UNION
    SELECT id, datestatus, status FROM #t2
)
, id_spread AS
(
    SELECT
        IFDN.id
        , IFDN.date
        , IDS.statuschange
    FROM
        id_foundation AS IFDN
        LEFT OUTER JOIN id_stat AS IDS
            ON IFDN.id = IDS.id
                AND IFDN.date = IDS.datechange
), id_fill AS
(
    SELECT
        IDS.id
        , IDS.date
        , COALESCE(IDS.statuschange, LS.statuschange) AS statuschange
    FROM
        id_spread AS IDS
        OUTER APPLY
        (
            SELECT TOP 1 statuschange
            FROM id_spread
            WHERE id = IDS.id AND date < IDS.date AND statuschange IS NOT NULL
            ORDER BY date DESC
        ) AS LS
    WHERE
        (IDS.statuschange IS NOT NULL OR LS.statuschange IS NOT NULL)
)

SELECT
    OFDN.date
    , OFDN.status
    , COUNT(statuschange) AS count
FROM
    output_foundation AS OFDN
    LEFT OUTER JOIN id_fill AS IDF
        ON OFDN.date = IDF.date
            AND OFDN.status = IDF.statuschange
WHERE
    OFDN.date >= @From
    AND OFDN.date <= @Thru
GROUP BY
    OFDN.date
    , OFDN.status
ORDER BY
    OFDN.date
    , OFDN.status;

答案 1 :(得分:0)

可能会在下面的查询中为您提供帮助。我不确定它是否与您想要的结果相同,因为您提到所需的输出与在临时表查询中提供的示例数据不匹配。 目前,我正在考虑您需要直到现在为止每天每种状态的可交换总和。

SELECT 
R.date
,R.status
,SUM (StausValue) OVER (PARTITION BY [status] ORDER BY date) AS Count
FROM 
(
SELECT Q.* , CASE WHEN T3.datechange IS NOT NULL THEN 1 ELSE 0 END as StausValue FROM (
select D.Date, t2.[status] from #dates  D
CROSS JOIN (SELECT DISTINCT [status] FROM #t2 )t2
)Q
LEFT JOIN #T3 T3 ON T3.[statuschange]=Q.status AND T3.[datechange]=Q.Date
)R order by Date asc, Status ASC

答案 2 :(得分:0)

我认为您还需要在解决方案中添加两点:

  1. 一个包含整天的表格,称为Dimdate。我将轻松获得最终结果。)
  2. 具有每个ID(slowly changing dimension type 2)状态记录的表格,即: enter image description here

下面您可以找到示例解决方案。这不是一个理想的选择,但是我想给您一个大概的认识。例如,我没有解决一个ID在一天中两次更改状态的情况。基本上,我为每个ID DimDate创建了一个SC2表,并将它们连接在一起

--initial insert for new subscribers (they begin as active)
drop table if exists #t4
create table #t4 (id int, [Status] varchar(20), OpenDate date, CloseDate date, IsCurrent int)
insert into #t4(id, [Status], OpenDate, CloseDate, IsCurrent)
select
     id
    ,'active'
    ,[subdate]
    ,'9999-12-31' --we don't know CloseDate for this version
    ,1
from #t1

declare @i date = '2019-01-01'
--filing versions till 2019-05-15
while @i < '2019-05-15'
begin

update t4
set t4.CloseDate = case when  t4.OpenDate = @i then @i else  dateadd(day,-1, @i) end--avoiding overlapping versions
    ,t4.IsCurrent = 0 -- there can only one version that is current
from #t4 as t4
join #t3 as t3
    on t3.id = t4.id
    and t4.IsCurrent = 1
where t3.[datechange] = @i
--inserting a new version
insert into #t4(id, [Status], OpenDate, CloseDate, IsCurrent)
select
     t3.id
    ,t3.statuschange
    ,t3.datechange
    ,'9999-12-31'
    ,1 --the newiest version
from #t3 as t3
where t3.[datechange] = @i

set @i = DATEADD(day, 1, @i)

end

--populating an examplary DimDate
drop table if exists #DimDate
create table #DimDate (
dat date,
dateFormatted as FORMAT(dat, 'dd-MMM-yyyy')
)

set @i = '2019-01-01'

while @i < '2019-06-01'
begin
    insert into #DimDate(dat)
    select @i

    set @i = DATEADD(day, 1, @i)
end

--final result
select
     d.dateFormatted
    ,v.statuses
    ,count(t4.Status) as [count]
from #DimDate as d
cross join (values ('inactive'), ('active'), ('risky')) as v(statuses)
left join #t4 as t4
    on v.statuses = t4.Status
    and d.dat between t4.OpenDate and t4.CloseDate
group by 
     d.dateFormatted
    ,v.statuses
    ,d.dat
order by d.dat

答案 3 :(得分:0)

我省略了这一部分:“当一天中有多个状态更改时,最近的状态是可供选择的状态。”您将需要找到一种方法来选择一天的最后状态,对于当前的设计,这是不可能的,也许是在#t3上添加时间列或增量ID的话。... 它对我有用...请复制整个代码,然后重试。 enter image description here

--create date table (not sure if usable)
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '2019-05-01'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
  INSERT INTO #dates ([date]) VALUES (@dIncr)
  SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO

-- ID + Subscribed Date (starts active at joindate)
create  table #t1 (id int, [subdate] date)
insert into #t1 values 
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO

-- ID + Latest activity date
/*create  table #t2 (id int, [status] int, [datestatus] date)
insert into #t2 values 
(9,'risky', '2019-03-01'),
(1, 1, '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO*/

-- ID + Activity Logs Date
create  table #t3 (id int, [statuschange] int, [datechange] date)
insert into #t3 values 
(9,2, '2019-01-01'),
(9,1, '2019-02-01'),
(9,3, '2019-03-01'),
(2, 3, '2019-05-08'),
(2, 2, '2019-05-13'),
(3, 2, '2019-05-08'),
(3, 1, '2019-05-14'),
(4, 2, '2019-05-15'),
(4, 3, '2019-05-15')
GO
---Status Table
create table #t4 (id int, [status] varchar(max))

insert into #t4 values 
(1, 'active'),
(2,'inactive'),
(3,'risky')




;WITH unionall AS--- join data from t1 and t3
(SELECT id 
,1 as [statuschange]--starts active at joindate
, [subdate] as datechange

FROM #t1
union ALL
SELECT id , [statuschange] , [datechange] 
FROM #t3
),
 userstatuslog as(
SELECT id, [statuschange],datechange as beginingdate
,COALESCE( DATEADD(DAY,-1, lead(datechange) OVER(PARTITION BY id ORDER BY [datechange])), getdate()) as enddate
from unionall 
)
,datestatus as(
SELECT  
id, statuschange,   beginingdate,   enddate,    [date]
,case WHEN [date]< beginingdate then 0 
WHEN [date]>=beginingdate AND [date]<=enddate then statuschange
END as newstatus
FROM userstatuslog CROSS JOIN #dates)
,crossjoin as (
SELECT [date],id
from #dates CROSS join #t4

)

,removenulls as (
SELECT *
FROM  datestatus
where newstatus is NOT NULL AND newstatus<>0

)

SELECT 
crossjoin.date,crossjoin.id, sum(case when newstatus is null then 0 else 1 end) 
 FROM crossjoin  left join datestatus on crossjoin.date=datestatus.date AND crossjoin.id=newstatus
GROUP BY crossjoin.date,crossjoin.id
ORDER BY crossjoin.date,crossjoin.id
相关问题