我有一个类似于:
的数据集asset_id,date_logged
1234,2018-02-01
1234,2018-02-02
1234,2018-02-03
1234,2018-02-04
1234,2018-02-05
1234,2018-02-06
1234,2018-02-07
1234,2018-02-08
1234,2018-02-09
1234,2018-02-10
9876,2018-02-01
9876,2018-02-02
9876,2018-02-03
9876,2018-02-07
9876,2018-02-08
9876,2018-02-09
9876,2018-02-10
就本练习而言,假设今天的日期是2018 - 02 - 10(2018年2月10日)。对于表中的所有asset_ids,我试图确定date_logged的最新连续条纹的开始。
对于asset_id = 1234,这将是2018-02-01。 asset_id连续10天都记录在案。对于asset_id = 9876,这将是2018-02-07。由于asset_id未在2018-02-04,2018-02-05和2018-02-06上登录,因此最新的连续爆发始于2018-02-07。
所以,我的结果集有望看起来像:
asset_id,Number_of_days_in_most_recent_logging_streak
1234,10
9876,4
或者,或者:
asset_id,Date_Begin_Most_Recent_Streak
1234,2018-02-01
9876,2018-02-07
我无法解决任何让我感到惊讶的事情 - 到目前为止,我的最大努力是获取自第一个日志日期和今天以来的天数,以及asset_id在数据集中出现的天数并比较这些以识别条纹比它们出现的第一天更新的情况。对于我的真实数据集,这不是特别有问题,但这是一个丑陋的解决方案,我想了解更好的方法来获得结果。
答案 0 :(得分:2)
也许是这样的。在WITH子句中的每个内联视图和最近的内联视图中的SELECT * FROM之后中断查询,以查看每个步骤的作用。
with
inputs ( asset_id, date_logged ) as (
select 1234, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-04', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-05', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-06', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-10', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-10', 'yyyy-mm-dd') from dual
),
prep ( asset_id, date_logged, grp ) as (
select asset_id, date_logged,
date_logged - row_number()
over (partition by asset_id order by date_logged)
from inputs
),
agg ( asset_id, date_logged, cnt ) as (
select asset_id, min(date_logged), count(*)
from prep
group by asset_id, grp
)
select asset_id, max(date_logged) as date_start_recent_streak,
max(cnt) keep (dense_rank last order by date_logged) as cnt
from agg
group by asset_id
order by asset_id -- If needed
;
ASSET_ID DATE_START_RECENT_STREAK CNT
---------- ------------------------ ----------
1234 2018-02-01 10
9876 2018-02-07 4
答案 1 :(得分:1)
你可以试试这个,
with test (asset_id, date_logged) as
(select 1234, date '2018-02-01' from dual union all
select 1234, date '2018-02-02' from dual union all
select 1234, date '2018-02-03' from dual union all
select 1234, date '2018-02-04' from dual union all
select 1234, date '2018-02-05' from dual union all
select 1234, date '2018-02-06' from dual union all
select 1234, date '2018-02-07' from dual union all
select 1234, date '2018-02-08' from dual union all
select 1234, date '2018-02-09' from dual union all
select 1234, date '2018-02-10' from dual union all
select 9876, date '2018-02-01' from dual union all
select 9876, date '2018-02-02' from dual union all
select 9876, date '2018-02-03' from dual union all
select 9876, date '2018-02-07' from dual union all
select 9876, date '2018-02-08' from dual union all
select 9876, date '2018-02-09' from dual union all
select 9876, date '2018-02-10' from dual union all
select 9876, date '2018-02-11' from dual union all
select 9876, date '2018-02-12' from dual
)
SELECT asset_id, MIN(date_logged), COUNT(1)
FROM (SELECT asset_id, date_logged,
MAX(date_logged) OVER (PARTITION BY asset_id)+1 max_date_logged_plus_one,
DENSE_RANK() OVER (PARTITION BY asset_id ORDER BY date_logged desc) rown
FROM test
ORDER BY asset_id, date_logged desc)
WHERE max_date_logged_plus_one - date_logged = rown
GROUP BY asset_id;
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 07-FEB-18 6
如果评论下面的数据,输出
select 9876, date '2018-02-10' from dual union all
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 11-FEB-18 2
答案 2 :(得分:0)
这会有意义吗?
SQL> with test (asset_id, date_logged) as
2 (select 1234, date '2018-02-01' from dual union all
3 select 1234, date '2018-02-02' from dual union all
4 select 1234, date '2018-02-03' from dual union all
5 select 1234, date '2018-02-04' from dual union all
6 select 1234, date '2018-02-05' from dual union all
7 select 1234, date '2018-02-06' from dual union all
8 select 1234, date '2018-02-07' from dual union all
9 select 1234, date '2018-02-08' from dual union all
10 select 1234, date '2018-02-09' from dual union all
11 select 1234, date '2018-02-10' from dual union all
12 select 9876, date '2018-02-01' from dual union all
13 select 9876, date '2018-02-02' from dual union all
14 select 9876, date '2018-02-03' from dual union all
15 select 9876, date '2018-02-07' from dual union all
16 select 9876, date '2018-02-08' from dual union all
17 select 9876, date '2018-02-09' from dual union all
18 select 9876, date '2018-02-10' from dual
19 ),
20 inter as
21 -- difference between DATE_LOGGED and its previous DATE_LOGGED
22 (select asset_id,
23 date_logged,
24 date_logged - lag(date_logged) over (partition by asset_id order by date_logged) diff
25 from test
26 )
27 select i.asset_id, min(i.date_logged) date_logged
28 from inter i
29 where nvl(i.diff, 1) = (select max(i1.diff) from inter i1
30 where i1.asset_id = i.asset_id
31 )
32 group by i.asset_id
33 order by i.asset_id;
ASSET_ID DATE_LOGGE
---------- ----------
1234 2018-02-01
9876 2018-02-07
SQL>