让我先说一下,我知道这不是一个特别有效或优雅的代码。我正在查询一个名为INSIDE的临时表,如下所示:
CREATE TEMP TABLE INSIDE (CONNECT_DATE DATE, DAILY_CONNECTIONS INT);`
然后我尝试在INSIDE上运行以下查询,试图测试我一直在研究的模型。
SELECT *
, q5.DAN_PREDICTION - q5.LINEAR_PREDICTION AS PREDICTION_COMPARISON
, q5.DAN_PREDICTION - q5.ACTUAL_MONTH_END_AMOUNT AS DAN_VARIANCE
, q5.LINEAR_PREDICTION - q5.ACTUAL_MONTH_END_AMOUNT AS LINEAR_VARIANCE
FROM (SELECT *
, q4.mtd + q4.last_yr_remainder + q4.run_rate * q4.days_remaining AS DAN_PREDICTION
, q4.mtd + q4.curr_yr_7_day * days_remaining AS LINEAR_PREDICTION
FROM(
SELECT
*
, q3.curr_yr_7_day - q3.last_yr_7_day AS RUN_RATE
FROM(
SELECT
CONNECT_DATE
, DAILY_CONNECTIONS
, (cur_yr_1_prev + cur_yr_2_prev + cur_yr_3_prev + cur_yr_4_prev + cur_yr_5_prev + cur_yr_6_prev + cur_yr_7_prev)/7 AS CURR_YR_7_DAY
, (last_yr_1_prev + last_yr_2_prev + last_yr_3_prev + last_yr_4_prev + last_yr_5_prev + last_yr_6_prev + last_yr_7_prev)/7 AS LAST_YR_7_DAY
, (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0)
FROM INSIDE ins
WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE)
AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE)
AND ins.CONNECT_DATE <= q2.CONNECT_DATE) AS MTD
, (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0)
FROM INSIDE ins
WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE)
AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE)-1
AND ins.CONNECT_DATE > DATEADD(YEAR, -1, q2.CONNECT_DATE)) AS LAST_YR_REMAINDER
, (SELECT TOP 1 DATEPART(DAY, last_day(CONNECT_DATE))
FROM INSIDE
WHERE CONNECT_DATE = q2.CONNECT_DATE)-DATEPART(DAY, q2.CONNECT_DATE) DAYS_REMAINING
, (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0)
FROM INSIDE ins
WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE)
AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE)) AS ACTUAL_MONTH_END_AMOUNT
FROM
(SELECT
q1.CONNECT_DATE CONNECT_DATE
, q1.DAILY_CONNECTIONS DAILY_CONNECTIONS
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-1,q1.connect_date)), 0) CUR_YR_1_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-2,q1.connect_date)), 0) CUR_YR_2_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-3,q1.connect_date)), 0) CUR_YR_3_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-4,q1.connect_date)), 0) CUR_YR_4_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-5,q1.connect_date)), 0) CUR_YR_5_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-6,q1.connect_date)), 0) CUR_YR_6_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-7,q1.connect_date)), 0) CUR_YR_7_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-1,q1.connect_date)))), 0) LAST_YR_1_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-2,q1.connect_date)))), 0) LAST_YR_2_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-3,q1.connect_date)))), 0) LAST_YR_3_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-4,q1.connect_date)))), 0) LAST_YR_4_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-5,q1.connect_date)))), 0) LAST_YR_5_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-6,q1.connect_date)))), 0) LAST_YR_6_PREV
, ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-7,q1.connect_date)))), 0) LAST_YR_7_PREV
FROM INSIDE q1 ORDER BY q1.CONNECT_DATE
) q2 ORDER BY q2.connect_date
) q3
) q4
) q5
运行内部q1查询似乎工作得很好;当我在q2中运行子查询时,问题就开始了。一次运行不止一个(MTD,LAST_YR_REMAINDER等)会产生以下错误:
Amazon无效操作:由于内部错误,不支持此类型的相关子查询模式;
我一直在审查Redshift中不受支持的子查询类型的文档,并且不明白这些规则正在破坏。任何帮助将不胜感激。
答案 0 :(得分:0)
你有太多的内联子查询。尝试使用公用表表达式(CTE
)以Redshift可以高效运行的方式分解逻辑。
大多数内联子查询可以重写为笛卡尔积的聚合。
WITH cte1 AS (
SELECT i1.CONNECT_DATE CONNECT_DATE
,i1.DAILY_CONNECTIONS DAILY_CONNECTIONS
-- Sub-selects converted to an aggregate over a sparse matrix
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -1, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_1_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -2, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_2_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -3, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_3_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -4, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_4_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -5, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_5_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -6, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_6_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -7, i1.connect_date) THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_7_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-1, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_1_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-2, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_2_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-3, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_3_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-4, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_4_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-5, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_5_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-6, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_6_PREV
,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-7, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_7_PREV
,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE)
AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE)
AND i2.CONNECT_DATE <= i1.CONNECT_DATE
THEN i2.DAILY_CONNECTIONS
ELSE NULL END) AS MTD
,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE)
AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE)-1
AND i2.CONNECT_DATE > DATEADD(YEAR, -1, i1.CONNECT_DATE)
THEN i2.DAILY_CONNECTIONS
ELSE NULL END) AS LAST_YR_REMAINDER
,MAX(CASE WHEN i2.CONNECT_DATE = i1.CONNECT_DATE-DATEPART(DAY, i1.CONNECT_DATE)
THEN DATEPART(DAY, last_day(CONNECT_DATE))
ELSE NULL END) AS DAYS_REMAINING
,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE)
AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE)
THEN i2.DAILY_CONNECTIONS
ELSE NULL END) AS ACTUAL_MONTH_END_AMOUNT
FROM INSIDE i1
-- Create an intentional cartesian product
CROSS JOIN INSIDE i2
/* Consider limiting the cartesian to a specific overlap range. E.g.
WHERE i2.CONNECT_DATE <= DATEADD(YEAR, -1, (DATEADD(DAY,-7, i1.connect_date)))
*/
-- Use group by to collapse the cartesian back to the original size
GROUP BY 1, 2
ORDER BY 1
), cte2 AS (
SELECT CONNECT_DATE
, DAILY_CONNECTIONS
, (cur_yr_1_prev + cur_yr_2_prev + cur_yr_3_prev + cur_yr_4_prev + cur_yr_5_prev + cur_yr_6_prev + cur_yr_7_prev)/7 AS CURR_YR_7_DAY
, (last_yr_1_prev + last_yr_2_prev + last_yr_3_prev + last_yr_4_prev + last_yr_5_prev + last_yr_6_prev + last_yr_7_prev)/7 AS LAST_YR_7_DAY
, MTD, LAST_YR_REMAINDER, DAYS_REMAINING, ACTUAL_MONTH_END_AMOUNT
FROM cte1
ORDER BY connect_date
), cte3 AS (
SELECT *, curr_yr_7_day - last_yr_7_day AS RUN_RATE
FROM cte2
), cte4 AS (
SELECT *
, mtd + last_yr_remainder + run_rate * days_remaining AS DAN_PREDICTION
, mtd + curr_yr_7_day * days_remaining AS LINEAR_PREDICTION
FROM cte3
)
SELECT *
, DAN_PREDICTION - LINEAR_PREDICTION AS PREDICTION_COMPARISON
, DAN_PREDICTION - ACTUAL_MONTH_END_AMOUNT AS DAN_VARIANCE
, LINEAR_PREDICTION - ACTUAL_MONTH_END_AMOUNT AS LINEAR_VARIANCE
FROM cte4