在SQL中计算Box和Whiskers值-使用INSERT INTO与WITH WITH <tablename> AS

时间:2018-12-10 19:19:42

标签: sql sql-server tsql

全部

我正在尝试使用this forum post's方法来计算不同表中多列的箱形图值。

在上面的链接中,该过程仅使用WITH SourceData AS方法定义源表来计算一栏的箱形图值。因此,为了将其应用于需要箱形图值的所有列,我编写了第二个过程以重复第一个过程,但使用INSERT INTO @SourceData方法更改了第一个操作所依据的SourceData表。

致电计算器:

ALTER PROCEDURE [usp_Get_SKU_Type_Eff_BPVals] 
-- Add the parameters for the stored procedure here

AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE @Query1 nvarchar(4000);
DECLARE @Query2 nvarchar(4000);
DECLARE @BoxAndWhiskerVals table(
[Dur_Type][nvarchar](3),
[GroupID][nvarchar](50),
[Quartile1][int],
[Median][int],
[Quartile3][int],
[maxMeasure][int],
[minMeasure][int]);

-- Insert statements for procedure here
SET @Query1 = 
'
SELECT ''PRS'' as Dur_Type, L.SKU_Type AS GroupID,
    DATEDIFF(SECOND, POL.Pick_Release, POL.Pick_Start) AS Measure      
FROM nelc.Pick_Order_Lines POL LEFT JOIN
     nelc.Line_ID L ON L.ID = POL.Line_ID
WHERE L.SKU_Type IS NOT NULL
 '
SET @Query2 = 
'
SELECT ''PSC'' as Dur_Type, L.SKU_Type AS GroupID,
       DATEDIFF(SECOND, POL.Pick_Start, POL.Pick_Complete) AS Measure
FROM nelc.Pick_Order_Lines POL LEFT JOIN
     nelc.Line_ID L on L.ID = POL.Line_ID
WHERE L.SKU_Type IS NOT NULL
'

EXEC nelc.usp_Calculate_BoxPlot_Vals @SQL = @Query1;

EXEC nelc.usp_Calculate_BoxPlot_Vals @SQL = @Query2;

进行计算:

ALTER PROCEDURE [nelc].[usp_Calculate_BoxPlot_Vals]
-- Add the parameters for the stored procedure here
@SQL nvarchar(4000)
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE @SourceData table(
[Dur_Type][nvarchar](3),
[GroupID][nvarchar](50),
[Measure][int])

INSERT INTO @SourceData EXEC(@SQL);

-- Insert statements for procedure here

--Aggregate into a single record for each group, using MAX to select the non-null 
--detail value for each column.
SELECT Dur_Type,
    GroupID,
(Max(Q1NextVal) - MAX(Q1Val)) * Max(Q1Frac) + Max(Q1Val) Quartile1,
(Max(MidVal1) + Max(MidVal2)) / 2 Median,
(Max(Q3NextVal) - MAX(Q3Val)) * Max(Q3Frac) + Max(Q3Val) Quartile3,
    Max(measure) maxMeasure,
    Min(measure) minMeasure

  --Save into a separate table if desired.
 --INTO dbo.PreCalculatedQuartiles
FROM (
--Expose the detail values for only the records at the index values 
--generated by the summary subquery. All other values are left as NULL.
SELECT detail.GroupID, detail.Dur_Type,
    CASE WHEN RowNum = Q1Idx THEN Measure ELSE NULL END Q1Val,
    CASE WHEN RowNum = Q1Idx + 1 THEN Measure ELSE NULL END Q1NextVal,
    CASE WHEN RowNum = Q3Idx THEN Measure ELSE NULL END Q3Val,
    CASE WHEN RowNum = Q3Idx + 1 THEN Measure ELSE NULL END Q3NextVal,
    Q1Frac,
    Q3Frac,
    CASE WHEN RowNum = MidPt1 THEN Measure ELSE NULL END MidVal1,
    CASE WHEN RowNum = MidPt2 THEN Measure ELSE NULL END MidVal2,
    Detail.measure
FROM
    --Calculate a row number sorted by measure for each group.
    (SELECT *,  ROW_NUMBER() OVER (PARTITION BY GroupID ORDER BY Measure) RowNum
FROM @SourceData) AS detail

INNER JOIN (
    --Summarize to find index numbers and fractions we need to use to locate 
    --the values at the quartile points.
    SELECT GroupID, Dur_Type,
        FLOOR((COUNT(*) + 3) / 4.0) Q1Idx,
        ((COUNT(*) + 3) / 4.0) - FLOOR((COUNT(*) + 3) / 4.0) Q1Frac,
        (COUNT(*) + 1) / 2 AS MidPt1,
        (COUNT(*) + 2) / 2 AS Midpt2,
        FLOOR((COUNT(*) * 3 + 1) / 4.0) Q3Idx,
        ((COUNT(*) * 3 + 1) / 4.0) - FLOOR((COUNT(*) * 3 + 1) / 4.0) Q3Frac
    FROM @SourceData
    GROUP BY GroupID, Dur_Type
    HAVING COUNT(*) > 1

) AS summary ON detail.GroupID  = summary.GroupID 

) AS combined
GROUP BY GroupID, Dur_Type;
END

所以问题在于,当我使用INSERT INTO @SourceData而不是WITH SourceData AS时,查询需要花费更长的时间来执行(我实际上没有等到看到多长时间,而是花了4个小时以上的时间)。在使用WITH SourceData AS的地方,查询会在1-2分钟内结束。

所以我的问题是:如果两个都充当从中提取数据的表,为什么要这样做?以及如何不使用我的INSERT INTO方法在不同列上重复计算过程?

编辑:这是使用WITH语句。工作正常,但我不知道如何使用此方法多次运行该过程。

ALTER PROCEDURE [nelc].[usp_Calculate_BP_PRS]
-- Add the parameters for the stored procedure here

AS
BEGIN
WITH SourceData AS (
SELECT 'PRS' as Dur_Type, L.SKU_Type AS GroupID,
       DATEDIFF(SECOND, POL.Pick_Release, POL.Pick_Start) AS Measure      
   FROM nelc.Pick_Order_Lines POL LEFT JOIN
        nelc.Line_ID L ON L.ID = POL.Line_ID
   WHERE L.SKU_Type IS NOT NULL
)

    SELECT Dur_Type,
    GroupID,
(Max(Q1NextVal) - MAX(Q1Val)) * Max(Q1Frac) + Max(Q1Val) Quartile1,
(Max(MidVal1) + Max(MidVal2)) / 2 Median,
(Max(Q3NextVal) - MAX(Q3Val)) * Max(Q3Frac) + Max(Q3Val) Quartile3,
    Max(measure) maxMeasure,
    Min(measure) minMeasure

--Save into a separate table if desired.
--INTO dbo.PreCalculatedQuartiles
 FROM (
 --Expose the detail values for only the records at the index values 
 --generated by the summary subquery. All other values are left as NULL.
 SELECT detail.GroupID, detail.Dur_Type,
    CASE WHEN RowNum = Q1Idx THEN Measure ELSE NULL END Q1Val,
    CASE WHEN RowNum = Q1Idx + 1 THEN Measure ELSE NULL END Q1NextVal,
    CASE WHEN RowNum = Q3Idx THEN Measure ELSE NULL END Q3Val,
    CASE WHEN RowNum = Q3Idx + 1 THEN Measure ELSE NULL END Q3NextVal,
    Q1Frac,
    Q3Frac,
    CASE WHEN RowNum = MidPt1 THEN Measure ELSE NULL END MidVal1,
    CASE WHEN RowNum = MidPt2 THEN Measure ELSE NULL END MidVal2,
    Detail.measure
FROM
    --Calculate a row number sorted by measure for each group.
    (SELECT *,  ROW_NUMBER() OVER (PARTITION BY GroupID ORDER BY Measure) RowNum
    FROM SourceData) AS detail

INNER JOIN (
    --Summarize to find index numbers and fractions we need to use to locate 
    --the values at the quartile points.
    SELECT GroupID, Dur_Type,
        FLOOR((COUNT(*) + 3) / 4.0) Q1Idx,
        ((COUNT(*) + 3) / 4.0) - FLOOR((COUNT(*) + 3) / 4.0) Q1Frac,
        (COUNT(*) + 1) / 2 AS MidPt1,
        (COUNT(*) + 2) / 2 AS Midpt2,
        FLOOR((COUNT(*) * 3 + 1) / 4.0) Q3Idx,
        ((COUNT(*) * 3 + 1) / 4.0) - FLOOR((COUNT(*) * 3 + 1) / 4.0) Q3Frac
    FROM SourceData
    GROUP BY GroupID, Dur_Type
    HAVING COUNT(*) > 1

) AS summary ON detail.GroupID  = summary.GroupID 

) AS combined
GROUP BY GroupID, Dur_Type;
END

编辑#2: 这是我的sqlplan文件的链接:

The WITH AS Execution Plan

The INSERT INTO Estimated Execution Plan

编辑#3: 添加了存储过程上下文

即使仅尝试一个INSERT INTO语句,INSERT INTO SP也不会完成执行。

0 个答案:

没有答案