查询执行时间过长

时间:2019-10-28 13:53:15

标签: postgresql

我有。一个大查询要花10多分钟才能执行,我想进一步优化它。我添加了很少的索引,但对执行没有太大影响。

下面是查询计划,关于如何提高执行速度的任何建议

任何指导都会有很大帮助。

PostgreSQL版本11

"Hash Left Join  (cost=564692523.83..564692533.09 rows=200 width=1032)"
"  Hash Cond: (fo.customer_email = loc.customer_email)"
"  CTE base"
"    ->  Unique  (cost=2818612.53..2915284.49 rows=798528 width=309)"
"          ->  Sort  (cost=2818612.53..2823006.71 rows=1757672 width=309)"
"                Sort Key: (first_value(((cube.customer_first_name || ' '::text) || cube.customer_last_name)) OVER (?)), cube.customer_email, (first_value(cube.customer_first_name) OVER (?)), (first_value(cube.customer_last_name) OVER (?)), cube.store_id, (first_value(cube.customer_group_name) OVER (?)), cube.order_id, cube.cat_level_1_name, cube.cat_level_2_name, cube.cat_level_3_name, cube.item_sku, cube.product_name_sku, cube.shipping_country_id, cube.default_channel_grouping, cube.first_order_of_customer, cube.created_at_order, cube.order_subtotal_net_after_discount, cube.row_number, cube.customer_city, cube.order_number, (row_number() OVER (?))"
"                ->  WindowAgg  (cost=2348817.00..2383970.44 rows=1757672 width=309)"
"                      ->  Sort  (cost=2348817.00..2353211.18 rows=1757672 width=323)"
"                            Sort Key: cube.customer_email, cube.created_at_order DESC"
"                            ->  WindowAgg  (cost=1862615.29..1902162.91 rows=1757672 width=323)"
"                                  ->  Sort  (cost=1862615.29..1867009.47 rows=1757672 width=291)"
"                                        Sort Key: cube.customer_email, ((cube.customer_group_name IS NULL)), cube.created_at_order DESC NULLS LAST"
"                                        ->  WindowAgg  (cost=1400441.09..1439988.71 rows=1757672 width=291)"
"                                              ->  Sort  (cost=1400441.09..1404835.27 rows=1757672 width=259)"
"                                                    Sort Key: cube.customer_email, ((cube.customer_last_name IS NULL)), cube.created_at_order DESC NULLS LAST"
"                                                    ->  WindowAgg  (cost=962297.88..1001845.50 rows=1757672 width=259)"
"                                                          ->  Sort  (cost=962297.88..966692.06 rows=1757672 width=227)"
"                                                                Sort Key: cube.customer_email, ((cube.customer_first_name IS NULL)), cube.created_at_order DESC NULLS LAST"
"                                                                ->  WindowAgg  (cost=539397.31..587733.29 rows=1757672 width=227)"
"                                                                      ->  Sort  (cost=539397.31..543791.49 rows=1757672 width=195)"
"                                                                            Sort Key: cube.customer_email, (((cube.customer_first_name IS NULL) AND (cube.customer_last_name IS NULL))), cube.created_at_order DESC NULLS LAST"
"                                                                            ->  Seq Scan on cube  (cost=0.00..188863.72 rows=1757672 width=195)"
"  CTE last_order_top_value"
"    ->  Unique  (cost=705414.33..705431.08 rows=200 width=200)"
"          CTE last_order"
"            ->  WindowAgg  (cost=104658.07..129785.32 rows=670060 width=49)"
"                  ->  WindowAgg  (cost=104658.07..118059.27 rows=670060 width=41)"
"                        ->  Sort  (cost=104658.07..106333.22 rows=670060 width=37)"
"                              Sort Key: orders.customer_email, orders.created_at_order"
"                              ->  Seq Scan on orders  (cost=0.00..39816.60 rows=670060 width=37)"
"          CTE cat_url"
"            ->  Gather  (cost=1000.00..59878.16 rows=8269 width=64)"
"                  Workers Planned: 4"
"                  ->  Parallel Seq Scan on core_url_rewrite  (cost=0.00..58051.26 rows=2067 width=64)"
"                        Filter: ((id_path)::text ~~ '%category%'::text)"
"          CTE prod_url"
"            ->  Gather  (cost=1000.00..62692.26 rows=36410 width=64)"
"                  Workers Planned: 4"
"                  ->  Parallel Seq Scan on core_url_rewrite core_url_rewrite_1  (cost=0.00..58051.26 rows=9102 width=64)"
"                        Filter: ((category_id IS NULL) AND ((id_path)::text ~~ '%product%'::text))"
"          CTE base"
"            ->  WindowAgg  (cost=424384.90..437786.10 rows=670060 width=227)"
"                  ->  Sort  (cost=424384.90..426060.05 rows=670060 width=199)"
"                        Sort Key: lo.customer_email"
"                        ->  Hash Left Join  (cost=128458.52..295416.42 rows=670060 width=199)"
"                              Hash Cond: ((i.product_id = p.product_id) AND (i.store_id = p.store_id))"
"                              ->  Hash Left Join  (cost=79411.82..219567.33 rows=670060 width=58)"
"                                    Hash Cond: (lo.order_id = i.order_id)"
"                                    Join Filter: (lo.last_order = lo.order_id)"
"                                    ->  CTE Scan on last_order lo  (cost=0.00..13401.20 rows=670060 width=44)"
"                                    ->  Hash  (cost=46992.70..46992.70 rows=1765770 width=22)"
"                                          ->  Seq Scan on items i  (cost=0.00..46992.70 rows=1765770 width=22)"
"                              ->  Hash  (cost=48246.90..48246.90 rows=53320 width=157)"
"                                    ->  Hash Right Join  (cost=46408.20..48246.90 rows=53320 width=157)"
"                                          Hash Cond: ((pu.product_id = p.product_id) AND (pu.store_id = p.store_id))"
"                                          ->  CTE Scan on prod_url pu  (cost=0.00..728.20 rows=36410 width=48)"
"                                          ->  Hash  (cost=45608.40..45608.40 rows=53320 width=125)"
"                                                ->  Merge Left Join  (cost=44693.88..45608.40 rows=53320 width=125)"
"                                                      Merge Cond: ((p.store_id = cu.store_id) AND (((p.cat_level_2_id)::integer) = cu.category_id))"
"                                                      ->  Sort  (cost=43990.46..44123.76 rows=53320 width=97)"
"                                                            Sort Key: p.store_id, ((p.cat_level_2_id)::integer)"
"                                                            ->  Seq Scan on products p  (cost=0.00..39804.20 rows=53320 width=97)"
"                                                      ->  Sort  (cost=703.42..724.10 rows=8269 width=48)"
"                                                            Sort Key: cu.store_id, cu.category_id"
"                                                            ->  CTE Scan on cat_url cu  (cost=0.00..165.38 rows=8269 width=48)"
"          ->  Sort  (cost=15272.49..15280.87 rows=3350 width=200)"
"                Sort Key: base.customer_email"
"                ->  CTE Scan on base  (cost=0.00..15076.35 rows=3350 width=200)"
"                      Filter: (max_amount = amount)"
"  CTE repurchase"
"    ->  Hash Left Join  (cost=2.64..20903.71 rows=231416 width=44)"
"          Hash Cond: (lower((c.top_first_cat3)::text) = lower(art.top_first_cat3))"
"          InitPlan 7 (returns $8)"
"            ->  Seq Scan on avg_repurchase_times  (cost=0.00..1.25 rows=1 width=4)"
"                  Filter: (lower(top_first_cat3) = 'rest'::text)"
"          ->  Seq Scan on customers c  (cost=0.00..14051.16 rows=231416 width=48)"
"          ->  Hash  (cost=1.17..1.17 rows=17 width=14)"
"                ->  Seq Scan on avg_repurchase_times art  (cost=0.00..1.17 rows=17 width=14)"
"  CTE first_order_top_value_category"
"    ->  CTE Scan on cat_seg  (cost=282871.58..286770.09 rows=866 width=104)"
"          Filter: (hno = 1)"
"          CTE cat_seg"
"            ->  Sort  (cost=282438.41..282871.58 rows=173267 width=44)"
"                  Sort Key: cube_1.customer_email"
"                  ->  HashAggregate  (cost=265629.23..267361.90 rows=173267 width=44)"
"                        Group Key: cube_1.customer_email, cube_1.cat_level_1_name, cube_1.row_amount_order, row_number() OVER (?)"
"                        ->  WindowAgg  (cost=247363.16..259540.54 rows=608869 width=44)"
"                              ->  Sort  (cost=247363.16..248885.33 rows=608869 width=36)"
"                                    Sort Key: cube_1.customer_email, cube_1.row_amount_order DESC"
"                                    ->  Seq Scan on cube cube_1  (cost=0.00..188863.72 rows=608869 width=36)"
"                                          Filter: (first_order_of_customer IS TRUE)"
"  CTE final_output"
"    ->  WindowAgg  (cost=305171.04..560488690.38 rows=200 width=968)"
"          ->  GroupAggregate  (cost=305171.04..560488672.38 rows=200 width=804)"
"                Group Key: b.customer_email"
"                ->  Merge Left Join  (cost=305171.04..60394894.75 rows=4000750169 width=717)"
"                      Merge Cond: (b.customer_email = r.customer_email)"
"                      ->  Merge Left Join  (cost=279923.40..349754.67 rows=3457626 width=705)"
"                            Merge Cond: (b.customer_email = fotvc.customer_email)"
"                            ->  Merge Left Join  (cost=279863.83..295834.39 rows=798528 width=673)"
"                                  Merge Cond: (b.customer_email = lotv.customer_email)"
"                                  ->  Sort  (cost=279852.19..281848.51 rows=798528 width=513)"
"                                        Sort Key: b.customer_email"
"                                        ->  CTE Scan on base b  (cost=0.00..15970.56 rows=798528 width=513)"
"                                  ->  Sort  (cost=11.64..12.14 rows=200 width=192)"
"                                        Sort Key: lotv.customer_email"
"                                        ->  CTE Scan on last_order_top_value lotv  (cost=0.00..4.00 rows=200 width=192)"
"                            ->  Sort  (cost=59.57..61.74 rows=866 width=64)"
"                                  Sort Key: fotvc.customer_email"
"                                  ->  CTE Scan on first_order_top_value_category fotvc  (cost=0.00..17.32 rows=866 width=64)"
"                      ->  Sort  (cost=25247.63..25826.17 rows=231416 width=44)"
"                            Sort Key: r.customer_email"
"                            ->  CTE Scan on repurchase r  (cost=0.00..4628.32 rows=231416 width=44)"
"  CTE last_order_category"
"    ->  GroupAggregate  (cost=275444.02..275444.06 rows=1 width=117)"
"          Group Key: c_1.customer_email"
"          ->  Sort  (cost=275444.02..275444.03 rows=1 width=63)"
"                Sort Key: c_1.customer_email"
"                ->  Hash Join  (cost=235826.80..275444.01 rows=1 width=63)"
"                      Hash Cond: ((b_1.customer_email = c_1.customer_email) AND ((b_1.order_number)::text = (c_1.order_number)::text))"
"                      ->  CTE Scan on base b_1  (cost=0.00..17966.88 rows=3993 width=72)"
"                            Filter: (last_no = 1)"
"                      ->  Hash  (cost=188863.72..188863.72 rows=1757672 width=65)"
"                            ->  Seq Scan on cube c_1  (cost=0.00..188863.72 rows=1757672 width=65)"
"  ->  CTE Scan on final_output fo  (cost=0.00..4.00 rows=200 width=936)"
"  ->  Hash  (cost=0.02..0.02 rows=1 width=128)"
"        ->  CTE Scan on last_order_category loc  (cost=0.00..0.02 rows=1 width=128)"

下面是查询,使用该查询通过“ CREATE TABLE segmentation AS”语句创建表。

-- EXPLAIN
CREATE table segment as
    WITH base AS (
        SELECT DISTINCT first_value(customer_first_name || ' ' || customer_last_name)
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY (customer_first_name ISNULL AND customer_last_name ISNULL),
                                created_at_order DESC NULLS LAST ) AS name,
                        customer_email,
                        first_value(customer_first_name)
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY customer_first_name ISNULL,
                                created_at_order DESC NULLS LAST ) AS first_name,
                        first_value(customer_last_name)
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY customer_last_name ISNULL,
                                created_at_order DESC NULLS LAST ) AS last_name,
                        store_id,
                        first_value(customer_group_name)
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY customer_group_name ISNULL,
                                created_at_order DESC NULLS LAST ) AS customer_group,
                        order_id,
                        cat_level_1_name,
                        cat_level_2_name,
                        cat_level_3_name,
                        item_sku,
                        product_name_sku,
                        shipping_country_id,
                        default_channel_grouping,
                        first_order_of_customer,
                        created_at_order,
                        order_subtotal_net_after_discount,
                        row_number,
                        customer_city
                        ,order_number
                        ,row_number() OVER(PARTITION BY customer_email ORDER BY created_at_order DESC) as last_no
        FROM ol.cube
        --WHERE order_state_1 != 'canceled' AND order_state_1 != 'pending_payment'
    ),
         last_order_top_value AS (
             WITH last_order AS (
                 SELECT customer_id,
                        last_value(order_id)
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY created_at_order
                            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS last_order,
                        order_id,
                        row_number()
                        OVER (
                            PARTITION BY customer_email
                            ORDER BY created_at_order )                                AS rn,
                        created_at_order,
                        customer_email

                 FROM ol.orders
             ),
                  cat_url AS (
                      SELECT category_id,
                             store_id,
                             request_path
                      FROM dl_magento.core_url_rewrite
                      WHERE id_path LIKE '%category%'
                  ),
                  prod_url AS (
                      SELECT product_id,
                             store_id,
                             request_path AS request_path
                      FROM dl_magento.core_url_rewrite
                      WHERE id_path LIKE '%product%'
                        AND category_id IS NULL
                  ),
                  base AS (
                      SELECT lo.customer_id,
                             lo.order_id,
                             lo.customer_email,
                             i.item_id,
                             p.product_id,
                             i.row_amount_minus_discount_order                 AS amount,
                             max(i.row_amount_minus_discount_order)
                             OVER (
                                 PARTITION BY lo.customer_email )              AS max_amount,
                             cat_level_2_name                                  AS category,
                             'http://xxxxxxx/media/catalog/product' || image AS image,
                             name,
                             cu.request_path                                   AS category_request_path,
                             pu.request_path                                   AS product_request_path


                      FROM last_order AS lo
                               LEFT JOIN ol.items AS i ON lo.last_order = lo.order_id AND lo.order_id = i.order_id
                               LEFT JOIN ol.products AS p ON i.product_id = p.product_id AND i.store_id = p.store_id
                               LEFT JOIN cat_url AS cu
                                         ON p.cat_level_2_id :: INT = cu.category_id AND p.store_id = cu.store_id
                               LEFT JOIN prod_url AS pu ON p.product_id = pu.product_id AND p.store_id = pu.store_id
                  )
             SELECT DISTINCT ON (customer_email) customer_email,
                                                 order_id,
                                                 customer_id,
                                                 name,
                                                 image,
                                                 category,
                                                 category_request_path,
                                                 product_request_path

             FROM base
             WHERE max_amount = amount
         ),

         repurchase AS (
             SELECT c.customer_email,
                    c.top_first_cat3,
                    date_part('day', 'today' :: DATE - c.last_order_date) :: INT AS days_since_last_order,
                    CASE
                        WHEN art.avg_repurchase_time IS NULL THEN (SELECT avg_repurchase_time
                                                                   FROM ol.avg_repurchase_times
                                                                   WHERE lower(top_first_cat3) = 'rest')
                        ELSE art.avg_repurchase_time
                        END
                     ,
                    CASE
                        WHEN (now()::date - c.last_order_date::date) = 60 AND COALESCE(c.num_orders, 0) != 1 THEN 1
                        ELSE 0
                        END                                                      AS reactivation_delay
             FROM ol.customers c
                      LEFT JOIN ol.avg_repurchase_times art ON lower(art.top_first_cat3) = lower(c.top_first_cat3)
         ),
               first_order_top_value_category as
                (
                WITH cat_seg as
                  (
                       select DISTINCT customer_email,cat_level_1_name as first_order_category,row_amount_order,ROW_NUMBER() OVER(PARTITION BY customer_email ORDER BY row_amount_order DESC) as hno
                       from ol.cube WHere first_order_of_customer IS TRUE
                       ORDER BY customer_email
                    )
                  SELECT * FROM cat_seg WHERE hno=1
                )
            ,
         final_output AS
             (
                 SELECT max(b.name)                                     AS name,
                        b.customer_email,
                        max(b.first_name)                               AS first_name,
                        max(b.last_name)                                AS last_name,
                        string_agg(DISTINCT b.store_id :: TEXT, ',')    AS store_id,
                        max(b.customer_group)                           AS customer_group,
                        string_agg(DISTINCT b.cat_level_1_name, ',')    AS cat_level_1,
                        string_agg(DISTINCT b.cat_level_2_name, ',')    AS cat_level_2,
                        string_agg(DISTINCT b.cat_level_3_name, ',')    AS cat_level_3,
                        string_agg(DISTINCT b.item_sku, ',')            AS sku,
                        string_agg(DISTINCT b.product_name_sku, ',')    AS product_name_sku,
                        string_agg(DISTINCT b.shipping_country_id, ',') AS shipping_country_id,
                        max(b.default_channel_grouping)
                        FILTER (WHERE b.first_order_of_customer)        AS first_order_acquisition_channel,
                        min(b.created_at_order)                         AS first_order_date,
                        CASE
                            WHEN (max(r.days_since_last_order) = max(r.avg_repurchase_time)) THEN 1
                            ELSE 0 END :: NUMERIC                       AS repurchase_delay,
                        COALESCE(MAX(reactivation_delay), 0)            AS reactivation_delay
                         ,
                        CASE
                            WHEN (max(r.days_since_last_order) BETWEEN max(r.avg_repurchase_time) AND max(r.avg_repurchase_time) + 16)
                                THEN 1
                            ELSE 0 END :: NUMERIC                       AS has_voucher_code,
                        CASE
                            WHEN (max(r.days_since_last_order) BETWEEN max(r.avg_repurchase_time) AND max(r.avg_repurchase_time) + 16)
                                THEN
                                (current_date - ((max(r.days_since_last_order) - max(r.avg_repurchase_time)) ||
                                                 ' days') :: INTERVAL) :: DATE
                            ELSE NULL END                               AS voucher_date,
                        count(DISTINCT b.order_id)                      AS num_orders,
                        count(DISTINCT b.order_id) :: NUMERIC /
                        floor(((date_part('day', max(b.created_at_order) - min(b.created_at_order)) / 30) +
                               1))                                      AS orders_per_month,
                        sum(b.order_subtotal_net_after_discount)
                        FILTER (WHERE b.row_number = 1)                 AS alltime_rev,
                        sum(b.order_subtotal_net_after_discount)
                        FILTER (WHERE b.row_number = 1) /
                        nullif(count(DISTINCT b.order_id), 0)           AS average_rev_per_order,
                        min(b.order_subtotal_net_after_discount)        AS min_rev,
                        max(b.order_subtotal_net_after_discount)        AS max_rev,
                        max(b.created_at_order)                         AS last_order_date,
                        max(lotv.name)                                  AS last_order_top_value_product_name,
                        max(lotv.category)                              AS last_order_top_value_product_category_name,
                        max(lotv.category_request_path)                 AS last_order_top_value_product_category_url,
                        max(lotv.product_request_path)                  AS last_order_top_value_product_url,
                        max(lotv.image)                                 AS last_order_top_value_product_image_url,
                        max(customer_city)                              AS customer_city,
                        CAST('' AS TEXT)                                AS "TAGS"
                        ,NTILE(9) OVER ()::TEXT                          AS customer_partition
                        ,MAX(first_order_category)                      AS first_order_category
                        ,MAX(b.order_number)
                        FILTER (WHERE b.last_no=1)                      AS last_order_number
                 FROM base AS b

                          LEFT JOIN last_order_top_value AS lotv ON b.customer_email = lotv.customer_email
                          LEFT JOIN repurchase AS r ON b.customer_email = r.customer_email
                          LEFT JOIN first_order_top_value_category AS fotvc ON fotvc.customer_email=b.customer_email
                                     GROUP BY 2
             )
    ,last_order_category as( SELECT c.customer_email
    ,string_agg(DISTINCT c.cat_level_1_name, ',') FILTER(WHERE last_no=1)  AS last_order_cat_level_1 
    ,string_agg(DISTINCT c.cat_level_2_name, ',') FILTER(WHERE last_no=1)  AS last_order_cat_level_2
    ,string_agg(DISTINCT c.cat_level_3_name, ',') FILTER(WHERE last_no=1)  AS last_order_cat_level_3
     from ol.cube c 
    INNER JOIN base b on b.customer_email=c.customer_email and c.order_number=b.order_number
    WHERE b.last_no=1
    GROUP BY c.customer_email
                            )
    SELECT name,
           fo.customer_email,
           first_name,
           last_name,
           store_id,
           customer_group,
           cat_level_1,
           cat_level_2,
           cat_level_3,
           sku,
           product_name_sku,
           shipping_country_id,
           first_order_acquisition_channel,
           first_order_date,
           repurchase_delay,
           reactivation_delay,
           has_voucher_code,
           voucher_date,
           num_orders,
           orders_per_month,
           alltime_rev,
           average_rev_per_order,
           min_rev,
           max_rev,
           last_order_date,
           last_order_top_value_product_name,
           last_order_top_value_product_category_name,
           last_order_top_value_product_category_url,
           last_order_top_value_product_url,
           last_order_top_value_product_image_url,
           customer_city,
           CASE
               WHEN customer_partition = '1' THEN 'K'
               WHEN customer_partition = '2' THEN 'D'
               WHEN customer_partition = '3' THEN 'v'
               WHEN customer_partition = '4' THEN 'W'
               WHEN customer_partition = '5' THEN 'J'
               WHEN customer_partition = '6' THEN 'i'
               WHEN customer_partition = '7' THEN 'u'
               WHEN customer_partition = '8' THEN 'n'
               WHEN customer_partition = '9' THEN 'P'
               ELSE '' END AS "TAGS"
               ,first_order_category
               ,last_order_number,loc.last_order_cat_level_1,loc.last_order_cat_level_2,loc.last_order_cat_level_3
    FROM final_output fo LEFT JOIN last_order_category loc on loc.customer_email=fo.customer_email;

下面是索引,

CREATE INDEX ON ol.cube (customer_email,created_at_order);

0 个答案:

没有答案