如何优化此查询?

时间:2016-03-19 21:50:33

标签: sql postgresql query-optimization

我需要在postgresql 9.3中编写这个查询:

  
      
  1. 列出每个国家/地区最受欢迎的电影。最受欢迎的电影/电影是平均评分最高的电影/电影   该国家/地区的所有用户。如果出现平局,请全部退货   电影按字母顺序排列。 (2栏)
  2.   

需要的表格:

CREATE TABLE movie (
id integer,
name varchar(200),
year date
);

CREATE TABLE userProfile (
userid varchar(200),
gender char(1),
age integer,
country varchar(200),
registered date
);

CREATE TABLE ratings (
mid integer,
userid varchar(200),
rating integer
);

CREATE INDEX movie_id_idx ON movie (id);
CREATE INDEX userProfile_userid_idx ON userProfile (userid);
CREATE INDEX ratings_userid_idx ON ratings (userid);
CREATE INDEX ratings_mid_idx ON ratings (mid);
CREATE INDEX ratings_userid_mid_idx ON ratings (userid, mid);

这是我的查询:

CREATE TEMP TABLE tops AS SELECT country, name 
FROM ratings AS r INNER JOIN userProfile AS u
ON r.userid=u.userid 
INNER JOIN movie AS m ON m.id = r.mid LIMIT 0;

~10 min
CREATE TEMP TABLE avg_country AS
SELECT country, r.mid, AVG(rating) AS rate
FROM ratings AS r INNER JOIN userProfile AS u
ON r.userid=u.userid 
GROUP BY country, r.mid;

~8 min
DO $$
DECLARE arrow record;
BEGIN
CREATE TABLE movie_names AS SELECT id, name FROM movie;
FOR arrow IN SELECT DISTINCT country FROM userProfile ORDER BY country
LOOP
  CREATE TABLE movies AS SELECT mid FROM (SELECT MAX(rate) AS m_rate FROM avg_country
  WHERE country=arrow.country) AS max_val CROSS JOIN LATERAL
  (SELECT mid FROM avg_country 
  WHERE country=arrow.country AND rate=max_val.m_rate) AS a;
  WITH names AS (DELETE FROM movie_names AS m 
  WHERE m.id IN (SELECT mid FROM movies) RETURNING name)
  INSERT INTO tops
  SELECT arrow.country, name FROM names ORDER BY name;
  DROP TABLE movies;
END LOOP;
DROP TABLE movie_names;
END$$;

SELECT * FROM tops;
DROP TABLE tops, avg_country;

提前多多感谢)

2 个答案:

答案 0 :(得分:0)

使用简单的老式SQL - 它虽旧但金色。

WITH q AS (
  SELECT *,
      dense_rank() over (partition by country order by avg_rating desc ) rank
  FROM (
      select u.country, m.name movie_name, avg( r.rating ) avg_rating
      from userProfile u
      join ratings r on u.userid = r.userid
      join movie m on r.mid = m.id
      group by u.country, m.name 
) xx )
SELECT country, movie_name
FROM q WHERE rank <= 1

答案 1 :(得分:0)

这类似于kordirko的答案,但只有少一个子查询:

select country, movie_name, avg_rating
from (select u.country, m.name as movie_name, avg(r.rating) as avg_rating
             rank() over (partition by u.country order by avg(r.rating) desc) as seqnum
      from userProfile u join
           ratings r
           on u.userid = r.userid join
           movie m
           on r.mid = m.id
      group by u.country, m.id -- `name` is not needed here because id is unique 
     ) uc
where seqnum = 1;

或者,如果您想获得每个国家/地区的一行列表:

select country, string_agg(movie_name, '; ') as most_popular_movies
from (select u.country, m.name as movie_name, avg(r.rating) as avg_rating
             rank() over (partition by u.country order by avg(r.rating) desc) as seqnum
      from userProfile u join
           ratings r
           on u.userid = r.userid join
           movie m
           on r.mid = m.id
      group by u.country, m.id -- `name` is not needed here because id is unique 
     ) uc
where seqnum = 1
group by country;