使用postgresql收集评级统计信息

时间:2014-04-19 15:44:43

标签: postgresql

我目前正在尝试从postgreSql数据库收集评级统计信息。您可以在下面找到我想要查询的数据库模式的简化示例。

CREATE DATABASE test_db;

CREATE TABLE rateable_object (
   id BIGSERIAL PRIMARY KEY,
   cdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
   mdate TIMESTAMP,
   name VARCHAR(160) NOT NULL,
   description VARCHAR NOT NULL
);

CREATE TABLE ratings (
   id BIGSERIAL PRIMARY KEY,
   cdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
   mdate TIMESTAMP,
   parent_id BIGINT NOT NULL,
   rating INTEGER NOT NULL DEFAULT -1
);

我现在想收集评级栏中值的统计信息。响应应如下所示:

 +--------------+-------+
 | column_value | count |
 +--------------+-------+
 |           -1 |     2 |
 |            0 |    45 |
 |            1 |    37 |
 |            2 |    13 |
 |            3 |     5 |
 |            4 |    35 |
 |            5 |    75 |
 +--------------+-------+

我的第一个解决方案(见下文)非常天真,可能不是最快最简单的解决方案。所以我的问题是,如果有更好的解决方案。

WITH
    stars AS (SELECT generate_series(-1, 5) AS value),
    votes AS (SELECT * FROM ratings WHERE parent_id = 1)
SELECT 
    stars.value AS stars, coalesce(COUNT(votes.*), 0) as votes
FROM 
    stars
LEFT JOIN 
    votes
ON
    votes.rating = stars.value
GROUP BY stars.value
ORDER BY stars.value;

由于我不想浪费你的时间,我为你准备了一些测试数据:

INSERT INTO rateable_object (name, description) VALUES 
    ('Penguin', 'This is the Linux penguin.'), 
    ('Gnu', 'This is the GNU gnu.'), 
    ('Elephant', 'This is the PHP elephant.'), 
    ('Elephant', 'This is the postgres elephant.'), 
    ('Duck', 'This is the duckduckgo duck.'), 
    ('Cat', 'This is the GitHub cat.'), 
    ('Bird', 'This is the Twitter bird.'), 
    ('Lion', 'This is the Leo lion.');

CREATE OR REPLACE FUNCTION generate_test_data() RETURNS INTEGER LANGUAGE plpgsql AS
$$
BEGIN
    FOR i IN 0..1000 LOOP

        INSERT INTO ratings (parent_id, rating) VALUES 
            (
                (1 + (10 - 1) * random())::numeric::int, 
                (-1 + (5 + 1) * random())::numeric::int
            );
    END LOOP;

    RETURN 0;
END;
$$;

SELECT generate_test_data();

0 个答案:

没有答案
相关问题