慢Postgres查询

时间:2009-10-01 14:37:52

标签: sql postgresql postgis nearest-neighbor

我是Postgres和SQL的新手。我创建了以下脚本,从一个点到最近一行的投影点绘制一条线。它适用于5到10个小数据集,行数相同;但是,使用2,000行在60点上进行,查询大约需要12个小时。它基于下面粘贴的最近邻居函数http://www.bostongis.com/downloads/pgis_nn.txt

关于pgis_fn_nn的

编辑文档可在http://www.bostongis.com/PrinterFriendly.aspx?content_name=postgis_nearest_neighbor_generic上找到

缓慢的部分是pgis_fn_nn(...)

的实现
  1. 我做错了什么?
  2. 有没有提示让它更快?
  3. 我是否有办法改进这两个脚本?
  4. 如果我想将两个查询合并为一个,你会推荐什么?
  5. my_script.sql

    -- this sql script creates a line table that connects points from a point table
    -- to the projected points from the nearest line to the point of oritin 
    
    -- delete duplicate tables if they exist
    DROP TABLE exploded_roads;
    DROP TABLE projected_points;
    DROP TABLE lines_from_centroids_to_roads;
    
    -- create temporary exploaded lines table
    CREATE TABLE exploded_roads (
     the_geom geometry,
     edge_id  serial
    );
    
    -- insert the linestring that are not multistring
    INSERT INTO exploded_roads
    SELECT the_geom
    FROM "StreetCenterLines"
    WHERE st_geometrytype(the_geom) = 'ST_LineString';
    
    -- insert the linestrings that need to be converted from multi string
    INSERT INTO exploded_roads
    SELECT the_geom
    FROM (
        SELECT ST_GeometryN(
     the_geom,
     generate_series(1, ST_NumGeometries(the_geom)))
        AS the_geom 
        FROM "StreetCenterLines"
    )
    AS foo;
    
    -- create projected points table with ids matching centroid table
    CREATE TABLE projected_points (
     the_geom  geometry,
     pid  serial,
     dauid  int
    );
    
    -- Populate Table
    -- code based on Paul Ramsey's site and Boston GIS' NN code
    INSERT INTO projected_points(the_geom, dauid)
    SELECT DISTINCT ON ("DAUID"::int)
     ( 
      ST_Line_Interpolate_Point(
       (
        SELECT the_geom
        FROM exploded_roads
        WHERE edge_id IN 
         (
          SELECT nn_gid
          FROM pgis_fn_nn(centroids.the_geom, 30000000, 1,10, 'exploded_roads', 'true', 'edge_id', 'the_geom')
         )
       ),
       ST_Line_Locate_Point(
        exploded_roads.the_geom,
        centroids.the_geom
       )
      )
     ),
     (centroids."DAUID"::int)
    
    FROM exploded_roads, fred_city_o6_da_centroids centroids;
    
    
    -- Create Line tables
    CREATE TABLE lines_from_centroids_to_roads (
     the_geom geometry,
     edge_id SERIAL
    );
    
    
    -- Populate Line Table
    INSERT INTO lines_from_centroids_to_roads(
    SELECT
     ST_MakeLine( centroids.the_geom, projected_points.the_geom )
    FROM projected_points, fred_city_o6_da_centroids centroids
    WHERE projected_points.dauid = centroids.id
    );
    
    来自http://www.bostongis.com/downloads/pgis_nn.txt

    pgis_fn_nn

    ---LAST UPDATED 8/2/2007 --
    CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision)
      RETURNS integer AS
    $BODY$
    BEGIN
        FOR i IN 0..maxslice LOOP
            IF expand(a,maxe*i/maxslice) && b THEN
                RETURN i;
            END IF;
        END LOOP; 
        RETURN 99999999;
    END;
    $BODY$
    LANGUAGE 'plpgsql' IMMUTABLE;
    
    CREATE TYPE pgis_nn AS
       (nn_gid integer, nn_dist numeric(16,5));
    
    CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100))
      RETURNS SETOF pgis_nn AS
    $BODY$
    DECLARE
        strsql text;
        rec pgis_nn;
        ncollected integer;
        it integer;
    --NOTE: it: the iteration we are currently at 
    --start at the bounding box of the object (expand 0) and move up until it has collected more objects than we need or it = maxslices whichever event happens first
    BEGIN
        ncollected := 0; it := 0;
        WHILE ncollected < numnn AND it <= maxslices LOOP
            strsql := 'SELECT currentit.' || sgid2field || ', distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || 
            CAST((numnn - ncollected) As varchar(200));
            --RAISE NOTICE 'sql: %', strsql;
            FOR rec in EXECUTE (strsql) LOOP
                IF ncollected < numnn THEN
                    ncollected := ncollected + 1;
                    RETURN NEXT rec;
                ELSE
                    EXIT;
                END IF;
            END LOOP;
            it := it + 1;
        END LOOP;
    END
    $BODY$
    LANGUAGE 'plpgsql' STABLE;
    
    CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100))
      RETURNS SETOF pgis_nn AS
    $BODY$
        SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8);
    $BODY$
      LANGUAGE 'sql' STABLE;
    

1 个答案:

答案 0 :(得分:4)

我使用“最近的”函数在OpenStreetMap数据上执行pgRouting。起初我偶然发现了你提到的fn_nn函数,但访问irc.freenode.net上的#postgis irc频道帮助了我。事实证明,postgis有一些非常棒的线性函数,当它们组合在一起时,它可以回答你需要的一切!

您可以在http://postgis.refractions.net/documentation/manual-1.3/ch06.html#id2578698找到有关线性函数的更多信息,但这是我实现它的方式

select 
    line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt))),')','')) as anchor_point,
    -- returns the anchor point
    line_locate_point(ways.the_geom, pnt) as anchor_percentage, 
    -- returns the percentage on the line where the anchor will 
    -- touch (number between 0 and 1)
    CASE
    WHEN line_locate_point(ways.the_geom, pnt) < 0.5 THEN ways.source
    WHEN line_locate_point(ways.the_geom, pnt) > 0.5 THEN ways.target
    END as node,
    -- returns the nearest end node id
    length_spheroid( st_line_substring(ways.the_geom,0,
     line_locate_point(ways.the_geom, pnt)),
    'SPHEROID[\"WGS 84\",6378137,298.257223563]' ) as length,
    distance_spheroid(pnt, line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt)),
    'SPHEROID[\"WGS 84\",6378137,298.257223563]') as dist
            from ways, planet_osm_line,
            ST_GeomFromText('POINT(1.245 51.234)', 4326) as pnt 
     where ways.gid = planet_osm_line.osm_id
            order by dist asc limit 1;";

希望这对你有用