Skip to content

Handy SQL statements

chris48s edited this page Nov 5, 2019 · 16 revisions

Handy SQL statements

The following SQL statements are useful for checking imported data and looking for possible error conditions in imported data:

Polling districts with an invalid polling station id:

SELECT * FROM pollingstations_pollingdistrict
WHERE polling_station_id NOT IN
    (SELECT internal_council_id FROM pollingstations_pollingstation
     WHERE council_id='X01000000')
AND council_id='X01000000';

Polling stations with an invalid polling district id:

SELECT * FROM pollingstations_pollingstation
WHERE polling_district_id NOT IN
    (SELECT internal_council_id FROM pollingstations_pollingdistrict
     WHERE council_id='X01000000')
AND council_id='X01000000';

Polling districts which share a station with another district:

SELECT DISTINCT(polling_station_id), COUNT(*)
FROM pollingstations_pollingdistrict
WHERE council_id='X01000000'
GROUP BY polling_station_id
HAVING COUNT(*)>1
ORDER BY COUNT(*);

Polling stations which serve more than one district:

SELECT DISTINCT(polling_district_id), COUNT(*)
FROM pollingstations_pollingstation
WHERE council_id='X01000000'
GROUP BY polling_district_id
HAVING COUNT(*)>1
ORDER BY COUNT(*);

Postcodes containing properties assigned to multiple polling stations:

SELECT DISTINCT(postcode), COUNT(DISTINCT(polling_station_id))
FROM pollingstations_residentialaddress
WHERE council_id='X01000000'
GROUP BY postcode
HAVING COUNT(DISTINCT(polling_station_id))>1;

Order addresses by ONSPD postcode centroid --> station location distance

SELECT
  round((ST_DISTANCE(ST_TRANSFORM(onspd.location, 27700), ST_TRANSFORM(ps.location, 27700))/1000)::numeric, 2) AS distance_km,
  ra.id,
  ra.address,
  ra.postcode,
  ra.polling_station_id,
  ra.council_id,
  ra.uprn,
  LEFT(ra.postcode, LENGTH(ra.postcode)-3) || ' ' || RIGHT(ra.postcode, 3) AS pcfull2,
  onspd.location,
  ps.internal_council_id,
  ps.postcode,
  ps.address,
  ps.location,
  ps.council_id
FROM pollingstations_residentialaddress ra
JOIN uk_geo_utils_onspd onspd ON onspd.pcds=LEFT(ra.postcode, LENGTH(ra.postcode)-3) || ' ' || RIGHT(ra.postcode, 3)
JOIN pollingstations_pollingstation ps ON ps.council_id=ra.council_id
AND ra.polling_station_id=ps.internal_council_id
WHERE onspd.doterm=''
  AND ps.location IS NOT NULL
  AND ra.council_id='X01000001'
ORDER BY ST_DISTANCE(onspd.location::geography, ps.location::geography) DESC;

Order addresses by doorstep gridref (if held) --> station location distance (often not that useful as the addresses with issues often won't have a doorstep gridref)

SELECT
  round(ST_DISTANCE(ra.location::geography, ps.location::geography)/1000)::numeric, 2) AS distance,
  ra.*,
  ps.internal_council_id,
  ps.postcode,
  ps.address,
  ps.location,
  ps.council_id
FROM pollingstations_residentialaddress ra
JOIN pollingstations_pollingstation ps ON ps.council_id=ra.council_id
AND ra.polling_station_id=ps.internal_council_id
WHERE ra.uprn!=''
  AND ps.location IS NOT NULL
  AND ra.council_id='X01000001'
ORDER BY ST_DISTANCE(ra.location::geography, ps.location::geography) DESC;

Materialized view showing line between every address and assigned station (using doorstep gridref if held)

CREATE MATERIALIZED VIEW pollingstations_lines_view AS
SELECT ra.id,
       ps.internal_council_id,
       ra.address,
       ra.postcode,
       st_makeline(ps.location, ra.location) AS geom
FROM pollingstations_pollingstation ps,
     pollingstations_residentialaddress ra
WHERE ps.council_id=ra.council_id
  AND ra.polling_station_id=ps.internal_council_id
  AND ra.council_id='X01000001';

Refresh:

REFRESH MATERIALIZED VIEW pollingstations_lines_view;

Residential Address records with duplicate UPRN

WITH duplicates AS
  (SELECT uprn, count(uprn)
   FROM public.pollingstations_residentialaddress
   WHERE uprn != ''
   GROUP BY uprn
   HAVING count(uprn) > 1)
SELECT ra.*, d.count
FROM pollingstations_residentialaddress ra, duplicates d
WHERE ra.uprn = d.uprn
ORDER BY ra.uprn;

Order postcodes by largest spread between points:

SELECT
   ra1.postcode,
   MAX(ROUND((ST_DISTANCE(ra1.location::geography, ra2.location::geography)/1000)::numeric, 2)) AS distance
FROM pollingstations_residentialaddress ra1
JOIN pollingstations_residentialaddress ra2
ON ra1.postcode = ra2.postcode
AND ra1.id < ra2.id
WHERE ra1.council_id='X01000001'
GROUP BY ra1.postcode
ORDER BY distance DESC;