-
Notifications
You must be signed in to change notification settings - Fork 30
Handy SQL statements
The following SQL statements are useful for checking imported data and looking for possible error conditions in imported data:
Polling districts with an invalid polling station id:
SELECT * FROM pollingstations_pollingdistrict
WHERE polling_station_id NOT IN
(SELECT internal_council_id FROM pollingstations_pollingstation
WHERE council_id='X01000000')
AND council_id='X01000000';
Polling stations with an invalid polling district id:
SELECT * FROM pollingstations_pollingstation
WHERE polling_district_id NOT IN
(SELECT internal_council_id FROM pollingstations_pollingdistrict
WHERE council_id='X01000000')
AND council_id='X01000000';
Polling districts which share a station with another district:
SELECT DISTINCT(polling_station_id), COUNT(*)
FROM pollingstations_pollingdistrict
WHERE council_id='X01000000'
GROUP BY polling_station_id
HAVING COUNT(*)>1
ORDER BY COUNT(*);
Polling stations which serve more than one district:
SELECT DISTINCT(polling_district_id), COUNT(*)
FROM pollingstations_pollingstation
WHERE council_id='X01000000'
GROUP BY polling_district_id
HAVING COUNT(*)>1
ORDER BY COUNT(*);
Postcodes containing properties assigned to multiple polling stations:
SELECT DISTINCT(postcode), COUNT(DISTINCT(polling_station_id))
FROM pollingstations_residentialaddress
WHERE council_id='X01000000'
GROUP BY postcode
HAVING COUNT(DISTINCT(polling_station_id))>1;
Order addresses by ONSPD postcode centroid --> station location distance
SELECT
round((ST_DISTANCE(ST_TRANSFORM(onspd.location, 27700), ST_TRANSFORM(ps.location, 27700))/1000)::numeric, 2) AS distance_km,
ra.id,
ra.address,
ra.postcode,
ra.polling_station_id,
ra.council_id,
ra.uprn,
LEFT(ra.postcode, LENGTH(ra.postcode)-3) || ' ' || RIGHT(ra.postcode, 3) AS pcfull2,
onspd.location,
ps.internal_council_id,
ps.postcode,
ps.address,
ps.location,
ps.council_id
FROM pollingstations_residentialaddress ra
JOIN uk_geo_utils_onspd onspd ON onspd.pcds=LEFT(ra.postcode, LENGTH(ra.postcode)-3) || ' ' || RIGHT(ra.postcode, 3)
JOIN pollingstations_pollingstation ps ON ps.council_id=ra.council_id
AND ra.polling_station_id=ps.internal_council_id
WHERE onspd.doterm=''
AND ps.location IS NOT NULL
AND ra.council_id='X01000001'
ORDER BY ST_DISTANCE(onspd.location::geography, ps.location::geography) DESC;
Order addresses by doorstep gridref (if held) --> station location distance (often not that useful as the addresses with issues often won't have a doorstep gridref)
SELECT
round(ST_DISTANCE(ra.location::geography, ps.location::geography)/1000)::numeric, 2) AS distance,
ra.*,
ps.internal_council_id,
ps.postcode,
ps.address,
ps.location,
ps.council_id
FROM pollingstations_residentialaddress ra
JOIN pollingstations_pollingstation ps ON ps.council_id=ra.council_id
AND ra.polling_station_id=ps.internal_council_id
WHERE ra.uprn!=''
AND ps.location IS NOT NULL
AND ra.council_id='X01000001'
ORDER BY ST_DISTANCE(ra.location::geography, ps.location::geography) DESC;
Materialized view showing line between every address and assigned station (using doorstep gridref if held)
CREATE MATERIALIZED VIEW pollingstations_lines_view AS
SELECT ra.id,
ps.internal_council_id,
ra.address,
ra.postcode,
st_makeline(ps.location, ra.location) AS geom
FROM pollingstations_pollingstation ps,
pollingstations_residentialaddress ra
WHERE ps.council_id=ra.council_id
AND ra.polling_station_id=ps.internal_council_id
AND ra.council_id='X01000001';
Refresh:
REFRESH MATERIALIZED VIEW pollingstations_lines_view;
Residential Address records with duplicate UPRN
WITH duplicates AS
(SELECT uprn, count(uprn)
FROM public.pollingstations_residentialaddress
WHERE uprn != ''
GROUP BY uprn
HAVING count(uprn) > 1)
SELECT ra.*, d.count
FROM pollingstations_residentialaddress ra, duplicates d
WHERE ra.uprn = d.uprn
ORDER BY ra.uprn;
Order postcodes by largest spread between points:
SELECT
ra1.postcode,
MAX(ROUND((ST_DISTANCE(ra1.location::geography, ra2.location::geography)/1000)::numeric, 2)) AS distance
FROM pollingstations_residentialaddress ra1
JOIN pollingstations_residentialaddress ra2
ON ra1.postcode = ra2.postcode
AND ra1.id < ra2.id
WHERE ra1.council_id='X01000001'
GROUP BY ra1.postcode
ORDER BY distance DESC;