-
Notifications
You must be signed in to change notification settings - Fork 12
/
toolbox.py
270 lines (227 loc) · 11.6 KB
/
toolbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# Yuwen Chang
# 2020-08-16
import numpy as np
import pandas as pd
import geopandas as gpd
import rtree
import itertools
from shapely.geometry import MultiPoint, LineString
from shapely.ops import snap, split
pd.options.mode.chained_assignment = None
def connect_poi(pois, nodes, edges, key_col=None, path=None, threshold=200, knn=5, meter_epsg=3857):
"""Connect and integrate a set of POIs into an existing road network.
Given a road network in the form of two GeoDataFrames: nodes and edges,
link each POI to the nearest edge (road segment) based on its projection
point (PP) and generate a new integrated road network including the POIs,
the projected points, and the connection edge.
Args:
pois (GeoDataFrame): a gdf of POI (geom: Point)
nodes (GeoDataFrame): a gdf of road network nodes (geom: Point)
edges (GeoDataFrame): a gdf of road network edges (geom: LineString)
key_col (str): a unique key column of pois should be provided,
e.g., 'index', 'osmid', 'poi_number', etc.
Currently, this will be renamed into 'osmid' in the output.
[NOTE] For use in pandana, you may want to ensure this
column is numeric-only to avoid processing errors.
Preferably use unique integers (int or str) only,
and be aware not to intersect with the node key,
'osmid' if you use OSM data, in the nodes gdf.
path (str): directory path to use for saving files (nodes and edges).
Outputs will NOT be saved if this arg is not specified.
threshold (int): the max length of a POI connection edge, POIs with
connection edge beyond this length will be removed.
The unit is in meters as crs epsg is set to 3857 by
default during processing.
knn (int): k nearest neighbors to query for the nearest edge.
Consider increasing this number up to 10 if the connection
output is slightly unreasonable. But higher knn number will
slow down the process.
meter_epsg (int): preferred EPSG in meter units. Suggested 3857 or 3395.
Returns:
nodes (GeoDataFrame): the original gdf with POIs and PPs appended
edges (GeoDataFrame): the original gdf with connection edges appended
and existing edges updated (if PPs are present)
Note:
1. Make sure all three input GeoDataFrames have defined crs attribute.
Try something like `gdf.crs` or `gdf.crs = 'epsg:4326'`.
They will then be converted into epsg:3857 or specified meter_epsg for processing.
"""
## STAGE 0: initialization
# 0-1: helper functions
def find_kne(point, lines):
dists = np.array(list(map(lambda l: l.distance(point), lines)))
kne_pos = dists.argsort()[0]
kne = lines.iloc[[kne_pos]]
kne_idx = kne.index[0]
return kne_idx, kne.values[0]
def get_pp(point, line):
"""Get the projected point (pp) of 'point' on 'line'."""
# project new Point to be interpolated
pp = line.interpolate(line.project(point)) # PP as a Point
return pp
def split_line(line, pps):
"""Split 'line' by all intersecting 'pps' (as multipoint).
Returns:
new_lines (list): a list of all line segments after the split
"""
# IMPORTANT FIX for ensuring intersection between splitters and the line
# but no need for updating edges_meter manually because the old lines will be
# replaced anyway
line = snap(line, pps, 1e-8) # slow?
try:
new_lines = list(split(line, pps)) # split into segments
return new_lines
except TypeError as e:
print('Error when splitting line: {}\n{}\n{}\n'.format(e, line, pps))
return []
def update_nodes(nodes, new_points, ptype, meter_epsg=3857):
"""Update nodes with a list (pp) or a GeoDataFrame (poi) of new_points.
Args:
ptype: type of Point list to append, 'pp' or 'poi'
"""
# create gdf of new nodes (projected PAPs)
if ptype == 'pp':
new_nodes = gpd.GeoDataFrame(new_points, columns=['geometry'], crs=f'epsg:{meter_epsg}')
n = len(new_nodes)
new_nodes['highway'] = node_highway_pp
new_nodes['osmid'] = [int(osmid_prefix + i) for i in range(n)]
# create gdf of new nodes (original POIs)
elif ptype == 'poi':
new_nodes = new_points[['geometry', key_col]]
new_nodes.columns = ['geometry', 'osmid']
new_nodes['highway'] = node_highway_poi
new_nodes['osmid'] = new_nodes['osmid'].astype(int)
else:
print("Unknown ptype when updating nodes.")
# merge new nodes (it is safe to ignore the index for nodes)
gdfs = [nodes, new_nodes]
nodes = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True, sort=False),
crs=gdfs[0].crs)
return nodes, new_nodes # all nodes, newly added nodes only
def update_edges(edges, new_lines, replace):
"""
Update edge info by adding new_lines; or,
replace existing ones with new_lines (n-split segments).
Args:
replace: treat new_lines (flat list) as newly added edges if False,
else replace existing edges with new_lines (often a nested list)
Note:
kne_idx refers to 'fid in Rtree'/'label'/'loc', not positional iloc
"""
# for interpolation (split by pp): replicate old line
if replace:
# create a flattened gdf with all line segs and corresponding kne_idx
kne_idxs = list(line_pps_dict.keys())
lens = [len(item) for item in new_lines]
new_lines_gdf = gpd.GeoDataFrame(
{'kne_idx': np.repeat(kne_idxs, lens),
'geometry': list(itertools.chain.from_iterable(new_lines))})
# merge to inherit the data of the replaced line
cols = list(edges.columns)
cols.remove('geometry') # don't include the old geometry
new_edges = new_lines_gdf.merge(edges[cols], how='left', left_on='kne_idx', right_index=True)
new_edges.drop('kne_idx', axis=1, inplace=True)
new_lines = new_edges['geometry'] # now a flatten list
# for connection (to external poi): append new lines
else:
new_edges = gpd.GeoDataFrame(pois[[key_col]], geometry=new_lines, columns=[key_col, 'geometry'])
new_edges['oneway'] = False
new_edges['highway'] = edge_highway
# update features (a bit slow)
new_edges['length'] = [l.length for l in new_lines]
new_edges['from'] = new_edges['geometry'].map(
lambda x: nodes_id_dict.get(list(x.coords)[0], None))
new_edges['to'] = new_edges['geometry'].map(
lambda x: nodes_id_dict.get(list(x.coords)[-1], None))
new_edges['osmid'] = ['_'.join(list(map(str, s))) for s in zip(new_edges['from'], new_edges['to'])]
# remember to reindex to prevent duplication when concat
start = edges.index[-1] + 1
stop = start + len(new_edges)
new_edges.index = range(start, stop)
# for interpolation: remove existing edges
if replace:
edges = edges.drop(kne_idxs, axis=0)
# for connection: filter invalid links
else:
valid_pos = np.where(new_edges['length'] <= threshold)[0]
n = len(new_edges)
n_fault = n - len(valid_pos)
f_pct = n_fault / n * 100
print("Remove faulty projections: {}/{} ({:.2f}%)".format(n_fault, n, f_pct))
new_edges = new_edges.iloc[valid_pos] # use 'iloc' here
# merge new edges
dfs = [edges, new_edges]
edges = gpd.GeoDataFrame(pd.concat(dfs, ignore_index=False, sort=False), crs=dfs[0].crs)
# all edges, newly added edges only
return edges, new_edges
# 0-2: configurations
# set poi arguments
node_highway_pp = 'projected_pap' # POI Access Point
node_highway_poi = 'poi'
edge_highway = 'projected_footway'
osmid_prefix = 9990000000
# convert CRS
pois_meter = pois.to_crs(epsg=meter_epsg)
nodes_meter = nodes.to_crs(epsg=meter_epsg)
edges_meter = edges.to_crs(epsg=meter_epsg)
# build rtree
print("Building rtree...")
Rtree = rtree.index.Index()
[Rtree.insert(fid, geom.bounds) for fid, geom in edges_meter['geometry'].iteritems()]
## STAGE 1: interpolation
# 1-1: update external nodes (pois)
print("Updating external nodes...")
nodes_meter, _ = update_nodes(nodes_meter, pois_meter, ptype='poi', meter_epsg=meter_epsg)
# 1-2: update internal nodes (interpolated pps)
# locate nearest edge (kne) and projected point (pp)
print("Projecting POIs to the network...")
pois_meter['near_idx'] = [list(Rtree.nearest(point.bounds, knn))
for point in pois_meter['geometry']] # slow
pois_meter['near_lines'] = [edges_meter['geometry'][near_idx]
for near_idx in pois_meter['near_idx']] # very slow
pois_meter['kne_idx'], knes = zip(
*[find_kne(point, near_lines) for point, near_lines in
zip(pois_meter['geometry'], pois_meter['near_lines'])]) # slow
pois_meter['pp'] = [get_pp(point, kne) for point, kne in zip(pois_meter['geometry'], knes)]
# update nodes
print("Updating internal nodes...")
nodes_meter, _ = update_nodes(nodes_meter, list(pois_meter['pp']), ptype='pp', meter_epsg=meter_epsg)
nodes_coord = nodes_meter['geometry'].map(lambda x: x.coords[0])
nodes_id_dict = dict(zip(nodes_coord, nodes_meter['osmid'].astype('Int64')))
# 1-3: update internal edges (split line segments)
print("Updating internal edges...")
# split
line_pps_dict = {k: MultiPoint(list(v)) for k, v in pois_meter.groupby(['kne_idx'])['pp']}
new_lines = [split_line(edges_meter['geometry'][idx], pps) for idx, pps in line_pps_dict.items()] # bit slow
edges_meter, _ = update_edges(edges_meter, new_lines, replace=True)
## STAGE 2: connection
# 2-1: update external edges (projected footways connected to pois)
# establish new_edges
print("Updating external links...")
pps_gdf = nodes_meter[nodes_meter['highway'] == node_highway_pp]
new_lines = [LineString([p1, p2]) for p1, p2 in zip(pois_meter['geometry'], pps_gdf['geometry'])]
edges_meter, _ = update_edges(edges_meter, new_lines, replace=False)
## STAGE 3: output
# convert CRS
nodes = nodes_meter.to_crs(epsg=4326)
edges = edges_meter.to_crs(epsg=4326)
# preprocess for pandana
nodes.index = nodes['osmid'] # IMPORTANT
nodes['x'] = [p.x for p in nodes['geometry']]
nodes['y'] = [p.y for p in nodes['geometry']]
# edges.reset_index(drop=True, inplace=True)
edges['length'] = edges['length'].astype(float)
# report issues
# - examine key duplication
if len(nodes_meter) != len(nodes_id_dict):
print("NOTE: duplication in node coordinates keys")
print("Nodes count:", len(nodes_meter))
print("Node coordinates key count:", len(nodes_id_dict))
# - examine missing nodes
print("Missing 'from' nodes:", len(edges[edges['from'] == None]))
print("Missing 'to' nodes:", len(edges[edges['to'] == None]))
# save and return
if path:
nodes.to_file(path+'/nodes.shp')
edges.to_file(path+'/edges.shp')
return nodes, edges