-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_heatmap.py
82 lines (63 loc) · 3.78 KB
/
main_heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import geopandas as gpd
import numpy as np
import os
from shapely.geometry import Point
from load_data import get_occurrence_data
from helpers import calculate_diversity, heatmap, save_heatmap
from dotenv import load_dotenv
def main():
load_dotenv()
access_token = os.getenv('ACCESS_TOKEN')
# Prompt the user for input with default values
municipal_id = input("Enter municipal_id (e.g., 'ML.660' for Helsinki): ") or 'ML.660'
informal_taxon_ids = input("Enter informal_taxon_ids (e.g., 'MVL.2' for mammals). You can add many: ") or 'MVL.2'
time = input("Enter time after which occurrences will be loaded (e.g., '2010-01-01'): ") or '2010-01-01'
grid_size = input("Enter grid size of the output heatmap in meters (e.g. 100): ") or 100
grid_size = int(grid_size)
# Print the inputs for confirmation
print(f"\nCreating an heatmap using the following parameters:")
print(f"Municipal ID: {municipal_id}")
print(f"Informal Taxon IDs: {informal_taxon_ids}")
print(f"Time: {time}")
print(f"Grid size: {grid_size}")
file_basename = f'{municipal_id}_{informal_taxon_ids}_{time}'.replace('.', '_')
# Check if the data exists
if os.path.exists(f'{file_basename}_data.json'):
print(f"Using local data {file_basename}_data.json")
gdf = gpd.read_file(f'{file_basename}_data.json')
else:
api_url = f'https://api.laji.fi/v0/warehouse/query/unit/list?selected=unit.taxonVerbatim,unit.linkings.taxon.id,unit.linkings.taxon.scientificName&informalTaxonGroupId={informal_taxon_ids}&page=1&pageSize=10000&coordinateAccuracyMax=500&time={time}/&geoJSON=true&featureType=CENTER_POINT&finnishMunicipalityId={municipal_id}&access_token={access_token}'
gdf = get_occurrence_data(api_url, pages="all")
#gdf.to_file(f'{file_basename}_data.json', driver='GeoJSON') # Uncomment if you want to use same data again and again
gdf = gdf.to_crs(3067)
if len(gdf) < 1000:
print(f"Warning: the heatmap will be generated only from {len(gdf)} occurrences")
# Define grid size for bounding box
x_min, y_min, x_max, y_max = gdf.total_bounds
extent_width = x_max - x_min
extent_height = y_max - y_min
x_edges = np.arange(x_min, x_max, grid_size)
y_edges = np.arange(y_min, y_max, grid_size)
# Assign each occurrence point to a grid cell
gdf['grid_x'] = np.digitize(gdf.geometry.x, x_edges)
gdf['grid_y'] = np.digitize(gdf.geometry.y, y_edges)
# Group by grid cells and calculate Shannon Diversity Index for each cell
diversity_data = gdf.groupby(['grid_x', 'grid_y'], as_index=True).apply(calculate_diversity).reset_index()
diversity_data = diversity_data.rename(columns={0: "shannon_diversity"})
# Calculate grid cell centroids and create geometries
diversity_data['x_centroid'] = x_edges[diversity_data['grid_x'] - 1] + grid_size / 2
diversity_data['y_centroid'] = y_edges[diversity_data['grid_y'] - 1] + grid_size / 2
# Create geometries using centroids
diversity_data['geometry'] = diversity_data.apply(lambda row: Point(row['x_centroid'], row['y_centroid']), axis=1)
# Convert to GeoDataFrame
gdf_diversity = gpd.GeoDataFrame(diversity_data, geometry='geometry', crs=gdf.crs)
gdf_diversity = gdf_diversity.drop(columns=['grid_x', 'grid_y'])
# Get heatmap pixel size
bins = (int(extent_width // grid_size), int(extent_height // grid_size))
# Create a heatmap
xedges, yedges, normalized_heatmap = heatmap(gdf_diversity, bins=bins, smoothing=1, weight_col='shannon_diversity')
# Save the heatmap
crs = gdf_diversity.crs.to_string()
save_heatmap(xedges, yedges, normalized_heatmap, crs, output_file=f'{file_basename}_heatmap.json')
if __name__ == "__main__":
main()