Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Examples for filtering grib data within a provided shapefile area #3

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.DS_Store
gribs/
71 changes: 71 additions & 0 deletions examples/utils_grib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from osgeo.ogr import Geometry, wkbPoint
# Only one OGR point needs to be created,
# since each call to `OGR_POINT.AddPoint`
# in the `check_point_in_area` function
# will reset the variable
OGR_POINT = Geometry(wkbPoint)

def coarse_geo_filter(df, AREA):
"""
Perform an initial coarse filter on the dataframe
based on the extent (bounding box) of the specified area
"""
# Get longitude values from index
lons = df.index.get_level_values('lon_0')
# Map longitude range from (0 to 360) into (-180 to 180)
maplon = lambda lon: (lon - 360) if (lon > 180) else lon
# Create new longitude and latitude columns in the dataframe
df['longitude'] = lons.map(maplon)
df['latitude'] = df.index.get_level_values('lat_0')
# Get the area's bounding box
extent = AREA.GetExtent()
minlon = extent[0]
maxlon = extent[1]
minlat = extent[2]
maxlat = extent[3]
# Perform an initial coarse filter on the global dataframe
# by limiting the data to the area's bounding box,
# thereby reducing the total processing time of the `area_filter`
latfilter = ((df['latitude'] >= minlat) & (df['latitude'] <= maxlat))
lonfilter = ((df['longitude'] >= minlon) & (df['longitude'] <= maxlon))
# Apply filters to the dataframe
df = df.loc[latfilter & lonfilter]
return df

def precise_geo_filter(df, AREA):
"""
Perform a precise filter on the dataframe
to check if each point is inside of the shapefile area
"""
# Create a new tuple column in the dataframe of lat/lon points
df['point'] = list(zip(df['latitude'], df['longitude']))
# Create a new boolean column in the dataframe, where each value represents
# whether the row's lat/lon point is contained in the shpfile area
map_func = lambda latlon: check_point_in_area(latlon, AREA)
df['inArea'] = df['point'].map(map_func)
# Remove point locations that are not within the shpfile area
df = df.loc[(df['inArea'] == True)]
return df

def check_point_in_area(latlon, AREA):
"""
Return a boolean value indicating whether
the specified point is inside of the shapefile area
"""
# Initialize flag
point_in_area = False
# Parse coordinates and convert to floats
lat = float(latlon[0])
lon = float(latlon[1])
# Set the point geometry
OGR_POINT.AddPoint(lon, lat)
# Check if the point is in any of the shpfile's features
for i in range(AREA.GetFeatureCount()):
feature = AREA.GetFeature(i)
if feature.geometry().Contains(OGR_POINT):
# This point is within a feature
point_in_area = True
# Break out of the loop
break
# Return flag indicating whether point is in the area
return point_in_area
9 changes: 9 additions & 0 deletions examples/working_with_grib_data/cropped_area/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Dependencies

The example programs require the following libraries in a Python 3.x environment:

- pyNIO
- gdal
- xarray
- pandas
- matplotlib
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Extract regional values from an Aviation GRIB file at one vertical level

This program extracts clear-air turbulence data from a GRIB file
at a single vertical (flight) level. It then crops the data
to the horizontal area of a provided shapefile.
"""
import argparse
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
from osgeo.ogr import GetDriverByName
import os
import sys
dir_path = os.path.dirname(os.path.realpath(__file__))
parent = os.path.join(dir_path, os.pardir)
sys.path.append(os.path.join(parent,'..'))
from conversions import wind_speed_from_u_v, wind_direction_from_u_v
from utils_grib import coarse_geo_filter, precise_geo_filter
# Load the shapefile area
filename = os.path.join(dir_path, 'shpfile/italy.shp')
driver = GetDriverByName('ESRI Shapefile')
shpfile = driver.Open(filename)
AREA = shpfile.GetLayer()

# DEF_VARIABLES = (
# 'TMP_P0_L102_GLL0', # Temperature
# 'RH_P0_L102_GLL0', # Relative humidity
# 'TIPD_P0_L100_GLL0', # Total icing potential diagnostic
# 'TIPD_P0_L102_GLL0', # Total icing potential diagnostic
# 'UGRD_P0_L6_GLL0', # U-component of wind
# 'UGRD_P0_L102_GLL0', # U-component of wind (vertical levels)
# 'VGRD_P0_L6_GLL0', # V-component of wind
# 'VGRD_P0_L102_GLL0', # V-component of wind (vertical levels)
# 'HGT_P0_L6_GLL0', # Geopotential height
# 'VIS_P0_L1_GLL0', # Visibility
# 'CAT_P0_L102_GLL0', # Clear air turbulence
# 'lv_AMSL2', # Specific altitude above mean sea level
# 'lv_ISBL1', # Isobaric surface
# 'lat_0', # latitude
# 'lon_0', # longitude
# 'lv_AMSL0', # Specific altitude above mean sea level
# )

# Create a dictionary for flight levels
# and corresponding values in meters
flight_levels = {
'FL100': 3048, 'FL110': 3352, 'FL120': 3657,
'FL130': 3962, 'FL140': 4267, 'FL150': 4572,
'FL160': 4876, 'FL170': 5181, 'FL180': 5486,
'FL190': 5791, 'FL200': 6096, 'FL210': 6400,
'FL220': 6705, 'FL230': 7010, 'FL240': 7315,
'FL250': 7620, 'FL260': 7924, 'FL270': 8229,
'FL280': 8534, 'FL290': 8839, 'FL300': 9144,
'FL310': 9448, 'FL320': 9753, 'FL330': 10058,
'FL340': 10363, 'FL350': 10668, 'FL360': 10972,
'FL370': 11277, 'FL380': 11582, 'FL390': 11887,
'FL400': 12192, 'FL410': 12496, 'FL420': 12801,
'FL430': 13106, 'FL440': 13411, 'FL450': 13716
}

# Load and filter grib data to get clear-air turbulence
# within a region at the specified flight level
def parse_data(filepath):
# Load the grib files into an xarray dataset
ds = xr.open_dataset(filepath, engine='pynio')
# Print information on data variables
# print(ds.keys())
# Rename the clear-air turbulence variable for clarity
# See DEF_VARIABLES above to lookup variable names
ds = ds.rename({'CAT_P0_L102_GLL0': 'turbulence'})
# Get only the turbulence values at flight levels
# to significantly reduce the volume of data right away,
# otherwise converting to a dataframe will take a long time
ds = ds.get('turbulence')
# Convert the xarray dataset to a dataframe
df = ds.to_dataframe()
# Retrieve flight level values
flmeters = df.index.get_level_values('lv_AMSL0')
# Filter to a specific flight level,
# using the lookup dictionary from above
df = df.loc[(flmeters == flight_levels['FL360'])]
# Perform an initial coarse filter on the global dataframe
# by limiting the data to the area's bounding box,
# thereby reducing the total processing time of the `precise_geo_filter`
df = coarse_geo_filter(df, AREA)
# Perform the precise filter to remove data outside of the shapefile area
df = precise_geo_filter(df, AREA)
# Trim the data to just the lat, lon, and turbulence columns
df_viz = df.loc[:, ['latitude','longitude','turbulence']]
return df_viz

# Visualize the data
def plot_data(data):
x = data['longitude'].values
y = data['latitude'].values
color = data['turbulence'].values
plt.scatter(
x,
y,
c=color,
s=10,
cmap='Spectral_r',
linewidths=0.1
)
plt.title('Clear-Air Turbulence at FL360')
plt.colorbar()
plt.show()

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Extract regional data from a GRIB file at a single vertical level'
)
parser.add_argument(
'filepath', type=str, help='The path to the Aviation bundle GRIB file to open'
)
args = parser.parse_args()
data = parse_data(args.filepath)
plot_data(data)
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
Extract accumulated, regional values from Basic GRIB messages

This program extracts precipitation data from 2 GRIB files,
and takes the difference to get fixed-interval accumulations.
It then crops the data to the area of a provided shapefile.
"""
import argparse
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
from osgeo.ogr import GetDriverByName
import os
import sys
dir_path = os.path.dirname(os.path.realpath(__file__))
parent = os.path.join(dir_path, os.pardir)
sys.path.append(os.path.join(parent,'..'))
from conversions import wind_speed_from_u_v, wind_direction_from_u_v
from utils_grib import coarse_geo_filter, precise_geo_filter
# Load the shapefile area
filename = os.path.join(dir_path, 'shpfile/italy.shp')
driver = GetDriverByName('ESRI Shapefile')
shpfile = driver.Open(filename)
AREA = shpfile.GetLayer()

# DEF_VARIABLES = (
# 'TMP_P0_L103_GLL0', # Temperature
# 'DPT_P0_L103_GLL0', # Dew point temperature
# 'RH_P0_L103_GLL0', # Relative humidity
# 'UGRD_P0_L103_GLL0', # U-component of wind
# 'VGRD_P0_L103_GLL0', # V-component of wind
# 'GUST_P0_L1_GLL0', # Wind speed (gust)
# 'PRMSL_P0_L101_GLL0', # Pressure reduced to MSL
# 'TMAX_P8_L103_GLL0_max', # Maximum temperature
# 'TMIN_P8_L103_GLL0_min', # Minimum temperature
# 'APCP_P8_L1_GLL0_acc', # Total precipitation
# 'lat_0', # latitude
# 'lon_0', # longitude
# )

# Load and filter grib data to get regional precipitation
def parse_data(filepath1, filepath2):
# Load the grib files into xarray datasets
ds1 = xr.open_dataset(filepath1, engine='pynio')
ds2 = xr.open_dataset(filepath2, engine='pynio')
# Print information on data variables
# print(ds1.keys())
# Convert the xarray datasets to dataframes
df1 = ds1.to_dataframe()
df2 = ds2.to_dataframe()
# Create a new dataframe with the same lat/lon index
df = pd.DataFrame(index=df1.index)
# Since the grib data is forecast-total accumulated precipitation,
# subtract the older value from the newer one to get fixed-interval values
df['precip'] = df2['APCP_P8_L1_GLL0_acc'] - df1['APCP_P8_L1_GLL0_acc']
# Perform an initial coarse filter on the global dataframe
# by limiting the data to the area's bounding box,
# thereby reducing the total processing time of the `precise_geo_filter`
df = coarse_geo_filter(df, AREA)
# Perform the precise filter to remove data outside of the shapefile area
df = precise_geo_filter(df, AREA)
# Trim the data to just the lat, lon, and precipitation columns
df_viz = df.loc[:, ['latitude','longitude','precip']]
# Convert from millimeters to inches
# df_viz['precip'] = df_viz['precip'] * 0.0393701
return df_viz

# Visualize the data
def plot_data(data):
x = data['longitude'].values
y = data['latitude'].values
color = data['precip'].values
plt.scatter(
x,
y,
c=color,
s=10,
cmap='Blues',
edgecolors='gray',
linewidths=0.1
)
plt.title('Precipitation (mm)')
plt.colorbar()
plt.show()

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Get fixed-interval precipitation values within a region by differencing 2 GRIB files'
)
parser.add_argument(
'filepath1', type=str, help='The path to the earlier Basic bundle GRIB file to open'
)
parser.add_argument(
'filepath2', type=str, help='The path to the later Basic bundle GRIB file to open'
)
args = parser.parse_args()
data = parse_data(args.filepath1, args.filepath2)
plot_data(data)
Loading