-
Notifications
You must be signed in to change notification settings - Fork 14
/
uwyo_sounding.py
413 lines (389 loc) · 16.6 KB
/
uwyo_sounding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 13 16:30:02 2019
@author: shlomi
"""
def check_date(date):
import pandas as pd
return pd.to_datetime(date)
def check_path(path):
import os
from pathlib import Path
path = str(path)
if not os.path.exists(path):
raise argparse.ArgumentTypeError(path + ' does not exist...')
return Path(path)
def DensHumid(tempc, pres, e):
"""Density of moist air.
This is a bit more explicit and less confusing than the method below.
INPUTS:
tempc: Temperature (C)
pres: static pressure (hPa)
e: water vapor partial pressure (hPa)
OUTPUTS:
rho_air (kg/m^3)
SOURCE: http://en.wikipedia.org/wiki/Density_of_air
"""
tempk = tempc + 273.15
prespa = pres * 100.0
epa = e * 100.0
Rs_v = 461.52 # Specific gas const for water vapour, J kg^{-1} K^{-1}
Rs_da = 287.05 # Specific gas const for dry air, J kg^{-1} K^{-1}
pres_da = prespa - epa
rho_da = pres_da / (Rs_da * tempk)
rho_wv = epa/(Rs_v * tempk)
return rho_da + rho_wv
def precipitable_water(da):
"""Calculate Total Precipitable Water (TPW) for sounding.
TPW is defined as the total column-integrated water vapour. I
calculate it from the dew point temperature because this is the
fundamental moisture variable in this module (even though it is RH
that is usually measured directly)
"""
import numpy as np
tempk = da.sel(var='TEMP').dropna('mpoint').reset_coords(drop=True) + 273.15 # in K
prespa = da.sel(var='PRES').dropna('mpoint').reset_coords(drop=True) * 100 # in Pa
mixrkg = da.sel(var='MIXR').dropna('mpoint').reset_coords(drop=True) / 1000.0 # kg/kg
dwptc = da.sel(var='DWPT').dropna('mpoint').reset_coords(drop=True)
hghtm = da.sel(var='HGHT').dropna('mpoint').reset_coords(drop=True)
min_size = min([tempk.size, prespa.size, mixrkg.size, dwptc.size, hghtm.size])
tempk = tempk.sel(mpoint=slice(0, min_size - 1))
prespa = prespa.sel(mpoint=slice(0, min_size - 1))
mixrkg = mixrkg.sel(mpoint=slice(0, min_size - 1))
dwptc = dwptc.sel(mpoint=slice(0, min_size - 1))
hghtm = hghtm.sel(mpoint=slice(0, min_size - 1))
# Get Water Vapour Mixing Ratio, by calculation
# from dew point temperature
vprespa = VaporPressure(dwptc)
# mixrkg = MixRatio(vprespa, prespa)
# Calculate density of air (accounting for moisture)
rho = DensHumid(tempk, prespa, vprespa)
# print('rho: {}, mix: {}, h: {}'.format(rho.shape,mixrkg.shape, hghtm.shape))
# Trapezoidal rule to approximate TPW (units kg/m^2==mm)
try:
tpw = np.trapz(mixrkg * rho, hghtm)
except ValueError:
return np.nan
return tpw
def VaporPressure(tempc, phase="liquid", units='hPa', method=None):
import numpy as np
"""Water vapor pressure over liquid water or ice.
INPUTS:
tempc: (C) OR dwpt (C), if SATURATION vapour pressure is desired.
phase: ['liquid'],'ice'. If 'liquid', do simple dew point. If 'ice',
return saturation vapour pressure as follows:
Tc>=0: es = es_liquid
Tc <0: es = es_ice
RETURNS: e_sat (Pa) or (hPa) units parameter choice
SOURCE: http://cires.colorado.edu/~voemel/vp.html (#2:
CIMO guide (WMO 2008), modified to return values in Pa)
This formulation is chosen because of its appealing simplicity,
but it performs very well with respect to the reference forms
at temperatures above -40 C. At some point I'll implement Goff-Gratch
(from the same resource).
"""
if units == 'hPa':
unit = 1.0
elif units == 'Pa':
unit = 100.0
if method is None:
over_liquid = 6.112 * np.exp(17.67 * tempc / (tempc + 243.12)) * unit
over_ice = 6.112 * np.exp(22.46 * tempc / (tempc + 272.62)) * unit
elif method == 'Buck':
over_liquid = 6.1121 * \
np.exp((18.678 - tempc / 234.5) * (tempc / (257.4 + tempc))) * unit
over_ice = 6.1125 * \
np.exp((23.036 - tempc / 333.7) * (tempc / (279.82 + tempc))) * unit
# return where(tempc<0,over_ice,over_liquid)
if phase == "liquid":
# return 6.112*exp(17.67*tempc/(tempc+243.12))*100.
return over_liquid
elif phase == "ice":
# return 6.112*exp(22.46*tempc/(tempc+272.62))*100.
return np.where(tempc < 0, over_ice, over_liquid)
else:
raise NotImplementedError
def Tm(da, from_raw_sounding=True):
""" calculate the atmospheric mean temperature with pp as water
vapor partial pressure and T deg in C. eq is Tm=int(pp/T)dz/int(pp/T^2)dz
h is the heights vactor"""
import numpy as np
if from_raw_sounding:
tempc = da.sel(var='TEMP').dropna('mpoint').reset_coords(drop=True)
h = da.sel(var='HGHT').dropna('mpoint').reset_coords(drop=True)
vp = VaporPressure(tempc, units='hPa')
tempk = tempc + 273.15
try:
Tm = np.trapz(vp / tempk, h) / np.trapz(vp / tempk**2, h)
except ValueError:
return np.nan
else:
tempc = da
h = da['height']
vp = VaporPressure(tempc, units='hPa')
tempk = tempc + 273.15
Tm = np.trapz(vp / tempk, h) / np.trapz(vp / tempk**2, h)
return Tm
def process_sounding_json(json_path, st_num):
"""process json files from sounding download and parse them to xarray"""
import pandas as pd
import json
import xarray as xr
import os
import logging
from aux_gps import path_glob
logger = logging.getLogger('uwyo')
# loop over lines lists in each year:
pw_years = []
df_years = []
bad_line = []
logger.info('proccessing station {} that was downloaded from UWYO website'.format(st_num))
for file in path_glob(json_path, 'station_{}_soundings_*.json'.format(st_num)):
year = file.as_posix().split('.')[0].split('_')[-1]
logger.info('Opening json file year: {}'.format(year))
with open(file) as read_file:
lines_list = json.load(read_file)
# loop over the lines list:
pw_list = []
dt_list = []
df_list = []
for lines in lines_list:
# print('.')
try:
pw = float([x for x in lines if '[mm]' in x][0].split(':')[-1])
dt = [x for x in lines if 'Observation time' in
x][0].split(':')[-1].split()[0]
# The %y (as opposed to %Y) is to read 2-digit year
# (%Y=4-digit)
header_line = [
x for x in range(
len(lines)) if 'Observations at'
in lines[x]][0] + 3
end_line = [x for x in range(len(lines)) if
'Station information and sounding indices'
in lines[x]][0]
header = lines[header_line].split()
units = lines[header_line + 1].split()
with open(json_path/'temp.txt', 'w') as f:
for item in lines[header_line + 3: end_line]:
f.write("%s\n" % item)
df = pd.read_fwf(json_path / 'temp.txt', names=header)
try:
os.remove(json_path / 'temp.txt')
except OSError as e: # if failed, report it back to the user
logger.error("Error: %s - %s." % (e.filename, e.strerror))
# df = pd.DataFrame(
# [x.split() for x in lines[header_line + 3:end_line]],
# columns=header)
df = df.astype(float)
dt_list.append(pd.to_datetime(dt, format='%y%m%d/%H%M'))
pw_list.append(pw)
df_list.append(df)
# st_num = int([x for x in lines if 'Station number' in
# x][0].split(':')[-1])
st_lat = float([x for x in lines if 'Station latitude' in
x][0].split(':')[-1])
st_lon = float([x for x in lines if 'Station longitude' in
x][0].split(':')[-1])
st_alt = float([x for x in lines if 'Station elevation' in
x][0].split(':')[-1])
except IndexError:
logger.warning('no data found in lines entry...')
bad_line.append(lines)
continue
except AssertionError:
bad_line.append(lines)
continue
if not pw_list or not df_list or not dt_list:
logger.warning('no entries in {}'.format(year))
continue
pw_year = xr.DataArray(pw_list, dims=['time'])
df_year = [xr.DataArray(x, dims=['mpoint', 'var']) for x in df_list]
df_year = xr.concat(df_year, 'time')
df_year['time'] = dt_list
df_year['var'] = header
pw_year['time'] = dt_list
pw_years.append(pw_year)
df_years.append(df_year)
pw = xr.concat(pw_years, 'time')
da = xr.concat(df_years, 'time')
da.attrs['description'] = 'station {} soundings full profile'.format(st_num)
units_dict = dict(zip(header, units))
for k, v in units_dict.items():
da.attrs[k] = v
pw.attrs['description'] = 'station {} soundings of precipatable water'.format(st_num)
pw.attrs['units'] = 'mm' # eqv. kg/m^2
pw.attrs['station_number'] = st_num
pw.attrs['station_lat'] = st_lat
pw.attrs['station_lon'] = st_lon
pw.attrs['station_alt'] = st_alt
pw = pw.sortby('time')
da = da.sortby('time')
# drop 0 pw - not physical
pw = pw.where(pw > 0, drop=True)
pw_file = 'PW_{}_soundings.nc'.format(st_num)
pw.to_netcdf(json_path / pw_file, 'w')
logger.info('{} was saved to {}'.format(pw_file, json_path))
all_file = 'ALL_{}_soundings.nc'.format(st_num)
da.to_netcdf(json_path / all_file, 'w')
logger.info('{} was saved to {}'.format(all_file, json_path))
return pw, da, bad_line
def get_sounding_data_from_uwyo(savepath, st_num='40179',
start_date='2003-01-01',
end_date='2004-12-31'):
"""Download sounding data from bet_dagan station at two times:00 and 12"""
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import json
import numpy as np
import xarray as xr
import logging
logger = logging.getLogger('uwyo')
logger.info('Downloading station {} from UWYO'.format(st_num))
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
logger.info('start date : {}'.format(start_date.strftime('%Y-%m-%d')))
logger.info('end date : {}'.format(end_date.strftime('%Y-%m-%d')))
dates = pd.date_range(start_date, end_date, freq='12h')
years = np.arange(start_date.year, end_date.year + 1)
time = xr.DataArray(dates, dims=['time'])
for year in years:
lines_list = []
for date in time.sel(time=str(year)).values:
date = pd.to_datetime(date)
logger.info('downloading datetime: {}'.format(date.strftime('%Y-%m-%d:%H')))
year = str(date.year)
month = str(date.month)
day = str(date.day)
if date.hour == 0:
hour = '0' + str(date.hour)
elif date.hour == 12:
hour = str(date.hour)
url = ('http://weather.uwyo.edu/cgi-bin/sounding?region=mideast&'
'TYPE=TEXT%3ALIST&YEAR=' + year + '&MONTH=' + month +
'&FROM=' + day + hour + '&TO=0100&STNM=' + st_num)
r = requests.get(url)
soup = bs(r.text, "lxml")
allLines = soup.text.split('\n')
# splice = allLines[53:78]
# keys = ['_'.join(x.split(':')[0].lower().split()) for x in splice]
# values = [x.split(':')[-1] for x in splice]
# print(keys)
# dict_list.append(dict(zip(keys, values)))
lines_list.append(allLines)
logger.info('Saving list of dicts to: {}'.format(savepath))
filename = 'station_{}_soundings_{}.json'.format(st_num, year)
with open(savepath / filename, 'w') as fout:
json.dump(lines_list, fout)
print('Done!')
return
def process_data_from_uwyo_sounding(path, st_num):
"""create tm, tpw from sounding station and also add surface temp and
station caluculated ipw"""
import xarray as xr
from aux_gps import dim_intersection
import numpy as np
import logging
logger = logging.getLogger('uwyo')
# da = xr.open_dataarray(sound_path / 'ALL_bet_dagan_soundings.nc')
pw_file = 'PW_{}_soundings.nc'.format(st_num)
all_file = 'ALL_{}_soundings.nc'.format(st_num)
da = xr.open_dataarray(path / all_file)
pw = xr.open_dataarray(path / pw_file)
new_time = dim_intersection([da, pw], 'time', dropna=False)
logger.info('loaded {}'.format(pw_file))
logger.info('loaded {}'.format(all_file))
da = da.sel(time=new_time)
pw = pw.sel(time=new_time)
pw.load()
da.load()
logger.info('calculating pw and tm for station {}'.format(st_num))
ts_list = []
tpw_list = []
tm_list = []
# cld_list = []
for date in da.time:
ts_list.append(da.sel(var='TEMP', mpoint=0, time=date) + 273.15)
tpw_list.append(precipitable_water(da.sel(time=date)))
tm_list.append(Tm(da.sel(time=date)))
# if np.isnan(ds.CLD.sel(time=date)).all():
# cld_list.append(0)
# else:
# cld_list.append(1)
tpw = xr.DataArray(tpw_list, dims='time')
tm = xr.DataArray(tm_list, dims='time')
tm.attrs['description'] = 'mean atmospheric temperature calculated by water vapor pressure weights'
tm.attrs['units'] = 'K'
ts = xr.concat(ts_list, 'time')
ts.attrs['description'] = 'Surface temperature from {} station soundings'.format(st_num)
ts.attrs['units'] = 'K'
result = pw.to_dataset(name='pw')
result['tpw'] = tpw
result['tm'] = tm
result['ts'] = ts
result['tpw'].attrs['description'] = 'station {} percipatable water calculated from sounding by me'.format(st_num)
result['tpw'].attrs['units'] = 'mm'
result['season'] = result['time.season']
result['hour'] = result['time.hour'].astype(str)
result['hour'] = result.hour.where(result.hour != '12', 'noon')
result['hour'] = result.hour.where(result.hour != '0', 'midnight')
# result['any_cld'] = xr.DataArray(cld_list, dims='time')
result = result.dropna('time')
filename = 'station_{}_sounding_pw_Ts_Tk.nc'.format(st_num)
logger.info('saving {} to {}'.format(filename, path))
comp = dict(zlib=True, complevel=9) # best compression
encoding = {var: comp for var in result}
result.to_netcdf(path / filename, 'w', encoding=encoding)
logger.info('Done!')
return
if __name__ == '__main__':
"""--mode: either download or post
--path: where to save the downloaded files or where to save the post
proccesed files.
"""
import argparse
import sys
from aux_gps import configure_logger
import pandas as pd
logger = configure_logger(name='uwyo')
parser = argparse.ArgumentParser(
description='a command line tool for downloading and proccesing upper air soundings from university of Wyoming website.')
optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')
required.add_argument(
'--path',
help="a full path to the save location of the files",
type=check_path)
required.add_argument(
'--mode',
help='mode of the tool',
choices=['download', 'post'])
optional.add_argument('--station', help='WMO station number, use with mode=download',
type=str)
optional.add_argument('--start', help='starting date, use with mode=download',
type=check_date)
optional.add_argument(
'--end',
help='end date, use with mode=download', type=check_date)
parser._action_groups.append(optional) # added this line
args = parser.parse_args()
if args.path is None:
print('path is a required argument, run with -h...')
sys.exit()
if args.mode is None:
print('mode is a required argument, run with -h...')
sys.exit()
if args.start is None:
args.start = pd.to_datetime('2003-01-01')
if args.end is None:
args.end = pd.to_datetime('2019-01-01')
if args.station is None:
args.station = '40179' # bet-dagan station
if args.mode == 'download':
get_sounding_data_from_uwyo(args.path, args.station, args.start,
args.end)
elif args.mode == 'post':
process_sounding_json(args.path, args.station)
process_data_from_uwyo_sounding(args.path, args.station)