Skip to content

Commit

Permalink
Updated join_l3 to generate source attribute for sites
Browse files Browse the repository at this point in the history
Validate attribute keys in e2e test
  • Loading branch information
ladsmund committed Aug 19, 2024
1 parent 6d80afb commit 0ccf858
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 36 deletions.
19 changes: 18 additions & 1 deletion src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#!/usr/bin/env python
import json
import logging, os, sys, toml
from argparse import ArgumentParser

from pypromice.utilities.git import get_commit_hash_and_check_dirty

import pypromice.resources
from pypromice.process.write import prepare_and_write
import numpy as np
Expand Down Expand Up @@ -284,7 +288,7 @@ def align_surface_heights(data_series_new, data_series_old):
# Drop NaN values and extract the last `hours_in_5_years` non-NaN data points
data_series_old_nonan = data_series_old.dropna()
data_series_old_last_5_years = data_series_old_nonan.iloc[
-min(len(data_series_old), hours_in_5_years) :
-min(len(data_series_old), hours_in_5_years):
]

# Perform a linear fit on the last 5 years of data
Expand Down Expand Up @@ -511,6 +515,19 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
l3_merged.attrs["project"] = sorted_list_station_data[0][1]["project"]
l3_merged.attrs["location_type"] = sorted_list_station_data[0][1]["location_type"]

site_source = dict(
site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
)
for stid, station_attributes in l3_merged.attrs["stations_attributes"].items():
station_source = json.loads(station_attributes["source"])
for k, v in station_source.items():
if k in site_source and site_source[k] != v:
site_source[k] = "multiple"
else:
site_source[k] = v
l3_merged.attrs["source"] = json.dumps(site_source)

v = pypromice.resources.load_variables(variables)
m = pypromice.resources.load_metadata(metadata)
if outpath is not None:
Expand Down
107 changes: 72 additions & 35 deletions tests/e2e/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,72 +25,74 @@
TEST_CONFIG_PATH = TEST_DATA_ROOT_PATH / "test_config1_raw.toml"
STATION_CONFIGURATIONS_ROOT = TEST_DATA_ROOT_PATH / "station_configurations"

class TestProcess(unittest.TestCase):

class TestProcess(unittest.TestCase):
def test_get_vars(self):
'''Test variable table lookup retrieval'''
"""Test variable table lookup retrieval"""
v = pypromice.resources.load_variables()
self.assertIsInstance(v, pd.DataFrame)
self.assertTrue(v.columns[0] in 'standard_name')
self.assertTrue(v.columns[2] in 'units')
self.assertTrue(v.columns[0] in "standard_name")
self.assertTrue(v.columns[2] in "units")

def test_get_meta(self):
'''Test AWS names retrieval'''
"""Test AWS names retrieval"""
m = pypromice.resources.load_metadata()
self.assertIsInstance(m, dict)
self.assertTrue('references' in m)
self.assertTrue("references" in m)

def test_add_all(self):
'''Test variable and metadata attributes added to Dataset'''
"""Test variable and metadata attributes added to Dataset"""
d = xr.Dataset()
v = pypromice.resources.load_variables()
att = list(v.index)
att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p']
att1 = ["gps_lon", "gps_lat", "gps_alt", "albedo", "p"]
for a in att:
d[a]=[0,1]
d[a] = [0, 1]
for a in att1:
d[a]=[0,1]
d['time'] = [datetime.datetime.now(),
datetime.datetime.now()-timedelta(days=365)]
d.attrs['station_id']='TEST'
d.attrs['level']='L2_test'
d[a] = [0, 1]
d["time"] = [
datetime.datetime.now(),
datetime.datetime.now() - timedelta(days=365),
]
d.attrs["station_id"] = "TEST"
d.attrs["level"] = "L2_test"
meta = pypromice.resources.load_metadata()
d = addVars(d, v)
d = addMeta(d, meta)
self.assertTrue(d.attrs['station_id']=='TEST')
self.assertIsInstance(d.attrs['references'], str)
self.assertTrue(d.attrs["station_id"] == "TEST")
self.assertIsInstance(d.attrs["references"], str)

def test_l0_to_l3(self):
'''Test L0 to L3 processing'''
"""Test L0 to L3 processing"""
pAWS = AWS(
TEST_CONFIG_PATH.as_posix(),
TEST_DATA_ROOT_PATH.as_posix(),
data_issues_repository=TEST_DATA_ROOT_PATH / 'data_issues',
data_issues_repository=TEST_DATA_ROOT_PATH / "data_issues",
var_file=None,
meta_file=None
meta_file=None,
)
pAWS.process()
self.assertIsInstance(pAWS.L2, xr.Dataset)
self.assertTrue(pAWS.L2.attrs['station_id']=='TEST1')
self.assertTrue(pAWS.L2.attrs["station_id"] == "TEST1")

def get_l2_cli(self):
'''Test get_l2 CLI'''
exit_status = os.system('get_l2 -h')
"""Test get_l2 CLI"""
exit_status = os.system("get_l2 -h")
self.assertEqual(exit_status, 0)

def test_join_l2_cli(self):
'''Test join_l2 CLI'''
exit_status = os.system('join_l2 -h')
"""Test join_l2 CLI"""
exit_status = os.system("join_l2 -h")
self.assertEqual(exit_status, 0)

def test_l2_to_l3_cli(self):
"""Test get_l2tol3 CLI"""
exit_status = os.system('get_l2tol3 -h')
exit_status = os.system("get_l2tol3 -h")
self.assertEqual(exit_status, 0)

def test_join_l3_cli(self):
"""Test join_l3 CLI"""
exit_status = os.system('join_l3 -h')
exit_status = os.system("join_l3 -h")
self.assertEqual(exit_status, 0)

def test_full_e2e(self):
Expand Down Expand Up @@ -176,28 +178,63 @@ def test_full_e2e(self):
self.assertTrue(expected_output_path.exists())

for output_rel_path in [
"site_l3/SITE_01/SITE_01_day.csv",
"site_l3/SITE_01/SITE_01_day.nc",
"site_l3/SITE_01/SITE_01_hour.csv",
"site_l3/SITE_01/SITE_01_hour.nc",
"site_l3/SITE_01/SITE_01_month.csv",
"site_l3/SITE_01/SITE_01_month.nc",
"station_l2_join/TEST1/TEST1_hour.csv",
"station_l2_join/TEST1/TEST1_hour.nc",
"station_l2_raw/TEST1/TEST1_10min.csv",
"station_l2_raw/TEST1/TEST1_10min.nc",
"station_l2_raw/TEST1/TEST1_hour.csv",
"station_l2_raw/TEST1/TEST1_hour.nc",
"station_l2_tx/TEST1/TEST1_hour.csv",
"station_l2_tx/TEST1/TEST1_hour.nc",
"station_l2_join/TEST1/TEST1_hour.csv",
"station_l2_join/TEST1/TEST1_hour.nc",
"station_l3/TEST1/TEST1_day.csv",
"station_l3/TEST1/TEST1_day.nc",
"station_l3/TEST1/TEST1_hour.csv",
"station_l3/TEST1/TEST1_hour.nc",
"station_l3/TEST1/TEST1_month.csv",
"station_l3/TEST1/TEST1_month.nc",
"site_l3/SITE_01/SITE_01_day.csv",
"site_l3/SITE_01/SITE_01_day.nc",
"site_l3/SITE_01/SITE_01_hour.csv",
"site_l3/SITE_01/SITE_01_hour.nc",
"site_l3/SITE_01/SITE_01_month.csv",
"site_l3/SITE_01/SITE_01_month.nc",
]:
self.assertTrue((root / output_rel_path).exists())
output_path = root / output_rel_path
self.assertTrue(output_path.exists())

if output_path.name.endswith("nc"):
output_dataset = xr.load_dataset(output_path)
self.check_global_attributes(output_dataset, output_rel_path)

def check_global_attributes(self, dataset: xr.Dataset, reference: str):
attribute_keys = set(dataset.attrs.keys())
highly_recommended_global_attributes = {
"title",
"summary",
"keywords",
"conventions",
}
self.assertSetEqual(
set(),
highly_recommended_global_attributes - attribute_keys,
reference,
)
required_global_attributes = {
"id",
"naming_authority",
"date_created",
"institution",
"date_issued",
"date_modified",
"processing_level",
"product_version",
"source",
}
self.assertSetEqual(
set(),
required_global_attributes - attribute_keys,
reference,
)


if __name__ == "__main__":
Expand Down

0 comments on commit 0ccf858

Please sign in to comment.