Skip to content

Commit

Permalink
Merge pull request #73 from openghg/Iss52-test-get_data-separately
Browse files Browse the repository at this point in the history
Iss52 test get data separately
  • Loading branch information
brendan-m-murphy authored Feb 15, 2024
2 parents 38297a4 + 4c14aa9 commit 2e7e173
Show file tree
Hide file tree
Showing 11 changed files with 260 additions and 163 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

# Version 0.2 (current devel)

- Added option to run an inversion without boundary conditions. This is specified by adding `use_bc = False` in an .ini file. This assumes that the baseline has already been factored into the observations.

- Added tests to test `get_data.py`, including creating, saving, and loading merged data. Refactored inversions tests to reload merged data, instead of creating merged data.


# Version 0.1.1

- Bug fix: typo (?) from previous merge conflicts resulted in data not being gathered if `use_merged_data` was `True`,
Expand All @@ -19,7 +24,7 @@

- Added tests that run a full inversion for a small number of iterations

- Added a fix for reading in the correct prior fluxes, when creating the posterior country fluxes and saving everything after the inversion. The prior fluxes are now read directly from the merged data object, and the correct monthly/annual flux is sliced from the full flux object. This includes taking an average flux across a range of months, if the inversion is across mulitple months.
- Added a fix for reading in the correct prior fluxes, when creating the posterior country fluxes and saving everything after the inversion. The prior fluxes are now read directly from the merged data object, and the correct monthly/annual flux is sliced from the full flux object. This includes taking an average flux across a range of months, if the inversion is across multiple months.

- Added a try/except loop which drops sites from the inversion if the data merge process doesn't work for that site (which normally happens if there's no obs).

Expand Down
61 changes: 30 additions & 31 deletions openghg_inversions/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def data_processing_surface_notracer(
"""
Retrieve and prepare fixed-surface datasets from
specified OpenGHG object stores for forward
simulations and model-data comparisons that do not
simulations and model-data comparisons that do not
use tracers
---------------------------------------------
Args:
Expand Down Expand Up @@ -82,13 +82,13 @@ def data_processing_surface_notracer(
ICOS observations data level. For non-ICOS sites
use "None"
inlet (list/str/opt):
Specific inlet height for the site observations
Specific inlet height for the site observations
(length must match number of sites)
instrument (list/str/opt):
Specific instrument for the site
(length must match number of sites)
Specific instrument for the site
(length must match number of sites)
calibration_scale (str):
Convert measurements to defined calibration scale
Convert measurements to defined calibration scale
met_model (str/opt):
Meteorological model used in the LPDM.
fp_model (str):
Expand All @@ -112,7 +112,7 @@ def data_processing_surface_notracer(
Adds the variability in the averaging period to the measurement
error if set to True.
save_merged_data (bool/opt, default=False):
Save forward simulations data and observations
Save forward simulations data and observations
merged_data_name (str/opt):
Filename for saved forward simulations data and observations
merged_data_dir (str/opt):
Expand All @@ -124,16 +124,15 @@ def data_processing_surface_notracer(

# Convert 'None' args to list
nsites = len(sites)
if inlet == None:
if inlet is None:
inlet = [None] * nsites
if instrument == None:
if instrument is None:
instrument = [None] * nsites
if fp_height == None:
if fp_height is None:
fp_height = [None] * nsites
if obs_data_level == None:
if obs_data_level is None:
obs_data_level = [None] * nsites


fp_all = {}
fp_all[".species"] = species.upper()

Expand Down Expand Up @@ -166,7 +165,7 @@ def data_processing_surface_notracer(
inlet=inlet[i],
start_date=start_date,
end_date=end_date,
icos_data_level=obs_data_level[i], # NB. Variable name may be later updated in OpenGHG
icos_data_level=obs_data_level[i], # NB. Variable name may be later updated in OpenGHG
average=averaging_period[i],
instrument=instrument[i],
calibration_scale=calibration_scale,
Expand All @@ -193,7 +192,7 @@ def data_processing_surface_notracer(
# Get footprints data
try:
# Ensure HiTRes CO2 footprints are obtained if
# using CO2
# using CO2
if species.lower() == "co2":
get_fps = get_footprint(
site=site,
Expand All @@ -204,7 +203,7 @@ def data_processing_surface_notracer(
end_date=end_date,
store=footprint_store,
species=species.lower(),
)
)

else:
get_fps = get_footprint(
Expand All @@ -215,7 +214,7 @@ def data_processing_surface_notracer(
start_date=start_date,
end_date=end_date,
store=footprint_store,
)
)
except SearchError:
print(
f"\nNo footprint data found for {site} with inlet/height {fp_height[i]}, model {fp_model}, and domain {domain}.",
Expand Down Expand Up @@ -254,16 +253,16 @@ def data_processing_surface_notracer(
# Create ModelScenario object for all emissions_sectors
# and combine into one object
model_scenario = ModelScenario(
site=site,
species=species,
inlet=inlet[i],
start_date=start_date,
end_date=end_date,
obs=site_data,
footprint=footprint_dict[site],
flux=flux_dict,
bc=my_bc,
)
site=site,
species=species,
inlet=inlet[i],
start_date=start_date,
end_date=end_date,
obs=site_data,
footprint=footprint_dict[site],
flux=flux_dict,
bc=my_bc,
)

if len(emissions_name) == 1:
scenario_combined = model_scenario.footprints_data_merge()
Expand All @@ -273,16 +272,16 @@ def data_processing_surface_notracer(
elif len(emissions_name) > 1:
# Create model scenario object for each flux sector
model_scenario_dict = {}

for source in emissions_name:
scenario_sector = model_scenario.footprints_data_merge(sources = source, recalculate = True)
scenario_sector = model_scenario.footprints_data_merge(sources=source, recalculate=True)

if species.lower() == "co2":
model_scenario_dict["mf_mod_high_res_" + source] = scenario_sector["mf_mod_high_res"]
model_scenario_dict["mf_mod_high_res_" + source] = scenario_sector["mf_mod_high_res"]
elif species.lower() != "co2":
model_scenario_dict["mf_mod_" + source] = scenario_sector["mf_mod"]
scenario_combined = model_scenario.footprints_data_merge(recalculate = True)
model_scenario_dict["mf_mod_" + source] = scenario_sector["mf_mod"]

scenario_combined = model_scenario.footprints_data_merge(recalculate=True)

for key in model_scenario_dict.keys():
scenario_combined[key] = model_scenario_dict[key]
Expand Down
4 changes: 3 additions & 1 deletion openghg_inversions/hbmcmc/hbmcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def fixedbasisMCMC(
reload_merged_data=False,
save_merged_data=False,
merged_data_dir=None,
merged_data_name=None,
basis_output_path=None,
**kwargs,
):
Expand Down Expand Up @@ -431,7 +432,8 @@ def fixedbasisMCMC(
rerun_merge = True

if reload_merged_data:
merged_data_name = f"{species}_{start_date}_{outputname}_merged-data.pickle"
if merged_data_name is None:
merged_data_name = f"{species}_{start_date}_{outputname}_merged-data.pickle"
merged_data_filename = os.path.join(merged_data_dir, merged_data_name)
print(f"Attempting to read in merged data from: {merged_data_filename}...\n")

Expand Down
2 changes: 1 addition & 1 deletion openghg_inversions/hbmcmc/inversion_pymc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import arviz as az
from scipy import stats
from pathlib import Path
from typing import cast, Optional, Union
from typing import Optional


from openghg_inversions import convert
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[pytest]
pythonpath = .
testpaths = tests

markers =
Expand Down
73 changes: 65 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,42 @@
from openghg.standardise import standardise_surface, standardise_bc, standardise_flux, standardise_footprint
from openghg.types import ObjectStoreError

raw_data_path = Path(".").resolve() / "tests/data/"
inversions_test_store_path = Path(tempfile.gettempdir(), "openghg_inversions_testing_store")
_raw_data_path = Path(".").resolve() / "tests/data/"


@pytest.fixture(scope="session")
def raw_data_path():
return _raw_data_path


# set up for pickled data
@pytest.fixture(scope="session")
def pickled_data_file_name():
return "merged_data_test_tac_combined_scenario.pickle"


@pytest.fixture(scope="session")
def merged_data_dir():
return Path(tempfile.gettempdir(), "openghg_inversions_testing_merged_data_dir")


@pytest.fixture(scope="session", autouse=True)
def add_frozen_pickled_merged_data(merged_data_dir, pickled_data_file_name):
"""Copy pickled merged data from tests/data to temporary merged_data_dir.
Pickled data created/frozen 9 Feb 2024.
"""
merged_data_dir.mkdir(exist_ok=True)

if not (merged_data_dir / pickled_data_file_name).exists():
shutil.copy(_raw_data_path / pickled_data_file_name, merged_data_dir)


bc_basis_function_path = Path(".").resolve() / "bc_basis_functions"
countries_path = Path(".").resolve() / "countries"

inversions_test_store_path = Path(tempfile.gettempdir(), "openghg_inversions_testing_store")


@pytest.fixture(scope="session", autouse=True)
def session_config_mocker() -> Iterator[None]:
Expand Down Expand Up @@ -48,10 +79,10 @@ def session_config_mocker() -> Iterator[None]:
}
flux_metadata = {"species": "ch4", "source": "total-ukghg-edgar7", "domain": "europe"}

obs_data_path = raw_data_path / "obs_tac_ch4_185m_2019-01-01_2019-02-01_data.nc"
bc_data_path = raw_data_path / "bc_ch4_europe_cams_2019-01-01_2019-12-31_data.nc"
footprints_data_path = raw_data_path / "footprints_tac_europe_name_185m_2019-01-01_2019-01-07_data.nc"
flux_data_path = raw_data_path / "flux_total_ch4_europe_edgar7_2019-01-01_2019-12-31_data.nc"
obs_data_path = _raw_data_path / "obs_tac_ch4_185m_2019-01-01_2019-02-01_data.nc"
bc_data_path = _raw_data_path / "bc_ch4_europe_cams_2019-01-01_2019-12-31_data.nc"
footprints_data_path = _raw_data_path / "footprints_tac_europe_name_185m_2019-01-01_2019-01-07_data.nc"
flux_data_path = _raw_data_path / "flux_total_ch4_europe_edgar7_2019-01-01_2019-12-31_data.nc"

data_info = {
"surface": [standardise_surface, obs_metadata, obs_data_path],
Expand Down Expand Up @@ -109,12 +140,38 @@ def session_ancilliary_files() -> None:

# copy basis file into default location if there isn't a file with the same name there
if not (bc_basis_function_path / "EUROPE" / "NESW_EUROPE_2019.nc").exists():
shutil.copy((raw_data_path / "bc_basis_NESW_EUROPE_2019.nc"), (bc_basis_function_path / "EUROPE" / "NESW_EUROPE_2019.nc"))
shutil.copy(
(_raw_data_path / "bc_basis_NESW_EUROPE_2019.nc"),
(bc_basis_function_path / "EUROPE" / "NESW_EUROPE_2019.nc"),
)

# Add country file
if not countries_path.exists():
countries_path.mkdir()

# copy country file into default location if there isn't a file with the same name there
if not (countries_path / "country_EUROPE.nc").exists():
shutil.copy((raw_data_path / "country_EUROPE.nc"), (countries_path / "country_EUROPE.nc"))
shutil.copy((_raw_data_path / "country_EUROPE.nc"), (countries_path / "country_EUROPE.nc"))


@pytest.fixture(scope="module")
def tac_ch4_data_args():
data_args = {
"species": "ch4",
"sites": ["TAC"],
"start_date": "2019-01-01",
"end_date": "2019-01-02",
"bc_store": "inversions_tests",
"obs_store": "inversions_tests",
"footprint_store": "inversions_tests",
"emissions_store": "inversions_tests",
"inlet": ["185m"],
"instrument": ["picarro"],
"domain": "EUROPE",
"fp_height": ["185m"],
"fp_model": "NAME",
"emissions_name": ["total-ukghg-edgar7"],
"met_model": "ukv",
"averaging_period": ["1H"],
}
return data_args
Binary file not shown.
Binary file not shown.
13 changes: 7 additions & 6 deletions tests/test_conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,29 @@ def test_default_session_fixture():
<temp dir>/openghg_inversions_testing_store
"""
from openghg.objectstore._local_store import read_local_config

conf = read_local_config()

assert conf
assert 'inversions_tests' in conf['object_store']
assert 'openghg_inversions_testing_store' in conf['object_store']['inversions_tests']['path']
assert "inversions_tests" in conf["object_store"]
assert "openghg_inversions_testing_store" in conf["object_store"]["inversions_tests"]["path"]


def test_obs_in_test_store():
results = search(site='tac', species='ch4', data_type='surface', store="inversions_tests")
results = search(site="tac", species="ch4", data_type="surface", store="inversions_tests")
assert results


def test_footprints_in_test_store():
results = search(site='tac', data_type='footprints')
results = search(site="tac", data_type="footprints")
assert results


def test_bc_in_test_store():
results = search(species='ch4', data_type='boundary_conditions')
results = search(species="ch4", data_type="boundary_conditions")
assert results


def test_flux_in_test_store():
results = search(species='ch4', data_type='emissions')
results = search(species="ch4", data_type="emissions")
assert results
Loading

0 comments on commit 2e7e173

Please sign in to comment.