From c3f2e929effdc3349da0bd5ebe950ab0a2256263 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:16:19 -0800 Subject: [PATCH 1/3] Refactor data sources --- docs/src/leaderboard.md | 11 +- .../ClimaEarth/leaderboard/data_sources.jl | 282 +++++++++++------- .../ClimaEarth/leaderboard/leaderboard.jl | 20 +- 3 files changed, 196 insertions(+), 117 deletions(-) diff --git a/docs/src/leaderboard.md b/docs/src/leaderboard.md index d2f826556..64be46057 100644 --- a/docs/src/leaderboard.md +++ b/docs/src/leaderboard.md @@ -9,10 +9,13 @@ preprocessing variables of interest are done in `data_sources.jl` and computing and plotting are done in `leaderboard.jl`. To add a new variable, you ideally only need to modify `data_sources.jl`. -#### Add a new variable to the bias plots -If you want to add a new variable to the bias plots, you add the variable to `sim_var_dict`, -`obs_var_dict`, `compare_vars_biases_groups`, and optionally -`compare_vars_biases_plot_extrema`. +#### Add a new 3D variable to the bias plots +If you want to add a new 3D variable defined over latitude, longitude, and time to the bias +plots, you add the variable to `sim_var_dict`, `obs_var_dict`, `compare_vars_biases_groups`, +and optionally `compare_vars_biases_plot_extrema`. The variables `sim_var_dict`, +`obs_var_dict`, `compare_vars_biases_groups`, `compare_vars_biases_plot_extrema` are in the +function `get_sim_var_dict`, `get_obs_var_dict`, `get_compare_vars_biases_groups`, and +`get_compare_vars_biases_plot_extrema` respectively. The dictionaries `sim_var_dict` and `obs_var_dict` map short names to an anonymous function that returns a [`OutputVar`](https://clima.github.io/ClimaAnalysis.jl/dev/var/). Both diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl index fc8f19dc8..557ff87f2 100644 --- a/experiments/ClimaEarth/leaderboard/data_sources.jl +++ b/experiments/ClimaEarth/leaderboard/data_sources.jl @@ -1,40 +1,6 @@ import ClimaAnalysis import ClimaUtilities.ClimaArtifacts: @clima_artifact -# For plotting bias plots -compare_vars_biases_plot_extrema = Dict( - "pr" => (-5.0, 5.0), - "rsdt" => (-2.0, 2.0), - "rsut" => (-50.0, 50.0), - "rlut" => (-50.0, 50.0), - "rsutcs" => (-20.0, 20.0), - "rlutcs" => (-20.0, 20.0), - "rsds" => (-50.0, 50.0), - "rsus" => (-10.0, 10.0), - "rlds" => (-50.0, 50.0), - "rlus" => (-50.0, 50.0), - "rsdscs" => (-10.0, 10.0), - "rsuscs" => (-10.0, 10.0), - "rldscs" => (-20.0, 20.0), -) - -# To add a variable to the bias plots, add the variable to sim_var_dict, obs_var_dict, -# compare_vars_biases_groups, and compare_vars_biases_plot_extrema -# To add a variable to the leaderboard, add the variable to rmse_var_names and rmse_var_dict - -# Dict for loading in simulation data -sim_var_dict = Dict{String, Any}( - "pr" => - () -> begin - sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr") - sim_var = - ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400)) - sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) - return sim_var - end, -) - -# Loop to load the rest of the simulation data # Tuple of short names for loading simulation and observational data sim_obs_short_names_no_pr = [ ("rsdt", "solar_mon"), @@ -51,84 +17,180 @@ sim_obs_short_names_no_pr = [ ("rldscs", "sfc_lw_down_clr_t_mon"), ] -# Add "pr" and the necessary preprocessing -for (short_name, _) in sim_obs_short_names_no_pr - sim_var_dict[short_name] = - () -> begin - sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name) - sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) - return sim_var - end +""" + get_compare_vars_biases_groups() + +Return an array of arrays of short names. + +This determines which variables are grouped on the bias plots. +""" +function get_compare_vars_biases_groups() + compare_vars_biases_groups = [ + ["pr", "rsdt", "rsut", "rlut"], + ["rsds", "rsus", "rlds", "rlus"], + ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"], + ] + return compare_vars_biases_groups end -# Dict for loading observational data -# Add "pr" and the necessary preprocessing -obs_var_dict = Dict{String, Any}( - "pr" => - (start_date) -> begin - obs_var = ClimaAnalysis.OutputVar( - joinpath(@clima_artifact("precipitation_obs"), "precip.mon.mean.nc"), - "precip", - new_start_date = start_date, - shift_by = Dates.firstdayofmonth, - ) - return obs_var - end, -) - -# Loop to load the rest of the observational data and the necessary preprocessing -for (sim_name, obs_name) in sim_obs_short_names_no_pr - obs_var_dict[sim_name] = - (start_date) -> begin - obs_var = ClimaAnalysis.OutputVar( - joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - obs_name, - new_start_date = start_date, - shift_by = Dates.firstdayofmonth, - ) - # Convert from W m-2 to W m^-2 - ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") : - error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units") - return obs_var - end +""" + get_compare_vars_biases_plot_extrema() + +Return a dictionary mapping short names to ranges for the bias plots. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_plot_extrema` whose key is a short name key is the same +short name in `sim_var_dict` and the value is a tuple, where the first element +is the lower bound and the last element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_plot_extrema() + compare_vars_biases_plot_extrema = Dict( + "pr" => (-5.0, 5.0), + "rsdt" => (-2.0, 2.0), + "rsut" => (-50.0, 50.0), + "rlut" => (-50.0, 50.0), + "rsutcs" => (-20.0, 20.0), + "rlutcs" => (-20.0, 20.0), + "rsds" => (-50.0, 50.0), + "rsus" => (-10.0, 10.0), + "rlds" => (-50.0, 50.0), + "rlus" => (-50.0, 50.0), + "rsdscs" => (-10.0, 10.0), + "rsuscs" => (-10.0, 10.0), + "rldscs" => (-20.0, 20.0), + ) + return compare_vars_biases_plot_extrema end -# Used to organize plots -compare_vars_biases_groups = [ - ["pr", "rsdt", "rsut", "rlut"], - ["rsds", "rsus", "rlds", "rlus"], - ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"], -] +""" + get_sim_var_dict(diagnostics_folder_path) + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +simulation data. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`sim_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. For each variable, any +preprocessing should be done in the corresponding anonymous function which +includes unit conversion and shifting the dates. + +The variable should have only three dimensions: latitude, longitude, and time. +""" +function get_sim_var_dict(diagnostics_folder_path) + # Dict for loading in simulation data + sim_var_dict = Dict{String, Any}( + "pr" => + () -> begin + sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr") + sim_var = ClimaAnalysis.convert_units( + sim_var, + "mm/day", + conversion_function = x -> x .* Float32(-86400), + ) + sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end, + ) + + # Add "pr" and the necessary preprocessing + for (short_name, _) in sim_obs_short_names_no_pr + sim_var_dict[short_name] = + () -> begin + sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name) + sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end + end + return sim_var_dict +end + +""" + get_obs_var_dict() -# For plotting box plots and leaderboard - -# Load data into RMSEVariables -rmse_var_names = ["pr", "rsut", "rlut"] -rmse_var_pr = ClimaAnalysis.read_rmses( - joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), - "pr", - units = "mm / day", -) -rmse_var_rsut = ClimaAnalysis.read_rmses( - joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"), - "rsut", - units = "W m^-2", -) -rmse_var_rlut = ClimaAnalysis.read_rmses( - joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"), - "rlut", - units = "W m^-2", -) - -# Add models and units for CliMA -rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA") -ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day") - -rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA") -ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2") - -rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA") -ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") - -# Store the rmse vars in a dictionary -rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) +Return a dictionary mapping short names to `OutputVar` containing preprocessed +observational data. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`obs_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. The function must take in a +start date which is used to align the times in the observational data to match +the simulation data. The short name must be the same as in `sim_var_dict` in the +function `sim_var_dict`. For each variable, any preprocessing is done in the +corresponding anonymous function which includes unit conversion and shifting the +dates. + +The variable should have only three dimensions: latitude, longitude, and time. +""" +function get_obs_var_dict() + # Add "pr" and the necessary preprocessing + obs_var_dict = Dict{String, Any}( + "pr" => + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + joinpath(@clima_artifact("precipitation_obs"), "precip.mon.mean.nc"), + "precip", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + return obs_var + end, + ) + + # Loop to load the rest of the observational data and the necessary preprocessing + for (sim_name, obs_name) in sim_obs_short_names_no_pr + obs_var_dict[sim_name] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), + obs_name, + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + # Convert from W m-2 to W m^-2 + ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") : + error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units") + return obs_var + end + end + return obs_var_dict +end + +""" + get_rmse_var_dict() + +Return a dictionary mapping short names to `RMSEVariable` containing information about +RMSEs of models for the short name of the variable. +""" +function get_rmse_var_dict() + # Load data into RMSEVariables + rmse_var_names = ["pr", "rsut", "rlut"] + rmse_var_pr = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), + "pr", + units = "mm / day", + ) + rmse_var_rsut = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"), + "rsut", + units = "W m^-2", + ) + rmse_var_rlut = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"), + "rlut", + units = "W m^-2", + ) + + # Add models and units for CliMA + rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA") + ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day") + + rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA") + ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2") + + rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA") + ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") + + # Store the rmse vars in a dictionary + rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) + return rmse_var_dict +end diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl index f42887a2e..57b18ad1c 100644 --- a/experiments/ClimaEarth/leaderboard/leaderboard.jl +++ b/experiments/ClimaEarth/leaderboard/leaderboard.jl @@ -9,14 +9,28 @@ include("data_sources.jl") """ compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) -Plot the biases and a leaderboard of various variables. +Plot the biases and a leaderboard of various variables defined over longitude, latitude, and +time. -The paramter `leaderboard_base_path` is the path to save the leaderboards and bias plots and -`diagnostics_folder_path` is the path to the simulation data. +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +Loading and preprocessing simulation data is done by `get_sim_var_dict`. Loading and +preprocessing observational data is done by `get_obs_var_dict`. The ranges of the bias plots +are determined by `get_compare_vars_biases_plot_extrema`. The groups of variables plotted on +the bias plots are determined by `get_compare_vars_biases_groups()`. Loading the RMSEs from +other models is done by `get_rmse_var_dict`. See the functions defined in data_sources.jl. """ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) @info "Error against observations" + # Get everything we need from data_sources.jl + sim_var_dict = get_sim_var_dict(diagnostics_folder_path) + obs_var_dict = get_obs_var_dict() + compare_vars_biases_plot_extrema = get_compare_vars_biases_plot_extrema() + rmse_var_dict = get_rmse_var_dict() + compare_vars_biases_groups = get_compare_vars_biases_groups() + # Set up dict for storing simulation and observational data after processing sim_obs_comparsion_dict = Dict() seasons = ["ANN", "MAM", "JJA", "SON", "DJF"] From 802190429c17d9805f5f77242c6cf714e0cd7cf1 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:15:41 -0800 Subject: [PATCH 2/3] Add ERA5 observational data on pressure levels --- experiments/ClimaEarth/Artifacts.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experiments/ClimaEarth/Artifacts.toml b/experiments/ClimaEarth/Artifacts.toml index 22c808119..4b084d1d2 100644 --- a/experiments/ClimaEarth/Artifacts.toml +++ b/experiments/ClimaEarth/Artifacts.toml @@ -58,3 +58,6 @@ git-tree-sha1 = "cbbc6b3752d9cb9b667ec33cfbeb46819f8db418" [[landsea_mask_1deg.download]] sha256 = "3722b553c2fdf28a6574aea2e0b167d16ab050f34e5969ada45625b3a3ecb6da" url = "https://caltech.box.com/shared/static/b3u4dv0dsoswvqgp8y63bzj7awbhwztd.gz" + +[era5_monthly_averages_pressure_levels_1979_2024] +git-tree-sha1 = "dd7ee9500805f590f8fc4c00bfc373eeb7a01587" From 6bbd53a99016d8522fb0fb53f9a4f57d745dd16a Mon Sep 17 00:00:00 2001 From: Kevin Phan <98072684+ph-kev@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:26:16 -0800 Subject: [PATCH 3/3] Add plots for variables in pressure coordinates The plots added are bias plots at 850 hPa, 500 hPa, and 250 hPa and lat - pressure plots. --- NEWS.md | 6 + docs/src/leaderboard.md | 11 ++ .../ClimaEarth/leaderboard/data_sources.jl | 159 ++++++++++++++++- .../ClimaEarth/leaderboard/leaderboard.jl | 166 +++++++++++++++++- experiments/ClimaEarth/run_amip.jl | 1 + 5 files changed, 336 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 129654136..f22f34dc5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,12 @@ by identifying where elevation is greater than 0. Note, this can lead to misidentification of ocean in some areas of the globe that are inland but below sea level (Dead Sea, Death Valley, ...). +### Leaderboard for variables over longitude, latitude, time, and pressure - PR [#1094](https://github.com/CliMA/ClimaCoupler.jl/pull/1094) + +As a part of the post processing pipeline, bias plots for variables at the +pressure levels of 850.0, 500.0, 250.0 hPa and bias plots over latitude and +pressure levels are being created. + ### Code cleanup diff --git a/docs/src/leaderboard.md b/docs/src/leaderboard.md index 64be46057..cb5f5f522 100644 --- a/docs/src/leaderboard.md +++ b/docs/src/leaderboard.md @@ -51,3 +51,14 @@ must be initialized for each variable of interest. The CliMA model is added with the `RMSEVariable`. It is assumed that the `RMSEVariable` contains only the columns "DJF", "MAM", "JJA", "SON", and "ANN" in that order. The file `leaderboard.jl` will load the appropriate data into the `RMSEVariable`. + +### Add a new variable to compare against observations in pressure coordinates +To add a new variable, you only need to modify the variable `sim_var_pfull_dict` in the +function `get_sim_var_in_pfull_dict`, the variable `obs_var_dict` in the function +`get_obs_var_in_pfull_dict`, and the variable `compare_vars_biases_plot_extrema` in the +function `get_compare_vars_biases_plot_extrema_pfull`. The variables and functions are +defined exactly the same as their analogous versions in the section above. + +It is expected that the dimensions of the variables are time, latitude, longitude, and +pressure in no particular order and the units for the pressure dimension is expected to be +`hPa`. diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl index 557ff87f2..ddaddb1e7 100644 --- a/experiments/ClimaEarth/leaderboard/data_sources.jl +++ b/experiments/ClimaEarth/leaderboard/data_sources.jl @@ -1,5 +1,6 @@ import ClimaAnalysis import ClimaUtilities.ClimaArtifacts: @clima_artifact +import OrderedCollections: OrderedDict # Tuple of short names for loading simulation and observational data sim_obs_short_names_no_pr = [ @@ -36,7 +37,8 @@ end """ get_compare_vars_biases_plot_extrema() -Return a dictionary mapping short names to ranges for the bias plots. +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_leaderboard`. To add a variable to the leaderboard, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose key is a short name key is the same @@ -66,7 +68,7 @@ end get_sim_var_dict(diagnostics_folder_path) Return a dictionary mapping short names to `OutputVar` containing preprocessed -simulation data. +simulation data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `sim_var_dict` whose key is the short name of the variable and the value is an @@ -108,7 +110,7 @@ end get_obs_var_dict() Return a dictionary mapping short names to `OutputVar` containing preprocessed -observational data. +observational data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `obs_var_dict` whose key is the short name of the variable and the value is an @@ -159,11 +161,11 @@ end get_rmse_var_dict() Return a dictionary mapping short names to `RMSEVariable` containing information about -RMSEs of models for the short name of the variable. +RMSEs of models for the short name of the variable. This is used by the function +`compute_leaderboard`. """ function get_rmse_var_dict() # Load data into RMSEVariables - rmse_var_names = ["pr", "rsut", "rlut"] rmse_var_pr = ClimaAnalysis.read_rmses( joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), "pr", @@ -191,6 +193,151 @@ function get_rmse_var_dict() ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") # Store the rmse vars in a dictionary - rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) + rmse_var_dict = OrderedDict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) return rmse_var_dict end + +""" + get_sim_var_in_pfull_dict(diagnostics_folder_path) + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +simulation data in pressure space. This is used by `compute_pfull_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`sim_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. For each variable, any +preprocessing should be done in the corresponding anonymous function which +includes unit conversion and shifting the dates. + +The variable should have only four dimensions: latitude, longitude, time, and +pressure. The units of pressure should be in hPa. +""" +function get_sim_var_in_pfull_dict(diagnostics_folder_path) + simdir = ClimaAnalysis.SimDir(diagnostics_folder_path) + sim_var_pfull_dict = Dict{String, Any}() + + short_names = ["ta", "hur", "hus"] + for short_name in short_names + sim_var_pfull_dict[short_name] = + () -> begin + sim_var = get(simdir, short_name = short_name) + pfull_var = get(simdir, short_name = "pfull") + + (ClimaAnalysis.units(sim_var) == "") && (sim_var = ClimaAnalysis.set_units(sim_var, "unitless")) + (ClimaAnalysis.units(sim_var) == "kg kg^-1") && + (sim_var = ClimaAnalysis.set_units(sim_var, "unitless")) + + sim_in_pfull_var = ClimaAnalysis.Atmos.to_pressure_coordinates(sim_var, pfull_var) + sim_in_pfull_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_in_pfull_var) + sim_in_pfull_var = ClimaAnalysis.convert_dim_units( + sim_in_pfull_var, + "pfull", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return sim_in_pfull_var + end + end + return sim_var_pfull_dict +end + +""" + get_obs_var_in_pfull_dict() + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +observational data in pressure coordinates. This is used by +`compute_pfull_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`obs_var_dict` whose key is the short name of the variable and the value is an anonymous +function that returns a `OutputVar`. The function must take in a start date +which is used to align the times in the observational data to match the +simulation data. The short name must be the same as in `sim_var_pfull_dict` in +the function `get_sim_var_in_pfull_dict`. Any preprocessing is done in the +function which includes unit conversion and shifting the dates. + +The variable should have only four dimensions: latitude, longitude, time, and +pressure. The units of pressure should be in hPa. +""" +function get_obs_var_in_pfull_dict() + artifact_path = joinpath( + @clima_artifact("era5_monthly_averages_pressure_levels_1979_2024"), + "era5_monthly_averages_pressure_levels_197901-202410.nc", + ) + short_names_pairs = [("ta", "t"), ("hus", "q")] + obs_var_dict = Dict{String, Any}() + for (short_name, era5_short_name) in short_names_pairs + obs_var_dict[short_name] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + artifact_path, + era5_short_name, + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + (ClimaAnalysis.units(obs_var) == "kg kg**-1") && + (obs_var = ClimaAnalysis.set_units(obs_var, "unitless")) + obs_var = ClimaAnalysis.Var.convert_dim_units( + obs_var, + "pressure_level", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return obs_var + end + end + + obs_var_dict["hur"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + artifact_path, + "r", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + obs_var = ClimaAnalysis.Var.convert_dim_units( + obs_var, + "pressure_level", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + # Convert from percentages (e.g. 120%) to decimal (1.20) + obs_var = ClimaAnalysis.Var.convert_units(obs_var, "unitless", conversion_function = x -> 0.01 * x) + return obs_var + end + return obs_var_dict +end + +""" + get_compare_vars_biases_plot_extrema_pfull() + +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_pfull_leaderboard`. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_plot_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_plot_extrema_pfull() + compare_vars_biases_plot_extrema = Dict("ta" => (-5.0, 5.0), "hur" => (-0.4, 0.4), "hus" => (-0.0015, 0.0015)) + return compare_vars_biases_plot_extrema +end + +""" + get_compare_vars_biases_heatmap_extrema_pfull() + +Return a dictionary mapping short names to ranges for the heatmaps. This is +used by the function `compute_pfull_leaderboard`. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_heatmap_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_heatmap_extrema_pfull() + compare_vars_biases_heatmap_extrema = Dict("ta" => (-10.0, 10.0), "hur" => (-0.4, 0.4), "hus" => (-0.001, 0.001)) + return compare_vars_biases_heatmap_extrema +end diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl index 57b18ad1c..d3700f558 100644 --- a/experiments/ClimaEarth/leaderboard/leaderboard.jl +++ b/experiments/ClimaEarth/leaderboard/leaderboard.jl @@ -1,5 +1,4 @@ import ClimaAnalysis -import ClimaUtilities.ClimaArtifacts: @clima_artifact import GeoMakie import CairoMakie import Dates @@ -137,6 +136,7 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) end # Add RMSE for the CliMA model and for each season + rmse_var_names = keys(rmse_var_dict) for short_name in rmse_var_names for season in seasons !ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && ( @@ -167,6 +167,169 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard) end +""" + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases and a leaderboard of various variables defined over longitude, latitude, +time, and pressure levels. + +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +Loading and preprocessing simulation data is done by `get_sim_var_in_pfull_dict`. Loading +and preprocessing observational data is done by `get_obs_var_in_pfull_dict`. The ranges of +the bias plots is defined by `get_compare_vars_biases_plot_extrema_pfull`. See the functions +defined in data_sources.jl for more information. +""" +function compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + @info "Error against observations for variables in pressure coordinates" + + sim_var_pfull_dict = get_sim_var_in_pfull_dict(diagnostics_folder_path) + obs_var_pfull_dict = get_obs_var_in_pfull_dict() + + # Print dates for debugging + _, get_var = first(sim_var_pfull_dict) + var = get_var() + output_dates = Dates.DateTime(var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + + for short_name in keys(sim_var_pfull_dict) + sim_var = sim_var_pfull_dict[short_name]() + obs_var = obs_var_pfull_dict[short_name](sim_var.attributes["start_date"]) + + # Check units for pressure are in hPa + ClimaAnalysis.dim_units(sim_var, ClimaAnalysis.pressure_name(sim_var)) != "hPa" && + error("Units of pressure should be hPa for $short_name simulation data") + ClimaAnalysis.dim_units(obs_var, ClimaAnalysis.pressure_name(obs_var)) != "hPa" && + error("Units of pressure should be hPa for $short_name simulation data") + + # Remove first spin_up_months from simulation + spin_up_months = 6 + spinup_cutoff = spin_up_months * 31 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && + (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff)) + + # Restrain the pressure levels so we can resample + min_pfull = ClimaAnalysis.pressures(obs_var)[1] + sim_pressures = ClimaAnalysis.pressures(sim_var) + greater_than_min_pfull_idx = findfirst(x -> x > min_pfull, sim_pressures) + sim_var = ClimaAnalysis.window(sim_var, "pfull", left = sim_pressures[greater_than_min_pfull_idx]) + + # Resample over lat, lon, time, and pressures + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + + # Take time average + sim_var = ClimaAnalysis.average_time(sim_var) + obs_var = ClimaAnalysis.average_time(obs_var) + + # Save observation and simulation data + sim_obs_comparsion_dict[short_name] = (; sim = sim_var, obs = obs_var) + end + + # Slicing only uses the nearest value, so we also need to keep track of the + # actual pressure levels that we get when slicing + target_p_lvls = [850.0, 500.0, 250.0] + real_p_lvls = [] + + # Get units for pressure for plotting + p_units = ClimaAnalysis.dim_units(first(sim_obs_comparsion_dict)[2].sim, "pfull") + + # Initialize ranges for colorbar and figure whose columns are pressure levels and rows are variables + compare_vars_biases_plot_extrema_pfull = get_compare_vars_biases_plot_extrema_pfull() + fig_bias_pfull_vars = CairoMakie.Figure(size = (650 * length(target_p_lvls), 450 * length(sim_obs_comparsion_dict))) + + # Plot bias at the pressure levels in p_lvls + for (row_idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + # Plot label for variable name + CairoMakie.Label(fig_bias_pfull_vars[row_idx, 0], short_name, tellheight = false, fontsize = 30) + for (col_idx, p_lvl) in enumerate(target_p_lvls) + layout = fig_bias_pfull_vars[row_idx, col_idx] = CairoMakie.GridLayout() + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Slice at pressure level using nearest value rather interpolating + sim_var = ClimaAnalysis.slice(sim_var, pfull = p_lvl) + obs_var = ClimaAnalysis.slice(obs_var, pressure_level = p_lvl) + + # Get the actual pressure level that we slice at + push!(real_p_lvls, parse(Float64, sim_var.attributes["slice_pfull"])) + + # Add so that it show up on in the title + sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))" + + # Plot bias + ClimaAnalysis.Visualize.plot_bias_on_globe!( + layout, + sim_var, + obs_var, + cmap_extrema = compare_vars_biases_plot_extrema_pfull[short_name], + ) + end + end + + # Plot label for the pressure levels + for (col_idx, p_lvl) in enumerate(real_p_lvls[begin:length(target_p_lvls)]) + CairoMakie.Label(fig_bias_pfull_vars[0, col_idx], "$p_lvl $p_units", tellwidth = false, fontsize = 30) + end + + # Define figure with at most four columns and the necessary number of rows for + # all the variables + num_vars = length(compare_vars_biases_plot_extrema_pfull) + num_cols = min(4, num_vars) + num_rows = ceil(Int, num_vars / 4) + fig_lat_pres = CairoMakie.Figure(size = (650 * num_cols, 450 * num_rows)) + + # Initialize ranges for colorbar + compare_vars_biases_heatmap_extrema_pfull = get_compare_vars_biases_heatmap_extrema_pfull() + + # Take zonal mean and plot lat - pressure heatmap + for (idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + # Compute where the figure should be placed + # Goes from 1 -> (1,1), 2 -> (1,2), ..., 4 -> (1,4), 5 -> (2,1) + # for idx -> (col_idx, row_idx) + row_idx = div(idx - 1, 4) + 1 + col_idx = 1 + rem(idx - 1, 4) + layout = fig_lat_pres[row_idx, col_idx] = CairoMakie.GridLayout() + + # Load data + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Take zonal mean (average along lon arithmetically) + sim_var = ClimaAnalysis.average_lon(sim_var) + obs_var = ClimaAnalysis.average_lon(obs_var) + + # Compute bias and set short name, long name, and units + bias_var = sim_var - obs_var + bias_var = ClimaAnalysis.set_units(bias_var, ClimaAnalysis.units(sim_var)) + bias_var.attributes["short_name"] = "sim-obs_$(ClimaAnalysis.short_name(sim_var))" + bias_var.attributes["long_name"] = "SIM-OBS_$(ClimaAnalysis.long_name(sim_var))" + ClimaAnalysis.Visualize.heatmap2D!( + layout, + bias_var, + more_kwargs = Dict( + :axis => Dict([:yreversed => true]), + :plot => Dict( + :colormap => :vik, + :colorrange => compare_vars_biases_heatmap_extrema_pfull[short_name], + :colormap => CairoMakie.cgrad(:vik, 11, categorical = true), + ), + ), + ) + end + + # Save figures + CairoMakie.save(joinpath(leaderboard_base_path, "bias_vars_in_pfull.png"), fig_bias_pfull_vars) + CairoMakie.save(joinpath(leaderboard_base_path, "bias_lat_pfull_heatmaps.png"), fig_lat_pres) +end + if abspath(PROGRAM_FILE) == @__FILE__ if length(ARGS) < 2 error("Usage: julia leaderboard.jl ") @@ -174,4 +337,5 @@ if abspath(PROGRAM_FILE) == @__FILE__ leaderboard_base_path = ARGS[begin] diagnostics_folder_path = ARGS[begin + 1] compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl index 34b920939..1bf77e411 100644 --- a/experiments/ClimaEarth/run_amip.jl +++ b/experiments/ClimaEarth/run_amip.jl @@ -873,6 +873,7 @@ if ClimaComms.iamroot(comms_ctx) diagnostics_folder_path = atmos_sim.integrator.p.output_dir leaderboard_base_path = dir_paths.artifacts compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end end ## plot extra atmosphere diagnostics if specified