From 1cd54c5411ed43f23d1911f1ec3ff882ed1a0ac1 Mon Sep 17 00:00:00 2001 From: Kevin Phan <98072684+ph-kev@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:26:16 -0800 Subject: [PATCH] Add plots for variables in pressure coordinates The plots added are bias plots at 850 hPa, 500 hPa, and 250 hPa and lat - pressure plots. --- NEWS.md | 6 + docs/src/leaderboard.md | 11 ++ .../ClimaEarth/leaderboard/data_sources.jl | 159 ++++++++++++++++- .../ClimaEarth/leaderboard/leaderboard.jl | 166 +++++++++++++++++- experiments/ClimaEarth/run_amip.jl | 1 + 5 files changed, 336 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 129654136e..f22f34dc56 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,12 @@ by identifying where elevation is greater than 0. Note, this can lead to misidentification of ocean in some areas of the globe that are inland but below sea level (Dead Sea, Death Valley, ...). +### Leaderboard for variables over longitude, latitude, time, and pressure - PR [#1094](https://github.com/CliMA/ClimaCoupler.jl/pull/1094) + +As a part of the post processing pipeline, bias plots for variables at the +pressure levels of 850.0, 500.0, 250.0 hPa and bias plots over latitude and +pressure levels are being created. + ### Code cleanup diff --git a/docs/src/leaderboard.md b/docs/src/leaderboard.md index 64be460570..cb5f5f522b 100644 --- a/docs/src/leaderboard.md +++ b/docs/src/leaderboard.md @@ -51,3 +51,14 @@ must be initialized for each variable of interest. The CliMA model is added with the `RMSEVariable`. It is assumed that the `RMSEVariable` contains only the columns "DJF", "MAM", "JJA", "SON", and "ANN" in that order. The file `leaderboard.jl` will load the appropriate data into the `RMSEVariable`. + +### Add a new variable to compare against observations in pressure coordinates +To add a new variable, you only need to modify the variable `sim_var_pfull_dict` in the +function `get_sim_var_in_pfull_dict`, the variable `obs_var_dict` in the function +`get_obs_var_in_pfull_dict`, and the variable `compare_vars_biases_plot_extrema` in the +function `get_compare_vars_biases_plot_extrema_pfull`. The variables and functions are +defined exactly the same as their analogous versions in the section above. + +It is expected that the dimensions of the variables are time, latitude, longitude, and +pressure in no particular order and the units for the pressure dimension is expected to be +`hPa`. diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl index 557ff87f23..9f731e3b0d 100644 --- a/experiments/ClimaEarth/leaderboard/data_sources.jl +++ b/experiments/ClimaEarth/leaderboard/data_sources.jl @@ -1,5 +1,6 @@ import ClimaAnalysis import ClimaUtilities.ClimaArtifacts: @clima_artifact +import OrderedCollections: OrderedDict # Tuple of short names for loading simulation and observational data sim_obs_short_names_no_pr = [ @@ -36,7 +37,8 @@ end """ get_compare_vars_biases_plot_extrema() -Return a dictionary mapping short names to ranges for the bias plots. +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_leaderboard`. To add a variable to the leaderboard, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose key is a short name key is the same @@ -66,7 +68,7 @@ end get_sim_var_dict(diagnostics_folder_path) Return a dictionary mapping short names to `OutputVar` containing preprocessed -simulation data. +simulation data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `sim_var_dict` whose key is the short name of the variable and the value is an @@ -108,7 +110,7 @@ end get_obs_var_dict() Return a dictionary mapping short names to `OutputVar` containing preprocessed -observational data. +observational data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `obs_var_dict` whose key is the short name of the variable and the value is an @@ -159,11 +161,11 @@ end get_rmse_var_dict() Return a dictionary mapping short names to `RMSEVariable` containing information about -RMSEs of models for the short name of the variable. +RMSEs of models for the short name of the variable. This is used by the function +`compute_leaderboard`. """ function get_rmse_var_dict() # Load data into RMSEVariables - rmse_var_names = ["pr", "rsut", "rlut"] rmse_var_pr = ClimaAnalysis.read_rmses( joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), "pr", @@ -191,6 +193,151 @@ function get_rmse_var_dict() ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") # Store the rmse vars in a dictionary - rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) + rmse_var_dict = OrderedDict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) return rmse_var_dict end + +""" + get_sim_var_in_pfull_dict(diagnostics_folder_path) + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +simulation data in pressure space. This is used by `compute_pfull_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`sim_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. For each variable, any +preprocessing should be done in the corresponding anonymous function which +includes unit conversion and shifting the dates. + +The variable should have only four dimensions: latitude, longitude, time, and +pressure. The units of pressure should be in hPa. +""" +function get_sim_var_in_pfull_dict(diagnostics_folder_path) + simdir = ClimaAnalysis.SimDir(diagnostics_folder_path) + sim_var_pfull_dict = Dict{String, Any}() + + short_names = ["ta", "hur", "hus"] + for short_name in short_names + sim_var_pfull_dict[short_name] = + () -> begin + sim_var = get(simdir, short_name = short_name) + pfull_var = get(simdir, short_name = "pfull") + + (ClimaAnalysis.units(sim_var) == "") && (sim_var = ClimaAnalysis.set_units(sim_var, "unitless")) + (ClimaAnalysis.units(sim_var) == "kg kg^-1") && + (sim_var = ClimaAnalysis.set_units(sim_var, "unitless")) + + sim_in_pfull_var = ClimaAnalysis.Atmos.to_pressure_coordinates(sim_var, pfull_var) + sim_in_pfull_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_in_pfull_var) + sim_in_pfull_var = ClimaAnalysis.convert_dim_units( + sim_in_pfull_var, + "pfull", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return sim_in_pfull_var + end + end + return sim_var_pfull_dict +end + +""" + get_obs_var_in_pfull_dict() + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +observational data in pressure coordinates. This is used by +`compute_pfull_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`obs_var_dict` whose key is the short name of the variable and the value is an anonymous +function that returns a `OutputVar`. The function must take in a start date +which is used to align the times in the observational data to match the +simulation data. The short name must be the same as in `sim_var_pfull_dict` in +the function `get_sim_var_in_pfull_dict`. Any preprocessing is done in the +function which includes unit conversion and shifting the dates. + +The variable should have only four dimensions: latitude, longitude, time, and +pressure. The units of pressure should be in hPa. +""" +function get_obs_var_in_pfull_dict() + artifact_path = joinpath( + @clima_artifact("era5_monthly_averages_pressure_levels_1979_2024"), + "era5_monthly_averages_pressure_levels_197901-202410.nc", + ) + short_names_pairs = [("ta", "t"), ("hus", "q")] + obs_var_dict = Dict{String, Any}() + for (short_name, era5_short_name) in short_names_pairs + obs_var_dict[short_name] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + artifact_path, + era5_short_name, + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + (ClimaAnalysis.units(obs_var) == "kg kg**-1") && + (obs_var = ClimaAnalysis.set_units(obs_var, "unitless")) + obs_var = ClimaAnalysis.Var.convert_dim_units( + obs_var, + "pressure_level", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return obs_var + end + end + + obs_var_dict["hur"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + artifact_path, + "r", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + obs_var = ClimaAnalysis.Var.convert_dim_units( + obs_var, + "pressure_level", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + # Convert from percentages (e.g. 120%) to decimal (1.20) + obs_var = ClimaAnalysis.Var.convert_units(obs_var, "unitless", conversion_function = x -> 0.01 * x) + return obs_var + end + return obs_var_dict +end + +""" + get_compare_vars_biases_plot_extrema_pfull() + +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_pfull_leaderboard`. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_plot_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_plot_extrema_pfull() + compare_vars_biases_plot_extrema = Dict("ta" => (-5.0, 5.0), "hur" => (-0.4, 0.4), "hus" => (-0.0015, 0.0015)) + return compare_vars_biases_plot_extrema +end + +""" + get_compare_vars_biases_heatmap_extrema_pfull() + +Return a dictionary mapping short names to ranges for the heatmaps. This is +used by the function `compute_pfull_leaderboard`. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_heatmap_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_heatmap_extrema_pfull() + compare_vars_biases_heatmap_extrema = Dict("ta" => (-10.0, 10.0), "hur" => (-0.4, 0.4), "hus" => (-0.001, 0.001)) + return compare_vars_biases_heatmap_extrema +end diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl index 57b18ad1cd..d3700f5589 100644 --- a/experiments/ClimaEarth/leaderboard/leaderboard.jl +++ b/experiments/ClimaEarth/leaderboard/leaderboard.jl @@ -1,5 +1,4 @@ import ClimaAnalysis -import ClimaUtilities.ClimaArtifacts: @clima_artifact import GeoMakie import CairoMakie import Dates @@ -137,6 +136,7 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) end # Add RMSE for the CliMA model and for each season + rmse_var_names = keys(rmse_var_dict) for short_name in rmse_var_names for season in seasons !ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && ( @@ -167,6 +167,169 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard) end +""" + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases and a leaderboard of various variables defined over longitude, latitude, +time, and pressure levels. + +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +Loading and preprocessing simulation data is done by `get_sim_var_in_pfull_dict`. Loading +and preprocessing observational data is done by `get_obs_var_in_pfull_dict`. The ranges of +the bias plots is defined by `get_compare_vars_biases_plot_extrema_pfull`. See the functions +defined in data_sources.jl for more information. +""" +function compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + @info "Error against observations for variables in pressure coordinates" + + sim_var_pfull_dict = get_sim_var_in_pfull_dict(diagnostics_folder_path) + obs_var_pfull_dict = get_obs_var_in_pfull_dict() + + # Print dates for debugging + _, get_var = first(sim_var_pfull_dict) + var = get_var() + output_dates = Dates.DateTime(var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + + for short_name in keys(sim_var_pfull_dict) + sim_var = sim_var_pfull_dict[short_name]() + obs_var = obs_var_pfull_dict[short_name](sim_var.attributes["start_date"]) + + # Check units for pressure are in hPa + ClimaAnalysis.dim_units(sim_var, ClimaAnalysis.pressure_name(sim_var)) != "hPa" && + error("Units of pressure should be hPa for $short_name simulation data") + ClimaAnalysis.dim_units(obs_var, ClimaAnalysis.pressure_name(obs_var)) != "hPa" && + error("Units of pressure should be hPa for $short_name simulation data") + + # Remove first spin_up_months from simulation + spin_up_months = 6 + spinup_cutoff = spin_up_months * 31 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && + (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff)) + + # Restrain the pressure levels so we can resample + min_pfull = ClimaAnalysis.pressures(obs_var)[1] + sim_pressures = ClimaAnalysis.pressures(sim_var) + greater_than_min_pfull_idx = findfirst(x -> x > min_pfull, sim_pressures) + sim_var = ClimaAnalysis.window(sim_var, "pfull", left = sim_pressures[greater_than_min_pfull_idx]) + + # Resample over lat, lon, time, and pressures + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + + # Take time average + sim_var = ClimaAnalysis.average_time(sim_var) + obs_var = ClimaAnalysis.average_time(obs_var) + + # Save observation and simulation data + sim_obs_comparsion_dict[short_name] = (; sim = sim_var, obs = obs_var) + end + + # Slicing only uses the nearest value, so we also need to keep track of the + # actual pressure levels that we get when slicing + target_p_lvls = [850.0, 500.0, 250.0] + real_p_lvls = [] + + # Get units for pressure for plotting + p_units = ClimaAnalysis.dim_units(first(sim_obs_comparsion_dict)[2].sim, "pfull") + + # Initialize ranges for colorbar and figure whose columns are pressure levels and rows are variables + compare_vars_biases_plot_extrema_pfull = get_compare_vars_biases_plot_extrema_pfull() + fig_bias_pfull_vars = CairoMakie.Figure(size = (650 * length(target_p_lvls), 450 * length(sim_obs_comparsion_dict))) + + # Plot bias at the pressure levels in p_lvls + for (row_idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + # Plot label for variable name + CairoMakie.Label(fig_bias_pfull_vars[row_idx, 0], short_name, tellheight = false, fontsize = 30) + for (col_idx, p_lvl) in enumerate(target_p_lvls) + layout = fig_bias_pfull_vars[row_idx, col_idx] = CairoMakie.GridLayout() + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Slice at pressure level using nearest value rather interpolating + sim_var = ClimaAnalysis.slice(sim_var, pfull = p_lvl) + obs_var = ClimaAnalysis.slice(obs_var, pressure_level = p_lvl) + + # Get the actual pressure level that we slice at + push!(real_p_lvls, parse(Float64, sim_var.attributes["slice_pfull"])) + + # Add so that it show up on in the title + sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))" + + # Plot bias + ClimaAnalysis.Visualize.plot_bias_on_globe!( + layout, + sim_var, + obs_var, + cmap_extrema = compare_vars_biases_plot_extrema_pfull[short_name], + ) + end + end + + # Plot label for the pressure levels + for (col_idx, p_lvl) in enumerate(real_p_lvls[begin:length(target_p_lvls)]) + CairoMakie.Label(fig_bias_pfull_vars[0, col_idx], "$p_lvl $p_units", tellwidth = false, fontsize = 30) + end + + # Define figure with at most four columns and the necessary number of rows for + # all the variables + num_vars = length(compare_vars_biases_plot_extrema_pfull) + num_cols = min(4, num_vars) + num_rows = ceil(Int, num_vars / 4) + fig_lat_pres = CairoMakie.Figure(size = (650 * num_cols, 450 * num_rows)) + + # Initialize ranges for colorbar + compare_vars_biases_heatmap_extrema_pfull = get_compare_vars_biases_heatmap_extrema_pfull() + + # Take zonal mean and plot lat - pressure heatmap + for (idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + # Compute where the figure should be placed + # Goes from 1 -> (1,1), 2 -> (1,2), ..., 4 -> (1,4), 5 -> (2,1) + # for idx -> (col_idx, row_idx) + row_idx = div(idx - 1, 4) + 1 + col_idx = 1 + rem(idx - 1, 4) + layout = fig_lat_pres[row_idx, col_idx] = CairoMakie.GridLayout() + + # Load data + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Take zonal mean (average along lon arithmetically) + sim_var = ClimaAnalysis.average_lon(sim_var) + obs_var = ClimaAnalysis.average_lon(obs_var) + + # Compute bias and set short name, long name, and units + bias_var = sim_var - obs_var + bias_var = ClimaAnalysis.set_units(bias_var, ClimaAnalysis.units(sim_var)) + bias_var.attributes["short_name"] = "sim-obs_$(ClimaAnalysis.short_name(sim_var))" + bias_var.attributes["long_name"] = "SIM-OBS_$(ClimaAnalysis.long_name(sim_var))" + ClimaAnalysis.Visualize.heatmap2D!( + layout, + bias_var, + more_kwargs = Dict( + :axis => Dict([:yreversed => true]), + :plot => Dict( + :colormap => :vik, + :colorrange => compare_vars_biases_heatmap_extrema_pfull[short_name], + :colormap => CairoMakie.cgrad(:vik, 11, categorical = true), + ), + ), + ) + end + + # Save figures + CairoMakie.save(joinpath(leaderboard_base_path, "bias_vars_in_pfull.png"), fig_bias_pfull_vars) + CairoMakie.save(joinpath(leaderboard_base_path, "bias_lat_pfull_heatmaps.png"), fig_lat_pres) +end + if abspath(PROGRAM_FILE) == @__FILE__ if length(ARGS) < 2 error("Usage: julia leaderboard.jl ") @@ -174,4 +337,5 @@ if abspath(PROGRAM_FILE) == @__FILE__ leaderboard_base_path = ARGS[begin] diagnostics_folder_path = ARGS[begin + 1] compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl index 0265ea0a0d..6979b7b69d 100644 --- a/experiments/ClimaEarth/run_amip.jl +++ b/experiments/ClimaEarth/run_amip.jl @@ -959,6 +959,7 @@ if ClimaComms.iamroot(comms_ctx) diagnostics_folder_path = atmos_sim.integrator.p.output_dir leaderboard_base_path = dir_paths.artifacts compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end end ## plot extra atmosphere diagnostics if specified