From f8081fef25aa5b27bdf698aabfd352c3c7e5ded7 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Thu, 26 Sep 2024 17:25:24 -0700 Subject: [PATCH] Specify build defaults with "default" key This follows recommendation in issue https://github.com/nextstrain/ncov/issues/1131 to use the pattern frequencies: default: min_date: "6M" narrow_bandwidth: 0.038 This use of "default" extends just to min_date, max_date and narrow_bandwidth. This behavior is now documented in parameters.yaml as well as workflow-config-file.rst. The specification of frequencies parameters in builds.yaml now follows this pattern. --- defaults/parameters.yaml | 21 ++++++++++--- docs/src/reference/workflow-config-file.rst | 22 +++++++++++-- .../nextstrain-gisaid-21L/builds.yaml | 31 ++----------------- .../nextstrain-gisaid/builds.yaml | 30 +----------------- .../nextstrain-open/builds.yaml | 30 +----------------- workflow/snakemake_rules/common.smk | 14 ++++----- 6 files changed, 47 insertions(+), 101 deletions(-) diff --git a/defaults/parameters.yaml b/defaults/parameters.yaml index 9359b43a6..fce6baac2 100644 --- a/defaults/parameters.yaml +++ b/defaults/parameters.yaml @@ -137,8 +137,21 @@ ancestral: # Frequencies settings frequencies: - # min_date is set by default to 1 year before present - # but can be explicitly set if desired + # default settings that can be over-ridden for specific builds + default: + + # min_date is set by default to 1 year before present + min_date: "1Y" + + # max_date is set by default to present date - recent_days_to_censor + + # KDE bandwidths in proportion of a year to use per strain. + # using 1M bandwidth by default + narrow_bandwidth: 0.0833 + + + # settings that can be over-ridden across all builds, but not for specific builds + recent_days_to_censor: 0 # Number of weeks between pivots pivot_interval: 1 @@ -146,9 +159,7 @@ frequencies: # Measure pivots in weeks pivot_interval_units: "weeks" - # KDE bandwidths in proportion of a year to use per strain. - # using 15 day bandwidth - narrow_bandwidth: 0.041 + # Weight of KDE that uses wide bandwidth proportion_wide: 0.0 # Diffusion frequency settings diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 23f9ed789..e9c08569d 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -983,13 +983,29 @@ columns frequencies ----------- -- Valid attributes: +- type: object +- description: Parameters for specifying tip frequency calculations via ``augur frequencies`` +- examples: + +.. code:: yaml + + frequencies: + default: + min_date: "6M" + narrow_bandwidth: 0.038 + global_1m: + min_date: "1M" + narrow_bandwidth: 0.019 + global_2020_to_2022: + min_date: "2020-01-01" + max_date: "2022-01-01" + narrow_bandwidth: 0.076 + +Each named traits configuration (``default`` or build-named) supports specification of ``min_date``, ``max_date`` and ``narrow_bandwidth``. Other parameters can only be specified across all builds. .. contents:: :local: -.. _min_date-1: - min_date ~~~~~~~~ diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index 5c87512da..7345d6010 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -896,6 +896,9 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: + default: + min_date: "2020-01-01" + narrow_bandwidth: 0.038 global_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -904,10 +907,6 @@ frequencies: narrow_bandwidth: 0.019 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - global_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -916,10 +915,6 @@ frequencies: narrow_bandwidth: 0.019 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - africa_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -928,10 +923,6 @@ frequencies: narrow_bandwidth: 0.019 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - asia_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -940,10 +931,6 @@ frequencies: narrow_bandwidth: 0.019 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - europe_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -952,10 +939,6 @@ frequencies: narrow_bandwidth: 0.019 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - north-america_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -964,10 +947,6 @@ frequencies: narrow_bandwidth: 0.019 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - oceania_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -976,7 +955,3 @@ frequencies: narrow_bandwidth: 0.019 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - south-america_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index cdb06d629..65e4cec89 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -887,7 +887,7 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: - reference: + default: min_date: "2020-01-01" narrow_bandwidth: 0.038 global_1m: @@ -898,10 +898,6 @@ frequencies: narrow_bandwidth: 0.019 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - global_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -910,10 +906,6 @@ frequencies: narrow_bandwidth: 0.019 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - africa_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -922,10 +914,6 @@ frequencies: narrow_bandwidth: 0.019 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - asia_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -934,10 +922,6 @@ frequencies: narrow_bandwidth: 0.019 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - europe_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -946,10 +930,6 @@ frequencies: narrow_bandwidth: 0.019 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - north-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -958,10 +938,6 @@ frequencies: narrow_bandwidth: 0.019 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - oceania_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -970,7 +946,3 @@ frequencies: narrow_bandwidth: 0.019 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - south-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index 5c8422c16..5e9d9755f 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -892,7 +892,7 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: - reference: + default: min_date: "2020-01-01" narrow_bandwidth: 0.038 global_1m: @@ -903,10 +903,6 @@ frequencies: narrow_bandwidth: 0.019 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - global_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -915,10 +911,6 @@ frequencies: narrow_bandwidth: 0.019 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - africa_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -927,10 +919,6 @@ frequencies: narrow_bandwidth: 0.019 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - asia_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -939,10 +927,6 @@ frequencies: narrow_bandwidth: 0.019 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - europe_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -951,10 +935,6 @@ frequencies: narrow_bandwidth: 0.019 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - north-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -963,10 +943,6 @@ frequencies: narrow_bandwidth: 0.019 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - oceania_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 @@ -975,7 +951,3 @@ frequencies: narrow_bandwidth: 0.019 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - south-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 diff --git a/workflow/snakemake_rules/common.smk b/workflow/snakemake_rules/common.smk index 022bdd8a7..290716ec1 100644 --- a/workflow/snakemake_rules/common.smk +++ b/workflow/snakemake_rules/common.smk @@ -183,8 +183,8 @@ def _get_sampling_bias_correction_for_wildcards(wildcards): def _get_min_date_for_frequencies(wildcards): if wildcards.build_name in config["frequencies"] and "min_date" in config["frequencies"][wildcards.build_name]: return config["frequencies"][wildcards.build_name]["min_date"] - elif "frequencies" in config and "min_date" in config["frequencies"]: - return config["frequencies"]["min_date"] + elif "frequencies" in config and "min_date" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["min_date"] else: # If not explicitly specified, default to 1 year back from the present min_date_cutoff = datetime.date.today() - datetime.timedelta(weeks=52) @@ -195,8 +195,8 @@ def _get_min_date_for_frequencies(wildcards): def _get_max_date_for_frequencies(wildcards): if wildcards.build_name in config["frequencies"] and "max_date" in config["frequencies"][wildcards.build_name]: return config["frequencies"][wildcards.build_name]["max_date"] - elif "frequencies" in config and "max_date" in config["frequencies"]: - return config["frequencies"]["max_date"] + elif "frequencies" in config and "max_date" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["max_date"] else: # Allow users to censor the N most recent days to minimize effects of # uneven recent sampling. @@ -211,9 +211,9 @@ def _get_narrow_bandwidth_for_wildcards(wildcards): # check if builds.yaml contains frequencies:{build_name}:narrow_bandwidth if wildcards.build_name in config["frequencies"] and 'narrow_bandwidth' in config["frequencies"][wildcards.build_name]: return config["frequencies"][wildcards.build_name]["narrow_bandwidth"] - # check if parameters.yaml contains frequencies:narrow_bandwidth - elif "frequencies" in config and "narrow_bandwidth" in config["frequencies"]: - return config["frequencies"]["narrow_bandwidth"] + # check if parameters.yaml contains frequencies:default:narrow_bandwidth + elif "frequencies" in config and "narrow_bandwidth" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["narrow_bandwidth"] # else return augur frequencies default value else: return 0.0833