diff --git a/.buildlibrary b/.buildlibrary index cdf45bd..a695f4e 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '1332170' +ValidationKey: '1523040' AutocreateReadme: no AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' diff --git a/.zenodo.json b/.zenodo.json index 1b51dde..b700034 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "title": "GDPuc: Easily Convert GDP Data", - "version": "0.7.0", + "version": "0.8.0", "description": "

A function to convert GDP time series from one unit to another.\n All common GDP units are included, i.e. current and constant local currency\n units, US$ via market exchange rates and international dollars via\n purchasing power parities. Conversion factors can easily be customized. \n Conversion at a regional/aggregated level is also possible.<\/p>", "creators": [ { diff --git a/DESCRIPTION b/DESCRIPTION index 3e49416..8b4c574 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GDPuc Title: Easily Convert GDP Data -Version: 0.7.0 +Version: 0.8.0 Authors@R: c(person(given = "Johannes", family = "Koch", @@ -43,6 +43,6 @@ Suggests: zoo Config/testthat/edition: 3 Encoding: UTF-8 -Date: 2022-02-08 +Date: 2022-02-15 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index 076f485..f00ca1a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# GDPuc 0.8.0 + +* Add options "no_conversion" for the `replace_NAs` argument. + +* The `replace_NAs` argument can now take a vector allowing for a combination of options. + # GDPuc 0.7.0 * Add argument `return_cfs` to return the conversion factors used. diff --git a/R/adapt_source.R b/R/adapt_source.R index 5d91b18..fe334fd 100644 --- a/R/adapt_source.R +++ b/R/adapt_source.R @@ -14,8 +14,8 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) { dplyr::anti_join(source, by = c("iso3c", "year"))) %>% tidyr::complete(.data$iso3c, .data$year) - if (replace_NAs %in% c("linear", "linear_regional_average")) { - # Make sure that source contains obersvations for every year between min and max years. + if (replace_NAs[1] == "linear") { + # Make sure that source contains observations for every year between min and max years. # This is important for the function lin_int_ext, which works with indices, to compute the # correct values source_adapted <- source_adapted %>% @@ -33,7 +33,7 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) { lin_int_ext)) } - if (replace_NAs %in% c("regional_average", "linear_regional_average")) { + if ("regional_average" %in% replace_NAs) { # Get GDP variable from source object, with its unit regex_var <- "GDP, PPP \\(constant .... international \\$\\)" weight_var <- grep(regex_var, colnames(source), value = TRUE)[1] @@ -68,7 +68,7 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) { dplyr::ungroup() } - if (replace_NAs == 1) { + if (1 %in% replace_NAs) { source_adapted <- source_adapted %>% # Mutate the 3 important columns dplyr::rowwise() %>% diff --git a/R/check_user_input.R b/R/check_user_input.R index 1aa1353..57be1d6 100644 --- a/R/check_user_input.R +++ b/R/check_user_input.R @@ -120,14 +120,24 @@ check_with_regions <- function(unit_in, unit_out, source, with_regions) { # Check input parameter 'replace_NAs' check_replace_NAs <- function(with_regions, replace_NAs) { if (!is.null(replace_NAs)) { - if (replace_NAs == 1) { - lifecycle::deprecate_warn("0.7.0", "convertGDP(replace_NAs = 'was deprecated')") + if (setequal(replace_NAs, 1)) { + lifecycle::deprecate_warn("0.7.0", "convertGDP(replace_NAs = 'should not be 1')") } - if (!replace_NAs %in% c(0, 1, "linear", "regional_average", "linear_regional_average")) { - abort("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, linear, regional_average or \\ - linear_regional_average.") + if ("linear_regional_average" %in% replace_NAs) { + lifecycle::deprecate_stop( + "0.8.0", + "convertGDP(replace_NAs = '\"linear_regional_average\" has been replaced by c(\"linear\", \"regional_average\")')" + ) } - if (replace_NAs == "regional_average" && is.null(with_regions)) { + if (!all(replace_NAs %in% c(0, 1, "no_conversion", "linear", "regional_average"))) { + abort("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, no_conversion, linear, regional_average or \\ + a combination of the above.") + } + if (length(replace_NAs) > 1 && replace_NAs[1] != "linear") { + abort("Invalid 'replace_NAs' argument. The only accepted combinations of arguments start with 'linear', e.g. \\ + c('linear', 'no_conversion').") + } + if ("regional_average" %in% replace_NAs && is.null(with_regions)) { abort("Using 'regional_average' requires a region mapping. The 'with_regions' argument can't be NULL.") } } diff --git a/R/convertGDP.R b/R/convertGDP.R index 21e402a..e7c677d 100644 --- a/R/convertGDP.R +++ b/R/convertGDP.R @@ -64,16 +64,15 @@ #' @param replace_NAs NULL by default, meaning no NA replacement. Can be set to one of the following: #' \itemize{ #' \item 0: resulting NAs are simply replaced with 0. +#' \item "no_conversion": resulting NAs are simply replaced with the values from the gdp argument. #' \item "linear": missing conversion factors in the source object are inter- and extrapolated linearly. #' For the extrapolation, the closest 5 data points are used. #' \item "regional_average": missing conversion factors in the source object are replaced with #' the regional average of the region to which the country belongs. This requires a region-mapping to #' be passed to the function, see the with_regions argument. -#' \item "linear_regional_average": missing conversion factors in the source object will be linearly -#' inter- and extrapolated, and when impossible (e.g. when no data at all is available for a country) set -#' to the regional GDP-weighted averages. This also requires a region-mapping to -#' be passed to the function, see the with_regions argument. #' } +#' Can also be a vector with "linear" as first element, e.g. c("linear", 0) or c("linear", "no_conversion"), +#' in which case, the operations are done in sequence. #' @param verbose TRUE or FALSE. A flag to turn verbosity on or off. Be default it is equal to the #' GDPuc.verbose option, which is FALSE if not set to TRUE by the user. #' @param return_cfs TRUE or FALSE. Set to TRUE to additionally return a tibble with the conversion factors @@ -133,9 +132,14 @@ convertGDP <- function(gdp, # Call function x <- do.call(f, a) - if (!is.null(replace_NAs) && replace_NAs == 0) x[is.na(x)] <- 0 + # Handle NAs + if (!is.null(replace_NAs) && 0 %in% replace_NAs) x[is.na(x)] <- 0 if (any(is.na(x$value) & !is.na(internal$gdp$value))) { - warn("NAs have been generated for countries lacking conversion factors!") + if (!is.null(replace_NAs) && "no_conversion" %in% replace_NAs) { + x$value[is.na(x$value)] <- internal$gdp$value[is.na(x$value)] + } else { + warn("NAs have been generated for countries lacking conversion factors!") + } } # Return with original type and names diff --git a/R/transform_user_input.R b/R/transform_user_input.R index b49dff2..4842da8 100644 --- a/R/transform_user_input.R +++ b/R/transform_user_input.R @@ -67,7 +67,7 @@ transform_user_input <- function(gdp, unit_in, unit_out, source, with_regions, r } # Use different source if required - if (!is.null(replace_NAs) && replace_NAs != 0) { + if (!is.null(replace_NAs) && !setequal(replace_NAs, 0) && !setequal(replace_NAs, "no_conversion")) { source <- adapt_source(gdp, source, with_regions, replace_NAs) source_name <- paste0(source_name, "_adapted") } diff --git a/man/convertGDP.Rd b/man/convertGDP.Rd index 22fbabf..f382391 100644 --- a/man/convertGDP.Rd +++ b/man/convertGDP.Rd @@ -63,16 +63,15 @@ level, and re-aggregated before being returned.} \item{replace_NAs}{NULL by default, meaning no NA replacement. Can be set to one of the following: \itemize{ \item 0: resulting NAs are simply replaced with 0. +\item "no_conversion": resulting NAs are simply replaced with the values from the gdp argument. \item "linear": missing conversion factors in the source object are inter- and extrapolated linearly. For the extrapolation, the closest 5 data points are used. \item "regional_average": missing conversion factors in the source object are replaced with the regional average of the region to which the country belongs. This requires a region-mapping to be passed to the function, see the with_regions argument. -\item "linear_regional_average": missing conversion factors in the source object will be linearly -inter- and extrapolated, and when impossible (e.g. when no data at all is available for a country) set -to the regional GDP-weighted averages. This also requires a region-mapping to -be passed to the function, see the with_regions argument. -}} +} +Can also be a vector with "linear" as first element, e.g. c("linear", 0) or c("linear", "no_conversion"), +in which case, the operations are done in sequence.} \item{verbose}{TRUE or FALSE. A flag to turn verbosity on or off. Be default it is equal to the GDPuc.verbose option, which is FALSE if not set to TRUE by the user.} diff --git a/tests/testthat/test-check_user_input.R b/tests/testthat/test-check_user_input.R index 2c2bf4c..6ad015e 100644 --- a/tests/testthat/test-check_user_input.R +++ b/tests/testthat/test-check_user_input.R @@ -33,8 +33,8 @@ test_that("source argument", { "Invalid 'source' argument. 'source' is neither a data frame nor a string.") s <- "blabla" expect_error(check_user_input(gdp, unit_in, unit_out, source = s), - glue::glue("Invalid 'source' argument. If 'source' is a string, it must be one of the internal sources. \\ - Use print_source_info\\(\\) for information on available sources. \\ + glue::glue("Invalid 'source' argument. If 'source' is a string, it must be one of the internal \\ + sources. Use print_source_info\\(\\) for information on available sources. \\ If you are trying to pass a custom source, pass the data frame directly, not its name.")) # Following doesn't work in covr for some reason @@ -43,8 +43,7 @@ test_that("source argument", { "Invalid 'source' argument. Required columns are:(.*)") }) -test_that("Abort with bad input", { - +test_that("with_regions argument", { gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100) unit_in = "current Int$PPP" unit_out = "current US$MER" @@ -64,9 +63,34 @@ test_that("Abort with bad input", { ) s <- my_bad_source expect_error(check_user_input(gdp, unit_in, unit_out, source = s, with_regions = with_regions)) +}) + +test_that("replace_NAs argument", { + gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100) + unit_in = "current Int$PPP" + unit_out = "current US$MER" + s <- wb_wdi + + expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = 2, with_regions = NULL), + glue::glue("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, no_conversion, linear, \\ + regional_average or a combination of the above.")) + expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = c(0, 1), with_regions = NULL), + glue::glue("Invalid 'replace_NAs' argument. The only accepted combinations of arguments start with \\ + 'linear', e.g. c\\('linear', 'no_conversion'\\).")) + + expect_error( + check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = "regional_average", with_regions = NULL), + glue::glue("Using 'regional_average' requires a region mapping. The 'with_regions' argument can't be NULL.") + ) +}) + +test_that("boolean arguments", { + + gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100) + unit_in = "current Int$PPP" + unit_out = "current US$MER" s <- wb_wdi - expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = 2, with_regions = NULL)) expect_error(check_user_input(gdp, unit_in, unit_out, source = s, with_regions = NULL, replace_NAs = NULL, verbose = "blabla")) @@ -81,3 +105,4 @@ test_that("Abort with bad input", { verbose = TRUE, return_cfs = "blabla")) }) + diff --git a/tests/testthat/test-replace_NAs.R b/tests/testthat/test-replace_NAs.R index c84303f..36ce302 100644 --- a/tests/testthat/test-replace_NAs.R +++ b/tests/testthat/test-replace_NAs.R @@ -52,9 +52,26 @@ test_that("convertGDP replace missing conversion factors", { expect_equal(gdp2$iso3c, gdp_conv5$iso3c) }) +test_that("convertGDP replace_NAs = 'no_conversion'", { + # wb_wi does not have info for ABW in 2019 + gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA"), + "year" = c(2010, 2015, 2025), + "SSP" = c("SSP1", "SSP2"), "value" = 100) + + expect_warning(convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 US$MER")) + + gdp_conv <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 US$MER", + replace_NAs = "no_conversion") + + expect_identical(gdp[1:6,], gdp_conv[1:6,]) +}) test_that("convertGDP replace_NAs = linear", { - # wb_wi does not have info for AIA, so AIA is used for testing here + # wb_wi does not have info for ABW in 2019 gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA"), "year" = c(2010, 2015, 2025), "SSP" = c("SSP1", "SSP2"), "value" = 100) @@ -65,14 +82,40 @@ test_that("convertGDP replace_NAs = linear", { gdp_conv <- convertGDP(gdp, unit_in = "constant 2005 Int$PPP", - unit_out = "constant 2005 US$MER", + unit_out = "constant 2019 US$MER", replace_NAs = "linear") expect_true(!any(is.na(gdp_conv$value))) }) - test_that("lin_int_ext", { x <- c(NA,NA,NA,NA,NA,NA,2,3,4,5,NA,7,8,NA,NA,NA,NA,NA,NA) expect_equal(lin_int_ext(x), -4:14) }) + + +test_that("convertGDP replace_NAs = c('linear', 'no_conversion')", { + # wb_wi does not have info for ABW in 2019 + gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA", "JJJ"), + "year" = c(2010, 2015, 2025), + "SSP" = c("SSP1", "SSP2"), "value" = 100) + + expect_warning(convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 US$MER")) + + gdp_conv <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 US$MER", + replace_NAs = c("linear", "no_conversion")) + + expect_true(!any(is.na(gdp_conv$value))) + expect_identical(gdp[19:24,], gdp_conv[19:24,]) + + gdp_conv <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 US$MER", + replace_NAs = c("linear", 0)) + expect_identical(dplyr::pull(gdp_conv[19:24, "value"]), rep(0, 6)) +}) + diff --git a/vignettes/handle_NAs.Rmd b/vignettes/handle_NAs.Rmd index 83de584..99ba3dd 100644 --- a/vignettes/handle_NAs.Rmd +++ b/vignettes/handle_NAs.Rmd @@ -65,6 +65,31 @@ x$result x$cfs ``` + +### `replace_NAs` = "no_conversion" + +If set to "no_conversion", NAs are replaced with the values in the gdp argument. + +```{r} +my_gdp <- tibble::tibble( + iso3c = "ABW", + year = 2010:2014, + value = 100:104 +) + +x <- convertGDP( + gdp = my_gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2019 Int$PPP", + replace_NAs = "no_conversion", + return_cfs = TRUE +) +x$result + +x$cfs +``` + + ### `replace_NAs` = "linear" If set to "linear", missing conversion factors are inter- and extrapolated linearly. For the extrapolation, the closest 5 data points are used. @@ -120,9 +145,11 @@ x$cfs # Obviously, being a part of the same region, does not mean the currencies are of the same strength. ``` -### `replace_NAs` = "linear_regional_average" +### `replace_NAs` = c("linear", "...") -If set to "linear_regional_average", conversion factors will be linearly inter- and extrapolated, and when impossible (e.g. when no data at all is available for a country) set to the regional GDP-weighted averages. +If a vector is passed, with "linear" as first element, then the operations are done in sequence. +For example for c("linear", 0), missing conversion factors are first inter- and extrapolated linearly but +if any missing conversion factors still lead to NAs, these are replaced with 0. ```{r} # Create an imaginary country XXX, and add it to the Latin America region @@ -141,7 +168,7 @@ x <- convertGDP( gdp = my_gdp, unit_in = "constant 2005 Int$PPP", unit_out = "constant 2019 Int$PPP", - replace_NAs = "linear_regional_average", + replace_NAs = c("linear", 0), with_regions = my_mapping_data_frame, return_cfs = TRUE ) diff --git a/vignettes/verbose.Rmd b/vignettes/verbose.Rmd index 1b62653..3dbd42a 100644 --- a/vignettes/verbose.Rmd +++ b/vignettes/verbose.Rmd @@ -1,8 +1,8 @@ --- -title: "Geting information on the conversion process" +title: "Getting information on the conversion process" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Geting information on the conversion process} + %\VignetteIndexEntry{Getting information on the conversion process} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---