Skip to content

Commit

Permalink
Include replace_NAs option no_conversion and combinations
Browse files Browse the repository at this point in the history
  • Loading branch information
johanneskoch94 committed Feb 15, 2022
1 parent 317dfae commit 5e82d65
Show file tree
Hide file tree
Showing 13 changed files with 153 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .buildlibrary
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ValidationKey: '1332170'
ValidationKey: '1523040'
AutocreateReadme: no
AcceptedWarnings:
- 'Warning: package ''.*'' was built under R version'
Expand Down
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "GDPuc: Easily Convert GDP Data",
"version": "0.7.0",
"version": "0.8.0",
"description": "<p>A function to convert GDP time series from one unit to another.\n All common GDP units are included, i.e. current and constant local currency\n units, US$ via market exchange rates and international dollars via\n purchasing power parities. Conversion factors can easily be customized. \n Conversion at a regional/aggregated level is also possible.<\/p>",
"creators": [
{
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GDPuc
Title: Easily Convert GDP Data
Version: 0.7.0
Version: 0.8.0
Authors@R:
c(person(given = "Johannes",
family = "Koch",
Expand Down Expand Up @@ -43,6 +43,6 @@ Suggests:
zoo
Config/testthat/edition: 3
Encoding: UTF-8
Date: 2022-02-08
Date: 2022-02-15
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# GDPuc 0.8.0

* Add options "no_conversion" for the `replace_NAs` argument.

* The `replace_NAs` argument can now take a vector allowing for a combination of options.

# GDPuc 0.7.0

* Add argument `return_cfs` to return the conversion factors used.
Expand Down
8 changes: 4 additions & 4 deletions R/adapt_source.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) {
dplyr::anti_join(source, by = c("iso3c", "year"))) %>%
tidyr::complete(.data$iso3c, .data$year)

if (replace_NAs %in% c("linear", "linear_regional_average")) {
# Make sure that source contains obersvations for every year between min and max years.
if (replace_NAs[1] == "linear") {
# Make sure that source contains observations for every year between min and max years.
# This is important for the function lin_int_ext, which works with indices, to compute the
# correct values
source_adapted <- source_adapted %>%
Expand All @@ -33,7 +33,7 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) {
lin_int_ext))
}

if (replace_NAs %in% c("regional_average", "linear_regional_average")) {
if ("regional_average" %in% replace_NAs) {
# Get GDP variable from source object, with its unit
regex_var <- "GDP, PPP \\(constant .... international \\$\\)"
weight_var <- grep(regex_var, colnames(source), value = TRUE)[1]
Expand Down Expand Up @@ -68,7 +68,7 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs) {
dplyr::ungroup()
}

if (replace_NAs == 1) {
if (1 %in% replace_NAs) {
source_adapted <- source_adapted %>%
# Mutate the 3 important columns
dplyr::rowwise() %>%
Expand Down
22 changes: 16 additions & 6 deletions R/check_user_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,24 @@ check_with_regions <- function(unit_in, unit_out, source, with_regions) {
# Check input parameter 'replace_NAs'
check_replace_NAs <- function(with_regions, replace_NAs) {
if (!is.null(replace_NAs)) {
if (replace_NAs == 1) {
lifecycle::deprecate_warn("0.7.0", "convertGDP(replace_NAs = 'was deprecated')")
if (setequal(replace_NAs, 1)) {
lifecycle::deprecate_warn("0.7.0", "convertGDP(replace_NAs = 'should not be 1')")
}
if (!replace_NAs %in% c(0, 1, "linear", "regional_average", "linear_regional_average")) {
abort("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, linear, regional_average or \\
linear_regional_average.")
if ("linear_regional_average" %in% replace_NAs) {
lifecycle::deprecate_stop(
"0.8.0",
"convertGDP(replace_NAs = '\"linear_regional_average\" has been replaced by c(\"linear\", \"regional_average\")')"
)
}
if (replace_NAs == "regional_average" && is.null(with_regions)) {
if (!all(replace_NAs %in% c(0, 1, "no_conversion", "linear", "regional_average"))) {
abort("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, no_conversion, linear, regional_average or \\
a combination of the above.")
}
if (length(replace_NAs) > 1 && replace_NAs[1] != "linear") {
abort("Invalid 'replace_NAs' argument. The only accepted combinations of arguments start with 'linear', e.g. \\
c('linear', 'no_conversion').")
}
if ("regional_average" %in% replace_NAs && is.null(with_regions)) {
abort("Using 'regional_average' requires a region mapping. The 'with_regions' argument can't be NULL.")
}
}
Expand Down
16 changes: 10 additions & 6 deletions R/convertGDP.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,15 @@
#' @param replace_NAs NULL by default, meaning no NA replacement. Can be set to one of the following:
#' \itemize{
#' \item 0: resulting NAs are simply replaced with 0.
#' \item "no_conversion": resulting NAs are simply replaced with the values from the gdp argument.
#' \item "linear": missing conversion factors in the source object are inter- and extrapolated linearly.
#' For the extrapolation, the closest 5 data points are used.
#' \item "regional_average": missing conversion factors in the source object are replaced with
#' the regional average of the region to which the country belongs. This requires a region-mapping to
#' be passed to the function, see the with_regions argument.
#' \item "linear_regional_average": missing conversion factors in the source object will be linearly
#' inter- and extrapolated, and when impossible (e.g. when no data at all is available for a country) set
#' to the regional GDP-weighted averages. This also requires a region-mapping to
#' be passed to the function, see the with_regions argument.
#' }
#' Can also be a vector with "linear" as first element, e.g. c("linear", 0) or c("linear", "no_conversion"),
#' in which case, the operations are done in sequence.
#' @param verbose TRUE or FALSE. A flag to turn verbosity on or off. Be default it is equal to the
#' GDPuc.verbose option, which is FALSE if not set to TRUE by the user.
#' @param return_cfs TRUE or FALSE. Set to TRUE to additionally return a tibble with the conversion factors
Expand Down Expand Up @@ -133,9 +132,14 @@ convertGDP <- function(gdp,
# Call function
x <- do.call(f, a)

if (!is.null(replace_NAs) && replace_NAs == 0) x[is.na(x)] <- 0
# Handle NAs
if (!is.null(replace_NAs) && 0 %in% replace_NAs) x[is.na(x)] <- 0
if (any(is.na(x$value) & !is.na(internal$gdp$value))) {
warn("NAs have been generated for countries lacking conversion factors!")
if (!is.null(replace_NAs) && "no_conversion" %in% replace_NAs) {
x$value[is.na(x$value)] <- internal$gdp$value[is.na(x$value)]
} else {
warn("NAs have been generated for countries lacking conversion factors!")
}
}

# Return with original type and names
Expand Down
2 changes: 1 addition & 1 deletion R/transform_user_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ transform_user_input <- function(gdp, unit_in, unit_out, source, with_regions, r
}

# Use different source if required
if (!is.null(replace_NAs) && replace_NAs != 0) {
if (!is.null(replace_NAs) && !setequal(replace_NAs, 0) && !setequal(replace_NAs, "no_conversion")) {
source <- adapt_source(gdp, source, with_regions, replace_NAs)
source_name <- paste0(source_name, "_adapted")
}
Expand Down
9 changes: 4 additions & 5 deletions man/convertGDP.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 30 additions & 5 deletions tests/testthat/test-check_user_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ test_that("source argument", {
"Invalid 'source' argument. 'source' is neither a data frame nor a string.")
s <- "blabla"
expect_error(check_user_input(gdp, unit_in, unit_out, source = s),
glue::glue("Invalid 'source' argument. If 'source' is a string, it must be one of the internal sources. \\
Use print_source_info\\(\\) for information on available sources. \\
glue::glue("Invalid 'source' argument. If 'source' is a string, it must be one of the internal \\
sources. Use print_source_info\\(\\) for information on available sources. \\
If you are trying to pass a custom source, pass the data frame directly, not its name."))

# Following doesn't work in covr for some reason
Expand All @@ -43,8 +43,7 @@ test_that("source argument", {
"Invalid 'source' argument. Required columns are:(.*)")
})

test_that("Abort with bad input", {

test_that("with_regions argument", {
gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100)
unit_in = "current Int$PPP"
unit_out = "current US$MER"
Expand All @@ -64,9 +63,34 @@ test_that("Abort with bad input", {
)
s <- my_bad_source
expect_error(check_user_input(gdp, unit_in, unit_out, source = s, with_regions = with_regions))
})

test_that("replace_NAs argument", {

gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100)
unit_in = "current Int$PPP"
unit_out = "current US$MER"
s <- wb_wdi

expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = 2, with_regions = NULL),
glue::glue("Invalid 'replace_NAs' argument. Has to be either NULL, 0, 1, no_conversion, linear, \\
regional_average or a combination of the above."))
expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = c(0, 1), with_regions = NULL),
glue::glue("Invalid 'replace_NAs' argument. The only accepted combinations of arguments start with \\
'linear', e.g. c\\('linear', 'no_conversion'\\)."))

expect_error(
check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = "regional_average", with_regions = NULL),
glue::glue("Using 'regional_average' requires a region mapping. The 'with_regions' argument can't be NULL.")
)
})

test_that("boolean arguments", {

gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100)
unit_in = "current Int$PPP"
unit_out = "current US$MER"
s <- wb_wdi
expect_error(check_user_input(gdp, unit_in, unit_out, source = s, replace_NAs = 2, with_regions = NULL))

expect_error(check_user_input(gdp, unit_in, unit_out, source = s,
with_regions = NULL, replace_NAs = NULL, verbose = "blabla"))
Expand All @@ -81,3 +105,4 @@ test_that("Abort with bad input", {
verbose = TRUE,
return_cfs = "blabla"))
})

49 changes: 46 additions & 3 deletions tests/testthat/test-replace_NAs.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,26 @@ test_that("convertGDP replace missing conversion factors", {
expect_equal(gdp2$iso3c, gdp_conv5$iso3c)
})

test_that("convertGDP replace_NAs = 'no_conversion'", {
# wb_wi does not have info for ABW in 2019
gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA"),
"year" = c(2010, 2015, 2025),
"SSP" = c("SSP1", "SSP2"), "value" = 100)

expect_warning(convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 US$MER"))

gdp_conv <- convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 US$MER",
replace_NAs = "no_conversion")

expect_identical(gdp[1:6,], gdp_conv[1:6,])
})

test_that("convertGDP replace_NAs = linear", {
# wb_wi does not have info for AIA, so AIA is used for testing here
# wb_wi does not have info for ABW in 2019
gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA"),
"year" = c(2010, 2015, 2025),
"SSP" = c("SSP1", "SSP2"), "value" = 100)
Expand All @@ -65,14 +82,40 @@ test_that("convertGDP replace_NAs = linear", {

gdp_conv <- convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2005 US$MER",
unit_out = "constant 2019 US$MER",
replace_NAs = "linear")

expect_true(!any(is.na(gdp_conv$value)))
})


test_that("lin_int_ext", {
x <- c(NA,NA,NA,NA,NA,NA,2,3,4,5,NA,7,8,NA,NA,NA,NA,NA,NA)
expect_equal(lin_int_ext(x), -4:14)
})


test_that("convertGDP replace_NAs = c('linear', 'no_conversion')", {
# wb_wi does not have info for ABW in 2019
gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA", "JJJ"),
"year" = c(2010, 2015, 2025),
"SSP" = c("SSP1", "SSP2"), "value" = 100)

expect_warning(convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 US$MER"))

gdp_conv <- convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 US$MER",
replace_NAs = c("linear", "no_conversion"))

expect_true(!any(is.na(gdp_conv$value)))
expect_identical(gdp[19:24,], gdp_conv[19:24,])

gdp_conv <- convertGDP(gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 US$MER",
replace_NAs = c("linear", 0))
expect_identical(dplyr::pull(gdp_conv[19:24, "value"]), rep(0, 6))
})

33 changes: 30 additions & 3 deletions vignettes/handle_NAs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,31 @@ x$result
x$cfs
```


### `replace_NAs` = "no_conversion"

If set to "no_conversion", NAs are replaced with the values in the gdp argument.

```{r}
my_gdp <- tibble::tibble(
iso3c = "ABW",
year = 2010:2014,
value = 100:104
)
x <- convertGDP(
gdp = my_gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 Int$PPP",
replace_NAs = "no_conversion",
return_cfs = TRUE
)
x$result
x$cfs
```


### `replace_NAs` = "linear"

If set to "linear", missing conversion factors are inter- and extrapolated linearly. For the extrapolation, the closest 5 data points are used.
Expand Down Expand Up @@ -120,9 +145,11 @@ x$cfs
# Obviously, being a part of the same region, does not mean the currencies are of the same strength.
```

### `replace_NAs` = "linear_regional_average"
### `replace_NAs` = c("linear", "...")

If set to "linear_regional_average", conversion factors will be linearly inter- and extrapolated, and when impossible (e.g. when no data at all is available for a country) set to the regional GDP-weighted averages.
If a vector is passed, with "linear" as first element, then the operations are done in sequence.
For example for c("linear", 0), missing conversion factors are first inter- and extrapolated linearly but
if any missing conversion factors still lead to NAs, these are replaced with 0.

```{r}
# Create an imaginary country XXX, and add it to the Latin America region
Expand All @@ -141,7 +168,7 @@ x <- convertGDP(
gdp = my_gdp,
unit_in = "constant 2005 Int$PPP",
unit_out = "constant 2019 Int$PPP",
replace_NAs = "linear_regional_average",
replace_NAs = c("linear", 0),
with_regions = my_mapping_data_frame,
return_cfs = TRUE
)
Expand Down
4 changes: 2 additions & 2 deletions vignettes/verbose.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title: "Geting information on the conversion process"
title: "Getting information on the conversion process"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Geting information on the conversion process}
%\VignetteIndexEntry{Getting information on the conversion process}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
Expand Down

0 comments on commit 5e82d65

Please sign in to comment.