From 1bc249c314258ec1cb891e455b7b7ff076814a1d Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:42:04 -0400 Subject: [PATCH 1/6] use `prettyunits::pretty_bytes` to prettify logging --- R/put_object.R | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/R/put_object.R b/R/put_object.R index e506bdf..46c56a1 100644 --- a/R/put_object.R +++ b/R/put_object.R @@ -14,48 +14,48 @@ #' @param partsize numeric, size of each part when using multipart upload. AWS imposes a minimum size (currently 5MB) so setting a too low value may fail. Note that it can be set to \code{Inf} in conjunction with \code{multipart=FALSE} to silence the warning suggesting multipart uploads for large content. #' @template dots #' @details This provides a generic interface for storing objects to S3. Some convenience wrappers are provided for common tasks: e.g., \code{\link{s3save}} and \code{\link{s3saveRDS}}. -#' +#' #' Note that S3 is a flat file store. So there is no folder hierarchy as in a traditional hard drive. However, S3 allows users to create pseudo-folders by prepending object keys with \code{foldername/}. The \code{put_folder} function is provided as a high-level convenience function for creating folders. This is not actually necessary as objects with slashes in their key will be displayed in the S3 web console as if they were in folders, but it may be useful for creating an empty directory (which is possible in the web console). #' #' \strong{IMPORTANT}: In aws.s3 versions before 0.3.22 the first positional argument was \code{file} and \code{put_object} changed behavior depending on whether the file could be found or not. This is inherently very dangerous since \code{put_object} would only store the filename in cases there was any problem with the input. Therefore the first argument was changed to \code{what} which is always the content to store and now also supports connection. If not used, \code{file} is still a named argument and can be set instead - it will be always interpreted as a filename, failing with an error if it doesn't exist. #' #' When using connections in \code{what} it is preferrable that they are either unopened or open in binary mode. This condition is mandatory for multipart uploads. Text connections are inherently much slower and may not deliver identical results since they mangle line endings. \code{put_object} will automatically open unopened connections and always closes the connection before returning. -#' +#' #' @return If successful, \code{TRUE}. #' @examples #' \dontrun{ #' library("datasets") -#' +#' #' # write file to S3 #' tmp <- tempfile() #' on.exit(unlink(tmp)) #' utils::write.csv(mtcars, file = tmp) #' # put object with an upload progress bar #' put_object(file = tmp, object = "mtcars.csv", bucket = "myexamplebucket", show_progress = TRUE) -#' +#' #' # create a "folder" in a bucket (NOT required! Folders are really just 0-length files) #' put_folder("example", bucket = "myexamplebucket") #' ## write object to the "folder" #' put_object(file = tmp, object = "example/mtcars.csv", bucket = "myexamplebucket") -#' +#' #' # write serialized, in-memory object to S3 #' x <- rawConnection(raw(), "w") #' utils::write.csv(mtcars, x) #' put_object(rawConnectionValue(x), object = "mtcars.csv", bucket = "myexamplebucketname") -#' +#' #' # use `headers` for server-side encryption #' ## require appropriate bucket policy #' ## encryption can also be set at the bucket-level using \code{\link{put_encryption}} #' put_object(file = tmp, object = "mtcars.csv", bucket = "myexamplebucket", #' headers = c('x-amz-server-side-encryption' = 'AES256')) -#' +#' #' # alternative "S3 URI" syntax: #' put_object(rawConnectionValue(x), object = "s3://myexamplebucketname/mtcars.csv") #' close(x) -#' +#' #' # read the object back from S3 #' read.csv(text = rawToChar(get_object(object = "s3://myexamplebucketname/mtcars.csv"))) -#' +#' #' # multi-part uploads for objects over 5MB #' \donttest{ #' x <- rnorm(3e6) @@ -68,6 +68,7 @@ #' @references \href{http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html}{API Documentation} #' @seealso \code{\link{put_bucket}}, \code{\link{get_object}}, \code{\link{delete_object}}, \code{\link{put_encryption}} #' @importFrom utils head +#' @importFrom prettyunits pretty_bytes #' @export put_object <- function( @@ -104,7 +105,7 @@ function( ## we cache connection info what.info <- if (inherits(what, "connection")) summary(what) else NULL - + ## auto-detect file name if object is not provided if (missing(object) && inherits(what, "connection") && what.info$class == "file") { if (missing(bucket)) @@ -190,7 +191,7 @@ function( headers = headers, ...) id <- initialize[["UploadId"]] - + # function to call abort if any part fails (otherwise the user pays for incomplete payload!) abort.upload <- function(id) delete_object(object = object, bucket = bucket, query = list(uploadId = id), ...) @@ -214,7 +215,7 @@ function( if (length(data) == 0) ## end of payload break - r <- s3HTTP(verb = "PUT", + r <- s3HTTP(verb = "PUT", bucket = bucket, path = paste0('/', object), query = list(partNumber = i, uploadId = id), @@ -247,7 +248,7 @@ function( } if (!is.na(size) && size > partsize) - message("File size is ", size, ", consider setting using multipart=TRUE") + message("File size is ", pretty_bytes(size), ", consider setting using multipart=TRUE") ## httr doesn't support connections so we have to read it all into memory first if (inherits(what, "connection")) { @@ -271,10 +272,10 @@ function( } } - r <- s3HTTP(verb = "PUT", + r <- s3HTTP(verb = "PUT", bucket = bucket, path = paste0('/', object), - headers = headers, + headers = headers, request_body = what, verbose = verbose, show_progress = show_progress, @@ -303,10 +304,10 @@ post_object <- function(file, object, bucket, headers = list(), ...) { if (!"Content-Length" %in% names(headers)) { headers <- c(headers, list(`Content-Length` = formatSize(calculate_data_size(file)))) } - r <- s3HTTP(verb = "POST", + r <- s3HTTP(verb = "POST", bucket = bucket, path = paste0("/", object), - headers = headers, + headers = headers, request_body = file, ...) structure(r, class = "s3_object") @@ -334,7 +335,7 @@ complete_parts <- function(object, bucket, id, parts, ...) { bucket <- get_bucketname(object) } object <- get_objectkey(object) - + tmp <- tempfile() xml2::write_xml(xml2::as_xml_document(list(CompleteMultipartUpload = parts)), tmp, options = "no_declaration") post_object(file = tmp, object = object, bucket = bucket, query = list(uploadId = id), ...) From fe4b43da161f437d472c57efe2e833ac0775cae2 Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:42:08 -0400 Subject: [PATCH 2/6] version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9c11671..4df0975 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: aws.s3 Type: Package Title: 'AWS S3' Client Package -Version: 0.3.22 +Version: 0.3.23 Authors@R: c(person("Thomas J.", "Leeper", role = "aut", email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), From 3bb7c751c9e3d5c071221c28eb4591abab4c01fd Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:46:01 -0400 Subject: [PATCH 3/6] docs --- NAMESPACE | 1 + man/put_object.Rd | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index eaa3a39..3b7cb9e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,6 +93,7 @@ importFrom(curl,curl) importFrom(curl,handle_setheaders) importFrom(curl,new_handle) importFrom(digest,digest) +importFrom(prettyunits,pretty_bytes) importFrom(tools,file_ext) importFrom(tools,md5sum) importFrom(utils,URLencode) diff --git a/man/put_object.Rd b/man/put_object.Rd index fad456b..eb9419c 100644 --- a/man/put_object.Rd +++ b/man/put_object.Rd @@ -64,7 +64,7 @@ When using connections in \code{what} it is preferrable that they are either uno \examples{ \dontrun{ library("datasets") - + # write file to S3 tmp <- tempfile() on.exit(unlink(tmp)) From 2ce202fcd856e2e6f5e15c93e1f371bb1e54fd92 Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:46:49 -0400 Subject: [PATCH 4/6] news --- NEWS.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS.md b/NEWS.md index 40d2d8f..c5ea303 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# aws.s3 0.3.23 + +## Features + +* `put_object` now uses `prettyunits::pretty_bytes` to prettify log messages printed when suggesting +using `multipart = TRUE`. + # aws.s3 0.3.22 ## API changes From d46a5792c92b4e2771a7a1648f7690589d82b3b7 Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:47:34 -0400 Subject: [PATCH 5/6] add me to description --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 4df0975..648a9dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,6 +15,7 @@ Authors@R: c(person("Thomas J.", "Leeper", role = "aut", person("Andrii", "Degtiarov", role = "ctb"), person("Dhruv", "Aggarwal", role = "ctb"), person("Alyssa", "Columbus", role = "ctb"), + person("Matt", "Kaye", role = "ctb"), person("Simon", "Urbanek", role = c("cre", "ctb"), email = "simon.urbanek@R-project.org") ) From 51621ad2f6944ef2b09542e539904ff904f1e90b Mon Sep 17 00:00:00 2001 From: mrkaye97 Date: Mon, 9 May 2022 13:48:15 -0400 Subject: [PATCH 6/6] add prettyunits dep --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 648a9dc..3ef52d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,8 @@ Imports: xml2 (> 1.0.0), base64enc, digest, - aws.signature (>= 0.3.7) + aws.signature (>= 0.3.7), + prettyunits Suggests: testthat, datasets