Model chunking support (#404)

Add support for model chunking [ committed by @billschereriii ] [ reviewed by @ashao ]
CrayLabs · Sep 28, 2023 · 53def75 · 53def75
1 parent faf58b3
commit 53def75
Show file tree

Hide file tree

Showing 19 changed files with 363 additions and 25 deletions.
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -8,19 +8,22 @@ To be released at some future point in time
 
 Description
 
+- Added support for model chunking
 - Updated the third-party RedisAI component
 - Updated the third-party lcov component
 - Add link to contributing guidelines
 
 Detailed Notes
 
+- Models will now be automatically chunked when sent to/received from the backed database. This allows use of models greater than 511MB in size. (PR404_)
 - Updated from RedisAI v1.2.3 (test target)/v1.2.4 and v1.2.5 (CI/CD pipeline) to v1.2.7 (PR402_)
 - Updated lcov from version 1.15 to 2.0 (PR396_)
 - Create CONTRIBUTIONS.md file that points to the contribution guideline for both SmartSim and SmartRedis (PR395_)
 
+.. _PR404: https://github.com/CrayLabs/SmartRedis/pull/404
 .. _PR402: https://github.com/CrayLabs/SmartRedis/pull/402
 .. _PR396: https://github.com/CrayLabs/SmartRedis/pull/396
-.. _PR394: https://github.com/CrayLabs/SmartRedis/pull/395
+.. _PR395: https://github.com/CrayLabs/SmartRedis/pull/395
 
 0.4.2
 -----

diff --git a/include/client.h b/include/client.h
@@ -1269,6 +1269,21 @@ class Client : public SRObject
                                                     const int start_index,
                                                     const int end_index);
 
+        /*!
+        *   \brief Reconfigure the chunking size that Redis uses for model
+        *          serialization, replication, and the model_get command.
+        *   \details This method triggers the AI.CONFIG method in the Redis
+        *            database to change the model chunking size.
+        *
+        *            NOTE: The default size of 511MB should be fine for most
+        *            applications, so it is expected to be very rare that a
+        *            client calls this method. It is not necessary to call
+        *            this method a model to be chunked.
+        *   \param chunk_size The new chunk size in bytes
+        *   \throw SmartRedis::Exception if the command fails.
+        */
+        void set_model_chunk_size(int chunk_size);
+
         /*!
         *   \brief Create a string representation of the client
         *   \returns A string containing client details

diff --git a/include/command.h b/include/command.h
@@ -147,6 +147,21 @@ class Command
             return *this;
         }
 
+        /*!
+        *   \brief Add a vector of string_views to the command.
+        *   \details The string values are copied to the command.
+        *            To add a vector of keys, use the add_keys()
+        *            method.
+        *   \param fields The strings to add to the command
+        *   \returns The command object, for chaining.
+        */
+        virtual Command& operator<<(const std::vector<std::string_view>& fields) {
+            for (size_t i = 0; i < fields.size(); i++) {
+                add_field_ptr(fields[i]);
+            }
+            return *this;
+        }
+
         /*!
         *   \brief Add a vector of strings to the command.
         *   \details The string values are copied to the command.

diff --git a/include/commandreply.h b/include/commandreply.h
@@ -267,6 +267,14 @@ class CommandReply {
         */
         std::string redis_reply_type();
 
+        /*!
+        *   \brief Determine whether the response is an array
+        *   \returns true iff the response is of type REDIS_REPLY_ARRAY
+        */
+        bool is_array() {
+            return _reply->type == REDIS_REPLY_ARRAY;
+        }
+
         /*!
         *   \brief Print the reply structure of the CommandReply
         */

diff --git a/include/pyclient.h b/include/pyclient.h
@@ -925,6 +925,21 @@ class PyClient : public PySRObject
                                         const int start_index,
                                         const int end_index);
 
+        /*!
+        *   \brief Reconfigure the chunking size that Redis uses for model
+        *          serialization, replication, and the model_get command.
+        *   \details This method triggers the AI.CONFIG method in the Redis
+        *            database to change the model chunking size.
+        *
+        *            NOTE: The default size of 511MB should be fine for most
+        *            applications, so it is expected to be very rare that a
+        *            client calls this method. It is not necessary to call
+        *            this method a model to be chunked.
+        *   \param chunk_size The new chunk size in bytes
+        *   \throw SmartRedis::Exception if the command fails.
+        */
+        void set_model_chunk_size(int chunk_size);
+
         /*!
         *   \brief Create a string representation of the Client
         *   \returns A string representation of the Client

diff --git a/include/redis.h b/include/redis.h
@@ -276,7 +276,7 @@ class Redis : public RedisServer
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution
         *   \param key The key to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param device The name of the device for execution
@@ -292,7 +292,7 @@ class Redis : public RedisServer
         *   \throw RuntimeException for all client errors
         */
         virtual CommandReply set_model(const std::string& key,
-                                       std::string_view model,
+                                       const std::vector<std::string_view>& model,
                                        const std::string& backend,
                                        const std::string& device,
                                        int batch_size = 0,
@@ -307,7 +307,7 @@ class Redis : public RedisServer
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution in a multi-GPU system
         *   \param name The name to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param first_gpu The first GPU to use with this model
@@ -322,7 +322,7 @@ class Redis : public RedisServer
         *   \throw RuntimeException for all client errors
         */
         virtual void set_model_multigpu(const std::string& name,
-                                        const std::string_view& model,
+                                        const std::vector<std::string_view>& model,
                                         const std::string& backend,
                                         int first_gpu,
                                         int num_gpus,
@@ -505,6 +505,27 @@ class Redis : public RedisServer
                                  const std::string& key,
                                  const bool reset_stat);
 
+        /*!
+        *   \brief Retrieve the current model chunk size
+        *   \returns The size in bytes for model chunking
+        */
+        virtual int get_model_chunk_size();
+
+        /*!
+        *   \brief Reconfigure the chunking size that Redis uses for model
+        *          serialization, replication, and the model_get command.
+        *   \details This method triggers the AI.CONFIG method in the Redis
+        *            database to change the model chunking size.
+        *
+        *            NOTE: The default size of 511MB should be fine for most
+        *            applications, so it is expected to be very rare that a
+        *            client calls this method. It is not necessary to call
+        *            this method a model to be chunked.
+        *   \param chunk_size The new chunk size in bytes
+        *   \throw SmartRedis::Exception if the command fails.
+        */
+        virtual void set_model_chunk_size(int chunk_size);
+
         /*!
         *   \brief Run a CommandList via a Pipeline
         *   \param cmdlist The list of commands to run

diff --git a/include/rediscluster.h b/include/rediscluster.h
@@ -294,7 +294,7 @@ class RedisCluster : public RedisServer
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution
         *   \param key The key to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param device The name of the device for execution
@@ -312,7 +312,7 @@ class RedisCluster : public RedisServer
         *   \throw RuntimeException for all client errors
         */
         virtual CommandReply set_model(const std::string& key,
-                                       std::string_view model,
+                                       const std::vector<std::string_view>& model,
                                        const std::string& backend,
                                        const std::string& device,
                                        int batch_size = 0,
@@ -327,7 +327,7 @@ class RedisCluster : public RedisServer
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution in a multi-GPU system
         *   \param name The name to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param first_gpu The first GPU to use with this model
@@ -344,7 +344,7 @@ class RedisCluster : public RedisServer
         *   \throw RuntimeException for all client errors
         */
         virtual void set_model_multigpu(const std::string& name,
-                                        const std::string_view& model,
+                                        const std::vector<std::string_view>& model,
                                         const std::string& backend,
                                         int first_gpu,
                                         int num_gpus,
@@ -527,6 +527,11 @@ class RedisCluster : public RedisServer
         get_model_script_ai_info(const std::string& address,
                                  const std::string& key,
                                  const bool reset_stat);
+        /*!
+        *   \brief Retrieve the current model chunk size
+        *   \returns The size in bytes for model chunking
+        */
+        virtual int get_model_chunk_size();
 
         /*!
         *   \brief Run a CommandList via a Pipeline.
@@ -741,6 +746,21 @@ class RedisCluster : public RedisServer
                                      std::vector<std::string>& inputs,
                                      std::vector<std::string>& outputs);
 
+        /*!
+        *   \brief Reconfigure the chunking size that Redis uses for model
+        *          serialization, replication, and the model_get command.
+        *   \details This method triggers the AI.CONFIG method in the Redis
+        *            database to change the model chunking size.
+        *
+        *            NOTE: The default size of 511MB should be fine for most
+        *            applications, so it is expected to be very rare that a
+        *            client calls this method. It is not necessary to call
+        *            this method a model to be chunked.
+        *   \param chunk_size The new chunk size in bytes
+        *   \throw SmartRedis::Exception if the command fails.
+        */
+        virtual void set_model_chunk_size(int chunk_size);
+
         /*!
         *   \brief Execute a pipeline for the provided commands.
         *          The provided commands MUST be executable on a single

diff --git a/include/redisserver.h b/include/redisserver.h
@@ -277,7 +277,7 @@ class RedisServer {
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution
         *   \param key The key to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param device The name of the device for execution
@@ -295,7 +295,7 @@ class RedisServer {
         *   \throw RuntimeException for all client errors
         */
         virtual CommandReply set_model(const std::string& key,
-                                       std::string_view model,
+                                       const std::vector<std::string_view>& model,
                                        const std::string& backend,
                                        const std::string& device,
                                        int batch_size = 0,
@@ -311,7 +311,7 @@ class RedisServer {
         *   \brief Set a model from std::string_view buffer in the
         *          database for future execution in a multi-GPU system
         *   \param name The name to associate with the model
-        *   \param model The model as a continuous buffer string_view
+        *   \param model The model as a sequence of buffer string_view chunks
         *   \param backend The name of the backend
         *                  (TF, TFLITE, TORCH, ONNX)
         *   \param first_gpu The first GPU to use with this model
@@ -328,7 +328,7 @@ class RedisServer {
         *   \throw RuntimeException for all client errors
         */
         virtual void set_model_multigpu(const std::string& name,
-                                        const std::string_view& model,
+                                        const std::vector<std::string_view>& model,
                                         const std::string& backend,
                                         int first_gpu,
                                         int num_gpus,
@@ -520,6 +520,35 @@ class RedisServer {
                                  const std::string& key,
                                  const bool reset_stat) = 0;
 
+        /*!
+        *   \brief Retrieve the current model chunk size
+        *   \returns The size in bytes for model chunking
+        */
+        virtual int get_model_chunk_size() = 0;
+
+        /*!
+        *   \brief Reconfigure the chunking size that Redis uses for model
+        *          serialization, replication, and the model_get command.
+        *   \details This method triggers the AI.CONFIG method in the Redis
+        *            database to change the model chunking size.
+        *
+        *            NOTE: The default size of 511MB should be fine for most
+        *            applications, so it is expected to be very rare that a
+        *            client calls this method. It is not necessary to call
+        *            this method a model to be chunked.
+        *   \param chunk_size The new chunk size in bytes
+        *   \throw SmartRedis::Exception if the command fails.
+        */
+        virtual void set_model_chunk_size(int chunk_size) = 0;
+
+        /*!
+        *   \brief Store the current model chunk size
+        *   \param chunk_size The updated model chunk size
+        */
+        virtual void store_model_chunk_size(int chunk_size) {
+            _model_chunk_size = chunk_size;
+        }
+
         /*!
         *   \brief Run a CommandList via a Pipeline. For clustered databases
         *          all commands must go to the same shard
@@ -567,6 +596,12 @@ class RedisServer {
         */
         int _command_attempts;
 
+        /*!
+        *   \brief The chunk size into which models need to be broken for
+        *          transfer to Redis
+        */
+        int _model_chunk_size;
+
         /*!
         *   \brief Default value of connection timeout (seconds)
         */
@@ -630,6 +665,11 @@ class RedisServer {
         */
         bool _is_domain_socket;
 
+        /*!
+        *   \brief Default model chunk size
+        */
+        static constexpr int _UNKNOWN_MODEL_CHUNK_SIZE = -1;
+
         /*!
         *   \brief Environment variable for connection timeout
         */