From a21dfae93e1149f8b379d9d2bfac60feac01052d Mon Sep 17 00:00:00 2001 From: Matt Dawson Date: Tue, 31 Dec 2024 15:40:16 -0800 Subject: [PATCH] Filter zeros in solver results (#700) * add min and max functions for matrix classes * add CUDA min and max functions for matrix classes * add filter for negative solver results * add cuda matrix tests * fix test logic --- include/micm/cuda/util/cuda_dense_matrix.hpp | 16 +++++ include/micm/cuda/util/cuda_matrix.cuh | 12 ++++ include/micm/solver/solver.hpp | 4 +- include/micm/solver/solver_result.hpp | 14 +++++ include/micm/util/matrix.hpp | 16 +++++ include/micm/util/vector_matrix.hpp | 21 +++++++ src/util/cuda_matrix.cu | 48 +++++++++++++++ test/tutorial/test_multiple_grid_cells.cpp | 2 +- ...rate_constants_no_user_defined_by_hand.cpp | 2 +- ..._constants_no_user_defined_with_config.cpp | 2 +- ...st_rate_constants_user_defined_by_hand.cpp | 2 +- ...ate_constants_user_defined_with_config.cpp | 2 +- test/tutorial/test_solver_configuration.cpp | 2 +- .../test_vectorized_matrix_solver.cpp | 2 +- .../unit/cuda/util/test_cuda_dense_matrix.cpp | 58 +++++++++++++++++++ test/unit/util/test_matrix.cpp | 10 ++++ test/unit/util/test_matrix_policy.hpp | 52 +++++++++++++++++ test/unit/util/test_vector_matrix.cpp | 16 +++++ 18 files changed, 273 insertions(+), 8 deletions(-) diff --git a/include/micm/cuda/util/cuda_dense_matrix.hpp b/include/micm/cuda/util/cuda_dense_matrix.hpp index 77dffb3fc..d0994c3c9 100644 --- a/include/micm/cuda/util/cuda_dense_matrix.hpp +++ b/include/micm/cuda/util/cuda_dense_matrix.hpp @@ -185,6 +185,22 @@ namespace micm "CUBLAS Daxpy operation failed..."); } + /// @brief For each element of the VectorMatrix, perform y = max(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Max(const T x) + { + static_assert(std::is_same_v); + CHECK_CUDA_ERROR(micm::cuda::MatrixMax(this->param_, x), "CudaMatrixMax"); + } + + /// @brief For each element of the VectorMatrix, perform y = min(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Min(const T x) + { + static_assert(std::is_same_v); + CHECK_CUDA_ERROR(micm::cuda::MatrixMin(this->param_, x), "CudaMatrixMin"); + } + // Copy the device data from the other Cuda dense matrix into this one void Copy(const CudaDenseMatrix& other) { diff --git a/include/micm/cuda/util/cuda_matrix.cuh b/include/micm/cuda/util/cuda_matrix.cuh index 61bfff6e5..713f5844c 100644 --- a/include/micm/cuda/util/cuda_matrix.cuh +++ b/include/micm/cuda/util/cuda_matrix.cuh @@ -24,6 +24,18 @@ namespace micm /// @returns Error code from free-ing data on device, if any cudaError_t FreeVector(CudaMatrixParam& vectorMatrix); + /// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is greater + /// @param vectorMatrix Struct containing allocated device memory + /// @param val Value to compare with each element in the matrix + template + cudaError_t MatrixMax(CudaMatrixParam& vectorMatrix, T val); + + /// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is lesser + /// @param vectorMatrix Struct containing allocated device memory + /// @param val Value to compare with each element in the matrix + template + cudaError_t MatrixMin(CudaMatrixParam& vectorMatrix, T val); + /// @brief Copies data from the host to the device /// @param vectorMatrix Struct containing allocated device memory /// @param h_data Host data to copy from diff --git a/include/micm/solver/solver.hpp b/include/micm/solver/solver.hpp index f8fc816d0..187d6e46d 100644 --- a/include/micm/solver/solver.hpp +++ b/include/micm/solver/solver.hpp @@ -60,7 +60,9 @@ namespace micm SolverResult Solve(double time_step, StatePolicy& state) { - return solver_.Solve(time_step, state); + auto result = solver_.Solve(time_step, state); + state.variables_.Max(0.0); + return result; } /// @brief Returns the number of grid cells diff --git a/include/micm/solver/solver_result.hpp b/include/micm/solver/solver_result.hpp index fb9797df5..13ea33dc9 100644 --- a/include/micm/solver/solver_result.hpp +++ b/include/micm/solver/solver_result.hpp @@ -48,6 +48,9 @@ namespace micm /// @brief Set all member variables to zero void Reset(); + + /// @brief Print the solver stats to the console + void Print() const; }; inline void SolverStats::Reset() @@ -61,6 +64,17 @@ namespace micm solves_ = 0; } + inline void SolverStats::Print() const + { + std::cout << "Function calls: " << function_calls_ << std::endl; + std::cout << "Jacobian updates: " << jacobian_updates_ << std::endl; + std::cout << "Number of steps: " << number_of_steps_ << std::endl; + std::cout << "Accepted steps: " << accepted_ << std::endl; + std::cout << "Rejected steps: " << rejected_ << std::endl; + std::cout << "Decompositions: " << decompositions_ << std::endl; + std::cout << "Solves: " << solves_ << std::endl; + } + inline std::string SolverStateToString(const SolverState& state) { switch (state) diff --git a/include/micm/util/matrix.hpp b/include/micm/util/matrix.hpp index b5484dfd2..bd63456ac 100644 --- a/include/micm/util/matrix.hpp +++ b/include/micm/util/matrix.hpp @@ -230,6 +230,22 @@ namespace micm y += alpha * (*(x_iter++)); } + /// @brief For each element of the matrix, perform y = max(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Max(const T &x) + { + for (auto &y : data_) + y = std::max(y, x); + } + + /// @brief For each element of the matrix, perform y = min(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Min(const T &x) + { + for (auto &y : data_) + y = std::min(y, x); + } + void ForEach(const std::function f, const Matrix &a) { auto a_iter = a.AsVector().begin(); diff --git a/include/micm/util/vector_matrix.hpp b/include/micm/util/vector_matrix.hpp index 0ef022f30..3ee89315b 100644 --- a/include/micm/util/vector_matrix.hpp +++ b/include/micm/util/vector_matrix.hpp @@ -288,6 +288,27 @@ namespace micm } } + /// @brief For each element of the VectorMatrix, perform y = max(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Max(const T &x) + { + MICM_PROFILE_FUNCTION(); + + for (auto &y : data_) + y = std::max(y, x); + + } + + /// @brief For each element of the VectorMatrix, perform y = min(y, x), where x is a scalar constant + /// @param x The scalar constant to compare against + void Min(const T &x) + { + MICM_PROFILE_FUNCTION(); + + for (auto &y : data_) + y = std::min(y, x); + } + void ForEach(const std::function f, const VectorMatrix &a) { auto this_iter = data_.begin(); diff --git a/src/util/cuda_matrix.cu b/src/util/cuda_matrix.cu index 08a3946bf..20987ab4a 100644 --- a/src/util/cuda_matrix.cu +++ b/src/util/cuda_matrix.cu @@ -35,6 +35,52 @@ namespace micm return err; } + template + __global__ void MatrixMaxKernel(T* d_data, std::size_t number_of_elements, T val) + { + std::size_t tid = blockIdx.x * BLOCK_SIZE + threadIdx.x; + if (tid < number_of_elements) + { + d_data[tid] = d_data[tid] > val ? d_data[tid] : val; + } + } + + template + cudaError_t MatrixMax(CudaMatrixParam& param, T val) + { + std::size_t number_of_blocks = (param.number_of_elements_ + BLOCK_SIZE - 1) / BLOCK_SIZE; + MatrixMaxKernel<<< + number_of_blocks, + BLOCK_SIZE, + 0, + micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(param.d_data_, param.number_of_elements_, val); + cudaError_t err = cudaGetLastError(); + return err; + } + + template + __global__ void MatrixMinKernel(T* d_data, std::size_t number_of_elements, T val) + { + std::size_t tid = blockIdx.x * BLOCK_SIZE + threadIdx.x; + if (tid < number_of_elements) + { + d_data[tid] = d_data[tid] < val ? d_data[tid] : val; + } + } + + template + cudaError_t MatrixMin(CudaMatrixParam& param, T val) + { + std::size_t number_of_blocks = (param.number_of_elements_ + BLOCK_SIZE - 1) / BLOCK_SIZE; + MatrixMinKernel<<< + number_of_blocks, + BLOCK_SIZE, + 0, + micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(param.d_data_, param.number_of_elements_, val); + cudaError_t err = cudaGetLastError(); + return err; + } + template cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector& h_data) { @@ -98,6 +144,8 @@ namespace micm // source code needs the instantiation of the template template cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements); template cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements); + template cudaError_t MatrixMax(CudaMatrixParam& param, double val); + template cudaError_t MatrixMin(CudaMatrixParam& param, double val); template cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector& h_data); template cudaError_t CopyToHost(CudaMatrixParam& param, std::vector& h_data); template cudaError_t CopyToDeviceFromDevice( diff --git a/test/tutorial/test_multiple_grid_cells.cpp b/test/tutorial/test_multiple_grid_cells.cpp index 173a62440..d011919ba 100644 --- a/test/tutorial/test_multiple_grid_cells.cpp +++ b/test/tutorial/test_multiple_grid_cells.cpp @@ -86,7 +86,7 @@ int main() { solver.CalculateRateConstants(state); auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); } diff --git a/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp b/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp index f105da91b..7172e13f1 100644 --- a/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp +++ b/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp @@ -138,7 +138,7 @@ int main(const int argc, const char* argv[]) while (elapsed_solve_time < time_step) { auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); diff --git a/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp b/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp index 873c7f28b..1b65cfd3a 100644 --- a/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp +++ b/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp @@ -80,7 +80,7 @@ int main(const int argc, const char* argv[]) while (elapsed_solve_time < time_step) { auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); diff --git a/test/tutorial/test_rate_constants_user_defined_by_hand.cpp b/test/tutorial/test_rate_constants_user_defined_by_hand.cpp index 7ded8f81a..8475edecc 100644 --- a/test/tutorial/test_rate_constants_user_defined_by_hand.cpp +++ b/test/tutorial/test_rate_constants_user_defined_by_hand.cpp @@ -164,7 +164,7 @@ int main(const int argc, const char* argv[]) while (elapsed_solve_time < time_step) { auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); diff --git a/test/tutorial/test_rate_constants_user_defined_with_config.cpp b/test/tutorial/test_rate_constants_user_defined_with_config.cpp index 7421d645c..5603a699a 100644 --- a/test/tutorial/test_rate_constants_user_defined_with_config.cpp +++ b/test/tutorial/test_rate_constants_user_defined_with_config.cpp @@ -89,7 +89,7 @@ int main(const int argc, const char* argv[]) while (elapsed_solve_time < time_step) { auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); diff --git a/test/tutorial/test_solver_configuration.cpp b/test/tutorial/test_solver_configuration.cpp index 522908d23..06ad83129 100644 --- a/test/tutorial/test_solver_configuration.cpp +++ b/test/tutorial/test_solver_configuration.cpp @@ -65,7 +65,7 @@ void test_solver_type(auto& solver) total_stats.decompositions_ += result.stats_.decompositions_; total_stats.solves_ += result.stats_.solves_; - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; } state.PrintState(time_step * (i + 1)); diff --git a/test/tutorial/test_vectorized_matrix_solver.cpp b/test/tutorial/test_vectorized_matrix_solver.cpp index 29538e7d5..62ce9a5d6 100644 --- a/test/tutorial/test_vectorized_matrix_solver.cpp +++ b/test/tutorial/test_vectorized_matrix_solver.cpp @@ -43,7 +43,7 @@ void solve(auto& solver, auto& state, std::size_t number_of_grid_cells) while (elapsed_solve_time < time_step && solver_state != SolverState::Converged) { auto result = solver.Solve(time_step - elapsed_solve_time, state); - elapsed_solve_time = result.final_time_; + elapsed_solve_time += result.final_time_; solver_state = result.state_; } } diff --git a/test/unit/cuda/util/test_cuda_dense_matrix.cpp b/test/unit/cuda/util/test_cuda_dense_matrix.cpp index 8bac62627..14222178b 100644 --- a/test/unit/cuda/util/test_cuda_dense_matrix.cpp +++ b/test/unit/cuda/util/test_cuda_dense_matrix.cpp @@ -571,4 +571,62 @@ TEST(CudaDenseMatrix, CopyFunction) EXPECT_EQ(matrix2[i][j], matrix[i][j]); } } +} + +TEST(CudaDenseMatrix, TestMax) +{ + micm::CudaDenseMatrix matrix{ 2, 3, 0.0 }; + matrix.CopyToDevice(); + matrix.Max(2.0); + matrix.CopyToHost(); + + for (auto& elem : matrix.AsVector()) + { + EXPECT_EQ(elem, 2.0); + } + + for (auto &elem : matrix.AsVector()) + { + elem = 1.0; + } + matrix[1][1] = 3.0; + matrix.CopyToDevice(); + matrix.Max(2.0); + matrix.CopyToHost(); + + EXPECT_EQ(matrix[0][0], 2.0); + EXPECT_EQ(matrix[0][1], 2.0); + EXPECT_EQ(matrix[0][2], 2.0); + EXPECT_EQ(matrix[1][0], 2.0); + EXPECT_EQ(matrix[1][1], 3.0); + EXPECT_EQ(matrix[1][2], 2.0); +} + +TEST(CudaDenseMatrix, TestMin) +{ + micm::CudaDenseMatrix matrix{ 2, 3, 0.0 }; + matrix.CopyToDevice(); + matrix.Min(2.0); + matrix.CopyToHost(); + + for (auto& elem : matrix.AsVector()) + { + EXPECT_EQ(elem, 0.0); + } + + for (auto &elem : matrix.AsVector()) + { + elem = 1.0; + } + matrix[1][1] = 3.0; + matrix.CopyToDevice(); + matrix.Min(2.0); + matrix.CopyToHost(); + + EXPECT_EQ(matrix[0][0], 1.0); + EXPECT_EQ(matrix[0][1], 1.0); + EXPECT_EQ(matrix[0][2], 1.0); + EXPECT_EQ(matrix[1][0], 1.0); + EXPECT_EQ(matrix[1][1], 2.0); + EXPECT_EQ(matrix[1][2], 1.0); } \ No newline at end of file diff --git a/test/unit/util/test_matrix.cpp b/test/unit/util/test_matrix.cpp index 096af54a9..090fff0d1 100644 --- a/test/unit/util/test_matrix.cpp +++ b/test/unit/util/test_matrix.cpp @@ -106,4 +106,14 @@ TEST(Matrix, ForEach) TEST(Matrix, SetScaler) { testSetScalar(); +} + +TEST(Matrix, Max) +{ + testMax(); +} + +TEST(Matrix, Min) +{ + testMin(); } \ No newline at end of file diff --git a/test/unit/util/test_matrix_policy.hpp b/test/unit/util/test_matrix_policy.hpp index 29bb84aea..812e61689 100644 --- a/test/unit/util/test_matrix_policy.hpp +++ b/test/unit/util/test_matrix_policy.hpp @@ -277,5 +277,57 @@ MatrixPolicy testSetScalar() EXPECT_EQ(elem, 2.0); } + return matrix; +} + +template class MatrixPolicy> +MatrixPolicy testMax() +{ + MatrixPolicy matrix{ 2, 3, 0.0 }; + + matrix.Max(2.0); + + for (auto &elem : matrix.AsVector()) + { + EXPECT_EQ(elem, 2.0); + } + + matrix = 1.0; + matrix[1][1] = 3.0; + matrix.Max(2.0); + + EXPECT_EQ(matrix[0][0], 2.0); + EXPECT_EQ(matrix[0][1], 2.0); + EXPECT_EQ(matrix[0][2], 2.0); + EXPECT_EQ(matrix[1][0], 2.0); + EXPECT_EQ(matrix[1][1], 3.0); + EXPECT_EQ(matrix[1][2], 2.0); + + return matrix; +} + +template class MatrixPolicy> +MatrixPolicy testMin() +{ + MatrixPolicy matrix{ 2, 3, 0.0 }; + + matrix.Min(2.0); + + for (auto &elem : matrix.AsVector()) + { + EXPECT_EQ(elem, 0.0); + } + + matrix = 1.0; + matrix[1][1] = 3.0; + matrix.Min(2.0); + + EXPECT_EQ(matrix[0][0], 1.0); + EXPECT_EQ(matrix[0][1], 1.0); + EXPECT_EQ(matrix[0][2], 1.0); + EXPECT_EQ(matrix[1][0], 1.0); + EXPECT_EQ(matrix[1][1], 2.0); + EXPECT_EQ(matrix[1][2], 1.0); + return matrix; } \ No newline at end of file diff --git a/test/unit/util/test_vector_matrix.cpp b/test/unit/util/test_vector_matrix.cpp index 799eff40c..061689f09 100644 --- a/test/unit/util/test_vector_matrix.cpp +++ b/test/unit/util/test_vector_matrix.cpp @@ -105,4 +105,20 @@ TEST(VectorMatrix, SetScaler) testSetScalar(); testSetScalar(); testSetScalar(); +} + +TEST(VectorMatrix, Max) +{ + testMax(); + testMax(); + testMax(); + testMax(); +} + +TEST(VectorMatrix, Min) +{ + testMin(); + testMin(); + testMin(); + testMin(); } \ No newline at end of file