Filter zeros in solver results (#700)

* add min and max functions for matrix classes * add CUDA min and max functions for matrix classes * add filter for negative solver results * add cuda matrix tests * fix test logic
NCAR · Dec 31, 2024 · a21dfae · a21dfae
1 parent 1dba473
commit a21dfae
Show file tree

Hide file tree

Showing 18 changed files with 273 additions and 8 deletions.
diff --git a/include/micm/cuda/util/cuda_dense_matrix.hpp b/include/micm/cuda/util/cuda_dense_matrix.hpp
@@ -185,6 +185,22 @@ namespace micm
           "CUBLAS Daxpy operation failed...");
     }
 
+    /// @brief For each element of the VectorMatrix, perform y = max(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Max(const T x)
+    {
+      static_assert(std::is_same_v<T, double>);
+      CHECK_CUDA_ERROR(micm::cuda::MatrixMax(this->param_, x), "CudaMatrixMax");
+    }
+
+    /// @brief For each element of the VectorMatrix, perform y = min(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Min(const T x)
+    {
+      static_assert(std::is_same_v<T, double>);
+      CHECK_CUDA_ERROR(micm::cuda::MatrixMin(this->param_, x), "CudaMatrixMin");
+    }
+
     // Copy the device data from the other Cuda dense matrix into this one
     void Copy(const CudaDenseMatrix& other)
     {

diff --git a/include/micm/cuda/util/cuda_matrix.cuh b/include/micm/cuda/util/cuda_matrix.cuh
@@ -24,6 +24,18 @@ namespace micm
     /// @returns Error code from free-ing data on device, if any
     cudaError_t FreeVector(CudaMatrixParam& vectorMatrix);
 
+    /// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is greater
+    /// @param vectorMatrix Struct containing allocated device memory
+    /// @param val Value to compare with each element in the matrix
+    template<typename T>
+    cudaError_t MatrixMax(CudaMatrixParam& vectorMatrix, T val);
+
+    /// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is lesser
+    /// @param vectorMatrix Struct containing allocated device memory
+    /// @param val Value to compare with each element in the matrix
+    template<typename T>
+    cudaError_t MatrixMin(CudaMatrixParam& vectorMatrix, T val);
+
     /// @brief Copies data from the host to the device
     /// @param vectorMatrix Struct containing allocated device memory
     /// @param h_data Host data to copy from

diff --git a/include/micm/solver/solver.hpp b/include/micm/solver/solver.hpp
@@ -60,7 +60,9 @@ namespace micm
 
     SolverResult Solve(double time_step, StatePolicy& state)
     {
-      return solver_.Solve(time_step, state);
+      auto result = solver_.Solve(time_step, state);
+      state.variables_.Max(0.0);
+      return result;
     }
 
     /// @brief Returns the number of grid cells

diff --git a/include/micm/solver/solver_result.hpp b/include/micm/solver/solver_result.hpp
@@ -48,6 +48,9 @@ namespace micm
 
     /// @brief Set all member variables to zero
     void Reset();
+
+    /// @brief Print the solver stats to the console
+    void Print() const;
   };
 
   inline void SolverStats::Reset()
@@ -61,6 +64,17 @@ namespace micm
     solves_ = 0;
   }
 
+  inline void SolverStats::Print() const
+  {
+    std::cout << "Function calls: " << function_calls_ << std::endl;
+    std::cout << "Jacobian updates: " << jacobian_updates_ << std::endl;
+    std::cout << "Number of steps: " << number_of_steps_ << std::endl;
+    std::cout << "Accepted steps: " << accepted_ << std::endl;
+    std::cout << "Rejected steps: " << rejected_ << std::endl;
+    std::cout << "Decompositions: " << decompositions_ << std::endl;
+    std::cout << "Solves: " << solves_ << std::endl;
+  }
+
   inline std::string SolverStateToString(const SolverState& state)
   {
     switch (state)

diff --git a/include/micm/util/matrix.hpp b/include/micm/util/matrix.hpp
@@ -230,6 +230,22 @@ namespace micm
         y += alpha * (*(x_iter++));
     }
 
+    /// @brief For each element of the matrix, perform y = max(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Max(const T &x)
+    {
+      for (auto &y : data_)
+        y = std::max(y, x);
+    }
+
+    /// @brief For each element of the matrix, perform y = min(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Min(const T &x)
+    {
+      for (auto &y : data_)
+        y = std::min(y, x);
+    }
+
     void ForEach(const std::function<void(T &, const T &)> f, const Matrix &a)
     {
       auto a_iter = a.AsVector().begin();

diff --git a/include/micm/util/vector_matrix.hpp b/include/micm/util/vector_matrix.hpp
@@ -288,6 +288,27 @@ namespace micm
       }
     }
 
+    /// @brief For each element of the VectorMatrix, perform y = max(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Max(const T &x)
+    {
+      MICM_PROFILE_FUNCTION();
+
+      for (auto &y : data_)
+        y = std::max(y, x);
+
+    }
+
+    /// @brief For each element of the VectorMatrix, perform y = min(y, x), where x is a scalar constant
+    /// @param x The scalar constant to compare against
+    void Min(const T &x)
+    {
+      MICM_PROFILE_FUNCTION();
+
+      for (auto &y : data_)
+        y = std::min(y, x);
+    }
+
     void ForEach(const std::function<void(T &, const T &)> f, const VectorMatrix &a)
     {
       auto this_iter = data_.begin();

diff --git a/src/util/cuda_matrix.cu b/src/util/cuda_matrix.cu
@@ -35,6 +35,52 @@ namespace micm
       return err;
     }
 
+    template<typename T>
+    __global__ void MatrixMaxKernel(T* d_data, std::size_t number_of_elements, T val)
+    {
+      std::size_t tid = blockIdx.x * BLOCK_SIZE + threadIdx.x;
+      if (tid < number_of_elements)
+      {
+        d_data[tid] = d_data[tid] > val ? d_data[tid] : val;
+      }
+    }
+
+    template<typename T>
+    cudaError_t MatrixMax(CudaMatrixParam& param, T val)
+    {
+      std::size_t number_of_blocks = (param.number_of_elements_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
+      MatrixMaxKernel<<<
+          number_of_blocks,
+          BLOCK_SIZE,
+          0,
+          micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(param.d_data_, param.number_of_elements_, val);
+      cudaError_t err = cudaGetLastError();
+      return err;
+    }
+
+    template<typename T>
+    __global__ void MatrixMinKernel(T* d_data, std::size_t number_of_elements, T val)
+    {
+      std::size_t tid = blockIdx.x * BLOCK_SIZE + threadIdx.x;
+      if (tid < number_of_elements)
+      {
+        d_data[tid] = d_data[tid] < val ? d_data[tid] : val;
+      }
+    }
+
+    template<typename T>
+    cudaError_t MatrixMin(CudaMatrixParam& param, T val)
+    {
+      std::size_t number_of_blocks = (param.number_of_elements_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
+      MatrixMinKernel<<<
+          number_of_blocks,
+          BLOCK_SIZE,
+          0,
+          micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(param.d_data_, param.number_of_elements_, val);
+      cudaError_t err = cudaGetLastError();
+      return err;
+    }
+
     template<typename T>
     cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector<T>& h_data)
     {
@@ -98,6 +144,8 @@ namespace micm
     // source code needs the instantiation of the template
     template cudaError_t MallocVector<double>(CudaMatrixParam& param, std::size_t number_of_elements);
     template cudaError_t MallocVector<int>(CudaMatrixParam& param, std::size_t number_of_elements);
+    template cudaError_t MatrixMax<double>(CudaMatrixParam& param, double val);
+    template cudaError_t MatrixMin<double>(CudaMatrixParam& param, double val);
     template cudaError_t CopyToDevice<double>(CudaMatrixParam& param, std::vector<double>& h_data);
     template cudaError_t CopyToHost<double>(CudaMatrixParam& param, std::vector<double>& h_data);
     template cudaError_t CopyToDeviceFromDevice<double>(

diff --git a/test/tutorial/test_multiple_grid_cells.cpp b/test/tutorial/test_multiple_grid_cells.cpp
@@ -86,7 +86,7 @@ int main()
     {
       solver.CalculateRateConstants(state);
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
     state.PrintState(time_step * (i + 1));
   }

diff --git a/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp b/test/tutorial/test_rate_constants_no_user_defined_by_hand.cpp
@@ -138,7 +138,7 @@ int main(const int argc, const char* argv[])
     while (elapsed_solve_time < time_step)
     {
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
 
     state.PrintState(time_step * (i + 1));

diff --git a/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp b/test/tutorial/test_rate_constants_no_user_defined_with_config.cpp
@@ -80,7 +80,7 @@ int main(const int argc, const char* argv[])
     while (elapsed_solve_time < time_step)
     {
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
 
     state.PrintState(time_step * (i + 1));

diff --git a/test/tutorial/test_rate_constants_user_defined_by_hand.cpp b/test/tutorial/test_rate_constants_user_defined_by_hand.cpp
@@ -164,7 +164,7 @@ int main(const int argc, const char* argv[])
     while (elapsed_solve_time < time_step)
     {
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
 
     state.PrintState(time_step * (i + 1));

diff --git a/test/tutorial/test_rate_constants_user_defined_with_config.cpp b/test/tutorial/test_rate_constants_user_defined_with_config.cpp
@@ -89,7 +89,7 @@ int main(const int argc, const char* argv[])
     while (elapsed_solve_time < time_step)
     {
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
 
     state.PrintState(time_step * (i + 1));

diff --git a/test/tutorial/test_solver_configuration.cpp b/test/tutorial/test_solver_configuration.cpp
@@ -65,7 +65,7 @@ void test_solver_type(auto& solver)
       total_stats.decompositions_ += result.stats_.decompositions_;
       total_stats.solves_ += result.stats_.solves_;
 
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
     }
 
     state.PrintState(time_step * (i + 1));

diff --git a/test/tutorial/test_vectorized_matrix_solver.cpp b/test/tutorial/test_vectorized_matrix_solver.cpp
@@ -43,7 +43,7 @@ void solve(auto& solver, auto& state, std::size_t number_of_grid_cells)
     while (elapsed_solve_time < time_step && solver_state != SolverState::Converged)
     {
       auto result = solver.Solve(time_step - elapsed_solve_time, state);
-      elapsed_solve_time = result.final_time_;
+      elapsed_solve_time += result.final_time_;
       solver_state = result.state_;
     }
   }

diff --git a/test/unit/cuda/util/test_cuda_dense_matrix.cpp b/test/unit/cuda/util/test_cuda_dense_matrix.cpp
@@ -571,4 +571,62 @@ TEST(CudaDenseMatrix, CopyFunction)
       EXPECT_EQ(matrix2[i][j], matrix[i][j]);
     }
   }
+}
+
+TEST(CudaDenseMatrix, TestMax)
+{
+  micm::CudaDenseMatrix<double,4> matrix{ 2, 3, 0.0 };
+  matrix.CopyToDevice();
+  matrix.Max(2.0);
+  matrix.CopyToHost();
+
+  for (auto& elem : matrix.AsVector())
+  {
+    EXPECT_EQ(elem, 2.0);
+  }
+
+  for (auto &elem : matrix.AsVector())
+  {
+    elem = 1.0;
+  }
+  matrix[1][1] = 3.0;
+  matrix.CopyToDevice();
+  matrix.Max(2.0);
+  matrix.CopyToHost();
+
+  EXPECT_EQ(matrix[0][0], 2.0);
+  EXPECT_EQ(matrix[0][1], 2.0);
+  EXPECT_EQ(matrix[0][2], 2.0);
+  EXPECT_EQ(matrix[1][0], 2.0);
+  EXPECT_EQ(matrix[1][1], 3.0);
+  EXPECT_EQ(matrix[1][2], 2.0);
+}
+
+TEST(CudaDenseMatrix, TestMin)
+{
+  micm::CudaDenseMatrix<double,4> matrix{ 2, 3, 0.0 };
+  matrix.CopyToDevice();
+  matrix.Min(2.0);
+  matrix.CopyToHost();
+
+  for (auto& elem : matrix.AsVector())
+  {
+    EXPECT_EQ(elem, 0.0);
+  }
+
+  for (auto &elem : matrix.AsVector())
+  {
+    elem = 1.0;
+  }
+  matrix[1][1] = 3.0;
+  matrix.CopyToDevice();
+  matrix.Min(2.0);
+  matrix.CopyToHost();
+
+  EXPECT_EQ(matrix[0][0], 1.0);
+  EXPECT_EQ(matrix[0][1], 1.0);
+  EXPECT_EQ(matrix[0][2], 1.0);
+  EXPECT_EQ(matrix[1][0], 1.0);
+  EXPECT_EQ(matrix[1][1], 2.0);
+  EXPECT_EQ(matrix[1][2], 1.0);
 }
diff --git a/test/unit/util/test_matrix.cpp b/test/unit/util/test_matrix.cpp
@@ -106,4 +106,14 @@ TEST(Matrix, ForEach)
 TEST(Matrix, SetScaler)
 {
   testSetScalar<micm::Matrix>();
+}
+
+TEST(Matrix, Max)
+{
+  testMax<micm::Matrix>();
+}
+
+TEST(Matrix, Min)
+{
+  testMin<micm::Matrix>();
 }
diff --git a/test/unit/util/test_matrix_policy.hpp b/test/unit/util/test_matrix_policy.hpp
@@ -277,5 +277,57 @@ MatrixPolicy<double> testSetScalar()
     EXPECT_EQ(elem, 2.0);
   }
 
+  return matrix;
+}
+
+template<template<class> class MatrixPolicy>
+MatrixPolicy<double> testMax()
+{
+  MatrixPolicy<double> matrix{ 2, 3, 0.0 };
+
+  matrix.Max(2.0);
+
+  for (auto &elem : matrix.AsVector())
+  {
+    EXPECT_EQ(elem, 2.0);
+  }
+
+  matrix = 1.0;
+  matrix[1][1] = 3.0;
+  matrix.Max(2.0);
+
+  EXPECT_EQ(matrix[0][0], 2.0);
+  EXPECT_EQ(matrix[0][1], 2.0);
+  EXPECT_EQ(matrix[0][2], 2.0);
+  EXPECT_EQ(matrix[1][0], 2.0);
+  EXPECT_EQ(matrix[1][1], 3.0);
+  EXPECT_EQ(matrix[1][2], 2.0);
+
+  return matrix;
+}
+
+template<template<class> class MatrixPolicy>
+MatrixPolicy<double> testMin()
+{
+  MatrixPolicy<double> matrix{ 2, 3, 0.0 };
+
+  matrix.Min(2.0);
+
+  for (auto &elem : matrix.AsVector())
+  {
+    EXPECT_EQ(elem, 0.0);
+  }
+
+  matrix = 1.0;
+  matrix[1][1] = 3.0;
+  matrix.Min(2.0);
+
+  EXPECT_EQ(matrix[0][0], 1.0);
+  EXPECT_EQ(matrix[0][1], 1.0);
+  EXPECT_EQ(matrix[0][2], 1.0);
+  EXPECT_EQ(matrix[1][0], 1.0);
+  EXPECT_EQ(matrix[1][1], 2.0);
+  EXPECT_EQ(matrix[1][2], 1.0);
+
   return matrix;
 }