Skip to content

Commit

Permalink
[femutils+poisson] Do the initialization of the CSR matrix on acceler…
Browse files Browse the repository at this point in the history
…ator.
  • Loading branch information
grospelliergilles committed Nov 29, 2024
1 parent 62a7e5b commit b0856e4
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 29 deletions.
37 changes: 23 additions & 14 deletions femutils/CsrFormatMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#include <arcane/utils/FatalErrorException.h>
#include <arcane/utils/NumArray.h>

#include <arcane/VariableTypes.h>
#include <arcane/IItemFamily.h>
#include <arcane/core/VariableTypes.h>
#include <arcane/core/IItemFamily.h>

#include <arcane/accelerator/core/RunQueue.h>

#include "CsrFormatMatrix.h"

Expand All @@ -25,18 +27,24 @@ namespace Arcane::FemUtils
/*---------------------------------------------------------------------------*/

void CsrFormat::
initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow)
initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow, RunQueue& queue)
{
info() << "Initialize CsrFormat: nb_non_zero=" << nnz << " nb_row=" << nbRow;

eMemoryRessource mem_ressource = queue.memoryRessource();
m_matrix_row = NumArray<Int32, MDDim1>(mem_ressource);
m_matrix_column = NumArray<Int32, MDDim1>(mem_ressource);
m_matrix_value = NumArray<Real, MDDim1>(mem_ressource);
m_matrix_rows_nb_column = NumArray<Int32, MDDim1>(mem_ressource);

m_matrix_row.resize(nbRow);
m_matrix_column.resize(nnz);
m_matrix_value.resize(nnz);
m_matrix_row.fill(-1);
m_matrix_column.fill(-1);
m_matrix_value.fill(0);
m_matrix_row.fill(-1, &queue);
m_matrix_column.fill(-1, &queue);
m_matrix_value.fill(0, &queue);
m_matrix_rows_nb_column.resize(nbRow);
m_matrix_rows_nb_column.fill(0);
m_matrix_rows_nb_column.fill(0, &queue);
m_dof_family = dof_family;
m_last_value = 0;
m_nnz = nnz;
Expand All @@ -47,7 +55,7 @@ initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow)
/*---------------------------------------------------------------------------*/

void CsrFormat::
translateToLinearSystem(DoFLinearSystem& linear_system)
translateToLinearSystem(DoFLinearSystem& linear_system, const RunQueue& queue)
{
info() << "TranslateToLinearSystem this=" << this;
bool do_set_csr = linear_system.hasSetCSRValues();
Expand All @@ -56,16 +64,17 @@ translateToLinearSystem(DoFLinearSystem& linear_system)
// NOTE: it should be possible to compute that in setCoordinates().
// and this value is constant if the structure of the matrix do not change
// so we can store these values instead of recomputing them.
if (do_set_csr){
if (do_set_csr) {
m_matrix_rows_nb_column.resize(m_matrix_row.extent0());
m_matrix_rows_nb_column.fill(0);
//m_matrix_rows_nb_column.fill(0);
}
Int32 nb_row = m_matrix_row.dim1Size();
for (Int32 i = 0; i < nb_row; i++) {
m_matrix_rows_nb_column[i] = 0;
if (((i + 1) < nb_row) && (m_matrix_row(i) == m_matrix_row(i + 1)))
continue;
for (Int32 j = m_matrix_row(i); ((i + 1) < nb_row && j < m_matrix_row(i + 1)) || ((i + 1) == nb_row && j < m_matrix_column.dim1Size()); j++) {
if (do_set_csr){
if (do_set_csr) {
++m_matrix_rows_nb_column[i];
continue;
}
Expand All @@ -76,9 +85,9 @@ translateToLinearSystem(DoFLinearSystem& linear_system)
}
}

if (do_set_csr){
CSRFormatView csr_view(m_matrix_row.to1DSpan(),m_matrix_rows_nb_column.to1DSpan(),
m_matrix_column.to1DSpan(),m_matrix_value.to1DSpan());
if (do_set_csr) {
CSRFormatView csr_view(m_matrix_row.to1DSpan(), m_matrix_rows_nb_column.to1DSpan(),
m_matrix_column.to1DSpan(), m_matrix_value.to1DSpan());
linear_system.setCSRValues(csr_view);
}
}
Expand Down
10 changes: 6 additions & 4 deletions femutils/CsrFormatMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@ class CsrFormat
{
public:

CsrFormat(ISubDomain* sd)
: TraceAccessor(sd->traceMng())
explicit CsrFormat(ITraceMng* tm)
: TraceAccessor(tm)
{
}

void initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow);
public:

void initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow, RunQueue& queue);

/**
* @brief
Expand Down Expand Up @@ -85,7 +87,7 @@ class CsrFormat
*
* @param linear_system
*/
void translateToLinearSystem(DoFLinearSystem& linear_system);
void translateToLinearSystem(DoFLinearSystem& linear_system, const RunQueue& queue);

/**
* @brief function to print the current content of the csr matrix
Expand Down
4 changes: 2 additions & 2 deletions poisson/BlCsrBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ _buildMatrixBuildLessCsr()
ARCANE_THROW(NotImplementedException, "");

Int32 nnz = nedge * 2 + nbnde;
m_csr_matrix.initialize(m_dof_family, nnz, nbnde);
m_csr_matrix.initialize(m_dof_family, nnz, nbnde, m_queue);

Check warning on line 36 in poisson/BlCsrBiliAssembly.cc

View check run for this annotation

Codecov / codecov/patch

poisson/BlCsrBiliAssembly.cc#L36

Added line #L36 was not covered by tests

Integer index = 1;
m_csr_matrix.m_matrix_row(0) = 0;
Expand Down Expand Up @@ -77,7 +77,7 @@ void FemModule::_buildMatrixGpuBuildLessCsr()

NumArray<Int32, MDDim1> tmp_row;
tmp_row.resize(nbnde);
m_csr_matrix.initialize(m_dof_family, nnz, nbnde);
m_csr_matrix.initialize(m_dof_family, nnz, nbnde, m_queue);

RunQueue* queue = acceleratorMng()->defaultQueue();
auto command = makeCommand(queue);
Expand Down
2 changes: 1 addition & 1 deletion poisson/CsrBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ _buildMatrixCsr()

Int32 nnz = nedge * 2 + nbnde;

m_csr_matrix.initialize(m_dof_family, nnz, nbNode());
m_csr_matrix.initialize(m_dof_family, nnz, nbNode(), m_queue);
auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
//We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison
if (mesh_dim == 2) {
Expand Down
7 changes: 3 additions & 4 deletions poisson/CsrGpuBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ _buildMatrixCsrGPU()

Int32 nb_node = nbNode();
Int32 nb_non_zero = nb_node + 2 * (mesh_dim == 2 ? nbFace() : m_nb_edge);
m_csr_matrix.initialize(m_dof_family, nb_non_zero, nb_node);
m_csr_matrix.initialize(m_dof_family, nb_non_zero, nb_node, m_queue);

NumArray<uint, MDDim1> offsets_numarray(nb_node + 1);
SmallSpan<uint> offsets_smallspan = offsets_numarray.to1DSmallSpan();
Expand Down Expand Up @@ -265,8 +265,7 @@ _assembleCsrGPUBilinearOperatorTETRA4()
}

{
RunQueue* queue = acceleratorMng()->defaultQueue();

This comment has been minimized.

Copy link
@mohd-afeef-badri

mohd-afeef-badri Dec 2, 2024

Collaborator

@grospelliergilles I am not sure to understand why do this here only, and why not for other functions in this file or in NW-CSR, and BL-CSR functions.

auto command = makeCommand(queue);
auto command = makeCommand(m_queue);

Int32 row_csr_size = m_csr_matrix.m_matrix_row.extent0();
Int32 col_csr_size = m_csr_matrix.m_matrix_column.extent0();
Expand All @@ -283,7 +282,7 @@ _assembleCsrGPUBilinearOperatorTETRA4()
ItemGenericInfoListView nodes_infos(mesh()->nodeFamily());

Timer::Action timer_add_compute(m_time_stats, "AddAndCompute");

ax::ProfileRegion ps_region(m_queue,"AddAndComputeBilinearTetra4",0x00FF7F);
command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells())
{

Expand Down
10 changes: 8 additions & 2 deletions poisson/FemModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ startInit()
{
info() << "Module Fem INIT";

m_queue = *(acceleratorMng()->defaultQueue());
// When everything will be available on the GPU we will be able to
// use device memory.
//if (m_queue.isAcceleratorPolicy())
//m_queue.setMemoryRessource(eMemoryRessource::Device);

{
IMesh* mesh = defaultMesh();
// If we do not create edges, we need to create custom connectivity
Expand Down Expand Up @@ -231,7 +237,7 @@ void FemModule::
_doStationarySolve()
{
Timer::Action timer_action(m_time_stats, "StationarySolve");

Accelerator::ProfileRegion ps1(m_queue, "StationarySolve", 0xFF00FF);
_getMaterialParameters();

auto dim = mesh()->dimension();
Expand Down Expand Up @@ -351,7 +357,7 @@ _doStationarySolve()
_assembleCsrGpuLinearOperator();
{
Timer::Action timer_action(m_time_stats, "TranslateToLinearSystem");
m_csr_matrix.translateToLinearSystem(m_linear_system);
m_csr_matrix.translateToLinearSystem(m_linear_system, m_queue);
}
_translateRhs();
}
Expand Down
6 changes: 5 additions & 1 deletion poisson/FemModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

//include for GPU use
#include "arcane/accelerator/core/IAcceleratorMng.h"
#include "arcane/accelerator/core/ProfileRegion.h"
#include "arcane/accelerator/Accelerator.h"
#include "arcane/accelerator/core/RunQueue.h"
#include "arcane/accelerator/Atomic.h"
Expand Down Expand Up @@ -119,7 +120,7 @@ class FemModule
: ArcaneFemObject(mbi)
, m_dofs_on_nodes(mbi.subDomain()->traceMng())
, m_coo_matrix(mbi.subDomain())
, m_csr_matrix(mbi.subDomain())
, m_csr_matrix(mbi.subDomain()->traceMng())
, m_time_stats(mbi.subDomain()->timeStats())
{
ICaseMng* cm = mbi.subDomain()->caseMng();
Expand Down Expand Up @@ -192,6 +193,9 @@ class FemModule
//! Number of edges (only for 3D meshes)
Int64 m_nb_edge = -1;

//! Default queue used for computation.
RunQueue m_queue;

private:

void _handleFlags();
Expand Down
2 changes: 1 addition & 1 deletion poisson/NodeWiseCsrBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ _buildMatrixNodeWiseCsr()

Int32 nb_node = nbNode();
Int32 nb_non_zero = nb_node + 2 * (mesh_dim == 2 ? nbFace() : m_nb_edge);
m_csr_matrix.initialize(m_dof_family, nb_non_zero, nb_node);
m_csr_matrix.initialize(m_dof_family, nb_non_zero, nb_node, m_queue);

NumArray<uint, MDDim1> offsets_numarray(nb_node + 1);
SmallSpan<uint> offsets_smallspan = offsets_numarray.to1DSmallSpan();
Expand Down

0 comments on commit b0856e4

Please sign in to comment.