Skip to content

Commit

Permalink
preparing for generating other precisions for 3D CPU SpTRSV
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyangzhuan committed Dec 6, 2023
1 parent 9df855f commit 0c06aab
Show file tree
Hide file tree
Showing 15 changed files with 1,848 additions and 1,935 deletions.
266 changes: 133 additions & 133 deletions SRC/cuda/pdgstrs_lsum_cuda.cu

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions SRC/double/pdgssvx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1390,7 +1390,7 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A,
} /* end printing stats */


/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
#ifdef HAVE_NVSHMEM
nsupers = Glu_persist->supno[n-1] + 1;
int nc = CEILING( nsupers, grid->npcol);
Expand All @@ -1413,7 +1413,7 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A,
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
6 changes: 3 additions & 3 deletions SRC/double/pdgssvx3d.c
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,7 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

/* Perform numerical factorization in parallel on all process layers.*/

/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
#ifdef HAVE_NVSHMEM
int nc = CEILING( nsupers, grid->npcol);
int nr = CEILING( nsupers, grid->nprow);
Expand Down Expand Up @@ -1277,13 +1277,13 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

if ( options->Fact != SamePattern_SameRowPerm) {
if (get_new3dsolve() && Solve3D==true){
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, trf3Dpartition->supernodeMask, grid, stat);
}else{
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
6 changes: 3 additions & 3 deletions SRC/double/pdgssvx3d_1pass_Yang.c
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

/* Perform numerical factorization in parallel on all process layers.*/

/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
#ifdef HAVE_NVSHMEM
int nc = CEILING( nsupers, grid->npcol);
int nr = CEILING( nsupers, grid->nprow);
Expand Down Expand Up @@ -1294,13 +1294,13 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

if ( options->Fact != SamePattern_SameRowPerm) {
if (get_new3dsolve() && Solve3D==true){
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, trf3Dpartition->supernodeMask, grid, stat);
}else{
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
6 changes: 3 additions & 3 deletions SRC/double/pdgssvx3d_2pass_Yang.c
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,7 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

/* Perform numerical factorization in parallel on all process layers.*/

/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
#ifdef HAVE_NVSHMEM
int nc = CEILING( nsupers, grid->npcol);
int nr = CEILING( nsupers, grid->nprow);
Expand Down Expand Up @@ -1328,13 +1328,13 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

if ( options->Fact != SamePattern_SameRowPerm) {
if (get_new3dsolve() && Solve3D==true){
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, trf3Dpartition->supernodeMask, grid, stat);
}else{
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
6 changes: 3 additions & 3 deletions SRC/double/pdgssvx3d_piyush.c
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

/* Perform numerical factorization in parallel on all process layers.*/

/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
#ifdef HAVE_NVSHMEM
int nc = CEILING( nsupers, grid->npcol);
int nr = CEILING( nsupers, grid->nprow);
Expand Down Expand Up @@ -1274,13 +1274,13 @@ void pdgssvx3d(superlu_dist_options_t *options, SuperMatrix *A,

if ( options->Fact != SamePattern_SameRowPerm) {
if (get_new3dsolve() && Solve3D==true){
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, trf3Dpartition->supernodeMask, grid, stat);
}else{
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
4 changes: 2 additions & 2 deletions SRC/double/pdgssvx_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ pdgssvx_ABglobal(superlu_dist_options_t *options, SuperMatrix *A,
stat->utime[FACT] = SuperLU_timer_() - t;


/* nvshmem related. The nvshmem_malloc has to be called before trs_compute_communication_structure, otherwise solve is much slower*/
/* nvshmem related. The nvshmem_malloc has to be called before dtrs_compute_communication_structure, otherwise solve is much slower*/
int nsupers = Glu_persist->supno[n-1] + 1;
#ifdef HAVE_NVSHMEM
int nc = CEILING( nsupers, grid->npcol);
Expand All @@ -960,7 +960,7 @@ pdgssvx_ABglobal(superlu_dist_options_t *options, SuperMatrix *A,
int* supernodeMask = int32Malloc_dist(nsupers);
for(int ii=0; ii<nsupers; ii++)
supernodeMask[ii]=1;
trs_compute_communication_structure(options, n, LUstruct,
dtrs_compute_communication_structure(options, n, LUstruct,
ScalePermstruct, supernodeMask, grid, stat);
SUPERLU_FREE(supernodeMask);
}
Expand Down
Loading

0 comments on commit 0c06aab

Please sign in to comment.