From ce4f9d4bc1c12e306487e7d6dbfb1e384d4363cc Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 13 Feb 2024 12:18:00 -0800 Subject: [PATCH] fixing compiling errors for 64 bit integer; still a few left in scatterGPU_batch_flat --- SRC/TRF3dV100/batch_factorize.cu | 2 +- SRC/cuda/pdgstrs_lsum_cuda.cu | 2 +- SRC/cuda/psgstrs_lsum_cuda.cu | 2 +- SRC/cuda/pzgstrs_lsum_cuda.cu | 2 +- SRC/include/superlu_ddefs.h | 6 ++---- SRC/include/superlu_sdefs.h | 5 +---- SRC/include/superlu_zdefs.h | 5 +---- .../run_cmake_build_perlmutter_nvidia_nvshmem.sh | 1 + 8 files changed, 9 insertions(+), 16 deletions(-) diff --git a/SRC/TRF3dV100/batch_factorize.cu b/SRC/TRF3dV100/batch_factorize.cu index 8c08a424..68ba8f6b 100644 --- a/SRC/TRF3dV100/batch_factorize.cu +++ b/SRC/TRF3dV100/batch_factorize.cu @@ -771,7 +771,7 @@ void copyGPULUDataToHost( gpuErrchk( cudaMemcpy(host_Llu->Unzval_br_new_dat, d_localLU.Unzval_br_new_dat, d_localLU.Unzval_br_new_cnt * sizeof(double), cudaMemcpyDeviceToHost) ); // Convert the host data from block row to skyline - int* xsup = LUstruct->Glu_persist->xsup; + int_t* xsup = LUstruct->Glu_persist->xsup; int n = xsup[ws->nsupers]; gridinfo_t *grid = &(grid3d->grid2d); pdconvertUROWDATA2skyline(options, grid, LUstruct, stat, n); diff --git a/SRC/cuda/pdgstrs_lsum_cuda.cu b/SRC/cuda/pdgstrs_lsum_cuda.cu index c996e8ff..fb6f7f37 100755 --- a/SRC/cuda/pdgstrs_lsum_cuda.cu +++ b/SRC/cuda/pdgstrs_lsum_cuda.cu @@ -1303,7 +1303,7 @@ __global__ void dwait_bcrd_u int* d_msgnum, int* d_flag_mod_u, double *lsum, /* Sum of local modifications. */ - int_t *bmod, /* Modification count for L-solve. */ + int *bmod, /* Modification count for L-solve. */ gridinfo_t *grid, int_t *xsup, int_t *ilsum, diff --git a/SRC/cuda/psgstrs_lsum_cuda.cu b/SRC/cuda/psgstrs_lsum_cuda.cu index c8b0ec3e..321f34a6 100644 --- a/SRC/cuda/psgstrs_lsum_cuda.cu +++ b/SRC/cuda/psgstrs_lsum_cuda.cu @@ -1303,7 +1303,7 @@ __global__ void swait_bcrd_u int* d_msgnum, int* d_flag_mod_u, float *lsum, /* Sum of local modifications. */ - int_t *bmod, /* Modification count for L-solve. */ + int *bmod, /* Modification count for L-solve. */ gridinfo_t *grid, int_t *xsup, int_t *ilsum, diff --git a/SRC/cuda/pzgstrs_lsum_cuda.cu b/SRC/cuda/pzgstrs_lsum_cuda.cu index 721a35ea..c8eb6307 100644 --- a/SRC/cuda/pzgstrs_lsum_cuda.cu +++ b/SRC/cuda/pzgstrs_lsum_cuda.cu @@ -1201,7 +1201,7 @@ __global__ void zwait_bcrd_u int* d_msgnum, int* d_flag_mod_u, doublecomplex *lsum, /* Sum of local modifications. */ - int_t *bmod, /* Modification count for L-solve. */ + int *bmod, /* Modification count for L-solve. */ gridinfo_t *grid, int_t *xsup, int_t *ilsum, diff --git a/SRC/include/superlu_ddefs.h b/SRC/include/superlu_ddefs.h index 6f691425..0016578e 100755 --- a/SRC/include/superlu_ddefs.h +++ b/SRC/include/superlu_ddefs.h @@ -771,9 +771,6 @@ extern int_t dleafForestForwardSolve3d(superlu_dist_options_t *options, int_t tr dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer); -extern int_t dtrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, dLUstruct_t * LUstruct, - dScalePermstruct_t * ScalePermstruct, - int* supernodeMask, gridinfo_t *grid, SuperLUStat_t * stat); extern int_t dreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, double* x, int nrhs, dtrf3Dpartition_t* trf3Dpartition, dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, double* recvbuf, xtrsTimer_t *xtrsTimer); @@ -988,7 +985,8 @@ extern void dscaleMatrixDiagonally(fact_t Fact, dScalePermstruct_t *, SuperMatri extern void dperform_row_permutation(superlu_dist_options_t *, fact_t Fact, dScalePermstruct_t *, dLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, - int job, int_t Equil, int *rowequ, int *colequ, int_t *iinfo); + int job, int Equil, int_t *rowequ, int *colequ, int *iinfo); + extern double dcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *); extern int dtrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, dLUstruct_t *, dScalePermstruct_t * ScalePermstruct, diff --git a/SRC/include/superlu_sdefs.h b/SRC/include/superlu_sdefs.h index 551e419f..903fbd88 100755 --- a/SRC/include/superlu_sdefs.h +++ b/SRC/include/superlu_sdefs.h @@ -771,9 +771,6 @@ extern int_t sleafForestForwardSolve3d(superlu_dist_options_t *options, int_t tr sSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer); -extern int_t strs_compute_communication_structure(superlu_dist_options_t *options, int_t n, sLUstruct_t * LUstruct, - sScalePermstruct_t * ScalePermstruct, - int* supernodeMask, gridinfo_t *grid, SuperLUStat_t * stat); extern int_t sreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, float* x, int nrhs, strf3Dpartition_t* trf3Dpartition, sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, float* recvbuf, xtrsTimer_t *xtrsTimer); @@ -988,7 +985,7 @@ extern void sscaleMatrixDiagonally(fact_t Fact, sScalePermstruct_t *, SuperMatri extern void sperform_row_permutation(superlu_dist_options_t *, fact_t Fact, sScalePermstruct_t *, sLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, - int job, int_t Equil, int *rowequ, int *colequ, int_t *iinfo); + int job, int Equil, int_t *rowequ, int *colequ, int *iinfo); extern float scomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *); extern int strs_compute_communication_structure(superlu_dist_options_t *options, int_t n, sLUstruct_t *, sScalePermstruct_t * ScalePermstruct, diff --git a/SRC/include/superlu_zdefs.h b/SRC/include/superlu_zdefs.h index 06de001d..708063f1 100755 --- a/SRC/include/superlu_zdefs.h +++ b/SRC/include/superlu_zdefs.h @@ -771,9 +771,6 @@ extern int_t zleafForestForwardSolve3d(superlu_dist_options_t *options, int_t tr zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer); -extern int_t ztrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, zLUstruct_t * LUstruct, - zScalePermstruct_t * ScalePermstruct, - int* supernodeMask, gridinfo_t *grid, SuperLUStat_t * stat); extern int_t zreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, doublecomplex* x, int nrhs, ztrf3Dpartition_t* trf3Dpartition, zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, doublecomplex* recvbuf, xtrsTimer_t *xtrsTimer); @@ -990,7 +987,7 @@ extern void zscaleMatrixDiagonally(fact_t Fact, zScalePermstruct_t *, SuperMatri extern void zperform_row_permutation(superlu_dist_options_t *, fact_t Fact, zScalePermstruct_t *, zLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, - int job, int_t Equil, int *rowequ, int *colequ, int_t *iinfo); + int job, int Equil, int_t *rowequ, int *colequ, int *iinfo); extern double zcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *); extern int ztrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, zLUstruct_t *, zScalePermstruct_t * ScalePermstruct, diff --git a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh index d022d862..fa22d182 100644 --- a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh +++ b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh @@ -64,6 +64,7 @@ cmake .. \ -DMPIEXEC_EXECUTABLE=/usr/bin/srun \ -DMPIEXEC_MAX_NUMPROCS=16 \ -Denable_complex16=ON \ + -DXSDK_INDEX_SIZE=64 \ -Denable_single=ON make pddrive -j16