-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
243 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
example_scripts/batch_script_mpi_runit_sunspot_intel_nogpu.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#!/bin/bash | ||
module load spack cmake | ||
ulimit -s unlimited | ||
|
||
|
||
#SUPERLU settings: | ||
export SUPERLU_LBS=GD | ||
export SUPERLU_ACC_OFFLOAD=0 # this can be 0 to do CPU tests on GPU nodes | ||
export GPU3DVERSION=1 | ||
export ANC25D=0 | ||
export NEW3DSOLVE=1 | ||
export NEW3DSOLVETREECOMM=1 | ||
export SUPERLU_BIND_MPI_GPU=1 # assign GPU based on the MPI rank, assuming one MPI per GPU | ||
|
||
export SUPERLU_MAXSUP=256 # max supernode size | ||
export SUPERLU_RELAX=64 # upper bound for relaxed supernode size | ||
export SUPERLU_MAX_BUFFER_SIZE=10000000 ## 500000000 # buffer size in words on GPU | ||
export SUPERLU_NUM_LOOKAHEADS=2 ##4, must be at least 2, see 'lookahead winSize' | ||
export SUPERLU_NUM_GPU_STREAMS=1 | ||
export SUPERLU_MPI_PROCESS_PER_GPU=1 # 2: this can better saturate GPU | ||
export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU | ||
|
||
|
||
|
||
|
||
CPUS_PER_NODE=104 | ||
THREADS_PER_NODE=208 | ||
nprows=(1) | ||
npcols=(1 ) | ||
npz=(1) | ||
nrhs=(1) | ||
NTH=2 | ||
NREP=1 | ||
# NODE_VAL_TOT=1 | ||
|
||
for ((i = 0; i < ${#npcols[@]}; i++)); do | ||
NROW=${nprows[i]} | ||
NCOL=${npcols[i]} | ||
NPZ=${npz[i]} | ||
for ((s = 0; s < ${#nrhs[@]}; s++)); do | ||
NRHS=${nrhs[s]} | ||
CORE_VAL2D=`expr $NCOL \* $NROW` | ||
NODE_VAL2D=`expr $CORE_VAL2D / $CPUS_PER_NODE` | ||
MOD_VAL=`expr $CORE_VAL2D % $CPUS_PER_NODE` | ||
if [[ $MOD_VAL -ne 0 ]] | ||
then | ||
NODE_VAL2D=`expr $NODE_VAL2D + 1` | ||
fi | ||
|
||
CORE_VAL=`expr $NCOL \* $NROW \* $NPZ` | ||
NODE_VAL=`expr $CORE_VAL / $CPUS_PER_NODE` | ||
MOD_VAL=`expr $CORE_VAL % $CPUS_PER_NODE` | ||
if [[ $MOD_VAL -ne 0 ]] | ||
then | ||
NODE_VAL=`expr $NODE_VAL + 1` | ||
fi | ||
|
||
# NODE_VAL=2 | ||
# NCORE_VAL_TOT=`expr $NODE_VAL_TOT \* $CORES_PER_NODE / $NTH` | ||
batch=0 # whether to do batched test | ||
NCORE_VAL_TOT=`expr $NROW \* $NCOL \* $NPZ ` | ||
NCORE_VAL_TOT2D=`expr $NROW \* $NCOL ` | ||
|
||
OMP_NUM_THREADS=$NTH | ||
|
||
export OMP_NUM_THREADS=$NTH | ||
export OMP_PLACES=threads | ||
export OMP_PROC_BIND=spread | ||
export MPICH_MAX_THREAD_SAFETY=multiple | ||
#export OMP_MAX_ACTIVE_LEVELS=1 | ||
#export OMP_DYNAMIC=TRUE | ||
|
||
# srun -n 1 ./EXAMPLE/pddrive -r 1 -c 1 ../EXAMPLE/g20.rua | ||
|
||
# export NSUP=256 | ||
# export NREL=256 | ||
# for MAT in big.rua | ||
# for MAT in g20.rua | ||
# for MAT in s1_mat_0_253872.bin s2D9pt2048.rua | ||
# for MAT in dielFilterV3real.bin | ||
for MAT in rma10.mtx | ||
# for MAT in s2D9pt2048.rua raefsky3.mtx rma10.mtx | ||
# for MAT in s1_mat_0_126936.bin # for MAT in s1_mat_0_126936.bin | ||
# for MAT in s2D9pt2048.rua | ||
# for MAT in s2D9pt1536.rua | ||
# for MAT in s1_mat_0_126936.bin s1_mat_0_253872.bin s1_mat_0_507744.bin | ||
# for MAT in matrix_ACTIVSg70k_AC_00.mtx matrix_ACTIVSg10k_AC_00.mtx | ||
# for MAT in temp_13k.mtx temp_25k.mtx temp_75k.mtx | ||
# for MAT in temp_13k.mtx | ||
do | ||
mkdir -p $MAT | ||
for ii in `seq 1 $NREP` | ||
do | ||
|
||
SUPERLU_ACC_SOLVE=0 | ||
|
||
mpirun -n $NCORE_VAL_TOT2D --depth $NTH --cpu-bind depth ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch ~/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d | ||
|
||
#mpirun -n $NCORE_VAL_TOT --depth $NTH --cpu-bind depth ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS ~/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS} | ||
|
||
|
||
done | ||
|
||
done | ||
done | ||
done | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/bin/bash | ||
|
||
module load spack cmake | ||
|
||
|
||
cmake .. \ | ||
-DCMAKE_C_FLAGS="-DGPU_SOLVE -std=c11 -D_XOPEN_SOURCE -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_ -I${MKLROOT}/include -fopenmp" \ | ||
-DCMAKE_CXX_FLAGS="-I${MKLROOT}/include -fopenmp" \ | ||
-DCMAKE_CXX_COMPILER=mpicxx \ | ||
-DCMAKE_C_COMPILER=mpicc \ | ||
-DCMAKE_Fortran_COMPILER=mpif90 \ | ||
-DXSDK_ENABLE_Fortran=OFF \ | ||
-DTPL_ENABLE_INTERNAL_BLASLIB=OFF \ | ||
-DTPL_ENABLE_LAPACKLIB=ON \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DTPL_ENABLE_CUDALIB=OFF \ | ||
-DCMAKE_INSTALL_PREFIX=. \ | ||
-DCMAKE_INSTALL_LIBDIR=./lib \ | ||
-DCMAKE_BUILD_TYPE=Debug \ | ||
-DTPL_BLAS_LIBRARIES="-L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core" \ | ||
-DTPL_LAPACK_LIBRARIES="-L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core" \ | ||
-DTPL_PARMETIS_INCLUDE_DIRS="/home/liuyangz/my_software/parmetis-4.0.3/include;/home/liuyangz/my_software/parmetis-4.0.3/metis/include" \ | ||
-DTPL_PARMETIS_LIBRARIES="/home/liuyangz/my_software/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/home/liuyangz/my_software/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ | ||
-DTPL_ENABLE_COMBBLASLIB=OFF \ | ||
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON | ||
|
||
make pddrive -j16 | ||
make pddrive3d -j16 | ||
#make f_pddrive | ||
|
||
## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \ |
Oops, something went wrong.