Skip to content

Commit

Permalink
resolve merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyangzhuan committed Oct 24, 2023
2 parents a1d2cc1 + e371977 commit 28d78c4
Show file tree
Hide file tree
Showing 12 changed files with 243 additions and 28 deletions.
16 changes: 13 additions & 3 deletions SRC/TRF3dV100/schurCompUpdate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ int_t LUstruct_v100::dSchurCompUpdatePartGPU(
double alpha = 1.0;
double beta = 0.0;
#ifndef NDEBUG
printf("m=%d, n=%d, k=%d\n", gemm_m, gemm_n, gemm_k);
// printf("m=%d, n=%d, k=%d\n", gemm_m, gemm_n, gemm_k);
#endif
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N,
gemm_m, gemm_n, gemm_k, &alpha,
Expand Down Expand Up @@ -1088,6 +1088,8 @@ int_t LUstruct_v100::setLUstruct_GPU()

size_t totalNzvalSize = 0; /* too big for gemmBufferSize */
size_t max_gemmCsize = 0; /* Sherry added 2/20/2023 */
size_t max_nzrow = 0; /* Yang added 10/20/2023 */
size_t max_nzcol = 0;

/*Memory for lpapenl and upanel Data*/
for (i = 0; i < CEILING(nsupers, Pc); ++i)
Expand All @@ -1096,6 +1098,8 @@ int_t LUstruct_v100::setLUstruct_GPU()
{
memReqData += lPanelVec[i].totalSize();
totalNzvalSize += lPanelVec[i].nzvalSize();
if(lPanelVec[i].nzvalSize()>0)
max_nzrow = SUPERLU_MAX(lPanelVec[i].nzrows(),max_nzrow);
//max_gemmCsize = SUPERoLU_MAX(max_gemmCsize, ???);
}
}
Expand All @@ -1105,8 +1109,11 @@ int_t LUstruct_v100::setLUstruct_GPU()
{
memReqData += uPanelVec[i].totalSize();
totalNzvalSize += uPanelVec[i].nzvalSize();
if(uPanelVec[i].nzvalSize()>0)
max_nzcol = SUPERLU_MAX(uPanelVec[i].nzcols(),max_nzcol);
}
}
max_gemmCsize = max_nzcol*max_nzrow;

memReqData += CEILING(nsupers, Pc) * sizeof(lpanelGPU_t);
memReqData += CEILING(nsupers, Pr) * sizeof(upanelGPU_t);
Expand All @@ -1118,8 +1125,11 @@ int_t LUstruct_v100::setLUstruct_GPU()
int_t maxBuffSize = sp_ienv_dist (8, options);
int maxsup = sp_ienv_dist(3, options); // max. supernode size
maxBuffSize = SUPERLU_MAX(maxsup * maxsup, maxBuffSize); // Sherry added 7/10/23
A_gpu.gemmBufferSize = SUPERLU_MIN(maxBuffSize, totalNzvalSize);

#if 0
A_gpu.gemmBufferSize = SUPERLU_MIN(maxBuffSize, totalNzvalSize);
#else
A_gpu.gemmBufferSize = SUPERLU_MIN(maxBuffSize, SUPERLU_MAX(max_gemmCsize,totalNzvalSize)); /* Yang added 10/20/2023 */
#endif
size_t dataPerStream = 3 * sizeof(double) * maxLvalCount + 3 * sizeof(double) * maxUvalCount + 2 * sizeof(int_t) * maxLidxCount + 2 * sizeof(int_t) * maxUidxCount + A_gpu.gemmBufferSize * sizeof(double) + ldt * ldt * sizeof(double);
if (memReqData + 2 * dataPerStream > useableGPUMem)
{
Expand Down
2 changes: 1 addition & 1 deletion SRC/double/dSchCompUdt-gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */
size_t C_stream_size = nbrow * num_col_stream * sizeof(double);

// Sherry: Check dC buffer of *buffer_size* is large enough
assert(nbrow*(st_col+num_col_stream) < buffer_size);
assert(nbrow*(st_col+num_col_stream) <= buffer_size);

gpuMemcpyAsync(dB+b_offset, tempu+b_offset, B_stream_size,
gpuMemcpyHostToDevice, streams[stream_id]);
Expand Down
5 changes: 4 additions & 1 deletion SRC/double/dlook_ahead_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,12 @@ while (j < nub && perm_u[2 * j] <= k0 + num_look_aheads)
/* Sherry -- examine all the shared variables ??
'firstprivate' ensures that the private variables are initialized
to the values before entering the loop. */
#if !defined __INTEL_LLVM_COMPILER
/* Yang: this parallel for is causing segfault for one-api compilers*/
#pragma omp parallel for \
firstprivate(lptr,luptr,ib,current_b) private(lb) \
firstprivate(lptr,luptr,current_b) private(ib,lb) \
default(shared) schedule(dynamic)
#endif
#endif
for (lb = 0; lb < nlb; lb++) { /* Loop through each block in L(:,k) */
int temp_nbrow; /* automatic variable is private */
Expand Down
1 change: 0 additions & 1 deletion SRC/double/pdgssvx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2362,7 +2362,6 @@ void dpacked2skyline(int_t k, int_t *usubpack, double *valpack, int_t *usub, dou

usubPtr += UB_DESCRIPTOR + gsupc;
}
return 0;
}


Expand Down
11 changes: 10 additions & 1 deletion SRC/include/superlu_ddefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1575,7 +1575,16 @@ extern void pdconvert_flatten_skyline2UROWDATA(superlu_dist_options_t *options,
dLUstruct_t *LUstruct, SuperLUStat_t *stat, int n);
extern void pdconvertUROWDATA2skyline(superlu_dist_options_t *options, gridinfo_t *grid,
dLUstruct_t *LUstruct, SuperLUStat_t *stat, int n);

extern int_t
dReDistribute_A(SuperMatrix *A, dScalePermstruct_t *ScalePermstruct,
Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno,
gridinfo_t *grid, int_t *colptr[], int_t *rowind[],
double *a[]);
extern float
pddistribute3d_Yang(superlu_dist_options_t *options, int_t n, SuperMatrix *A,
dScalePermstruct_t *ScalePermstruct,
Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct,
gridinfo3d_t *grid3d);
#if 0 // NOT CALLED
/* from ancFactorization.h (not called) */
extern int_t ancestorFactor(
Expand Down
7 changes: 7 additions & 0 deletions SRC/include/superlu_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,13 @@ extern int_t Wait_LDiagBlock_Recv(MPI_Request *, SCT_t *);
extern int_t Test_LDiagBlock_Recv(MPI_Request *, SCT_t *);
extern int_t LDiagBlockRecvWait( int_t k, int* factored_U, MPI_Request *, gridinfo_t *);


extern int_t num_full_cols_U_mod(
int_t kk, int_t *usub, int_t *xsup,
gridinfo_t *grid, int_t *perm_u,
int_t *ldu /* max. segment size of nonzero columns in U(kk,:) */
);

/*=====================*/

#ifdef __cplusplus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:$LD_LIBRARY_PATH

export SUPERLU_LBS=GD
export SUPERLU_ACC_OFFLOAD=1 # this can be 0 to do CPU tests on GPU nodes
export GPU3DVERSION=1
export GPU3DVERSION=0
export ANC25D=0
export NEW3DSOLVE=1
export NEW3DSOLVETREECOMM=1
Expand Down Expand Up @@ -74,8 +74,8 @@ else
# Host unknown; exiting
exit $EXIT_HOST
fi
nprows=(2)
npcols=(2 )
nprows=(1)
npcols=(1 )
npz=(1)
nrhs=(1)
NTH=1
Expand Down Expand Up @@ -123,14 +123,16 @@ export MPICH_MAX_THREAD_SAFETY=multiple

# export NSUP=256
# export NREL=256
for MAT in big.rua
# for MAT in big.rua
# for MAT in Geo_1438.bin
# for MAT in g20.rua
# for MAT in s1_mat_0_253872.bin s2D9pt2048.rua
# for MAT in dielFilterV3real.bin
# for MAT in rma10.mtx
# for MAT in rma10.mtx
# for MAT in raefsky3.mtx
# for MAT in s2D9pt2048.rua raefsky3.mtx rma10.mtx
# for MAT in s1_mat_0_126936.bin # for MAT in s1_mat_0_126936.bin
# for MAT in s2D9pt2048.rua
for MAT in s2D9pt2048.rua
# for MAT in s2D9pt1536.rua
# for MAT in s1_mat_0_126936.bin s1_mat_0_253872.bin s1_mat_0_507744.bin
# for MAT in matrix_ACTIVSg70k_AC_00.mtx matrix_ACTIVSg10k_AC_00.mtx
Expand All @@ -140,16 +142,23 @@ do
mkdir -p $MAT
for ii in `seq 1 $NREP`
do
# export SUPERLU_ACC_SOLVE=1

# export SUPERLU_ACC_OFFLOAD=1
# SUPERLU_ACC_OFFLOAD=0
# srun -n $NCORE_VAL_TOT2D -N $NODE_VAL2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}
# export SUPERLU_ACC_OFFLOAD=0

# SUPERLU_ACC_OFFLOAD=1
# srun -n $NCORE_VAL_TOT2D -N $NODE_VAL2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}

unset SUPERLU_ACC_SOLVE
echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}"
srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}
# SUPERLU_ACC_OFFLOAD=1
# export GPU3DVERSION=0
# echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}"
# srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}

SUPERLU_ACC_OFFLOAD=1
export GPU3DVERSION=1
echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}"
srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}


# export SUPERLU_ACC_SOLVE=1
# srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores valgrind --leak-check=yes ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:$LD_LIBRARY_PATH

export SUPERLU_LBS=GD
export SUPERLU_ACC_OFFLOAD=1 # this can be 0 to do CPU tests on GPU nodes
export GPU3DVERSION=1
export GPU3DVERSION=0
export ANC25D=0
export NEW3DSOLVE=1
export NEW3DSOLVETREECOMM=1
Expand Down Expand Up @@ -81,8 +81,8 @@ fi
# npz=(64 32 16)
# nrhs=(1 50)

nprows=(2 )
npcols=(2 )
nprows=(1 )
npcols=(1 )
npz=(1 )
nrhs=(1)

Expand Down Expand Up @@ -138,8 +138,8 @@ export MPICH_MAX_THREAD_SAFETY=multiple
# for MAT in s1_mat_0_253872.bin s2D9pt2048.rua
# for MAT in dielFilterV3real.bin
# for MAT in Geo_1438.bin s2D9pt2048.rua raefsky3.mtx rma10.mtx
# for MAT in Geo_1438.bin
for MAT in s1_mat_0_126936.bin
for MAT in Geo_1438.bin
# for MAT in s1_mat_0_126936.bin
# for MAT in s2D9pt2048.rua
# for MAT in s2D9pt1536.rua
# for MAT in s1_mat_0_126936.bin s1_mat_0_253872.bin s1_mat_0_507744.bin
Expand All @@ -153,7 +153,7 @@ do
# export SUPERLU_ACC_SOLVE=1


# # srun -n $NCORE_VAL_TOT2D -N $NODE_VAL2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}
# # # srun -n $NCORE_VAL_TOT2D -N $NODE_VAL2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}
# export SUPERLU_ACC_OFFLOAD=0
# srun -n $NCORE_VAL_TOT2D -N $NODE_VAL2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}

Expand Down
115 changes: 115 additions & 0 deletions example_scripts/batch_script_mpi_runit_sunspot_intel_nogpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/bin/bash
module load spack cmake
ulimit -s unlimited


#SUPERLU settings:
export SUPERLU_LBS=GD
export SUPERLU_ACC_OFFLOAD=0 # this can be 0 to do CPU tests on GPU nodes
export GPU3DVERSION=1
export ANC25D=0
export NEW3DSOLVE=1
export NEW3DSOLVETREECOMM=1
export SUPERLU_BIND_MPI_GPU=1 # assign GPU based on the MPI rank, assuming one MPI per GPU

export SUPERLU_MAXSUP=256 # max supernode size
export SUPERLU_RELAX=64 # upper bound for relaxed supernode size
export SUPERLU_MAX_BUFFER_SIZE=10000000 ## 500000000 # buffer size in words on GPU
export SUPERLU_NUM_LOOKAHEADS=2 ##4, must be at least 2, see 'lookahead winSize'
export SUPERLU_NUM_GPU_STREAMS=1
export SUPERLU_MPI_PROCESS_PER_GPU=1 # 2: this can better saturate GPU
export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU




CPUS_PER_NODE=104
THREADS_PER_NODE=208
nprows=(1)
npcols=(1 )
npz=(1)
nrhs=(1)
NTH=2
NREP=1
# NODE_VAL_TOT=1

for ((i = 0; i < ${#npcols[@]}; i++)); do
NROW=${nprows[i]}
NCOL=${npcols[i]}
NPZ=${npz[i]}
for ((s = 0; s < ${#nrhs[@]}; s++)); do
NRHS=${nrhs[s]}
CORE_VAL2D=`expr $NCOL \* $NROW`
NODE_VAL2D=`expr $CORE_VAL2D / $CPUS_PER_NODE`
MOD_VAL=`expr $CORE_VAL2D % $CPUS_PER_NODE`
if [[ $MOD_VAL -ne 0 ]]
then
NODE_VAL2D=`expr $NODE_VAL2D + 1`
fi

CORE_VAL=`expr $NCOL \* $NROW \* $NPZ`
NODE_VAL=`expr $CORE_VAL / $CPUS_PER_NODE`
MOD_VAL=`expr $CORE_VAL % $CPUS_PER_NODE`
if [[ $MOD_VAL -ne 0 ]]
then
NODE_VAL=`expr $NODE_VAL + 1`
fi

# NODE_VAL=2
# NCORE_VAL_TOT=`expr $NODE_VAL_TOT \* $CORES_PER_NODE / $NTH`
batch=0 # whether to do batched test
NCORE_VAL_TOT=`expr $NROW \* $NCOL \* $NPZ `
NCORE_VAL_TOT2D=`expr $NROW \* $NCOL `

OMP_NUM_THREADS=$NTH

export OMP_NUM_THREADS=$NTH
export OMP_PLACES=threads
export OMP_PROC_BIND=spread
export MPICH_MAX_THREAD_SAFETY=multiple
#export OMP_MAX_ACTIVE_LEVELS=1
#export OMP_DYNAMIC=TRUE

# srun -n 1 ./EXAMPLE/pddrive -r 1 -c 1 ../EXAMPLE/g20.rua

# export NSUP=256
# export NREL=256
# for MAT in big.rua
# for MAT in g20.rua
# for MAT in s1_mat_0_253872.bin s2D9pt2048.rua
# for MAT in dielFilterV3real.bin
for MAT in rma10.mtx
# for MAT in s2D9pt2048.rua raefsky3.mtx rma10.mtx
# for MAT in s1_mat_0_126936.bin # for MAT in s1_mat_0_126936.bin
# for MAT in s2D9pt2048.rua
# for MAT in s2D9pt1536.rua
# for MAT in s1_mat_0_126936.bin s1_mat_0_253872.bin s1_mat_0_507744.bin
# for MAT in matrix_ACTIVSg70k_AC_00.mtx matrix_ACTIVSg10k_AC_00.mtx
# for MAT in temp_13k.mtx temp_25k.mtx temp_75k.mtx
# for MAT in temp_13k.mtx
do
mkdir -p $MAT
for ii in `seq 1 $NREP`
do

SUPERLU_ACC_SOLVE=0

mpirun -n $NCORE_VAL_TOT2D --depth $NTH --cpu-bind depth ./EXAMPLE/pddrive -c $NCOL -r $NROW -b $batch ~/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d

#mpirun -n $NCORE_VAL_TOT --depth $NTH --cpu-bind depth ./EXAMPLE/pddrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS ~/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}


done

done
done
done









6 changes: 3 additions & 3 deletions example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/}
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
#NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/
cmake .. \
-DCMAKE_C_FLAGS="-DGPU_SOLVE -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
-DCMAKE_CXX_FLAGS="" \
-DCMAKE_Fortran_FLAGS="" \
-DCMAKE_C_FLAGS="-O2 -DGPU_SOLVE -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
-DCMAKE_CXX_FLAGS="-O2" \
-DCMAKE_Fortran_FLAGS="-O2" \
-DCMAKE_CXX_COMPILER=CC \
-DCMAKE_C_COMPILER=cc \
-DCMAKE_Fortran_COMPILER=ftn \
Expand Down
31 changes: 31 additions & 0 deletions example_scripts/run_cmake_build_sunspot_oneAPI_mkl_cpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

module load spack cmake


cmake .. \
-DCMAKE_C_FLAGS="-DGPU_SOLVE -std=c11 -D_XOPEN_SOURCE -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_ -I${MKLROOT}/include -fopenmp" \
-DCMAKE_CXX_FLAGS="-I${MKLROOT}/include -fopenmp" \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_Fortran_COMPILER=mpif90 \
-DXSDK_ENABLE_Fortran=OFF \
-DTPL_ENABLE_INTERNAL_BLASLIB=OFF \
-DTPL_ENABLE_LAPACKLIB=ON \
-DBUILD_SHARED_LIBS=ON \
-DTPL_ENABLE_CUDALIB=OFF \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_INSTALL_LIBDIR=./lib \
-DCMAKE_BUILD_TYPE=Debug \
-DTPL_BLAS_LIBRARIES="-L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core" \
-DTPL_LAPACK_LIBRARIES="-L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core" \
-DTPL_PARMETIS_INCLUDE_DIRS="/home/liuyangz/my_software/parmetis-4.0.3/include;/home/liuyangz/my_software/parmetis-4.0.3/metis/include" \
-DTPL_PARMETIS_LIBRARIES="/home/liuyangz/my_software/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/home/liuyangz/my_software/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \
-DTPL_ENABLE_COMBBLASLIB=OFF \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON

make pddrive -j16
make pddrive3d -j16
#make f_pddrive

## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \
Loading

0 comments on commit 28d78c4

Please sign in to comment.