From d3cebc5fdb08aff0153f936953415cb2cf6f4cc0 Mon Sep 17 00:00:00 2001 From: Nicolas Bock Date: Mon, 31 Jul 2017 18:28:17 +0200 Subject: [PATCH 1/4] Fix permissions on update_tags.sh --- update_tags.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 update_tags.sh diff --git a/update_tags.sh b/update_tags.sh old mode 100644 new mode 100755 From 9c21eb2b311979af8be351513389703db07e263a Mon Sep 17 00:00:00 2001 From: Nicolas Bock Date: Tue, 1 Aug 2017 06:07:55 +0200 Subject: [PATCH 2/4] Fix description of `BLAS_VENDOR` The empty string is also allowed. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6f02002c..15026c44b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,7 @@ if(BML_INTERNAL_GEMM) endif() set(BLAS_VENDOR "" - CACHE STRING "If set, the preferred BLAS/LAPACK vendor. Possible choices: {Intel,MKL,ACML,GNU}") + CACHE STRING "If set, the preferred BLAS/LAPACK vendor. Possible choices: {,Intel,MKL,ACML,GNU}") if(BLAS_VENDOR STREQUAL "Intel" OR BLAS_VENDOR STREQUAL "MKL") message(STATUS "Attempting to use Intel's BLAS/LAPACK (MKL)") From 39c526110bff2f98b9f4a498521eff3a6274d827 Mon Sep 17 00:00:00 2001 From: Nicolas Bock Date: Tue, 1 Aug 2017 06:25:56 +0200 Subject: [PATCH 3/4] Change parallel distribution of random_matrix_dense Since we don't care where the random matrix elements are stored, we can change the double nested loops over the matrix dimensions into one loop over all elements. This change increases the amount of work that can be split across the OpenMP thread pool and potentially aid in parallel performance. --- src/C-interface/dense/bml_allocate_dense_typed.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/C-interface/dense/bml_allocate_dense_typed.c b/src/C-interface/dense/bml_allocate_dense_typed.c index 75f199d47..d2feed6b4 100644 --- a/src/C-interface/dense/bml_allocate_dense_typed.c +++ b/src/C-interface/dense/bml_allocate_dense_typed.c @@ -112,12 +112,9 @@ bml_matrix_dense_t *TYPED_FUNC( TYPED_FUNC(bml_zero_matrix_dense) (N, distrib_mode); REAL_T *A_dense = A->matrix; #pragma omp parallel for default(none) shared(A_dense) - for (int i = 0; i < N; i++) + for (int i = 0; i < N * N; i++) { - for (int j = 0; j < N; j++) - { - A_dense[ROWMAJOR(i, j, N, N)] = rand() / (double) RAND_MAX; - } + A_dense[i] = rand() / (double) RAND_MAX; } return A; } From 4e2fe616147bf3fd3ec9e660cb7d15ab4ee8281e Mon Sep 17 00:00:00 2001 From: Nicolas Bock Date: Mon, 31 Jul 2017 08:09:43 +0200 Subject: [PATCH 4/4] Cleanup testing of bml_add We should test all APIs of `bml_add`. This change adds the missing API tests. --- src/C-interface/dense/bml_add_dense_typed.c | 1 - .../ellpack/bml_add_ellpack_typed.c | 10 - tests/CMakeLists.txt | 4 +- tests/add_matrix_typed.c | 73 ------- tests/bml_test.c | 2 +- tests/bml_test.h | 2 +- tests/{add_matrix.c => test_bml_add.c} | 16 +- tests/{add_matrix.h => test_bml_add.h} | 14 +- tests/test_bml_add_typed.c | 198 ++++++++++++++++++ 9 files changed, 218 insertions(+), 102 deletions(-) delete mode 100644 tests/add_matrix_typed.c rename tests/{add_matrix.c => test_bml_add.c} (50%) rename tests/{add_matrix.h => test_bml_add.h} (77%) create mode 100644 tests/test_bml_add_typed.c diff --git a/src/C-interface/dense/bml_add_dense_typed.c b/src/C-interface/dense/bml_add_dense_typed.c index 2f1f9abe1..bb0a6287e 100644 --- a/src/C-interface/dense/bml_add_dense_typed.c +++ b/src/C-interface/dense/bml_add_dense_typed.c @@ -85,7 +85,6 @@ double TYPED_FUNC( shared(B_matrix, A_localRowMin, A_localRowMax) \ shared(N, myRank) \ reduction(+:trnorm) - //for (int i = 0; i < N * N; i++) for (int i = A_localRowMin[myRank] * N; i < A_localRowMax[myRank] * N; i++) { diff --git a/src/C-interface/ellpack/bml_add_ellpack_typed.c b/src/C-interface/ellpack/bml_add_ellpack_typed.c index 2c1a0e138..1cf9cd9bd 100644 --- a/src/C-interface/ellpack/bml_add_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_add_ellpack_typed.c @@ -40,7 +40,6 @@ void TYPED_FUNC( int N = A->N; int A_M = A->M; int B_M = B->M; - int ix[N], jx[N]; int *A_nnz = A->nnz; @@ -67,7 +66,6 @@ void TYPED_FUNC( shared(A_index, A_value, A_nnz) \ shared(A_localRowMin, A_localRowMax) \ shared(B_index, B_value, B_nnz) - //for (int i = 0; i < N; i++) for (int i = A_localRowMin[myRank]; i < A_localRowMax[myRank]; i++) { int l = 0; @@ -79,7 +77,6 @@ void TYPED_FUNC( { x[k] = 0.0; ix[k] = i + 1; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -94,7 +91,6 @@ void TYPED_FUNC( { x[k] = 0.0; ix[k] = i + 1; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -106,7 +102,6 @@ void TYPED_FUNC( for (int jp = 0; jp < l; jp++) { int jind = jx[jp]; - //REAL_T xTmp = x[A_index[ROWMAJOR(i, jp, N, A_M)]]; REAL_T xTmp = x[jind]; if (is_above_threshold(xTmp, threshold)) { @@ -154,8 +149,6 @@ double TYPED_FUNC( int *B_nnz = B->nnz; int *B_index = B->index; - int ind, ind2; - REAL_T x[N]; REAL_T y[N]; REAL_T *A_value = (REAL_T *) A->value; @@ -177,7 +170,6 @@ double TYPED_FUNC( shared(A_localRowMin, A_localRowMax) \ shared(B_index, B_value, B_nnz) \ reduction(+:trnorm) - //for (int i = 0; i < N; i++) for (int i = A_localRowMin[myRank]; i < A_localRowMax[myRank]; i++) { int l = 0; @@ -190,7 +182,6 @@ double TYPED_FUNC( x[k] = 0.0; ix[k] = i + 1; y[k] = 0.0; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -207,7 +198,6 @@ double TYPED_FUNC( x[k] = 0.0; ix[k] = i + 1; y[k] = 0.0; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 71a5974b8..9d724f379 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ include_directories(${CMAKE_SOURCE_DIR}/src/C-interface) set(SOURCES_TYPED - add_matrix_typed.c + test_bml_add_typed.c adjacency_matrix_typed.c adjungate_triangle_matrix_typed.c allocate_matrix_typed.c @@ -39,7 +39,7 @@ set_target_properties(bmltests POSITION_INDEPENDENT_CODE yes) add_executable(bml-test - add_matrix.c + test_bml_add.c adjacency_matrix.c adjungate_triangle_matrix.c allocate_matrix.c diff --git a/tests/add_matrix_typed.c b/tests/add_matrix_typed.c deleted file mode 100644 index c5dd31ff8..000000000 --- a/tests/add_matrix_typed.c +++ /dev/null @@ -1,73 +0,0 @@ -#include "bml.h" -#include "../typed.h" - -#include -#include -#include -#include -#if defined(SINGLE_REAL) || defined(SINGLE_COMPLEX) -#define REL_TOL 1e-6 -#else -#define REL_TOL 1e-12 -#endif - -int TYPED_FUNC( - test_add) ( - const int N, - const bml_matrix_type_t matrix_type, - const bml_matrix_precision_t matrix_precision, - const int M) -{ - bml_matrix_t *A = NULL; - bml_matrix_t *B = NULL; - bml_matrix_t *C = NULL; - - REAL_T *A_dense = NULL; - REAL_T *B_dense = NULL; - REAL_T *C_dense = NULL; - - double alpha = 1.2; - double beta = 0.8; - double threshold = 0.0; - - LOG_DEBUG("rel. tolerance = %e\n", REL_TOL); - - A = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); - B = bml_copy_new(A); - C = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); - - bml_add(B, C, alpha, beta, threshold); - - A_dense = bml_convert_to_dense(A, dense_row_major); - B_dense = bml_convert_to_dense(B, dense_row_major); - C_dense = bml_convert_to_dense(C, dense_row_major); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, A_dense, 0, - N, 0, N); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, - N, 0, N); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, C_dense, 0, - N, 0, N); - for (int i = 0; i < N * N; i++) - { - double expected = alpha * A_dense[i] + beta * C_dense[i]; - double rel_diff = ABS((expected - B_dense[i]) / expected); - if (rel_diff > REL_TOL) - { - LOG_ERROR - ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", - i, expected, i, B_dense[i]); - return -1; - } - } - bml_free_memory(A_dense); - bml_free_memory(B_dense); - bml_free_memory(C_dense); - - bml_deallocate(&A); - bml_deallocate(&B); - bml_deallocate(&C); - - LOG_INFO("add matrix test passed\n"); - - return 0; -} diff --git a/tests/bml_test.c b/tests/bml_test.c index 1fd6866d0..965237b86 100644 --- a/tests/bml_test.c +++ b/tests/bml_test.c @@ -66,7 +66,7 @@ const char *test_description[] = { }; const test_function_t testers[] = { - test_add, + test_bml_add, test_adjacency, test_adjungate_triangle, test_allocate, diff --git a/tests/bml_test.h b/tests/bml_test.h index 05979f778..2d3bef81e 100644 --- a/tests/bml_test.h +++ b/tests/bml_test.h @@ -10,7 +10,6 @@ typedef int ( const bml_matrix_precision_t matrix_precision, const int M); -#include "add_matrix.h" #include "adjacency_matrix.h" #include "adjungate_triangle_matrix.h" #include "allocate_matrix.h" @@ -27,6 +26,7 @@ typedef int ( #include "scale_matrix.h" #include "set_row.h" #include "submatrix_matrix.h" +#include "test_bml_add.h" #include "test_bml_gemm.h" #include "threshold_matrix.h" #include "trace_matrix.h" diff --git a/tests/add_matrix.c b/tests/test_bml_add.c similarity index 50% rename from tests/add_matrix.c rename to tests/test_bml_add.c index 977035c09..2384d9258 100644 --- a/tests/add_matrix.c +++ b/tests/test_bml_add.c @@ -4,7 +4,7 @@ #include int -test_add( +test_bml_add( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, @@ -13,18 +13,20 @@ test_add( switch (matrix_precision) { case single_real: - return test_add_single_real(N, matrix_type, matrix_precision, M); + return test_bml_add_single_real(N, matrix_type, matrix_precision, + M); break; case double_real: - return test_add_double_real(N, matrix_type, matrix_precision, M); + return test_bml_add_double_real(N, matrix_type, matrix_precision, + M); break; case single_complex: - return test_add_single_complex(N, matrix_type, matrix_precision, - M); + return test_bml_add_single_complex(N, matrix_type, + matrix_precision, M); break; case double_complex: - return test_add_double_complex(N, matrix_type, matrix_precision, - M); + return test_bml_add_double_complex(N, matrix_type, + matrix_precision, M); break; default: fprintf(stderr, "unknown matrix precision\n"); diff --git a/tests/add_matrix.h b/tests/test_bml_add.h similarity index 77% rename from tests/add_matrix.h rename to tests/test_bml_add.h index d6bafd4a3..234070ef9 100644 --- a/tests/add_matrix.h +++ b/tests/test_bml_add.h @@ -1,33 +1,33 @@ -#ifndef __ADD_MATRIX_H -#define __ADD_MATRIX_H +#ifndef __TEST_BML_ADD_H +#define __TEST_BML_ADD_H #include -int test_add( +int test_bml_add( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_single_real( +int test_bml_add_single_real( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_double_real( +int test_bml_add_double_real( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_single_complex( +int test_bml_add_single_complex( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_double_complex( +int test_bml_add_double_complex( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, diff --git a/tests/test_bml_add_typed.c b/tests/test_bml_add_typed.c new file mode 100644 index 000000000..0b472e554 --- /dev/null +++ b/tests/test_bml_add_typed.c @@ -0,0 +1,198 @@ +#include "bml.h" +#include "../typed.h" +#include "../macros.h" + +#include +#include +#include +#include +#if defined(SINGLE_REAL) || defined(SINGLE_COMPLEX) +#define REL_TOL 1e-6 +#else +#define REL_TOL 1e-12 +#endif + +int TYPED_FUNC( + test_bml_add) ( + const int N, + const bml_matrix_type_t matrix_type, + const bml_matrix_precision_t matrix_precision, + const int M) +{ + bml_matrix_t *A = NULL; + bml_matrix_t *B = NULL; + bml_matrix_t *C = NULL; + + REAL_T *A_dense = NULL; + REAL_T *B_dense = NULL; + REAL_T *C_dense = NULL; + + int result = 0; + + double norm; + double expected_norm; + + double alpha = 1.2; + double beta = 0.8; + double threshold = 0.0; + + LOG_DEBUG("rel. tolerance = %e\n", REL_TOL); + + A = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); + A_dense = bml_export_to_dense(A, dense_row_major); + + LOG_INFO("A_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, A_dense, 0, + N, 0, N); + C = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); + C_dense = bml_export_to_dense(C, dense_row_major); + + LOG_INFO("C_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, C_dense, 0, + N, 0, N); + LOG_INFO("Testing bml_add()\n"); + + B = bml_copy_new(A); + bml_add(B, C, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N * N; i++) + { + double expected = alpha * A_dense[i] + beta * C_dense[i]; + double rel_diff = ABS((expected - B_dense[i]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[i]); + result = -1; + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_add_norm()\n"); + + B = bml_copy_new(A); + norm = bml_add_norm(B, C, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + expected_norm = 0; + for (int i = 0; i < N * N; i++) + { + expected_norm += C_dense[i] * C_dense[i]; + double expected = alpha * A_dense[i] + beta * C_dense[i]; + double rel_diff = ABS((expected - B_dense[i]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[i]); + result = -1; + } + } + if (ABS(expected_norm - norm) / expected_norm > REL_TOL) + { + LOG_INFO("norm mismatch: expected = %e, norm = %e\n", expected_norm, norm); + result = -1; + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_add_identity()\n"); + + B = bml_copy_new(A); + bml_add_identity(B, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N; i++) + { + for (int j = 0; j < N; j++) + { + double expected = 0; + if (i == j) + { + expected = A_dense[ROWMAJOR(i, i, N, N)] + beta; + } + else + { + expected = A_dense[ROWMAJOR(i, j, N, N)]; + } + + double rel_diff = + ABS((expected - B_dense[ROWMAJOR(i, j, N, N)]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[ROWMAJOR(i, j, N, N)]); + result = -1; + } + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_scale_add_identity()\n"); + + B = bml_copy_new(A); + bml_scale_add_identity(B, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N; i++) + { + for (int j = 0; j < N; j++) + { + double expected = 0; + if (i == j) + { + expected = alpha * A_dense[ROWMAJOR(i, i, N, N)] + beta; + } + else + { + expected = alpha * A_dense[ROWMAJOR(i, j, N, N)]; + } + + double rel_diff = + ABS((expected - B_dense[ROWMAJOR(i, j, N, N)]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[ROWMAJOR(i, j, N, N)]); + result = -1; + } + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + bml_free_memory(A_dense); + bml_free_memory(C_dense); + + bml_deallocate(&A); + bml_deallocate(&C); + + if (result == 0) + { + LOG_INFO("add matrix test passed\n"); + } + else + { + LOG_INFO("add matrix test failed\n"); + } + + return result; +}