Skip to content

Commit

Permalink
Fix lbann half issues (elemental#93)
Browse files Browse the repository at this point in the history
* tick up version to 1.3.3

* fix issues with CPU half

* rework the copy interface

* work on Copy

* add a warmup run to the gemm test

* Various updates to copy dispatch; no more ETI for Copy

* fix an issue with the cuda half type's assignment operators

* make gpu_half_type assignment operators into templates

* Update include/El/blas_like/level1/CopyLocal.hpp

Co-Authored-By: Tim Moon <[email protected]>

* remove some debugging output

* add decls for BaseDistMatrix copy and copyasync

* make gpu_half_type streamable

* be a little more clever about casting to __half

* fix some things

* add overloads of sqrt and pow for half types

* add unary minus for gpu half type

* fix an issue where NVCC decided that rvalue references have value semantics

* add an overload of Log for gpu_half_type

* add exception-throwing bitwise operators for half. This is to appease Aluminum at compile-time and should never be encounted IRL.

* add a bunch of missing library symbols

* add a bunch of transendental functions, etc, for half types

* fix some missing symbols when compiling without half support

* Fixes to instantiate Read/Write with gpu half type

* Add overload for instantiate with half types

* add a few missing symbols

* add a write impl for gpu half matrices

* temp: dispatch gemv through Gemm for __half

* Fix the GEMV as GEMM call

* temporary error-throw for unhandled case

* remove the incy != 1 gemv case

* patch around an issue in the Half library

* expose AbstractMatrix interface to Print()

Co-authored-by: Tim Moon <[email protected]>
  • Loading branch information
benson31 and Tim Moon authored Jan 17, 2020
1 parent de6b509 commit 7d581e9
Show file tree
Hide file tree
Showing 44 changed files with 1,734 additions and 898 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ endif (__GIT_EXECUTABLE)
# This must be set because version tags
set(HYDROGEN_VERSION_MAJOR 1)
set(HYDROGEN_VERSION_MINOR 3)
set(HYDROGEN_VERSION_PATCH 2)
set(HYDROGEN_VERSION_PATCH 3)
set(HYDROGEN_VERSION_MAJOR_MINOR
"${HYDROGEN_VERSION_MAJOR}.${HYDROGEN_VERSION_MINOR}")
set(HYDROGEN_VERSION
Expand Down Expand Up @@ -539,6 +539,7 @@ if (HYDROGEN_HAVE_CUDA)
target_link_libraries(Hydrogen_CUDA PUBLIC ${NVTX_LIBRARIES})
target_link_libraries(Hydrogen_CUDA PUBLIC cuda::toolkit)

target_link_libraries(Hydrogen_CXX PUBLIC Hydrogen_CUDA)
list(APPEND HYDROGEN_LIBRARIES Hydrogen_CUDA)
endif ()

Expand Down
31 changes: 31 additions & 0 deletions cmake/modules/HydrogenETIGeneration.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This function sets the _ARG# variable and recurs through the parameters.
function (h_gtpe_recur OUTVAR EXPRESSION_TEMPLATE THIS_EPARAM_ID THIS_EPARAM)
foreach (_VAL IN LISTS ${THIS_EPARAM})
set(_ARG${THIS_EPARAM_ID} "${_VAL}")
if (ARGN)
math(EXPR _NEXT_ID "${THIS_EPARAM_ID} + 1")
h_gtpe_recur(${OUTVAR} "${EXPRESSION_TEMPLATE}" ${_NEXT_ID} ${ARGN})
else ()
string(CONFIGURE "${EXPRESSION_TEMPLATE}" _THIS_EXPRESSION @ONLY)
list(APPEND ${OUTVAR} "${_THIS_EXPRESSION}")
endif ()
endforeach ()
set(${OUTVAR} "${${OUTVAR}}" PARENT_SCOPE)
endfunction ()

# This function calls the recursive function above to generate each
# function instance.
#
# DO NOT ADD A SEMICOLON TO THE EXPRESSION_TEMPLATE PARAMETER!!!!!!!
function (h_generate_tensor_product_expression OUTVAR EXPRESSION_TEMPLATE)
h_gtpe_recur(_ALL_EXPRESSIONS "${EXPRESSION_TEMPLATE}" 0 ${ARGN})
set(${OUTVAR} ${_ALL_EXPRESSIONS} PARENT_SCOPE)
endfunction ()

# This function adds a semicolon to each function instance in the list
# and joins them into a string with each function instance on its own
# line.
function (h_func_list_to_string OUTVAR INLIST)
list(JOIN ${INLIST} ";\n" _TMP)
set(${OUTVAR} "${_TMP};" PARENT_SCOPE)
endfunction ()
Loading

0 comments on commit 7d581e9

Please sign in to comment.