-
Notifications
You must be signed in to change notification settings - Fork 55
/
CMakeLists.txt
214 lines (194 loc) · 9.23 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# Author: Johannes de Fine Licht ([email protected])
# Copyright: This software is copyrighted under the BSD 3-Clause License.
cmake_minimum_required(VERSION 3.0)
project(sdaccel_mm)
# Target options
set(MM_PLATFORM "xilinx_u250_gen3x16_xdma_3_1_202020_1" CACHE STRING "Platform string for Vitis.")
set(MM_TARGET_CLOCK "" CACHE STRING "Target clock for kernel (uses default is left empty).")
set(MM_ENABLE_PROFILING OFF CACHE BOOL "Collect profiling information.")
set(MM_ENABLE_DEBUGGING OFF CACHE BOOL "Inject debugging cores to design.")
set(MM_VITIS_FLAGS "" CACHE STRING "Extra flags for Vitis.")
set(MM_POWER_METER OFF CACHE BOOL "Enable Corsair power meter for measuring power consumption with Corsair RMi power supplies.")
set(MM_ENABLE_BLAS ON CACHE BOOL "Use BLAS library for validation if available.")
# Domain options
set(MM_DATA_TYPE "float" CACHE STRING "Matrix data type.")
set(MM_MEMORY_BUS_WIDTH_N 64 CACHE STRING "Width of memory bus in bytes in N.")
set(MM_MEMORY_BUS_WIDTH_K 64 CACHE STRING "Width of memory bus in bytes in K.")
set(MM_MEMORY_BUS_WIDTH_M 64 CACHE STRING "Width of memory bus in bytes in M.")
set(MM_DYNAMIC_SIZES ON CACHE BOOL "Use dynamic matrix dimension sizes.")
set(MM_SIZE_N 512 CACHE STRING "Size of matrix dimension.")
set(MM_SIZE_K 512 CACHE STRING "Size of matrix dimension.")
set(MM_SIZE_M 512 CACHE STRING "Size of matrix dimension.")
set(MM_MEMORY_TILE_SIZE_N 256 CACHE STRING "Tile size of outer memory tile in N.")
set(MM_MEMORY_TILE_SIZE_M 256 CACHE STRING "Tile size of outer memory tile in M.")
set(MM_PARALLELISM_N 32 CACHE STRING "Number of parallel compute in N.")
set(MM_PARALLELISM_M 8 CACHE STRING "Number of parallel compute in M.")
# set(MM_GRANULARITY_N 1 CACHE STRING "Granularity of processing elements in N.")
set(MM_TRANSPOSED_A OFF CACHE BOOL "Assume the input matrix A is transposed.")
set(MM_TRANSPOSE_WIDTH 64 CACHE STRING "Burst width when transposing reads from A.")
set(MM_TWO_DIMMS OFF CACHE BOOL "Use two DDR DIMMs instead of one")
set(MM_MAP_OP "Multiply" CACHE STRING "Map operation to perform between A and B matrices.")
set(MM_REDUCE_OP "Add" CACHE STRING "Reduction operation to write back to C.")
set(MM_ADD_RESOURCE OFF CACHE STRING "")
set(MM_MULT_RESOURCE OFF CACHE STRING "")
# Internal
set(MM_KERNEL_NAME MatrixMultiplicationKernel)
include(CheckTypeSize)
check_type_size(${MM_DATA_TYPE} MM_DATA_WIDTH_${MM_DATA_TYPE})
if(NOT MM_DATA_WIDTH_${MM_DATA_TYPE}) # Non-primitive data type
if(MM_DATA_TYPE STREQUAL "half")
set(MM_DATA_WIDTH_${MM_DATA_TYPE} 2)
elseif(MM_DATA_TYPE STREQUAL "uint8_t")
set(MM_DATA_WIDTH_${MM_DATA_TYPE} 1)
else()
message(FATAL_ERROR "Could not get size of data type ${MM_DATA_TYPE}.")
endif()
endif()
math(EXPR MM_KERNEL_WIDTH_N "${MM_DATA_WIDTH_${MM_DATA_TYPE}} * ${MM_PARALLELISM_N}")
math(EXPR MM_KERNEL_WIDTH_M "${MM_DATA_WIDTH_${MM_DATA_TYPE}} * ${MM_PARALLELISM_M}")
# Validation checks
math(EXPR MM_INNER_TILES "(${MM_MEMORY_TILE_SIZE_N} / ${MM_PARALLELISM_N}) * (${MM_MEMORY_TILE_SIZE_M} / ${MM_PARALLELISM_M})")
if(NOT MM_TRANSPOSED_A AND MM_MEMORY_TILE_SIZE_N GREATER MM_INNER_TILES)
message(WARNING "In-memory transposition for A cannot keep up with the instantiated number of compute units. The number of inner tiles (currently ${MM_INNER_TILES}) must be greater than or equal to the outer tile size in N (currently ${MM_MEMORY_TILE_SIZE_N}).")
endif()
math(EXPR MM_MEMORY_WIDTH_M "${MM_MEMORY_BUS_WIDTH_M} / ${MM_DATA_WIDTH_${MM_DATA_TYPE}}")
math(EXPR MM_REM "${MM_MEMORY_WIDTH_M} % ${MM_PARALLELISM_M}")
if(MM_REM GREATER 0)
message(FATAL_ERROR "Bus width in M (${MM_MEMORY_WIDTH_M}) must be a multiple of the parallelism in M (${MM_PARALLELISM_M}).")
endif()
math(EXPR MM_REM "${MM_MEMORY_TILE_SIZE_M} % ${MM_MEMORY_WIDTH_M}")
if(MM_REM GREATER 0)
message(FATAL_ERROR "Outer memory tile size must be divisible by element width of memory bus.")
endif()
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake
${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/hlslib/cmake)
find_package(Vitis REQUIRED)
find_package(Threads REQUIRED)
# Check if BLAS is available
if(MM_ENABLE_BLAS)
find_package(BLAS)
if(BLAS_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMM_HAS_BLAS")
else()
set(BLAS_LIBRARIES)
endif()
else()
set(BLAS_LIBRARIES)
endif()
# Include power meter
if(MM_POWER_METER)
add_subdirectory(powermeter)
include_directories(SYSTEM powermeter/include powermeter/OpenCorsairLink/include ${LIBUSB_INCLUDE_DIR})
add_definitions("-DMM_POWER_METER")
endif()
include_directories(include ${CMAKE_BINARY_DIR} SYSTEM hlslib/include ${Vitis_INCLUDE_DIRS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
if(MM_DYNAMIC_SIZES)
add_definitions("-DMM_DYNAMIC_SIZES")
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -DMM_DYNAMIC_SIZES")
endif()
if(MM_TRANSPOSED_A)
add_definitions("-DMM_TRANSPOSED_A")
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -DMM_TRANSPOSED_A")
endif()
add_definitions("-DHLSLIB_STREAM_TIMEOUT=16")
if(((${Vitis_MAJOR_VERSION} LESS 2018) AND (${Vitis_MINOR_VERSION} LESS 3)) OR ${Vitis_MAJOR_VERSION} LESS 2017)
add_definitions(-DHLSLIB_LEGACY_SDX=1)
else()
add_definitions(-DHLSLIB_LEGACY_SDX=0)
endif()
if(MM_DATA_TYPE STREQUAL "half")
add_definitions("-DMM_HALF_PRECISION")
endif()
# Query default clock of platform
if(NOT "${MM_TARGET_CLOCK}")
if(NOT "${MM_PLATFORM_INTERNAL}" STREQUAL "${MM_PLATFORM}")
message(STATUS "Querying default clock for ${MM_PLATFORM}.")
execute_process(COMMAND ${Vitis_PLATFORMINFO} --platform ${MM_PLATFORM} -jhardwarePlatform.systemClocks
OUTPUT_VARIABLE SYSTEM_CLOCKS
RESULT_VARIABLE RET)
if(RET EQUAL 0)
string(FIND "${SYSTEM_CLOCKS}" "\"default\": \"true\"" LOC)
string(SUBSTRING "${SYSTEM_CLOCKS}" ${LOC} -1 SYSTEM_CLOCKS)
string(REGEX MATCH "\"frequency\": \"[0-9\\.]+\"" SYSTEM_CLOCKS "${SYSTEM_CLOCKS}")
string(REGEX REPLACE "\"frequency\": \"([0-9\\.]+)\"" "\\1" CLOCK "${SYSTEM_CLOCKS}")
set(MM_CLOCK_INTERNAL "${CLOCK}" CACHE INTERNAL "")
set(MM_PLATFORM_INTERNAL "${MM_PLATFORM}" CACHE INTERNAL "")
else()
message(WARNING "Failed to query default frequency for platform ${MM_PLATFORM}. Assuming 300 MHz.")
set(MM_CLOCK_INTERNAL "300" CACHE INTERNAL "")
endif()
endif()
endif()
# Hardware configuration header for HLS
configure_file(include/Config.h.in Config.h)
# C++ source code
set(MM_KERNEL_SRC
${CMAKE_SOURCE_DIR}/kernel/Compute.cpp
${CMAKE_SOURCE_DIR}/kernel/Memory.cpp
${CMAKE_SOURCE_DIR}/kernel/Top.cpp)
add_library(mmkernel ${MM_KERNEL_SRC})
target_link_libraries(mmkernel ${CMAKE_THREAD_LIBS_INIT})
# Executables
add_executable(PrintSpecifications src/PrintSpecifications.cpp)
# Software test
add_executable(TestSimulation test/TestSimulation.cpp)
target_link_libraries(TestSimulation ${Vitis_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${BLAS_LIBRARIES} ${Vitis_LIBRARIES} mmkernel)
if(MM_DATA_TYPE STREQUAL "half")
target_link_libraries(TestSimulation ${Vitis_FLOATING_POINT_LIBRARY})
endif()
if(MM_DYNAMIC_SIZES)
math(EXPR TEST_SIZE_N "2 * ${MM_MEMORY_TILE_SIZE_N} + 1")
math(EXPR TEST_SIZE_K "2 * ${MM_PARALLELISM_N} * ${MM_PARALLELISM_M} + ${MM_MEMORY_BUS_WIDTH_K} / ${MM_DATA_WIDTH_${MM_DATA_TYPE}}")
math(EXPR TEST_SIZE_M "2 * ${MM_MEMORY_TILE_SIZE_M} + ${MM_MEMORY_WIDTH_M}")
add_test(TestSimulation TestSimulation ${TEST_SIZE_N} ${TEST_SIZE_K} ${TEST_SIZE_M})
else()
add_test(TestSimulation TestSimulation)
endif()
enable_testing()
# Synthesis flags
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -O3 -DMM_SYNTHESIS")
if(MM_DATA_TYPE STREQUAL "half")
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -DMM_HALF_PRECISION")
endif()
if(MM_ADD_RESOURCE)
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -DMM_ADD_RESOURCE=${MM_ADD_RESOURCE}")
endif()
if(MM_MULT_RESOURCE)
set(MM_SYNTHESIS_FLAGS "${MM_SYNTHESIS_FLAGS} -DMM_MULT_RESOURCE=${MM_MULT_RESOURCE}")
endif()
if(MM_TWO_DIMMS)
set(MM_PORT_MAPPING m_axi_gmem0:DDR[0] m_axi_gmem1:DDR[1] m_axi_gmem2:DDR[1])
else()
set(MM_PORT_MAPPING m_axi_gmem0:DDR[1] m_axi_gmem1:DDR[1] m_axi_gmem2:DDR[1])
endif()
# Hardware kernel
add_vitis_kernel(MatrixMultiplication
KERNEL "MatrixMultiplicationKernel"
FILES ${MM_KERNEL_SRC}
HLS_FLAGS ${MM_SYNTHESIS_FLAGS}
INCLUDE_DIRS include
hlslib/include
${CMAKE_BINARY_DIR}
PORT_MAPPING ${MM_PORT_MAPPING}
DEPENDS include/Compute.h
include/MatrixMultiplication.h
include/Memory.h
${CMAKE_BINARY_DIR}/Config.h)
add_vitis_program(MatrixMultiplication ${MM_PLATFORM}
CLOCK ${MM_TARGET_CLOCK}
BUILD_FLAGS ${MM_VITIS_FLAGS}
PROFILING ${MM_ENABLE_PROFILING}
DEBUGGING ${MM_ENABLE_DEBUGGING})
# Host code to launch kernel
add_executable(RunHardware.exe host/RunHardware.cpp)
target_link_libraries(RunHardware.exe ${Vitis_LIBRARIES} ${BLAS_LIBRARIES} mmkernel)
if(MM_TWO_DIMMS)
target_compile_definitions(RunHardware.exe PRIVATE MM_TWO_DIMMS)
endif()
if(MM_DATA_TYPE STREQUAL "half")
target_link_libraries(RunHardware.exe ${Vitis_FLOATING_POINT_LIBRARY})
endif()
if(MM_POWER_METER)
target_link_libraries(RunHardware.exe powermeter)
endif()