-
-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat(AMD GPU): initial commit to support AMD GPU (working for ASM but not object code) * feat(AMD GPU): 🔥 🔥 end to end JIT compilation to AMD GPU is working
- Loading branch information
Showing
13 changed files
with
2,093 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
# Constantine | ||
# Copyright (c) 2018-2019 Status Research & Development GmbH | ||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy | ||
# Licensed and distributed under either of | ||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). | ||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). | ||
# at your option. This file may not be copied, modified, or distributed except according to those terms. | ||
|
||
import | ||
constantine/platforms/abis/amdgpu_abi {.all.}, | ||
constantine/platforms/abis/amdcomgr_abi, | ||
constantine/platforms/abis/c_abi, | ||
constantine/platforms/llvm/llvm, | ||
constantine/platforms/primitives, | ||
./ir | ||
|
||
export | ||
amdgpu_abi, | ||
Flag, flag, wrapOpenArrayLenType | ||
|
||
# ############################################################ | ||
# | ||
# AMD GPUs API | ||
# | ||
# ############################################################ | ||
|
||
# Hip Runtime API | ||
# ------------------------------------------------------------ | ||
|
||
template check*(status: HipError) = | ||
## Check the status code of a Hip operation | ||
## Exit program with error if failure | ||
|
||
let code = status # ensure that the input expression is evaluated once only | ||
if code != hipSuccess: | ||
writeStackTrace() | ||
stderr.write(astToStr(status) & " " & $instantiationInfo() & " exited with error: " & $code & '\n') | ||
quit 1 | ||
|
||
func hipModuleGetFunction*(kernel: var HipFunction, module: HipModule, fnName: openArray[char]): HipError {.inline.}= | ||
hipModuleGetFunction(kernel, module, fnName[0].unsafeAddr) | ||
|
||
proc getGcnArchName*(deviceID: int32): string = | ||
var prop: HipDeviceProp | ||
check hipGetDeviceProperties(prop, deviceID) | ||
|
||
for c in prop.gcnArchName: | ||
if c != '\0': | ||
result.add c | ||
|
||
proc hipDeviceInit*(deviceID = 0'i32): HipDevice = | ||
|
||
check hipInit(deviceID.uint32) | ||
|
||
var devCount: int32 | ||
check hipGetDeviceCount(devCount) | ||
if devCount == 0: | ||
echo "hipDeviceInit error: no devices supporting AMD ROCm/HIP" | ||
quit 1 | ||
|
||
var hipDevice: HipDevice | ||
check hipDeviceGet(hipDevice, deviceID) | ||
var name = newString(128) | ||
check hipDeviceGetName(name[0].addr, name.len.int32, hipDevice) | ||
echo "Using HIP Device [", deviceID, "]: ", cstring(name) | ||
echo "AMD GCN ARCH: ", deviceID.getGcnArchName() | ||
|
||
return hipDevice | ||
|
||
# ############################################################ | ||
# | ||
# LLVM IR for AMD GPUs | ||
# | ||
# ############################################################ | ||
# | ||
# Note: | ||
# __device__ functions for field and elliptic curve arithmetic | ||
# might be compiled by default with scalar codegen | ||
# | ||
# We will need to either: | ||
# - Derive explicitly a vectorized version of the warp/wave size (32) | ||
# - Derive implicitly a vectorized version, probably with __forceinline__ | ||
|
||
proc wrapInCallableHipKernel*(module: ModuleRef, fn: FnDef) = | ||
## Create a public wrapper of a Hip device function | ||
## | ||
## A function named `addmod` can be found by appending _public | ||
## check hipModuleGetFunction(fnPointer, cuModule, "addmod_public") | ||
|
||
let pubName = fn.fnImpl.getName() & "_public" | ||
let pubFn = module.addFunction(cstring(pubName), fn.fnTy) | ||
|
||
let ctx = module.getContext() | ||
let builder = ctx.createBuilder() | ||
defer: builder.dispose() | ||
|
||
let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody") | ||
builder.positionAtEnd(blck) | ||
|
||
var args = newSeq[ValueRef](fn.fnTy.countParamTypes()) | ||
for i, arg in mpairs(args): | ||
arg = pubFn.getParam(i.uint32) | ||
discard builder.call2(fn.fnTy, fn.fnImpl, args) | ||
|
||
# A public kernel must return void | ||
builder.retVoid() | ||
pubFn.setCallingConvention(AMDGPU_KERNEL) | ||
|
||
# ############################################################ | ||
# | ||
# Code generation | ||
# | ||
# ############################################################ | ||
|
||
template check*(status: ComgrStatus) = | ||
## Check the status code of a Comgr operation | ||
## Exit program with error if failure | ||
|
||
let code = status # ensure that the input expression is evaluated once only | ||
if code != AMD_COMGR_STATUS_SUCCESS: | ||
writeStackTrace() | ||
stderr.write(astToStr(status) & " " & $instantiationInfo() & " exited with error: " & $code & '\n') | ||
quit 1 | ||
|
||
|
||
proc linkAmdGpu*(reloc_obj: seq[byte], gcnArchName: string): seq[byte] {.noInline.} = | ||
## Link a relocatable object code | ||
## into an executable that can be used through hipModuleLoadData | ||
var roc: ComgrData | ||
check amd_comgr_create_data(AMD_COMGR_DATA_KIND_RELOCATABLE, roc) | ||
defer: check amd_comgr_release_data(roc) | ||
|
||
var ai: ComgrActionInfo | ||
check amd_comgr_create_action_info(ai) | ||
defer: check amd_comgr_destroy_action_info(ai) | ||
|
||
var ds: ComgrDataset | ||
check amd_comgr_create_data_set(ds) | ||
defer: check amd_comgr_destroy_data_set(ds) | ||
|
||
var dsOut: ComgrDataset | ||
check amd_comgr_create_data_set(dsOut) | ||
defer: check amd_comgr_destroy_data_set(dsOut) | ||
|
||
check roc.amd_comgr_set_data(reloc_obj.len.csize_t(), reloc_obj[0].addr) | ||
check roc.amd_comgr_set_data_name("linkAmdGpu-input.o") | ||
check ds.amd_comgr_data_set_add(roc) | ||
|
||
check ai.amd_comgr_action_info_set_isa_name( | ||
cstring("amdgcn-amd-amdhsa--" & gcnArchName) | ||
) | ||
|
||
check amd_comgr_do_action( | ||
AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE, | ||
info = ai, | ||
input = ds, | ||
output = dsOut) | ||
|
||
# Extract the executable | ||
# ------------------------------------------------ | ||
|
||
var exe: ComgrData | ||
check amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, exe) | ||
defer: check amd_comgr_release_data(exe) | ||
|
||
check amd_comgr_action_data_get_data( | ||
dsOut, AMD_COMGR_DATA_KIND_EXECUTABLE, | ||
index = 0, exe) | ||
|
||
# Query the required buffer size | ||
var size: csize_t | ||
check amd_comgr_get_data( | ||
exe, size, nil) | ||
|
||
# Size includes nul char | ||
# But we use seq[byte] not a string, so Nim doesn't auto-inster a \0 | ||
# Hence allocation size is exact. | ||
result.setLen(int size) | ||
|
||
check amd_comgr_get_data( | ||
exe, size, result[0].addr) | ||
|
||
|
||
# ############################################################ | ||
# | ||
# Code execution | ||
# | ||
# ############################################################ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.