Skip to content

Commit

Permalink
Nvidia remastered (#464)
Browse files Browse the repository at this point in the history
* nvidia: update hello world following changes in #456

* update Nvidia backend to use the new LLVM infra

* update Nvidia multiplication
  • Loading branch information
mratsim authored Aug 27, 2024
1 parent 0b24651 commit 65147ed
Show file tree
Hide file tree
Showing 11 changed files with 437 additions and 524 deletions.
39 changes: 0 additions & 39 deletions constantine/math_compiler/codegen_amdgpu.nim
Original file line number Diff line number Diff line change
Expand Up @@ -67,45 +67,6 @@ proc hipDeviceInit*(deviceID = 0'i32): HipDevice =

return hipDevice

# ############################################################
#
# LLVM IR for AMD GPUs
#
# ############################################################
#
# Note:
# __device__ functions for field and elliptic curve arithmetic
# might be compiled by default with scalar codegen
#
# We will need to either:
# - Derive explicitly a vectorized version of the warp/wave size (32)
# - Derive implicitly a vectorized version, probably with __forceinline__

proc wrapInCallableHipKernel*(module: ModuleRef, fn: FnDef) =
## Create a public wrapper of a Hip device function
##
## A function named `addmod` can be found by appending _public
## check hipModuleGetFunction(fnPointer, cuModule, "addmod_public")

let pubName = fn.fnImpl.getName() & "_public"
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)

let ctx = module.getContext()
let builder = ctx.createBuilder()
defer: builder.dispose()

let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
builder.positionAtEnd(blck)

var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
for i, arg in mpairs(args):
arg = pubFn.getParam(i.uint32)
discard builder.call2(fn.fnTy, fn.fnImpl, args)

# A public kernel must return void
builder.retVoid()
pubFn.setCallingConvention(AMDGPU_KERNEL)

# ############################################################
#
# Code generation
Expand Down
35 changes: 1 addition & 34 deletions constantine/math_compiler/codegen_nvidia.nim
Original file line number Diff line number Diff line change
Expand Up @@ -109,37 +109,6 @@ proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =

return cuDevice

# ############################################################
#
# LLVM IR for Nvidia GPUs
#
# ############################################################

proc wrapInCallableCudaKernel*(module: ModuleRef, fn: FnDef) =
## Create a public wrapper of a cuda device function
##
## A function named `addmod` can be found by appending _public
## check cuModuleGetFunction(fnPointer, cuModule, "addmod_public")

let pubName = fn.fnImpl.getName() & "_public"
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)

let ctx = module.getContext()
let builder = ctx.createBuilder()
defer: builder.dispose()

let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
builder.positionAtEnd(blck)

var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
for i, arg in mpairs(args):
arg = pubFn.getParam(i.uint32)
discard builder.call2(fn.fnTy, fn.fnImpl, args)

# A public kernel must return void
builder.retVoid()
module.tagCudaKernel((fn.fnTy, pubFn))

# ############################################################
#
# Code generation
Expand Down Expand Up @@ -194,9 +163,7 @@ proc codegenNvidiaPTX*(asy: Assembler_LLVM, sm: tuple[major, minor: int32]): str
#
# ############################################################

proc getCudaKernel*(cuMod: CUmodule, cm: CurveMetadata, opcode: Opcode): CUfunction =
# Public kernels are appended _public
let fnName = cm.genSymbol(opcode) & "_public"
proc getCudaKernel*(cuMod: CUmodule, fnName: string): CUfunction =
check cuModuleGetFunction(result, cuMod, fnName)

proc exec*[T](jitFn: CUfunction, r: var T, a, b: T) =
Expand Down
34 changes: 34 additions & 0 deletions constantine/math_compiler/impl_fields_dispatch.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
constantine/platforms/llvm/llvm,
./ir,
./impl_fields_sat {.all.},
./impl_fields_nvidia {.all.}

proc modadd*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of {bkX86_64_Linux, bkAmdGpu}:
asy.modadd_sat(fd, r, a, b, M)
of bkNvidiaPTX:
asy.modadd_nvidia(fd, r, a, b, M)

proc modsub*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of bkNvidiaPTX:
asy.modsub_nvidia(fd, r, a, b, M)
else:
doAssert false, "Unimplemented"

proc mtymul*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of bkNvidiaPTX:
asy.mtymul_nvidia(fd, r, a, b, M)
else:
doAssert false, "Unimplemented"
3 changes: 1 addition & 2 deletions constantine/math_compiler/impl_fields_globals.nim
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ proc getM0ninv*(asy: Assembler_LLVM, fd: FieldDescriptor): ValueRef =
fd.wordTy
)


return m0ninv
return asy.load2(fd.wordTy, m0ninv, "m0ninv")

when isMainModule:
let asy = Assembler_LLVM.new("test_module", bkX86_64_Linux)
Expand Down
Loading

0 comments on commit 65147ed

Please sign in to comment.