Skip to content

Commit

Permalink
[LIBCLC][PTX] Add group_ballot intrinsic (#5020)
Browse files Browse the repository at this point in the history
This patch implements the `group_ballot` intrinsic for NVIDIA, it is
currently only implemented for subgroups.
  • Loading branch information
npmiller authored Nov 29, 2021
1 parent d3ab145 commit 0680e5c
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 0 deletions.
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ images/image_helpers.ll
images/image.cl
group/collectives_helpers.ll
group/collectives.cl
group/group_ballot.cl
atomic/atomic_add.cl
atomic/atomic_and.cl
atomic/atomic_cmpxchg.cl
Expand Down
2 changes: 2 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "membermask.h"

#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down
36 changes: 36 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/group/group_ballot.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "membermask.h"

#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

_CLC_DEF _CLC_CONVERGENT __clc_vec4_uint32_t
_Z29__spirv_GroupNonUniformBallotjb(unsigned flag, bool predicate) {
// only support subgroup for now
if (flag != Subgroup) {
__builtin_trap();
__builtin_unreachable();
}

// prepare result, we only support the ballot operation on 32 threads maximum
// so we only need the first element to represent the final mask
__clc_vec4_uint32_t res;
res[1] = 0;
res[2] = 0;
res[3] = 0;

// compute thread mask
unsigned threads = __clc__membermask();

// run the ballot operation
res[0] = __nvvm_vote_ballot_sync(threads, predicate);

return res;
}
17 changes: 17 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/group/membermask.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef PTX_NVIDIACL_MEMBERMASK_H
#define PTX_NVIDIACL_MEMBERMASK_H

#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

_CLC_DEF _CLC_CONVERGENT uint __clc__membermask();

#endif

0 comments on commit 0680e5c

Please sign in to comment.