-
Notifications
You must be signed in to change notification settings - Fork 137
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* port layernorm * change warp_welford.hpp * Update warpshuffle * 1. Add save mean and save std back 2. Move construction of tensor_view and tile_window to operator() * refine welford max count calculation * unify layernorm api * Rename file * Remove save mean and inv std * Revert "refine welford max count calculation" This reverts commit 0223658. * Fix order of parameter * refine welford max count calculation again * Remove fp32 instances * Fix bug of padding * refactor api * Support bf16 * Extract common function * Refine arg of operator() * Add kMThreadPerBlock to template parameter * clang format * Refine variable name * Refine file name * remove redundant line * refactor layernorm2d pipeline and add block-per-block utility * fix name * rename more * add more block-per-tile instance * remove duplicated define * update instance for 2048, 1024 case * support up to 2048 now * opt loading * add n1536 * Add two pass pipeline * format * Fix incorrect type * parallel compilation * Use smaller N * fix 2p pass * Support Repeat_M in distribution * Refine nameing * Add reduce example --------- Co-authored-by: letaoqin <[email protected]> Co-authored-by: aska-0096 <[email protected]> Co-authored-by: rocking <[email protected]> Co-authored-by: carlushuang <[email protected]>
- Loading branch information
1 parent
3f71093
commit 0394f8a
Showing
59 changed files
with
2,916 additions
and
1,041 deletions.
There are no files selected for viewing
223 changes: 110 additions & 113 deletions
223
example/66_complex_contraction_bilinear/run_complex_contraction_bilinear_example.inc
100755 → 100644
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,21 @@ | ||
set(EXAMPLE_LAYERNORM2D_FWD "tile_example_layernorm2d_fwd") | ||
# not using add_example_executable() to add this target, since we don't want this to have | ||
# to be included in "make all/install/check" | ||
add_executable(tile_example_layernorm2d_fwd EXCLUDE_FROM_ALL layernorm2d_fwd.cpp) | ||
target_compile_options(tile_example_layernorm2d_fwd PRIVATE -DSAVE_MEAN_INV_STD) | ||
message("adding example ${EXAMPLE_LAYERNORM2D_FWD}") | ||
file(GLOB INSTANCE_SRCS instances/*.cpp) | ||
add_executable(${EXAMPLE_LAYERNORM2D_FWD} EXCLUDE_FROM_ALL layernorm2d_fwd.cpp) | ||
target_include_directories(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) | ||
target_sources(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${INSTANCE_SRCS}) | ||
|
||
set(EXAMPLE_LAYERNORM2D_FWD_COMPILE_OPTIONS) | ||
|
||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations | ||
list(APPEND EXAMPLE_LAYERNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal) | ||
|
||
target_compile_options(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${EXAMPLE_LAYERNORM2D_FWD_COMPILE_OPTIONS}) | ||
|
||
# TODO: we have to turn off this global prop, otherwise the progress bar generated | ||
# by cmake will print too many files, execvp: /bin/sh: Argument list too long | ||
# however, this property may affect global | ||
# TODO: consider codegen a makefile by us | ||
set_property(GLOBAL PROPERTY RULE_MESSAGES OFF) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
155 changes: 155 additions & 0 deletions
155
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_api.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include <ck_tile/core.hpp> | ||
#include "layernorm2d_fwd.hpp" | ||
|
||
template <typename DataType_, | ||
ck_tile::index_t Repeat_M_, // each thread repeat along M | ||
ck_tile::index_t Repeat_N_, // each thread repeat along N | ||
ck_tile::index_t ThreadPerBlock_M_, // num threads along M | ||
ck_tile::index_t ThreadPerBlock_N_, // num threads along N | ||
ck_tile::index_t Vector_N_, // vector size along N | ||
bool kPadN_, | ||
bool kSaveMeanInvStd_, | ||
bool kTwoPass_> | ||
using trait_ = layernorm2d_fwd_traits_<DataType_, | ||
Repeat_M_, | ||
Repeat_N_, | ||
ThreadPerBlock_M_, | ||
ThreadPerBlock_N_, | ||
Vector_N_, | ||
kPadN_, | ||
kSaveMeanInvStd_, | ||
kTwoPass_>; | ||
|
||
template <typename data_type> | ||
float layernorm2d_fwd_b16_(layernorm2d_fwd_traits /*t*/, | ||
layernorm2d_fwd_args a, | ||
const ck_tile::stream_config& s) | ||
{ | ||
#if 1 | ||
float r = -1; | ||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
if(a.n <= 64) { | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 4, 64, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 128) { | ||
if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 4, 64, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 4, 64, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 256) { | ||
if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 4, 64, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 4, 64, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 4, 64, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 512) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 4, 64, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 4, 64, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 4, 64, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 8, 4, 64, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 768) { | ||
if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 4, 64, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 6, 4, 64, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1,12, 4, 64, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 1024) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 2, 128, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 2, 128, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 2, 128, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 256, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 1536) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 4, 64, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 2, 128, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 1, 256, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 6, 1, 256, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 2048) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 1, 1, 256, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 1, 256, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 256, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 8, 1, 256, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 3072) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 1, 128, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 1, 256, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 6, 1, 256, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 3, 1, 1024, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n <= 4096) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 1, 256, 8, true, false, false>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 256, 4, true, false, false>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 1, 1024, 2, true, false, false>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 1024, 1, true, false, false>>(s, a); | ||
} | ||
else if(a.n > 4096) { | ||
if (a.n % 8 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 1, 256, 8, true, false, true>>(s, a); | ||
else if (a.n % 4 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 256, 4, true, false, true>>(s, a); | ||
else if (a.n % 2 == 0) | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 2, 1, 1024, 2, true, false, true>>(s, a); | ||
else | ||
r = layernorm2d_fwd_<trait_<data_type, 1, 4, 1, 1024, 1, true, false, true>>(s, a); | ||
} | ||
return r; | ||
#else | ||
return layernorm2d_fwd_<trait_<data_type, 1, 1, 1, 256, 4, true, false, false>>(s, a); | ||
#endif | ||
// clang-format on | ||
} | ||
|
||
float layernorm2d_fwd(layernorm2d_fwd_traits t, | ||
layernorm2d_fwd_args a, | ||
const ck_tile::stream_config& s) | ||
{ | ||
|
||
float r = -1; | ||
if(t.data_type.compare("fp16") == 0) | ||
{ | ||
return layernorm2d_fwd_b16_<ck_tile::fp16_t>(t, a, s); | ||
} | ||
else if(t.data_type.compare("bf16") == 0) | ||
{ | ||
return layernorm2d_fwd_b16_<ck_tile::bf16_t>(t, a, s); | ||
} | ||
if(r < 0) | ||
throw std::runtime_error("Without supported instances!"); | ||
|
||
return r; | ||
} |
22 changes: 22 additions & 0 deletions
22
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n1024_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
#if 0 | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 4, 64, 8, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 4, 64, 4, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 8, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 16, 4, 64, 1, true , false, false>>(const S&, A); | ||
|
||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 1, 256, 4, true , false, false>>(const S&, A); | ||
#endif | ||
|
||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 2, 128, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 2, 128, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 2, 128, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 256, 1, true, false, false>>(const S&, A); | ||
// clang-format on |
13 changes: 13 additions & 0 deletions
13
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n1536_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 4, 64, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 2, 128, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 1, 256, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 6, 1, 256, 1, true, false, false>>(const S&, A); | ||
// clang-format on |
14 changes: 14 additions & 0 deletions
14
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n2048_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 1, 256, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 1, 256, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 256, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 8, 1, 256, 1, true, false, false>>(const S&, A); | ||
|
||
// clang-format on |
12 changes: 12 additions & 0 deletions
12
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n256_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 4, 64, 4, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 4, 64, 1, true , false, false>>(const S&, A); | ||
// clang-format on |
14 changes: 14 additions & 0 deletions
14
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n3072_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 1, 128, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 1, 256, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 6, 1, 256, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 1, 1024, 1, true, false, false>>(const S&, A); | ||
|
||
// clang-format on |
14 changes: 14 additions & 0 deletions
14
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n4096_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 1, 256, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 256, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 1, 1024, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 1024, 1, true, false, false>>(const S&, A); | ||
|
||
// clang-format on |
14 changes: 14 additions & 0 deletions
14
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n4096_tp_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 1, 256, 8, true, false, true>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 256, 4, true, false, true>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 1, 1024, 2, true, false, true>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 1, 1024, 1, true, false, true>>(const S&, A); | ||
|
||
// clang-format on |
13 changes: 13 additions & 0 deletions
13
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n512_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 4, 64, 8, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 4, 64, 4, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 4, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 8, 4, 64, 1, true , false, false>>(const S&, A); | ||
// clang-format on |
12 changes: 12 additions & 0 deletions
12
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n64_n128_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 4, 64, 1, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 1, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 2, 4, 64, 1, true , false, false>>(const S&, A); | ||
// clang-format on |
12 changes: 12 additions & 0 deletions
12
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_bf16_n768_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 3, 4, 64, 4, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 6, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::bf16_t, 1, 12, 4, 64, 1, true , false, false>>(const S&, A); | ||
// clang-format on |
22 changes: 22 additions & 0 deletions
22
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_fp16_n1024_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
#if 0 | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 2, 4, 64, 8, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 4, 4, 64, 4, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 8, 4, 64, 2, true , false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 16, 4, 64, 1, true , false, false>>(const S&, A); | ||
|
||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 4, true , false, false>>(const S&, A); | ||
#endif | ||
|
||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 1, 2, 128, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 2, 2, 128, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 4, 2, 128, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 256, 1, true, false, false>>(const S&, A); | ||
// clang-format on |
13 changes: 13 additions & 0 deletions
13
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_fp16_n1536_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 3, 4, 64, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 3, 2, 128, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 3, 1, 256, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 6, 1, 256, 1, true, false, false>>(const S&, A); | ||
// clang-format on |
14 changes: 14 additions & 0 deletions
14
example/ck_tile/02_layernorm2d/instances/layernorm2d_fwd_fp16_n2048_instance.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "layernorm2d_fwd_instance_common.hpp" | ||
|
||
// clang-format off | ||
// rm rn tm tn vn pd mv 2p | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 8, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 2, 1, 256, 4, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 4, 1, 256, 2, true, false, false>>(const S&, A); | ||
template float layernorm2d_fwd_<trait_<ck_tile::fp16_t, 1, 8, 1, 256, 1, true, false, false>>(const S&, A); | ||
|
||
// clang-format on |
Oops, something went wrong.