-
Notifications
You must be signed in to change notification settings - Fork 82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add kernel fusing using RAJA #167
base: develop
Are you sure you want to change the base?
Changes from 7 commits
c9ac694
5ff40e2
5cc28c3
beccc9f
cdf1111
049bfd0
6d600c8
c0a037d
91f0636
31b3798
c249b86
e91952a
3ab15d8
eaf6fd2
d43092e
f426913
f8a5518
4fe3d2c
c57914d
b6fee9f
d9c4ce3
899a9fa
9b08ece
7c24184
78c618d
2b1a9e1
1eb8cf5
5a62605
879e75e
67ef461
8a8a98e
f2230e9
8df7260
6f6c6bd
6397677
41a65a6
cb6fa43
4837d89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,15 @@ | |
#include "SAMRAI/tbox/Utilities.h" | ||
|
||
namespace SAMRAI { | ||
|
||
/* | ||
* Forward declaration of KernelFuser class - required here because it sucks in | ||
* RAJA and requires CUDA. | ||
*/ | ||
namespace tbox { | ||
class KernelFuser; | ||
} | ||
|
||
namespace hier { | ||
|
||
/** | ||
|
@@ -160,6 +169,12 @@ class PatchData | |
const PatchData& src, | ||
const BoxOverlap& overlap) = 0; | ||
|
||
virtual void | ||
copy( | ||
const PatchData& src, | ||
const BoxOverlap& overlap, | ||
tbox::KernelFuser& fuser); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As it stands I'll have to implement both if I want to use fusion. I guess I can have one implementation for both that takes the fuser pointer and use or not use it under an abstraction layer to keep things single source. I'll need to use some macros to maintain support for older versions of samrai but that was pretty much inevitable. |
||
|
||
/** | ||
* Copy data from the source into the destination using the designated | ||
* overlap descriptor. The overlap description will have been computed | ||
|
@@ -206,6 +221,19 @@ class PatchData | |
tbox::MessageStream& stream, | ||
const BoxOverlap& overlap) const = 0; | ||
|
||
/** | ||
* Pack data lying on the specified index set into the output stream using | ||
* the given KernelFuser. The default implementation of this method will | ||
* call packStream without the fuser argument. See the abstract stream | ||
* virtual base class for more information about the packing operators | ||
* defined for streams. | ||
*/ | ||
virtual void | ||
packStream( | ||
tbox::MessageStream& stream, | ||
const BoxOverlap& overlap, | ||
tbox::KernelFuser& fuser); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not const? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, looks like this should be const. |
||
|
||
/** | ||
* Unpack data from the message stream into the specified index set. | ||
* See the abstract stream virtual base class for more information about | ||
|
@@ -216,6 +244,19 @@ class PatchData | |
tbox::MessageStream& stream, | ||
const BoxOverlap& overlap) = 0; | ||
|
||
/** | ||
* Unpack data from the message stream into the specified index set using | ||
* the given KernelFuser. The default implementation of this method will | ||
* call unpackStream without the fuser argument. See the abstract stream | ||
* virtual base class for more information about the packing operators | ||
* defined for streams. | ||
*/ | ||
virtual void | ||
unpackStream( | ||
tbox::MessageStream& stream, | ||
const BoxOverlap& overlap, | ||
tbox::KernelFuser& fuser); | ||
|
||
/** | ||
* Checks that class version and restart file version are equal. If so, | ||
* reads in the data members common to all patch data types from restart | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,6 +112,11 @@ struct policy_traits<policy::parallel> { | |
>; | ||
|
||
using ReductionPolicy = RAJA::cuda_reduce; | ||
|
||
using WorkGroupPolicy = RAJA::WorkGroupPolicy< | ||
RAJA::cuda_work_async<1024>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you provide a way to set the workgroup block size in case people run into cuda linking issues? |
||
RAJA::unordered_cuda_loop_y_block_iter_x_threadblock_average, | ||
RAJA::constant_stride_array_of_objects>; | ||
}; | ||
|
||
#else | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#ifndef included_tbox_KernelFuser | ||
#define included_tbox_KernelFuser | ||
|
||
#include "SAMRAI/tbox/ExecutionPolicy.h" | ||
#include "SAMRAI/tbox/AllocatorDatabase.h" | ||
|
||
// #include "RAJA/RAJA.hpp" | ||
|
||
namespace SAMRAI { | ||
namespace tbox { | ||
|
||
class KernelFuser | ||
{ | ||
public: | ||
// KernelFuser() : | ||
// d_workpool(AllocatorDatabase::getDatabase()->getStreamAllocator()) | ||
// {} | ||
|
||
template<typename Kernel> | ||
void enqueue(int begin, int end, Kernel&& kernel) { | ||
//d_workpool.enqueue(RAJA::RangeSegment(begin, end), std::forward<Kernel>(kernel)); | ||
} | ||
|
||
void launch() | ||
{ | ||
// d_workgroup = d_workpool.instantiate(); | ||
// d_worksite = d_workgroup.run(); | ||
} | ||
|
||
private: | ||
#ifdef HAVE_UMPIRE | ||
using Allocator = umpire::TypedAllocator<char>; | ||
#else | ||
using Allocator = ResourceAllocator; | ||
#endif | ||
|
||
// using Policy = typename tbox::detail::policy_traits< tbox::policy::parallel >::WorkGroupPolicy; | ||
// using WorkPool = RAJA::WorkPool <Policy, int, RAJA::xargs<>, Allocator>; | ||
// using WorkGroup = RAJA::WorkGroup<Policy, int, RAJA::xargs<>, Allocator>; | ||
// using WorkSite = RAJA::WorkSite <Policy, int, RAJA::xargs<>, Allocator>; | ||
|
||
// WorkPool d_workpool; | ||
// WorkGroup d_workgroup; | ||
// WorkSite d_worksite; | ||
}; | ||
|
||
} | ||
} | ||
|
||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SAMRAI_HAVE_KERNEL_FUSER?