Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: accelerator module #887

Open
wants to merge 1 commit into
base: stable
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ if(Legion_USE_CUDA)
endif()
endif()

#------------------------------------------------------------------------------#
# Accelerator configuration
#------------------------------------------------------------------------------#
option(Legion_USE_ACCELERATOR "Enable support for accelerator" OFF)
if (Legion_USE_ACCELERATOR)
message("Including Accelerator in Realm...")
set(REALM_USE_ACCELERATOR ON)
endif()

#------------------------------------------------------------------------------#
# Kokkos configuration
#------------------------------------------------------------------------------#
Expand Down
12 changes: 12 additions & 0 deletions cmake/accelerator-deps.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
set(hls_dep $ENV{HLS_CONFIG} CACHE STRING "set hls config .cmake path for module")

get_filename_component(hls_dir ${hls_dep} DIRECTORY)
get_filename_component(hls_module ${hls_dep} NAME_WE)

list(APPEND CMAKE_MODULE_PATH ${hls_dir})

include(${hls_module})
link_directories(${XRT_LIB_DIR})
target_link_libraries(RealmRuntime PRIVATE ${hls_module})
install(TARGETS ${hls_module} EXPORT LegionTargets)
install(TARGETS miniglog EXPORT LegionTargets)
15 changes: 15 additions & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,13 @@ if(REALM_USE_MPI)
)
endif()

if (REALM_USE_ACCELERATOR)
list(APPEND REALM_SRC
realm/accelerator/accelerator_module.h
realm/accelerator/accelerator_module.cc
)
endif()

list(APPEND REALM_SRC
realm.h
realm/activemsg.h realm/activemsg.cc
Expand Down Expand Up @@ -206,6 +213,14 @@ endforeach()

find_package(Threads REQUIRED)
add_library(RealmRuntime ${REALM_SRC})

if(Legion_USE_ACCELERATOR)
if (REALM_USE_ACCELERATOR)
include(accelerator-deps)
add_definitions(-DREALM_USE_ACCELERATOR)
endif()
endif()

target_compile_options(RealmRuntime PRIVATE ${CXX_BUILD_WARNING_FLAGS})
if(COMPILER_SUPPORTS_DEFCHECK)
# use the cxx_defcheck wrapper to make sure realm_defines.h is included
Expand Down
6 changes: 6 additions & 0 deletions runtime/legion/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11769,6 +11769,11 @@ namespace Legion {
LegionSpy::log_processor_kind(kind, "Python");
break;
}
case Processor::ACCEL_PROC:
{
LegionSpy::log_processor_kind(kind, "Accelerator");
break;
}
default:
assert(false); // unknown processor kind
}
Expand Down Expand Up @@ -22843,6 +22848,7 @@ namespace Legion {
((local_util_procs.empty() || config.replay_on_cpus) &&
((it->first.kind() == Processor::LOC_PROC) ||
(it->first.kind() == Processor::TOC_PROC) ||
(it->first.kind() == Processor::ACCEL_PROC) ||
(it->first.kind() == Processor::IO_PROC))))
{
registered_events.insert(RtEvent(
Expand Down
120 changes: 115 additions & 5 deletions runtime/mappers/default_mapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ namespace Legion {
node_id(local.address_space()), machine(m),
mapper_name((name == NULL) ? create_default_name(local) :
own ? name : strdup(name)),
next_local_gpu(0), next_local_cpu(0), next_local_io(0),
next_local_gpu(0), next_local_cpu(0), next_local_io(0), next_local_accel(0),
next_local_procset(0), next_local_omp(0), next_local_py(0),
next_global_gpu(Processor::NO_PROC),
next_global_gpu(Processor::NO_PROC), next_global_accel(Processor::NO_PROC),
next_global_cpu(Processor::NO_PROC), next_global_io(Processor::NO_PROC),
next_global_procset(Processor::NO_PROC),
next_global_omp(Processor::NO_PROC), next_global_py(Processor::NO_PROC),
global_gpu_query(NULL), global_cpu_query(NULL), global_io_query(NULL),
global_procset_query(NULL), global_omp_query(NULL),
global_procset_query(NULL), global_omp_query(NULL), global_accel_query(NULL),
global_py_query(NULL),
max_steals_per_theft(STATIC_MAX_PERMITTED_STEALS),
max_steal_count(STATIC_MAX_STEAL_COUNT),
Expand Down Expand Up @@ -151,6 +151,11 @@ namespace Legion {
local_omps.push_back(*it);
break;
}
case Processor::ACCEL_PROC:
{
local_accels.push_back(*it);
break;
}
default: // ignore anything else
break;
}
Expand Down Expand Up @@ -211,6 +216,14 @@ namespace Legion {
remote_omps[node] = *it;
break;
}
case Processor::ACCEL_PROC:
{
if (node >= remote_accels.size())
remote_accels.resize(node+1, Processor::NO_PROC);
if (!remote_accels[node].exists())
remote_accels[node] = *it;
break;
}
default: // ignore anything else
break;
}
Expand Down Expand Up @@ -392,6 +405,8 @@ namespace Legion {
return default_get_next_local_omp();
case Processor::PY_PROC:
return default_get_next_local_py();
case Processor::ACCEL_PROC:
return default_get_next_local_accel();
default: // make warnings go away
break;
}
Expand Down Expand Up @@ -421,6 +436,8 @@ namespace Legion {
return default_get_next_local_omp();
case Processor::PY_PROC:
return default_get_next_local_py();
case Processor::ACCEL_PROC:
return default_get_next_local_accel();
default: // make warnings go away
break;
}
Expand All @@ -446,6 +463,8 @@ namespace Legion {
return default_get_next_global_omp();
case Processor::PY_PROC:
return default_get_next_global_py();
case Processor::ACCEL_PROC:
return default_get_next_global_accel();
default: // make warnings go away
break;
}
Expand All @@ -468,6 +487,8 @@ namespace Legion {
return default_get_next_local_omp();
case Processor::PY_PROC:
return default_get_next_local_py();
case Processor::ACCEL_PROC:
return default_get_next_local_accel();
default: // make warnings go away
break;
}
Expand Down Expand Up @@ -552,6 +573,37 @@ namespace Legion {
return result;
}

//--------------------------------------------------------------------------
Processor DefaultMapper::default_get_next_local_accel(void)
//--------------------------------------------------------------------------
{
Processor result = local_accels[next_local_accel++];
if (next_local_accel == local_accels.size())
next_local_accel = 0;
return result;
}

//--------------------------------------------------------------------------
Processor DefaultMapper::default_get_next_global_accel(void)
//-------------------------------------------------------------------------
{
if (total_nodes == 1)
return default_get_next_local_accel();
if (!next_global_accel.exists())
{
global_accel_query = new Machine::ProcessorQuery(machine);
global_accel_query->only_kind(Processor::ACCEL_PROC);
next_global_accel = global_accel_query->first();
}
Processor result = next_global_accel;
next_global_accel = global_accel_query->next(result);
if (!next_global_accel.exists())
{
delete global_accel_query;
global_accel_query = NULL;
}
return result;
}
//--------------------------------------------------------------------------
Processor DefaultMapper::default_get_next_local_io(void)
//--------------------------------------------------------------------------
Expand Down Expand Up @@ -753,6 +805,13 @@ namespace Legion {
continue;
break;
}
case Processor::ACCEL_PROC:
{
kindString += "ACCEL_PROC ";
if (local_accels.empty())
continue;
break;
}
case Processor::LOC_PROC:
{
kindString += "LOC_PROC ";
Expand Down Expand Up @@ -914,10 +973,11 @@ namespace Legion {
//--------------------------------------------------------------------------
{
// Default mapper is ignorant about task IDs so just do whatever:
// 1) GPU > OMP > procset > cpu > IO > Python (default)
// 2) OMP > procset > cpu > IO > Python > GPU (with PREFER_CPU_VARIANT)
// 1) GPU > OMP > procset > cpu > IO > Python > Accel (default)
// 2) OMP > procset > cpu > IO > Python > GPU > Accel (with PREFER_CPU_VARIANT)
// It is up to the caller to filter out processor kinds that aren't
// suitable for a given task

bool prefer_cpu = ((task.tag & PREFER_CPU_VARIANT) != 0);
if ((local_gpus.size() > 0) && !prefer_cpu)
ranking.push_back(Processor::TOC_PROC);
Expand All @@ -928,6 +988,10 @@ namespace Legion {
if (local_pys.size() > 0) ranking.push_back(Processor::PY_PROC);
if ((local_gpus.size() > 0) && prefer_cpu)
ranking.push_back(Processor::TOC_PROC);

if (local_accels.size() > 0) {
ranking.push_back(Processor::ACCEL_PROC);
}
}

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -1024,6 +1088,23 @@ namespace Legion {
}
break;
}
case Processor::ACCEL_PROC:
{
if (task.index_domain.get_volume() > local_accels.size())
{
if (!global_memory.exists())
{
log_mapper.error("Default mapper failure. No memory found "
"for CPU task %s (ID %lld) which is visible "
"for all point in the index space.",
task.get_task_name(), task.get_unique_id());
assert(false);
}
else
target_memory = global_memory;
}
break;
}
case Processor::LOC_PROC:
{
if (task.index_domain.get_volume() > local_cpus.size())
Expand Down Expand Up @@ -1125,6 +1206,7 @@ namespace Legion {
switch (task.target_proc.kind())
{
case Processor::LOC_PROC:
case Processor::ACCEL_PROC: // use cpu memory
case Processor::IO_PROC:
case Processor::PROC_SET:
case Processor::OMP_PROC:
Expand Down Expand Up @@ -1252,6 +1334,11 @@ namespace Legion {
input, output, gpu_slices_cache);
break;
}
case Processor::ACCEL_PROC:
{
default_slice_task(task, local_accels, remote_accels,
input, output, cpu_slices_cache);
}
case Processor::IO_PROC:
{
default_slice_task(task, local_ios, remote_ios,
Expand Down Expand Up @@ -1701,6 +1788,15 @@ namespace Legion {
target_procs.push_back(task.target_proc);
break;
}
case Processor::ACCEL_PROC:
{
if (!task.must_epoch_task)
target_procs.insert(target_procs.end(),
local_accels.begin(), local_accels.end());
else
target_procs.push_back(task.target_proc);
break;
}
case Processor::LOC_PROC:
{
// Put any of our local cpus on here
Expand Down Expand Up @@ -3124,6 +3220,11 @@ namespace Legion {
*result = local_gpus.size();
break;
}
case DEFAULT_TUNABLE_LOCAL_ACCELS:
{
*result = local_accels.size();
break;
}
case DEFAULT_TUNABLE_LOCAL_CPUS:
{
*result = local_cpus.size();
Expand Down Expand Up @@ -3418,6 +3519,15 @@ namespace Legion {
}
break;
}
case Processor::ACCEL_PROC:
{
if (local_accels.empty())
{
++it;
continue;
}
break;
}
case Processor::OMP_PROC:
{
if (local_omps.empty())
Expand Down
13 changes: 9 additions & 4 deletions runtime/mappers/default_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ namespace Legion {
DEFAULT_TUNABLE_GLOBAL_IOS = 8,
DEFAULT_TUNABLE_GLOBAL_OMPS = 9,
DEFAULT_TUNABLE_GLOBAL_PYS = 10,
DEFAULT_TUNABLE_LAST = 11, // this one must always be last and unused
DEFAULT_TUNABLE_LOCAL_ACCELS = 11,
DEFAULT_TUNABLE_LAST = 12 // this one must always be last and unused
};
enum MappingKind {
TASK_MAPPING,
Expand Down Expand Up @@ -375,6 +376,8 @@ namespace Legion {
Processor default_get_next_global_cpu(void);
Processor default_get_next_local_gpu(void);
Processor default_get_next_global_gpu(void);
Processor default_get_next_local_accel(void);
Processor default_get_next_global_accel(void);
Processor default_get_next_local_io(void);
Processor default_get_next_global_io(void);
Processor default_get_next_local_py(void);
Expand Down Expand Up @@ -464,24 +467,26 @@ namespace Legion {
// There are a couple of parameters from the machine description that
// the default mapper uses to determine how to perform mapping.
std::vector<Processor> local_gpus;
std::vector<Processor> local_accels;
std::vector<Processor> local_cpus;
std::vector<Processor> local_ios;
std::vector<Processor> local_procsets;
std::vector<Processor> local_omps;
std::vector<Processor> local_pys;
std::vector<Processor> remote_gpus;
std::vector<Processor> remote_accels;
std::vector<Processor> remote_cpus;
std::vector<Processor> remote_ios;
std::vector<Processor> remote_procsets;
std::vector<Processor> remote_omps;
std::vector<Processor> remote_pys;
protected:
// For doing round-robining of tasks onto processors
unsigned next_local_gpu, next_local_cpu, next_local_io,
unsigned next_local_gpu, next_local_cpu, next_local_io, next_local_accel,
next_local_procset, next_local_omp, next_local_py;
Processor next_global_gpu, next_global_cpu, next_global_io,
Processor next_global_gpu, next_global_cpu, next_global_io, next_global_accel,
next_global_procset, next_global_omp, next_global_py;
Machine::ProcessorQuery *global_gpu_query, *global_cpu_query,
Machine::ProcessorQuery *global_gpu_query, *global_cpu_query, *global_accel_query,
*global_io_query, *global_procset_query,
*global_omp_query, *global_py_query;
protected:
Expand Down
1 change: 1 addition & 0 deletions runtime/mappers/mapping_utilities.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,7 @@ namespace Legion {
case Processor::PROC_SET: return "PROC_SET";
case Processor::OMP_PROC: return "OMP_PROC";
case Processor::PY_PROC: return "PY_PROC";
case Processor::ACCEL_PROC: return "ACCEL_PROC";
default: assert(false); return "";
}
}
Expand Down
Loading