StanfordLegion · powderluv · Jun 25, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -233,6 +233,15 @@ if(Legion_USE_CUDA)
   endif()
 endif()
 
+#------------------------------------------------------------------------------#
+# Accelerator configuration
+#------------------------------------------------------------------------------#
+option(Legion_USE_ACCELERATOR "Enable support for accelerator" OFF)
+if (Legion_USE_ACCELERATOR)
+  message("Including Accelerator in Realm...")
+  set(REALM_USE_ACCELERATOR ON)
+endif()
+
 #------------------------------------------------------------------------------#
 # Kokkos configuration
 #------------------------------------------------------------------------------#

diff --git a/cmake/accelerator-deps.cmake b/cmake/accelerator-deps.cmake
@@ -0,0 +1,12 @@
+set(hls_dep $ENV{HLS_CONFIG} CACHE STRING "set hls config .cmake path for module")
+
+get_filename_component(hls_dir ${hls_dep} DIRECTORY)
+get_filename_component(hls_module ${hls_dep} NAME_WE)
+
+list(APPEND CMAKE_MODULE_PATH ${hls_dir})
+
+include(${hls_module})
+link_directories(${XRT_LIB_DIR})
+target_link_libraries(RealmRuntime PRIVATE ${hls_module})
+install(TARGETS ${hls_module} EXPORT LegionTargets)
+install(TARGETS miniglog EXPORT LegionTargets)
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
@@ -139,6 +139,13 @@ if(REALM_USE_MPI)
   )
 endif()
 
+if (REALM_USE_ACCELERATOR)
+  list(APPEND REALM_SRC
+    realm/accelerator/accelerator_module.h
+    realm/accelerator/accelerator_module.cc
+  )
+endif()
+
 list(APPEND REALM_SRC
   realm.h
   realm/activemsg.h realm/activemsg.cc
@@ -206,6 +213,14 @@ endforeach()
 
 find_package(Threads REQUIRED)
 add_library(RealmRuntime ${REALM_SRC})
+
+if(Legion_USE_ACCELERATOR)
+  if (REALM_USE_ACCELERATOR)
+    include(accelerator-deps)
+    add_definitions(-DREALM_USE_ACCELERATOR)
+  endif()
+endif()
+
 target_compile_options(RealmRuntime PRIVATE ${CXX_BUILD_WARNING_FLAGS})
 if(COMPILER_SUPPORTS_DEFCHECK)
   # use the cxx_defcheck wrapper to make sure realm_defines.h is included

diff --git a/runtime/legion/runtime.cc b/runtime/legion/runtime.cc
@@ -11769,6 +11769,11 @@ namespace Legion {
                 LegionSpy::log_processor_kind(kind, "Python");
                 break;
               }
+            case Processor::ACCEL_PROC:
+              {
+                LegionSpy::log_processor_kind(kind, "Accelerator");
+                break;
+              }
             default:
               assert(false); // unknown processor kind
           }
@@ -22843,6 +22848,7 @@ namespace Legion {
             ((local_util_procs.empty() || config.replay_on_cpus) &&
               ((it->first.kind() == Processor::LOC_PROC) ||
                (it->first.kind() == Processor::TOC_PROC) ||
+               (it->first.kind() == Processor::ACCEL_PROC) ||
                (it->first.kind() == Processor::IO_PROC))))
         {
           registered_events.insert(RtEvent(

diff --git a/runtime/mappers/default_mapper.cc b/runtime/mappers/default_mapper.cc
@@ -55,14 +55,14 @@ namespace Legion {
         node_id(local.address_space()), machine(m),
         mapper_name((name == NULL) ? create_default_name(local) : 
                       own ? name : strdup(name)),
-        next_local_gpu(0), next_local_cpu(0), next_local_io(0),
+        next_local_gpu(0), next_local_cpu(0), next_local_io(0), next_local_accel(0),
         next_local_procset(0), next_local_omp(0), next_local_py(0),
-        next_global_gpu(Processor::NO_PROC),
+        next_global_gpu(Processor::NO_PROC), next_global_accel(Processor::NO_PROC),
         next_global_cpu(Processor::NO_PROC), next_global_io(Processor::NO_PROC),
         next_global_procset(Processor::NO_PROC),
         next_global_omp(Processor::NO_PROC), next_global_py(Processor::NO_PROC),
         global_gpu_query(NULL), global_cpu_query(NULL), global_io_query(NULL),
-        global_procset_query(NULL), global_omp_query(NULL),
+        global_procset_query(NULL), global_omp_query(NULL), global_accel_query(NULL),
         global_py_query(NULL),
         max_steals_per_theft(STATIC_MAX_PERMITTED_STEALS),
         max_steal_count(STATIC_MAX_STEAL_COUNT),
@@ -151,6 +151,11 @@ namespace Legion {
                 local_omps.push_back(*it);
                 break;
               }
+	    case Processor::ACCEL_PROC:
+	      {
+		local_accels.push_back(*it);
+		break;
+	      }
             default: // ignore anything else
               break;
           }
@@ -211,6 +216,14 @@ namespace Legion {
                 remote_omps[node] = *it;
               break;
             }
+	  case Processor::ACCEL_PROC:
+	    {
+              if (node >= remote_accels.size())
+		remote_accels.resize(node+1, Processor::NO_PROC);
+	      if (!remote_accels[node].exists())
+		remote_accels[node] = *it;
+	      break;
+	    }
           default: // ignore anything else
             break;
         }
@@ -392,6 +405,8 @@ namespace Legion {
             return default_get_next_local_omp();
           case Processor::PY_PROC:
             return default_get_next_local_py();
+	  case Processor::ACCEL_PROC:
+	    return default_get_next_local_accel();
           default: // make warnings go away
             break;
         }
@@ -421,6 +436,8 @@ namespace Legion {
                 return default_get_next_local_omp();
               case Processor::PY_PROC:
                 return default_get_next_local_py();
+	      case Processor::ACCEL_PROC:
+                return default_get_next_local_accel();
               default: // make warnings go away
                 break;
             }
@@ -446,6 +463,8 @@ namespace Legion {
                   return default_get_next_global_omp();
                 case Processor::PY_PROC:
                   return default_get_next_global_py();
+		case Processor::ACCEL_PROC:
+                  return default_get_next_global_accel();
                 default: // make warnings go away
                   break;
               }
@@ -468,6 +487,8 @@ namespace Legion {
                 return default_get_next_local_omp();
               case Processor::PY_PROC:
                 return default_get_next_local_py();
+	      case Processor::ACCEL_PROC:
+                return default_get_next_local_accel();
               default: // make warnings go away
                 break;
             }
@@ -552,6 +573,37 @@ namespace Legion {
       return result;
     }
 
+    //--------------------------------------------------------------------------
+    Processor DefaultMapper::default_get_next_local_accel(void)
+    //--------------------------------------------------------------------------
+    {
+      Processor result = local_accels[next_local_accel++];
+      if (next_local_accel == local_accels.size())
+        next_local_accel = 0;
+      return result;
+    }
+
+    //--------------------------------------------------------------------------
+    Processor DefaultMapper::default_get_next_global_accel(void)
+    //-------------------------------------------------------------------------
+    {
+      if (total_nodes == 1)
+	return default_get_next_local_accel();
+      if (!next_global_accel.exists())
+      {
+         global_accel_query = new Machine::ProcessorQuery(machine);
+	 global_accel_query->only_kind(Processor::ACCEL_PROC);
+	 next_global_accel = global_accel_query->first();
+      }
+      Processor result = next_global_accel;
+      next_global_accel = global_accel_query->next(result);
+      if (!next_global_accel.exists())
+      {
+        delete global_accel_query;
+	global_accel_query = NULL;
+      }
+      return result;
+    }
     //--------------------------------------------------------------------------
     Processor DefaultMapper::default_get_next_local_io(void)
     //--------------------------------------------------------------------------
@@ -753,6 +805,13 @@ namespace Legion {
                     continue;
                   break;
                 }
+	      case Processor::ACCEL_PROC:
+		{
+		  kindString += "ACCEL_PROC ";
+		  if (local_accels.empty())
+		    continue;
+		  break;
+		}
               case Processor::LOC_PROC:
                 {
                   kindString += "LOC_PROC ";
@@ -914,10 +973,11 @@ namespace Legion {
     //--------------------------------------------------------------------------
     {
       // Default mapper is ignorant about task IDs so just do whatever:
-      // 1) GPU > OMP > procset > cpu > IO > Python  (default)
-      // 2) OMP > procset > cpu > IO > Python > GPU  (with PREFER_CPU_VARIANT)
+      // 1) GPU > OMP > procset > cpu > IO > Python > Accel  (default)
+      // 2) OMP > procset > cpu > IO > Python > GPU > Accel  (with PREFER_CPU_VARIANT)
       // It is up to the caller to filter out processor kinds that aren't
       // suitable for a given task
+
       bool prefer_cpu = ((task.tag & PREFER_CPU_VARIANT) != 0);
       if ((local_gpus.size() > 0) && !prefer_cpu)
        ranking.push_back(Processor::TOC_PROC);
@@ -928,6 +988,10 @@ namespace Legion {
       if (local_pys.size() > 0) ranking.push_back(Processor::PY_PROC);
       if ((local_gpus.size() > 0) && prefer_cpu)
        ranking.push_back(Processor::TOC_PROC);
+
+      if (local_accels.size() > 0) {
+       ranking.push_back(Processor::ACCEL_PROC);
+      }
     }
 
     //--------------------------------------------------------------------------
@@ -1024,6 +1088,23 @@ namespace Legion {
                 }
                 break;
               }
+	    case Processor::ACCEL_PROC:
+	      {
+                if (task.index_domain.get_volume() > local_accels.size())
+                {
+                  if (!global_memory.exists())
+                  {
+                    log_mapper.error("Default mapper failure. No memory found "
+                        "for CPU task %s (ID %lld) which is visible "
+                        "for all point in the index space.",
+                        task.get_task_name(), task.get_unique_id());
+                    assert(false);
+                  }
+                  else
+                    target_memory = global_memory;
+                }
+                break;
+              }
             case Processor::LOC_PROC:
               {
                 if (task.index_domain.get_volume() > local_cpus.size())
@@ -1125,6 +1206,7 @@ namespace Legion {
         switch (task.target_proc.kind())
         {
           case Processor::LOC_PROC:
+          case Processor::ACCEL_PROC: // use cpu memory
           case Processor::IO_PROC:
           case Processor::PROC_SET:
           case Processor::OMP_PROC:
@@ -1252,6 +1334,11 @@ namespace Legion {
                                input, output, gpu_slices_cache);
             break;
           }
+	case Processor::ACCEL_PROC:
+	  {
+            default_slice_task(task, local_accels, remote_accels,
+                               input, output, cpu_slices_cache);
+	  }
         case Processor::IO_PROC:
           {
             default_slice_task(task, local_ios, remote_ios, 
@@ -1701,6 +1788,15 @@ namespace Legion {
               target_procs.push_back(task.target_proc);
               break;
             }
+	  case Processor::ACCEL_PROC:
+	    {
+	      if (!task.must_epoch_task)
+                target_procs.insert(target_procs.end(),
+                    local_accels.begin(), local_accels.end());
+              else
+                target_procs.push_back(task.target_proc);
+              break;
+	    }
           case Processor::LOC_PROC:
             {
               // Put any of our local cpus on here
@@ -3124,6 +3220,11 @@ namespace Legion {
             *result = local_gpus.size();
             break;
           }
+	case DEFAULT_TUNABLE_LOCAL_ACCELS:
+	  {
+	    *result = local_accels.size();
+	    break;
+          }
         case DEFAULT_TUNABLE_LOCAL_CPUS:
           {
             *result = local_cpus.size();
@@ -3418,6 +3519,15 @@ namespace Legion {
                 }
                 break;
               }
+	    case  Processor::ACCEL_PROC:
+	      {
+                if (local_accels.empty())
+		{
+		  ++it;
+		  continue;
+		}
+		break;
+	      }
             case Processor::OMP_PROC:
               {
                 if (local_omps.empty())

diff --git a/runtime/mappers/default_mapper.h b/runtime/mappers/default_mapper.h
@@ -51,7 +51,8 @@ namespace Legion {
         DEFAULT_TUNABLE_GLOBAL_IOS = 8,
         DEFAULT_TUNABLE_GLOBAL_OMPS = 9,
         DEFAULT_TUNABLE_GLOBAL_PYS = 10,
-        DEFAULT_TUNABLE_LAST = 11, // this one must always be last and unused
+        DEFAULT_TUNABLE_LOCAL_ACCELS = 11,
+        DEFAULT_TUNABLE_LAST = 12 // this one must always be last and unused
       };
       enum MappingKind {
         TASK_MAPPING,
@@ -375,6 +376,8 @@ namespace Legion {
       Processor default_get_next_global_cpu(void);
       Processor default_get_next_local_gpu(void);
       Processor default_get_next_global_gpu(void);
+      Processor default_get_next_local_accel(void);
+      Processor default_get_next_global_accel(void);
       Processor default_get_next_local_io(void);
       Processor default_get_next_global_io(void);
       Processor default_get_next_local_py(void);
@@ -464,24 +467,26 @@ namespace Legion {
       // There are a couple of parameters from the machine description that 
       // the default mapper uses to determine how to perform mapping.
       std::vector<Processor> local_gpus;
+      std::vector<Processor> local_accels;
       std::vector<Processor> local_cpus;
       std::vector<Processor> local_ios;
       std::vector<Processor> local_procsets;
       std::vector<Processor> local_omps;
       std::vector<Processor> local_pys;
       std::vector<Processor> remote_gpus;
+      std::vector<Processor> remote_accels;
       std::vector<Processor> remote_cpus;
       std::vector<Processor> remote_ios;
       std::vector<Processor> remote_procsets;
       std::vector<Processor> remote_omps;
       std::vector<Processor> remote_pys;
     protected:
       // For doing round-robining of tasks onto processors
-      unsigned next_local_gpu, next_local_cpu, next_local_io,
+      unsigned next_local_gpu, next_local_cpu, next_local_io, next_local_accel,
                next_local_procset, next_local_omp, next_local_py;
-      Processor next_global_gpu, next_global_cpu, next_global_io,
+      Processor next_global_gpu, next_global_cpu, next_global_io, next_global_accel,
                 next_global_procset, next_global_omp, next_global_py;
-      Machine::ProcessorQuery *global_gpu_query, *global_cpu_query,
+      Machine::ProcessorQuery *global_gpu_query, *global_cpu_query, *global_accel_query,
                               *global_io_query, *global_procset_query,
                               *global_omp_query, *global_py_query;
     protected: 

diff --git a/runtime/mappers/mapping_utilities.cc b/runtime/mappers/mapping_utilities.cc
@@ -1059,6 +1059,7 @@ namespace Legion {
           case Processor::PROC_SET: return "PROC_SET";
           case Processor::OMP_PROC: return "OMP_PROC";
           case Processor::PY_PROC: return "PY_PROC";
+	  case Processor::ACCEL_PROC: return "ACCEL_PROC";
           default: assert(false); return "";
         }
       }