Skip to content

Commit

Permalink
a few fixes & improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
chhwang committed May 18, 2024
1 parent 8596dbf commit 2daa05c
Show file tree
Hide file tree
Showing 7 changed files with 2,054 additions and 34 deletions.
3 changes: 1 addition & 2 deletions ark/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ CodeGenerator::Impl::Impl(const Json &plan,
}
}
for (auto &rg : pg["ResourceGroups"]) {
body_ss << this->resource_group(rg, plan.at("TaskInfos"),
proc_range);
body_ss << resource_group(rg, plan.at("TaskInfos"), proc_range);
}
unsynced.push_back(proc_range);
pg_idx++;
Expand Down
4 changes: 3 additions & 1 deletion ark/include/kernels/kernel_template.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ __forceinline__ __device__ void task_seq(char *_buf) {
constexpr size_t SramBytesPerWarp = SlotSramBytes / SlotNumWarps;
size_t p = ((blockIdx.x + gridDim.x - ProcCurrent) % gridDim.x) / ProcStep;
size_t k = threadIdx.x / SlotNumThreads;
if constexpr (ARK_WARPS_PER_BLOCK > SlotNumWarps) {
if (k >= NumSlots) return;
}
size_t task_id_base = TaskBegin + p * TaskStep * TaskGranularity;
for (size_t t = k; ; t += NumSlots) {
size_t task_id = task_id_base + TaskStep *
(t % TaskGranularity + t / TaskGranularity * TaskGranularity * NumProcs);
if (task_id >= TaskEnd) break;
task(_buf, task_id, SramBytesPerWarp);
}
__syncthreads();
}
}

Expand Down
Loading

0 comments on commit 2daa05c

Please sign in to comment.