Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: Close a small race window in the enable path which can lead to u…
Browse files Browse the repository at this point in the history
…se-after-free

scx_ops_enable() was iterating all tasks without locking its rq and then
calling get_task_struct() on them. However, this can race against task free
path. The task struct itself is accessible but its refcnt may already have
reached zero by the time get_task_struct() is called. This triggers the
following refcnt warning.

  WARNING: CPU: 11 PID: 2834521 at lib/refcount.c:25 refcount_warn_saturate+0x7d/0xe0
  ...
  RIP: 0010:refcount_warn_saturate+0x7d/0xe0
  ...
  Call Trace:
   bpf_scx_reg+0x851/0x890
   bpf_struct_ops_link_create+0xbd/0xe0
   __sys_bpf+0x2862/0x2d20
   __x64_sys_bpf+0x18/0x20
   do_syscall_64+0x3d/0x90
   entry_SYSCALL_64_after_hwframe+0x46/0xb0

This can also affect other loops as it can call sched functions on tasks
which are already destroyed. Let's fix it by making
scx_task_iter_filtered_locked() also filter out TASK_DEAD tasks and
switching the scx_task_iter_filtered() usage in scx_ops_enable() to
scx_task_iter_filtered_locked(). This makes the next function always test
TASK_DEAD while holding the task's rq lock and the rq lock guarantees that
the task won't be freed closing the race window.
  • Loading branch information
htejun committed May 17, 2024
1 parent 9cfa965 commit f266831
Showing 1 changed file with 44 additions and 37 deletions.
81 changes: 44 additions & 37 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1268,6 +1268,28 @@ static void scx_task_iter_init(struct scx_task_iter *iter)
iter->locked = NULL;
}

/**
* scx_task_iter_rq_unlock - Unlock rq locked by a task iterator
* @iter: iterator to unlock rq for
*
* If @iter is in the middle of a locked iteration, it may be locking the rq of
* the task currently being visited. Unlock the rq if so. This function can be
* safely called anytime during an iteration.
*
* Returns %true if the rq @iter was locking is unlocked. %false if @iter was
* not locking an rq.
*/
static bool scx_task_iter_rq_unlock(struct scx_task_iter *iter)
{
if (iter->locked) {
task_rq_unlock(iter->rq, iter->locked, &iter->rf);
iter->locked = NULL;
return true;
} else {
return false;
}
}

/**
* scx_task_iter_exit - Exit a task iterator
* @iter: iterator to exit
Expand All @@ -1278,19 +1300,10 @@ static void scx_task_iter_init(struct scx_task_iter *iter)
*/
static void scx_task_iter_exit(struct scx_task_iter *iter)
{
struct list_head *cursor = &iter->cursor.tasks_node;

lockdep_assert_held(&scx_tasks_lock);

if (iter->locked) {
task_rq_unlock(iter->rq, iter->locked, &iter->rf);
iter->locked = NULL;
}

if (list_empty(cursor))
return;

list_del_init(cursor);
scx_task_iter_rq_unlock(iter);
list_del_init(&iter->cursor.tasks_node);
}

/**
Expand Down Expand Up @@ -1320,50 +1333,43 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
}

/**
* scx_task_iter_next_filtered - Next non-idle task
* scx_task_iter_next_filtered_locked - Next alive non-idle task with its rq locked
* @iter: iterator to walk
*
* Visit the next non-idle task. See scx_task_iter_init() for details.
* Visit the next alive non-idle task with its rq lock held. See
* scx_task_iter_init() for details.
*/
static struct task_struct *
scx_task_iter_next_filtered(struct scx_task_iter *iter)
scx_task_iter_next_filtered_locked(struct scx_task_iter *iter)
{
struct task_struct *p;
retry:
scx_task_iter_rq_unlock(iter);

while ((p = scx_task_iter_next(iter))) {
/*
* is_idle_task() tests %PF_IDLE which may not be set for CPUs
* which haven't yet been onlined. Test sched_class directly.
*/
if (p->sched_class != &idle_sched_class)
return p;
}
return NULL;
}

/**
* scx_task_iter_next_filtered_locked - Next non-idle task with its rq locked
* @iter: iterator to walk
*
* Visit the next non-idle task with its rq lock held. See scx_task_iter_init()
* for details.
*/
static struct task_struct *
scx_task_iter_next_filtered_locked(struct scx_task_iter *iter)
{
struct task_struct *p;

if (iter->locked) {
task_rq_unlock(iter->rq, iter->locked, &iter->rf);
iter->locked = NULL;
break;
}

p = scx_task_iter_next_filtered(iter);
if (!p)
return NULL;

iter->rq = task_rq_lock(p, &iter->rf);
iter->locked = p;

/*
* If we see %TASK_DEAD, @p already disabled preemption, is about to do
* the final __schedule(), won't ever need to be scheduled again and can
* thus be safely ignored. If we don't see %TASK_DEAD, @p can't enter
* the final __schedle() while we're locking its rq and thus will stay
* alive until the rq is unlocked.
*/
if (READ_ONCE(p->__state) == TASK_DEAD)
goto retry;

return p;
}

Expand Down Expand Up @@ -5028,8 +5034,9 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
spin_lock_irq(&scx_tasks_lock);

scx_task_iter_init(&sti);
while ((p = scx_task_iter_next_filtered(&sti))) {
while ((p = scx_task_iter_next_filtered_locked(&sti))) {
get_task_struct(p);
scx_task_iter_rq_unlock(&sti);
spin_unlock_irq(&scx_tasks_lock);

ret = scx_ops_init_task(p, task_group(p), false);
Expand Down

0 comments on commit f266831

Please sign in to comment.