Skip to content

Commit

Permalink
Refactor gc-stack functions; Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
udesou committed Dec 9, 2024
1 parent c725c6f commit 4ea1782
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 122 deletions.
18 changes: 18 additions & 0 deletions src/gc-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,24 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
jl_throw(jl_memory_exception);
}

// Sweeping mtarraylist_buffers:
// These buffers are made unreachable via `mtarraylist_resizeto` from mtarraylist.c
// and are freed at the end of GC via jl_gc_sweep_stack_pools_and_mtarraylist_buffers
void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls = gc_all_tls_states[i];
if (ptls == NULL) {
continue;
}
small_arraylist_t *buffers = &ptls->lazily_freed_mtarraylist_buffers;
void *buf;
while ((buf = small_arraylist_pop(buffers)) != NULL) {
free(buf);
}
}
}

#ifdef __cplusplus
}
#endif
18 changes: 10 additions & 8 deletions src/gc-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,6 @@ extern jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
extern jl_gc_callback_list_t *gc_cblist_notify_external_free;
extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;


// FIXME: These are specific to the Stock GC but being declared here
// for now, instead of gc-stock.h. We might want to refactor the
// code in gc-stacks.c that uses these
extern _Atomic(int) gc_ptls_sweep_idx;
extern _Atomic(int) gc_stack_free_idx;
extern _Atomic(int) gc_n_threads_sweeping_stacks;

#define gc_invoke_callbacks(ty, list, args) \
do { \
for (jl_gc_callback_list_t *cb = list; \
Expand Down Expand Up @@ -226,4 +218,14 @@ extern jl_ptls_t* gc_all_tls_states;

extern int gc_logging_enabled;

// =========================================================================== //
// MISC
// =========================================================================== //

// number of stacks to always keep available per pool
#define MIN_STACK_MAPPINGS_PER_POOL 5

extern void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
extern void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT;

#endif // JL_GC_COMMON_H
4 changes: 4 additions & 0 deletions src/gc-interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT;
// Returns which GC implementation is being used and possibly its version according to the list of supported GCs
// NB: it should clearly identify the GC by including e.g. ‘stock’ or ‘mmtk’ as a substring.
JL_DLLEXPORT const char* jl_gc_active_impl(void);
// Sweep Julia's stack pools and mtarray buffers. Note that this function has been added to the interface since
// each GC should implement this but this function will most likely not be used by other code in the runtime.
// It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk.
JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT;

// ========================================================================= //
// Metrics
Expand Down
14 changes: 8 additions & 6 deletions src/gc-mmtk.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ extern "C" {

extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
extern const unsigned pool_sizes[];
extern void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz);
extern jl_mutex_t finalizers_lock;

// FIXME: Does it make sense for MMTk to implement something similar
Expand Down Expand Up @@ -155,6 +154,8 @@ void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT {
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
arraylist_new(&ptls->finalizers, 0);
// Initialize `lazily_freed_mtarraylist_buffers`
small_arraylist_new(&ptls->lazily_freed_mtarraylist_buffers, 0);
// Clear the malloc sz count
jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
// Create mutator
Expand Down Expand Up @@ -608,11 +609,6 @@ JL_DLLEXPORT void jl_gc_mmtk_sweep_malloced_memory(void) JL_NOTSAFEPOINT
mmtk_close_mutator_iterator(iter);
}



// number of stacks to always keep available per pool - from gc-stacks.c
#define MIN_STACK_MAPPINGS_PER_POOL 5

#define jl_genericmemory_elsize(a) (((jl_datatype_t*)jl_typetagof(a))->layout->size)

// if data is inlined inside the genericmemory object --- to->ptr needs to be updated when copying the array
Expand Down Expand Up @@ -727,6 +723,12 @@ JL_DLLEXPORT void jl_gc_mmtk_sweep_stack_pools(void)
}
}

JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
jl_gc_mmtk_sweep_stack_pools();
sweep_mtarraylist_buffers();
}

JL_DLLEXPORT void* jl_gc_get_stackbase(int16_t tid) {
assert(tid >= 0);
jl_ptls_t ptls2 = jl_all_tls_states[tid];
Expand Down
96 changes: 0 additions & 96 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
# endif
#endif

// number of stacks to always keep available per pool
#define MIN_STACK_MAPPINGS_PER_POOL 5

const size_t jl_guard_size = (4096 * 8);
static _Atomic(uint32_t) num_stack_mappings = 0;

Expand Down Expand Up @@ -202,99 +199,6 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
return stk;
}

void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT
{
// Stack sweeping algorithm:
// // deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, 1);
while (1) {
int i = jl_atomic_fetch_add_relaxed(&gc_ptls_sweep_idx, -1);
if (i < 0)
break;
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 == NULL)
continue;
assert(gc_n_threads);
// free half of stacks that remain unused since last sweep
if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) {
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
size_t n_to_free;
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
}
else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
n_to_free = al->len / 2;
if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
}
else {
n_to_free = 0;
}
for (int n = 0; n < n_to_free; n++) {
void *stk = small_arraylist_pop(al);
free_stack(stk, pool_sizes[p]);
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(al);
}
}
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
}

small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
void **lst = live_tasks->items;
if (l == 0)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
assert(jl_is_task(t));
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
if (t->ctx.stkbuf == NULL)
ndel++; // jl_release_task_stack called
else
n++;
}
else {
ndel++;
void *stkbuf = t->ctx.stkbuf;
size_t bufsz = t->ctx.bufsz;
if (stkbuf) {
t->ctx.stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
#ifdef _COMPILER_TSAN_ENABLED_
if (t->ctx.tsan_state) {
__tsan_destroy_fiber(t->ctx.tsan_state);
t->ctx.tsan_state = NULL;
}
#endif
}
if (n >= l - ndel)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n + ndel] = tmp;
}
live_tasks->len -= ndel;
}
jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, -1);
}

// Builds a list of the live tasks. Racy: `live_tasks` can expand at any time.
arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT
{
Expand Down
102 changes: 91 additions & 11 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1019,22 +1019,102 @@ void gc_sweep_wait_for_all_stacks(void) JL_NOTSAFEPOINT
}
}

void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls = gc_all_tls_states[i];
if (ptls == NULL) {
extern const unsigned pool_sizes[];

void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT
{
// Stack sweeping algorithm:
// // deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, 1);
while (1) {
int i = jl_atomic_fetch_add_relaxed(&gc_ptls_sweep_idx, -1);
if (i < 0)
break;
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 == NULL)
continue;
assert(gc_n_threads);
// free half of stacks that remain unused since last sweep
if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) {
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
size_t n_to_free;
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
}
else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
n_to_free = al->len / 2;
if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
}
else {
n_to_free = 0;
}
for (int n = 0; n < n_to_free; n++) {
void *stk = small_arraylist_pop(al);
free_stack(stk, pool_sizes[p]);
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(al);
}
}
}
small_arraylist_t *buffers = &ptls->lazily_freed_mtarraylist_buffers;
void *buf;
while ((buf = small_arraylist_pop(buffers)) != NULL) {
free(buf);
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
}

small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
void **lst = live_tasks->items;
if (l == 0)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
assert(jl_is_task(t));
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
if (t->ctx.stkbuf == NULL)
ndel++; // jl_release_task_stack called
else
n++;
}
else {
ndel++;
void *stkbuf = t->ctx.stkbuf;
size_t bufsz = t->ctx.bufsz;
if (stkbuf) {
t->ctx.stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
#ifdef _COMPILER_TSAN_ENABLED_
if (t->ctx.tsan_state) {
__tsan_destroy_fiber(t->ctx.tsan_state);
t->ctx.tsan_state = NULL;
}
#endif
}
if (n >= l - ndel)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n + ndel] = tmp;
}
live_tasks->len -= ndel;
}
jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, -1);
}

void sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
// initialize ptls index for parallel sweeping of stack pools
assert(gc_n_threads);
Expand Down Expand Up @@ -3096,7 +3176,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
current_sweep_full = sweep_full;
sweep_weak_refs();
uint64_t stack_pool_time = jl_hrtime();
sweep_stack_pools_and_mtarraylist_buffers(ptls);
jl_gc_sweep_stack_pools_and_mtarraylist_buffers(ptls);
stack_pool_time = jl_hrtime() - stack_pool_time;
gc_num.total_stack_pool_sweep_time += stack_pool_time;
gc_num.stack_pool_sweep_time = stack_pool_time;
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ extern volatile int profile_all_tasks;
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
// Because of that, this lock must be held in `jl_profile_task` and `sweep_stack_pools_and_mtarraylist_buffers`.
// Because of that, this lock must be held in `jl_profile_task` and `jl_gc_sweep_stack_pools_and_mtarraylist_buffers`.
extern uv_mutex_t live_tasks_lock;
// Ensures that we can safely write to `profile_bt_data_prof` and `profile_bt_size_cur`.
// We want to avoid the case that:
Expand Down

0 comments on commit 4ea1782

Please sign in to comment.