Skip to content

Commit

Permalink
Clean up generator frame computations
Browse files Browse the repository at this point in the history
Summary:
First, there were two mysterious +1s in `translateInitialYield()`:

The first was was never necessary: the value comes from
`LinearScanAllocator::getSpillSize()`, which is already exactly the number of
bytes of spill space needed.

The second one was making up for an off-by-one error a few lines earlier: `rsi`
was being initialized to `rbp - frame_size`, which points at the bottom word of
the `FrameHeader`, not the top word of the spill space like we wanted.

I then realized that we shouldn't even be allocating space for the `FrameHeader`
in generators, so I removed that from a bunch of relevant computations.

I also fixed a few out-of-date comments and renamed a few functions/variables
to clarify their meanings along the way.

Reviewed By: mpage

Differential Revision: D49653467

fbshipit-source-id: 19236352b3041b5012ae1e819442ac93a1239d1d
  • Loading branch information
swtaarrs authored and facebook-github-bot committed Sep 29, 2023
1 parent 4fcd583 commit 2872a2f
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 30 deletions.
17 changes: 10 additions & 7 deletions Jit/codegen/autogen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,10 @@ void translateYieldInitial(Environ* env, const Instruction* instr) {

// Make a generator object to be returned by the epilogue.
as->lea(x86::rdi, x86::ptr(env->gen_resume_entry_label));
JIT_CHECK(env->spill_size % kPointerSize == 0, "Bad spill alignment");
as->mov(x86::rdx, (env->spill_size / kPointerSize) + 1);
JIT_CHECK(
env->shadow_frames_and_spill_size % kPointerSize == 0,
"Bad spill alignment");
as->mov(x86::rdx, env->shadow_frames_and_spill_size / kPointerSize);
as->mov(x86::rcx, reinterpret_cast<uint64_t>(env->code_rt));
JIT_CHECK(instr->origin()->IsInitialYield(), "expected InitialYield");
PyCodeObject* code = static_cast<const hir::InitialYield*>(instr->origin())
Expand Down Expand Up @@ -445,12 +447,13 @@ void translateYieldInitial(Environ* env, const Instruction* instr) {
emitStoreGenYieldPoint(as, env, instr, resume_label, x86::rdi, scratch_r);

// Store variables spilled by this point to generator.
int frame_size = sizeof(FrameHeader);
as->lea(x86::rsi, x86::ptr(x86::rbp, -frame_size));
as->sub(x86::rdi, frame_size);
int current_spill_bytes = env->initial_yield_spill_size_ - frame_size;
// Point rsi at the top word of the current spill space.
as->lea(x86::rsi, x86::ptr(x86::rbp, -kPointerSize));
// Point rdi at the top word of the generator's spill space.
as->sub(x86::rdi, kPointerSize);
int current_spill_bytes = env->initial_yield_spill_size_;
JIT_CHECK(current_spill_bytes % kPointerSize == 0, "Bad spill alignment");
as->mov(x86::rcx, (current_spill_bytes / kPointerSize) + 1);
as->mov(x86::rcx, current_spill_bytes / kPointerSize);
as->std();
as->rep().movsq();
as->cld();
Expand Down
12 changes: 7 additions & 5 deletions Jit/codegen/environ.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@ struct Environ {
// and generateEpilogue().
PhyRegisterSet changed_regs{0};

// Size of the stack frame.
int frame_size{-1};
// The size of all data stored on the C stack: shadow frames, spilled values,
// saved callee-saved registers, and space for stack arguments to called
// functions.
int stack_frame_size{-1};

// A subset of stack_frame_size: only the shadow frames and spilled values.
int shadow_frames_and_spill_size{0};

// Offset from the base of the frame to the last callee-saved register stored
// on the stack.
int last_callee_saved_reg_off{-1};

// Space used to spill values by VariableManager.
int spill_size{0};

// Various Labels that span major sections of the function.
asmjit::Label static_arg_typecheck_failed_label;
asmjit::Label hard_exit_label;
Expand Down
15 changes: 9 additions & 6 deletions Jit/codegen/gen_asm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ void* NativeGenerator::getVectorcallEntry() {
lir_printer.print(*lir_func, "Register-allocated LIR"));
}

env_.spill_size = lsalloc.getSpillSize();
env_.shadow_frames_and_spill_size = lsalloc.getFrameSize();
env_.changed_regs = lsalloc.getChangedRegs();
env_.exit_label = as_->newLabel();
env_.exit_for_yield_label = as_->newLabel();
Expand Down Expand Up @@ -401,7 +401,7 @@ void* NativeGenerator::getVectorcallEntry() {

JIT_DCHECK(code.codeSize() < INT_MAX, "Code size is larger than INT_MAX");
compiled_size_ = static_cast<int>(code.codeSize());
env_.code_rt->set_frame_size(env_.frame_size);
env_.code_rt->set_frame_size(env_.stack_frame_size);
return vectorcall_entry_;
}

Expand All @@ -421,7 +421,7 @@ int NativeGenerator::GetCompiledFunctionSize() const {
}

int NativeGenerator::GetCompiledFunctionStackSize() const {
return env_.frame_size;
return env_.stack_frame_size;
}

int NativeGenerator::GetCompiledFunctionSpillStackSize() const {
Expand Down Expand Up @@ -517,7 +517,7 @@ void NativeGenerator::setupFrameAndSaveCallerRegisters(x86::Gp tstate_reg) {
auto saved_regs = env_.changed_regs & CALLEE_SAVE_REGS;
int saved_regs_size = saved_regs.count() * 8;
// Make sure we have at least one word for scratch in the epilogue.
spill_stack_size_ = env_.spill_size;
spill_stack_size_ = env_.shadow_frames_and_spill_size;
// The frame header size and inlined shadow frames are already included in
// env_.spill_size.
int spill_stack = std::max(spill_stack_size_, 8);
Expand Down Expand Up @@ -547,7 +547,7 @@ void NativeGenerator::setupFrameAndSaveCallerRegisters(x86::Gp tstate_reg) {
as_->sub(x86::rsp, arg_buffer_size);
}

env_.frame_size = spill_stack + saved_regs_size + arg_buffer_size;
env_.stack_frame_size = spill_stack + saved_regs_size + arg_buffer_size;
}

x86::Gp get_arg_location(int arg) {
Expand Down Expand Up @@ -1905,7 +1905,10 @@ void NativeGenerator::generateAssemblyBody(const asmjit::CodeHolder& code) {
}

int NativeGenerator::calcFrameHeaderSize(const hir::Function* func) {
return func == nullptr ? 0 : sizeof(FrameHeader);
if (func == nullptr || func->code->co_flags & kCoFlagsAnyGenerator) {
return 0;
}
return sizeof(FrameHeader);
}

// calcMaxInlineDepth must work with nullptr HIR functions because it's valid
Expand Down
2 changes: 1 addition & 1 deletion Jit/lir/regalloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ int LinearScanAllocator::getStackSlot(const Operand* operand) {
}

if (free_stack_slots_.empty()) {
max_stack_slot_ -= 8;
max_stack_slot_ -= kPointerSize;
slot = max_stack_slot_;
} else {
slot = free_stack_slots_.back();
Expand Down
6 changes: 3 additions & 3 deletions Jit/lir/regalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ class LinearScanAllocator {
return changed_regs_;
}

int getSpillSize() const {
// Return the number of bytes that should be allocated below the base
// pointer, including zero or more shadow frames and any needed spill space.
int getFrameSize() const {
return -max_stack_slot_;
}

Expand Down Expand Up @@ -184,8 +186,6 @@ class LinearScanAllocator {
UnorderedMap<const lir::Operand*, LIRLocation> vreg_global_last_use_;

int initial_max_stack_slot_;
// stack slot number always starts from -8, and it's up to the code generator
// to translate stack slot number into the form of (RBP - offset).
int max_stack_slot_;
std::vector<int> free_stack_slots_;

Expand Down
10 changes: 5 additions & 5 deletions Jit/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ class TypeDeoptPatcher;
// While the content of spill data is arbitrary depending on the function, we
// also have a few items of data about the current generator we want to access
// quickly. We can do this via positive offsets from RBP into the
// GenSuspendDataFooter struct defined below.
// GenDataFooter struct defined below.
//
// Together the spill data and GenDataFooter make up the complete JIT-specific
// data needed for a generator. PyGenObject::gi_jit_data points to the _top_ of
// the spill data (i.e. at the start of the footer). This allows us to easily
// set RBP to the pointer value on generator resume.
// data needed for a generator. PyGenObject::gi_jit_data points above the _top_
// of the spill data (i.e. at the start of the footer). This allows us to
// easily set RBP to the pointer value on generator resume.
//
// The base address of the complete heap allocated suspend data is:
// PyGenObject::gi_jit_data - GenDataFooter::spill_words
// PyGenObject::gi_jit_data - GenDataFooter::spillWords
typedef struct _GenDataFooter {
// Tools which examine/walk the stack expect the following two values to be
// ahead of RBP.
Expand Down
4 changes: 2 additions & 2 deletions RuntimeTests/backend_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BackendTest : public RuntimeTest {
LinearScanAllocator lsalloc(lir_func);
lsalloc.run();

environ.spill_size = lsalloc.getSpillSize();
environ.shadow_frames_and_spill_size = lsalloc.getFrameSize();
environ.changed_regs = lsalloc.getChangedRegs();

PostRegAllocRewrite post_rewrite(lir_func, &environ);
Expand All @@ -61,7 +61,7 @@ class BackendTest : public RuntimeTest {
auto saved_regs = environ.changed_regs & CALLEE_SAVE_REGS;
int saved_regs_size = saved_regs.count() * 8;

int allocate_stack = std::max(environ.spill_size, 8);
int allocate_stack = std::max(environ.shadow_frames_and_spill_size, 8);
if ((allocate_stack + saved_regs_size + arg_buffer_size) % 16 != 0) {
allocate_stack += 8;
}
Expand Down
2 changes: 1 addition & 1 deletion RuntimeTests/regalloc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ BB %28
lsallocator.linearScan();
ASSERT_FALSE(lsallocator.allocated_.empty());

ASSERT_GT(lsallocator.getSpillSize(), 0)
ASSERT_GT(lsallocator.getFrameSize(), 0)
<< "Incorrect results - no registers have been spilled.";

std::vector<LiveInterval*> intervals;
Expand Down

0 comments on commit 2872a2f

Please sign in to comment.