From 2872a2f71e06b5e1722bde81c89a50f2f6f88c67 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Fri, 29 Sep 2023 15:50:43 -0700 Subject: [PATCH] Clean up generator frame computations Summary: First, there were two mysterious +1s in `translateInitialYield()`: The first was was never necessary: the value comes from `LinearScanAllocator::getSpillSize()`, which is already exactly the number of bytes of spill space needed. The second one was making up for an off-by-one error a few lines earlier: `rsi` was being initialized to `rbp - frame_size`, which points at the bottom word of the `FrameHeader`, not the top word of the spill space like we wanted. I then realized that we shouldn't even be allocating space for the `FrameHeader` in generators, so I removed that from a bunch of relevant computations. I also fixed a few out-of-date comments and renamed a few functions/variables to clarify their meanings along the way. Reviewed By: mpage Differential Revision: D49653467 fbshipit-source-id: 19236352b3041b5012ae1e819442ac93a1239d1d --- Jit/codegen/autogen.cpp | 17 ++++++++++------- Jit/codegen/environ.h | 12 +++++++----- Jit/codegen/gen_asm.cpp | 15 +++++++++------ Jit/lir/regalloc.cpp | 2 +- Jit/lir/regalloc.h | 6 +++--- Jit/runtime.h | 10 +++++----- RuntimeTests/backend_test.cpp | 4 ++-- RuntimeTests/regalloc_test.cpp | 2 +- 8 files changed, 38 insertions(+), 30 deletions(-) diff --git a/Jit/codegen/autogen.cpp b/Jit/codegen/autogen.cpp index 87e44d83c25..e69499bce29 100644 --- a/Jit/codegen/autogen.cpp +++ b/Jit/codegen/autogen.cpp @@ -411,8 +411,10 @@ void translateYieldInitial(Environ* env, const Instruction* instr) { // Make a generator object to be returned by the epilogue. as->lea(x86::rdi, x86::ptr(env->gen_resume_entry_label)); - JIT_CHECK(env->spill_size % kPointerSize == 0, "Bad spill alignment"); - as->mov(x86::rdx, (env->spill_size / kPointerSize) + 1); + JIT_CHECK( + env->shadow_frames_and_spill_size % kPointerSize == 0, + "Bad spill alignment"); + as->mov(x86::rdx, env->shadow_frames_and_spill_size / kPointerSize); as->mov(x86::rcx, reinterpret_cast(env->code_rt)); JIT_CHECK(instr->origin()->IsInitialYield(), "expected InitialYield"); PyCodeObject* code = static_cast(instr->origin()) @@ -445,12 +447,13 @@ void translateYieldInitial(Environ* env, const Instruction* instr) { emitStoreGenYieldPoint(as, env, instr, resume_label, x86::rdi, scratch_r); // Store variables spilled by this point to generator. - int frame_size = sizeof(FrameHeader); - as->lea(x86::rsi, x86::ptr(x86::rbp, -frame_size)); - as->sub(x86::rdi, frame_size); - int current_spill_bytes = env->initial_yield_spill_size_ - frame_size; + // Point rsi at the top word of the current spill space. + as->lea(x86::rsi, x86::ptr(x86::rbp, -kPointerSize)); + // Point rdi at the top word of the generator's spill space. + as->sub(x86::rdi, kPointerSize); + int current_spill_bytes = env->initial_yield_spill_size_; JIT_CHECK(current_spill_bytes % kPointerSize == 0, "Bad spill alignment"); - as->mov(x86::rcx, (current_spill_bytes / kPointerSize) + 1); + as->mov(x86::rcx, current_spill_bytes / kPointerSize); as->std(); as->rep().movsq(); as->cld(); diff --git a/Jit/codegen/environ.h b/Jit/codegen/environ.h index 9936b324628..f4e3539dc2b 100644 --- a/Jit/codegen/environ.h +++ b/Jit/codegen/environ.h @@ -28,16 +28,18 @@ struct Environ { // and generateEpilogue(). PhyRegisterSet changed_regs{0}; - // Size of the stack frame. - int frame_size{-1}; + // The size of all data stored on the C stack: shadow frames, spilled values, + // saved callee-saved registers, and space for stack arguments to called + // functions. + int stack_frame_size{-1}; + + // A subset of stack_frame_size: only the shadow frames and spilled values. + int shadow_frames_and_spill_size{0}; // Offset from the base of the frame to the last callee-saved register stored // on the stack. int last_callee_saved_reg_off{-1}; - // Space used to spill values by VariableManager. - int spill_size{0}; - // Various Labels that span major sections of the function. asmjit::Label static_arg_typecheck_failed_label; asmjit::Label hard_exit_label; diff --git a/Jit/codegen/gen_asm.cpp b/Jit/codegen/gen_asm.cpp index eb328e339bc..0eca052681a 100644 --- a/Jit/codegen/gen_asm.cpp +++ b/Jit/codegen/gen_asm.cpp @@ -337,7 +337,7 @@ void* NativeGenerator::getVectorcallEntry() { lir_printer.print(*lir_func, "Register-allocated LIR")); } - env_.spill_size = lsalloc.getSpillSize(); + env_.shadow_frames_and_spill_size = lsalloc.getFrameSize(); env_.changed_regs = lsalloc.getChangedRegs(); env_.exit_label = as_->newLabel(); env_.exit_for_yield_label = as_->newLabel(); @@ -401,7 +401,7 @@ void* NativeGenerator::getVectorcallEntry() { JIT_DCHECK(code.codeSize() < INT_MAX, "Code size is larger than INT_MAX"); compiled_size_ = static_cast(code.codeSize()); - env_.code_rt->set_frame_size(env_.frame_size); + env_.code_rt->set_frame_size(env_.stack_frame_size); return vectorcall_entry_; } @@ -421,7 +421,7 @@ int NativeGenerator::GetCompiledFunctionSize() const { } int NativeGenerator::GetCompiledFunctionStackSize() const { - return env_.frame_size; + return env_.stack_frame_size; } int NativeGenerator::GetCompiledFunctionSpillStackSize() const { @@ -517,7 +517,7 @@ void NativeGenerator::setupFrameAndSaveCallerRegisters(x86::Gp tstate_reg) { auto saved_regs = env_.changed_regs & CALLEE_SAVE_REGS; int saved_regs_size = saved_regs.count() * 8; // Make sure we have at least one word for scratch in the epilogue. - spill_stack_size_ = env_.spill_size; + spill_stack_size_ = env_.shadow_frames_and_spill_size; // The frame header size and inlined shadow frames are already included in // env_.spill_size. int spill_stack = std::max(spill_stack_size_, 8); @@ -547,7 +547,7 @@ void NativeGenerator::setupFrameAndSaveCallerRegisters(x86::Gp tstate_reg) { as_->sub(x86::rsp, arg_buffer_size); } - env_.frame_size = spill_stack + saved_regs_size + arg_buffer_size; + env_.stack_frame_size = spill_stack + saved_regs_size + arg_buffer_size; } x86::Gp get_arg_location(int arg) { @@ -1905,7 +1905,10 @@ void NativeGenerator::generateAssemblyBody(const asmjit::CodeHolder& code) { } int NativeGenerator::calcFrameHeaderSize(const hir::Function* func) { - return func == nullptr ? 0 : sizeof(FrameHeader); + if (func == nullptr || func->code->co_flags & kCoFlagsAnyGenerator) { + return 0; + } + return sizeof(FrameHeader); } // calcMaxInlineDepth must work with nullptr HIR functions because it's valid diff --git a/Jit/lir/regalloc.cpp b/Jit/lir/regalloc.cpp index 8c8861ac145..73db09b6b87 100644 --- a/Jit/lir/regalloc.cpp +++ b/Jit/lir/regalloc.cpp @@ -891,7 +891,7 @@ int LinearScanAllocator::getStackSlot(const Operand* operand) { } if (free_stack_slots_.empty()) { - max_stack_slot_ -= 8; + max_stack_slot_ -= kPointerSize; slot = max_stack_slot_; } else { slot = free_stack_slots_.back(); diff --git a/Jit/lir/regalloc.h b/Jit/lir/regalloc.h index 76a14ef841b..136af78df3e 100644 --- a/Jit/lir/regalloc.h +++ b/Jit/lir/regalloc.h @@ -156,7 +156,9 @@ class LinearScanAllocator { return changed_regs_; } - int getSpillSize() const { + // Return the number of bytes that should be allocated below the base + // pointer, including zero or more shadow frames and any needed spill space. + int getFrameSize() const { return -max_stack_slot_; } @@ -184,8 +186,6 @@ class LinearScanAllocator { UnorderedMap vreg_global_last_use_; int initial_max_stack_slot_; - // stack slot number always starts from -8, and it's up to the code generator - // to translate stack slot number into the form of (RBP - offset). int max_stack_slot_; std::vector free_stack_slots_; diff --git a/Jit/runtime.h b/Jit/runtime.h index 3fa18e0131a..2fa9a761a2c 100644 --- a/Jit/runtime.h +++ b/Jit/runtime.h @@ -37,15 +37,15 @@ class TypeDeoptPatcher; // While the content of spill data is arbitrary depending on the function, we // also have a few items of data about the current generator we want to access // quickly. We can do this via positive offsets from RBP into the -// GenSuspendDataFooter struct defined below. +// GenDataFooter struct defined below. // // Together the spill data and GenDataFooter make up the complete JIT-specific -// data needed for a generator. PyGenObject::gi_jit_data points to the _top_ of -// the spill data (i.e. at the start of the footer). This allows us to easily -// set RBP to the pointer value on generator resume. +// data needed for a generator. PyGenObject::gi_jit_data points above the _top_ +// of the spill data (i.e. at the start of the footer). This allows us to +// easily set RBP to the pointer value on generator resume. // // The base address of the complete heap allocated suspend data is: -// PyGenObject::gi_jit_data - GenDataFooter::spill_words +// PyGenObject::gi_jit_data - GenDataFooter::spillWords typedef struct _GenDataFooter { // Tools which examine/walk the stack expect the following two values to be // ahead of RBP. diff --git a/RuntimeTests/backend_test.cpp b/RuntimeTests/backend_test.cpp index deaaa6071ba..8c7a683f279 100644 --- a/RuntimeTests/backend_test.cpp +++ b/RuntimeTests/backend_test.cpp @@ -42,7 +42,7 @@ class BackendTest : public RuntimeTest { LinearScanAllocator lsalloc(lir_func); lsalloc.run(); - environ.spill_size = lsalloc.getSpillSize(); + environ.shadow_frames_and_spill_size = lsalloc.getFrameSize(); environ.changed_regs = lsalloc.getChangedRegs(); PostRegAllocRewrite post_rewrite(lir_func, &environ); @@ -61,7 +61,7 @@ class BackendTest : public RuntimeTest { auto saved_regs = environ.changed_regs & CALLEE_SAVE_REGS; int saved_regs_size = saved_regs.count() * 8; - int allocate_stack = std::max(environ.spill_size, 8); + int allocate_stack = std::max(environ.shadow_frames_and_spill_size, 8); if ((allocate_stack + saved_regs_size + arg_buffer_size) % 16 != 0) { allocate_stack += 8; } diff --git a/RuntimeTests/regalloc_test.cpp b/RuntimeTests/regalloc_test.cpp index dc9cb1cc735..c4099e3e822 100644 --- a/RuntimeTests/regalloc_test.cpp +++ b/RuntimeTests/regalloc_test.cpp @@ -242,7 +242,7 @@ BB %28 lsallocator.linearScan(); ASSERT_FALSE(lsallocator.allocated_.empty()); - ASSERT_GT(lsallocator.getSpillSize(), 0) + ASSERT_GT(lsallocator.getFrameSize(), 0) << "Incorrect results - no registers have been spilled."; std::vector intervals;