From a840c5d00bb7f6199a1300b783ee7d353bb63104 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 15 Apr 2026 20:20:33 +0800 Subject: [PATCH 1/5] Unbind methods at _LOAD_ATTR time eagerly for better specialization --- Lib/test/test_opcache.py | 21 +++++++++++++++++++++ Objects/object.c | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 5f40ad1d7a0f24..bfdd49d0a8b1f8 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -2008,6 +2008,27 @@ class MyList(list): pass self.assert_no_opcode(my_list_append, "CALL_LIST_APPEND") self.assert_no_opcode(my_list_append, "CALL") + @cpython_only + @requires_specialization + def test_load_and_call_classmethod(self): + + r = range(_testinternalcapi.SPECIALIZATION_THRESHOLD) + + class C: + @classmethod + def val(self): + return 1 + + def class_method_call(): + for _ in r: + C.val() + + class_method_call() + # gh-148608: To improve specialization, the classmethod should've become unbound. + self.assert_specialized(class_method_call, "CALL_PY_EXACT_ARGS") + self.assert_no_opcode(class_method_call, "CALL_BOUND_METHOD_EXACT_ARGS") + self.assert_no_opcode(class_method_call, "CALL") + @cpython_only @requires_specialization def test_load_attr_module_with_getattr(self): diff --git a/Objects/object.c b/Objects/object.c index 3166254f6f640b..209d67b29b2871 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1773,6 +1773,15 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, _PyStackRef *self, PyObject *res = PyObject_GetAttr(obj, name); PyStackRef_CLEAR(*self); if (res != NULL) { + // gh-148608: If it's a method, unbind it right now to improve + // specialization later on. + if (Py_IS_TYPE(res, &PyMethod_Type)) { + PyMethodObject *as_meth = ((PyMethodObject *)res); + *method = PyStackRef_FromPyObjectNew(as_meth->im_func); + *self = PyStackRef_FromPyObjectNew(as_meth->im_self); + Py_DECREF(res); + return 1; + } *method = PyStackRef_FromPyObjectSteal(res); return 0; } From 77bab2e61874fcdb2e63f2097ebdff8b6d12059d Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 15 Apr 2026 23:10:53 +0800 Subject: [PATCH 2/5] Specialize for `classmethod` in LOAD_ATTR_CLASS --- Include/internal/pycore_opcode_metadata.h | 8 +-- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 +-- Lib/test/test_opcache.py | 4 +- Modules/_testinternalcapi/test_cases.c.h | 86 +++++++++++++++-------- Python/bytecodes.c | 25 +++++-- Python/executor_cases.c.h | 49 +++++++++---- Python/generated_cases.c.h | 86 +++++++++++++++-------- Python/optimizer_bytecodes.c | 3 +- Python/optimizer_cases.c.h | 8 ++- Python/specialize.c | 14 ++++ 11 files changed, 204 insertions(+), 89 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0752f191d9434e..8b9fedfcf75bb1 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -796,9 +796,9 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case LOAD_ATTR: return 1 + (oparg&1); case LOAD_ATTR_CLASS: - return 1 + (oparg & 1); + return 1 + (oparg&1); case LOAD_ATTR_CLASS_WITH_METACLASS_CHECK: - return 1 + (oparg & 1); + return 1 + (oparg&1); case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: return 0; case LOAD_ATTR_INSTANCE_VALUE: @@ -1442,8 +1442,8 @@ _PyOpcode_macro_expansion[256] = { [LIST_APPEND] = { .nuops = 1, .uops = { { _LIST_APPEND, OPARG_SIMPLE, 0 } } }, [LIST_EXTEND] = { .nuops = 2, .uops = { { _LIST_EXTEND, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 } } }, [LOAD_ATTR] = { .nuops = 1, .uops = { { _LOAD_ATTR, OPARG_SIMPLE, 8 } } }, - [LOAD_ATTR_CLASS] = { .nuops = 3, .uops = { { _CHECK_ATTR_CLASS, 2, 1 }, { _LOAD_ATTR_CLASS, 4, 5 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, - [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { .nuops = 5, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_CLASS, 2, 3 }, { _LOAD_ATTR_CLASS, 4, 5 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, + [LOAD_ATTR_CLASS] = { .nuops = 2, .uops = { { _CHECK_ATTR_CLASS, 2, 1 }, { _LOAD_ATTR_CLASS, 4, 5 } } }, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_CLASS, 2, 3 }, { _LOAD_ATTR_CLASS, 4, 5 } } }, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { .nuops = 7, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_PEP_523, OPARG_SIMPLE, 3 }, { _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME, 2, 3 }, { _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME, OPERAND1_4, 5 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 9 }, { _PUSH_FRAME, OPARG_SIMPLE, 9 } } }, [LOAD_ATTR_INSTANCE_VALUE] = { .nuops = 6, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_MANAGED_OBJECT_HAS_VALUES, OPARG_SIMPLE, 3 }, { _LOAD_ATTR_INSTANCE_VALUE, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, [LOAD_ATTR_METHOD_LAZY_DICT] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_METHOD_LAZY_DICT, 1, 3 }, { _LOAD_ATTR_METHOD_LAZY_DICT, 4, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index c8c06060b00203..7449af8b2bd00d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1019,7 +1019,7 @@ extern "C" { #define _LIST_APPEND_r10 1230 #define _LIST_EXTEND_r11 1231 #define _LOAD_ATTR_r10 1232 -#define _LOAD_ATTR_CLASS_r11 1233 +#define _LOAD_ATTR_CLASS_r10 1233 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME_r11 1234 #define _LOAD_ATTR_INSTANCE_VALUE_r02 1235 #define _LOAD_ATTR_INSTANCE_VALUE_r12 1236 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2642861eb8f0b1..2a3c08efed5ba0 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -224,7 +224,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_EXIT_FLAG, [_LOAD_ATTR_SLOT] = HAS_EXIT_FLAG, [_CHECK_ATTR_CLASS] = HAS_EXIT_FLAG, - [_LOAD_ATTR_CLASS] = HAS_ESCAPES_FLAG, + [_LOAD_ATTR_CLASS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_PROPERTY_FRAME] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_EXIT_FLAG, [_GUARD_DORV_NO_DICT] = HAS_EXIT_FLAG, @@ -2138,7 +2138,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .best = { 1, 1, 1, 1 }, .entries = { { -1, -1, -1 }, - { 1, 1, _LOAD_ATTR_CLASS_r11 }, + { 0, 1, _LOAD_ATTR_CLASS_r10 }, { -1, -1, -1 }, { -1, -1, -1 }, }, @@ -4227,7 +4227,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_CHECK_ATTR_CLASS_r11] = _CHECK_ATTR_CLASS, [_CHECK_ATTR_CLASS_r22] = _CHECK_ATTR_CLASS, [_CHECK_ATTR_CLASS_r33] = _CHECK_ATTR_CLASS, - [_LOAD_ATTR_CLASS_r11] = _LOAD_ATTR_CLASS, + [_LOAD_ATTR_CLASS_r10] = _LOAD_ATTR_CLASS, [_LOAD_ATTR_PROPERTY_FRAME_r01] = _LOAD_ATTR_PROPERTY_FRAME, [_LOAD_ATTR_PROPERTY_FRAME_r11] = _LOAD_ATTR_PROPERTY_FRAME, [_LOAD_ATTR_PROPERTY_FRAME_r22] = _LOAD_ATTR_PROPERTY_FRAME, @@ -5463,7 +5463,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_LOAD_ATTR] = "_LOAD_ATTR", [_LOAD_ATTR_r10] = "_LOAD_ATTR_r10", [_LOAD_ATTR_CLASS] = "_LOAD_ATTR_CLASS", - [_LOAD_ATTR_CLASS_r11] = "_LOAD_ATTR_CLASS_r11", + [_LOAD_ATTR_CLASS_r10] = "_LOAD_ATTR_CLASS_r10", [_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME] = "_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME", [_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME_r11] = "_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME_r11", [_LOAD_ATTR_INSTANCE_VALUE] = "_LOAD_ATTR_INSTANCE_VALUE", diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index bfdd49d0a8b1f8..ddd468e4b03040 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -2024,8 +2024,8 @@ def class_method_call(): C.val() class_method_call() - # gh-148608: To improve specialization, the classmethod should've become unbound. - self.assert_specialized(class_method_call, "CALL_PY_EXACT_ARGS") + # gh-148608: To improve specialization, the classmethod should be unbound. + self.assert_specialized(class_method_call, "LOAD_ATTR_CLASS") self.assert_no_opcode(class_method_call, "CALL_BOUND_METHOD_EXACT_ARGS") self.assert_no_opcode(class_method_call, "CALL") diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 778a60d98e7278..12db45ce52ee62 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -8184,7 +8184,7 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; _PyStackRef attr; - _PyStackRef *null; + _PyStackRef *self_or_null; /* Skip 1 cache entry */ // _CHECK_ATTR_CLASS { @@ -8206,25 +8206,40 @@ /* Skip 2 cache entries */ // _LOAD_ATTR_CLASS { - PyObject *descr = read_obj(&this_instr[6].cache); + self_or_null = &stack_pointer[0]; + PyObject *descr_tagged = read_obj(&this_instr[6].cache); STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = owner; - owner = attr; - stack_pointer[-1] = owner; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _PUSH_NULL_CONDITIONAL - { - null = &stack_pointer[0]; if (oparg & 1) { - null[0] = PyStackRef_NULL; + if (use_self) { + self_or_null[0] = owner; + } + else { + self_or_null[0] = PyStackRef_NULL; + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); + } + } + else { + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); } } - stack_pointer += (oparg & 1); + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } @@ -8242,7 +8257,7 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; _PyStackRef attr; - _PyStackRef *null; + _PyStackRef *self_or_null; /* Skip 1 cache entry */ // _GUARD_TYPE_VERSION { @@ -8274,25 +8289,40 @@ } // _LOAD_ATTR_CLASS { - PyObject *descr = read_obj(&this_instr[6].cache); + self_or_null = &stack_pointer[0]; + PyObject *descr_tagged = read_obj(&this_instr[6].cache); STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = owner; - owner = attr; - stack_pointer[-1] = owner; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _PUSH_NULL_CONDITIONAL - { - null = &stack_pointer[0]; if (oparg & 1) { - null[0] = PyStackRef_NULL; + if (use_self) { + self_or_null[0] = owner; + } + else { + self_or_null[0] = PyStackRef_NULL; + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); + } + } + else { + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); } } - stack_pointer += (oparg & 1); + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6dfa61b5f9caf8..aa3c75a3f662ae 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2949,27 +2949,40 @@ dummy_func( EXIT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(((PyTypeObject *)owner_o)->tp_version_tag) != type_version); } - op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr)) { + // This can't use _PUSH_NULL_CONDITIONAL as it requires swapping self in certain cases. + op(_LOAD_ATTR_CLASS, (descr_tagged/4, owner -- attr, self_or_null[oparg&1])) { STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - DECREF_INPUTS(); + if (oparg & 1) { + if (use_self) { + self_or_null[0] = owner; + DEAD(owner); + } + else { + self_or_null[0] = PyStackRef_NULL; + PyStackRef_CLOSE(owner); + } + } + else { + PyStackRef_CLOSE(owner); + } } macro(LOAD_ATTR_CLASS) = unused/1 + _CHECK_ATTR_CLASS + unused/2 + - _LOAD_ATTR_CLASS + - _PUSH_NULL_CONDITIONAL; + _LOAD_ATTR_CLASS; macro(LOAD_ATTR_CLASS_WITH_METACLASS_CHECK) = unused/1 + _RECORD_TOS_TYPE + _GUARD_TYPE_VERSION + _CHECK_ATTR_CLASS + - _LOAD_ATTR_CLASS + - _PUSH_NULL_CONDITIONAL; + _LOAD_ATTR_CLASS; op(_LOAD_ATTR_PROPERTY_FRAME, (func_version/2, fget/4, owner -- new_frame)) { assert((oparg & 1) == 0); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 35872aa99d4fc2..73898b5a8845a7 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -11656,31 +11656,52 @@ break; } - case _LOAD_ATTR_CLASS_r11: { + case _LOAD_ATTR_CLASS_r10: { CHECK_CURRENT_CACHED_VALUES(1); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef owner; _PyStackRef attr; + _PyStackRef *self_or_null; _PyStackRef _stack_item_0 = _tos_cache0; + oparg = CURRENT_OPARG(); owner = _stack_item_0; - PyObject *descr = (PyObject *)CURRENT_OPERAND0_64(); + self_or_null = &stack_pointer[1]; + PyObject *descr_tagged = (PyObject *)CURRENT_OPERAND0_64(); STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - stack_pointer[0] = owner; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = owner; - owner = attr; - stack_pointer[-1] = owner; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - _tos_cache0 = attr; + if (oparg & 1) { + if (use_self) { + self_or_null[0] = owner; + } + else { + self_or_null[0] = PyStackRef_NULL; + stack_pointer[0] = attr; + stack_pointer += 1 + (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1 - (oparg&1); + } + } + else { + stack_pointer[0] = attr; + stack_pointer += 1 + (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1 - (oparg&1); + } + _tos_cache0 = PyStackRef_ZERO_BITS; _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -1; + SET_CURRENT_CACHED_VALUES(0); + stack_pointer[0] = attr; + stack_pointer += 1 + (oparg&1); ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f8e7f7476fc184..0c6d3557094f59 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8183,7 +8183,7 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; _PyStackRef attr; - _PyStackRef *null; + _PyStackRef *self_or_null; /* Skip 1 cache entry */ // _CHECK_ATTR_CLASS { @@ -8205,25 +8205,40 @@ /* Skip 2 cache entries */ // _LOAD_ATTR_CLASS { - PyObject *descr = read_obj(&this_instr[6].cache); + self_or_null = &stack_pointer[0]; + PyObject *descr_tagged = read_obj(&this_instr[6].cache); STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = owner; - owner = attr; - stack_pointer[-1] = owner; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _PUSH_NULL_CONDITIONAL - { - null = &stack_pointer[0]; if (oparg & 1) { - null[0] = PyStackRef_NULL; + if (use_self) { + self_or_null[0] = owner; + } + else { + self_or_null[0] = PyStackRef_NULL; + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); + } + } + else { + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); } } - stack_pointer += (oparg & 1); + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } @@ -8241,7 +8256,7 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; _PyStackRef attr; - _PyStackRef *null; + _PyStackRef *self_or_null; /* Skip 1 cache entry */ // _GUARD_TYPE_VERSION { @@ -8273,25 +8288,40 @@ } // _LOAD_ATTR_CLASS { - PyObject *descr = read_obj(&this_instr[6].cache); + self_or_null = &stack_pointer[0]; + PyObject *descr_tagged = read_obj(&this_instr[6].cache); STAT_INC(LOAD_ATTR, hit); + PyObject *descr = (PyObject *)((uintptr_t)descr_tagged & (~1)); + int use_self = ((uintptr_t)descr_tagged & 1); assert(descr != NULL); attr = PyStackRef_FromPyObjectNew(descr); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = owner; - owner = attr; - stack_pointer[-1] = owner; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _PUSH_NULL_CONDITIONAL - { - null = &stack_pointer[0]; if (oparg & 1) { - null[0] = PyStackRef_NULL; + if (use_self) { + self_or_null[0] = owner; + } + else { + self_or_null[0] = PyStackRef_NULL; + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); + } + } + else { + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(owner); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -(oparg&1); } } - stack_pointer += (oparg & 1); + stack_pointer[-1] = attr; + stack_pointer += (oparg&1); ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index d7d8b90ebabd6d..694f3d13d0a40c 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -964,12 +964,13 @@ dummy_func(void) { o = owner; } - op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr)) { + op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, self_or_null[oparg&1])) { (void)descr; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP, _NOP); + self_or_null = sym_new_unknown(ctx); } op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index bc6391d85d76cd..e4ca691726669f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2647,14 +2647,20 @@ case _LOAD_ATTR_CLASS: { JitOptRef owner; JitOptRef attr; + JitOptRef *self_or_null; owner = stack_pointer[-1]; - PyObject *descr = (PyObject *)this_instr->operand0; + self_or_null = &stack_pointer[0]; + PyObject *descr_tagged = (PyObject *)this_instr->operand0; (void)descr; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP, _NOP); + self_or_null = sym_new_unknown(ctx); + CHECK_STACK_BOUNDS((oparg&1)); stack_pointer[-1] = attr; + stack_pointer += (oparg&1); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } diff --git a/Python/specialize.c b/Python/specialize.c index e42e1a8faa8a23..ad0ad7614a73f8 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1206,6 +1206,20 @@ specialize_class_load_attr(PyObject *owner, _Py_CODEUNIT *instr, } } switch (kind) { + case PYTHON_CLASSMETHOD: { + int is_meth = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.arg) & 1; + if (!is_meth) { + SPECIALIZATION_FAIL(LOAD_ATTR, load_attr_fail_kind(kind)); + return -1; + } + Py_DECREF(descr); + descr = Py_NewRef(_PyClassMethod_GetFunc(descr)); + write_ptr(cache->descr, (void *)((uintptr_t)descr | 1)); + write_u32(cache->type_version, tp_version); + specialize(instr, LOAD_ATTR_CLASS); + Py_XDECREF(descr); + return 0; + } case MUTABLE: // special case for enums which has Py_TYPE(descr) == cls // so guarding on type version is sufficient From 3b1880f13ace37f394b2f68eb623917186061d10 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 15 Apr 2026 23:12:42 +0800 Subject: [PATCH 3/5] Remove unused part --- Objects/object.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 209d67b29b2871..3166254f6f640b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1773,15 +1773,6 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, _PyStackRef *self, PyObject *res = PyObject_GetAttr(obj, name); PyStackRef_CLEAR(*self); if (res != NULL) { - // gh-148608: If it's a method, unbind it right now to improve - // specialization later on. - if (Py_IS_TYPE(res, &PyMethod_Type)) { - PyMethodObject *as_meth = ((PyMethodObject *)res); - *method = PyStackRef_FromPyObjectNew(as_meth->im_func); - *self = PyStackRef_FromPyObjectNew(as_meth->im_self); - Py_DECREF(res); - return 1; - } *method = PyStackRef_FromPyObjectSteal(res); return 0; } From 4d973bf930bfd14c002f9a0596a7b0e50d935c55 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 15 Apr 2026 23:13:49 +0800 Subject: [PATCH 4/5] align values --- Python/optimizer_bytecodes.c | 4 ++-- Python/optimizer_cases.c.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 694f3d13d0a40c..a4605408f952bb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -964,8 +964,8 @@ dummy_func(void) { o = owner; } - op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, self_or_null[oparg&1])) { - (void)descr; + op(_LOAD_ATTR_CLASS, (descr_tagged/4, owner -- attr, self_or_null[oparg&1])) { + (void)descr_tagged; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index e4ca691726669f..16bd0d0c80d0cd 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2651,7 +2651,7 @@ owner = stack_pointer[-1]; self_or_null = &stack_pointer[0]; PyObject *descr_tagged = (PyObject *)this_instr->operand0; - (void)descr; + (void)descr_tagged; PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, From bd666c5e306b575d41ecf8e0e531b645bfa64ae1 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 15 Apr 2026 23:20:14 +0800 Subject: [PATCH 5/5] fix compilation --- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a4605408f952bb..efd10f52adf8b2 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -970,7 +970,7 @@ dummy_func(void) { PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP, _NOP); - self_or_null = sym_new_unknown(ctx); + self_or_null[0] = sym_new_unknown(ctx); } op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 16bd0d0c80d0cd..a906914291c522 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2656,7 +2656,7 @@ PyObject *name = get_co_name(ctx, oparg >> 1); attr = lookup_attr(ctx, dependencies, this_instr, type, name, _POP_TOP, _NOP); - self_or_null = sym_new_unknown(ctx); + self_or_null[0] = sym_new_unknown(ctx); CHECK_STACK_BOUNDS((oparg&1)); stack_pointer[-1] = attr; stack_pointer += (oparg&1);