diff --git a/crates/vm/src/builtins/dict.rs b/crates/vm/src/builtins/dict.rs index 0fc615e442e..43891d3b7f7 100644 --- a/crates/vm/src/builtins/dict.rs +++ b/crates/vm/src/builtins/dict.rs @@ -79,6 +79,11 @@ impl PyDict { &self.entries } + /// Monotonically increasing version for mutation tracking. + pub(crate) fn version(&self) -> u64 { + self.entries.version() + } + /// Returns all keys as a Vec, atomically under a single read lock. /// Thread-safe: prevents "dictionary changed size during iteration" errors. pub fn keys_vec(&self) -> Vec { diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index e0b7116553a..02198785815 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -80,6 +80,14 @@ pub struct PyFunction { static FUNC_VERSION_COUNTER: AtomicU32 = AtomicU32::new(1); +/// Atomically allocate the next function version, returning 0 if exhausted. +/// Once the counter wraps to 0, it stays at 0 permanently. +fn next_func_version() -> u32 { + FUNC_VERSION_COUNTER + .fetch_update(Relaxed, Relaxed, |v| (v != 0).then(|| v.wrapping_add(1))) + .unwrap_or(0) +} + unsafe impl Traverse for PyFunction { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.globals.traverse(tracer_fn); @@ -204,7 +212,7 @@ impl PyFunction { annotate: PyMutex::new(None), module: PyMutex::new(module), doc: PyMutex::new(doc), - func_version: AtomicU32::new(FUNC_VERSION_COUNTER.fetch_add(1, Relaxed)), + func_version: AtomicU32::new(next_func_version()), #[cfg(feature = "jit")] jitted_code: OnceCell::new(), }; @@ -603,6 +611,22 @@ impl Py { self.func_version.load(Relaxed) } + /// Returns the current version, assigning a fresh one if previously invalidated. + /// Returns 0 if the version counter has overflowed. + /// `_PyFunction_GetVersionForCurrentState` + pub fn get_version_for_current_state(&self) -> u32 { + let v = self.func_version.load(Relaxed); + if v != 0 { + return v; + } + let new_v = next_func_version(); + if new_v == 0 { + return 0; + } + self.func_version.store(new_v, Relaxed); + new_v + } + /// Check if this function is eligible for exact-args call specialization. /// Returns true if: no VARARGS, no VARKEYWORDS, no kwonly args, not generator/coroutine, /// and effective_nargs matches co_argcount. @@ -627,6 +651,16 @@ impl Py { pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult { let code: PyRef = (*self.code).to_owned(); + debug_assert_eq!(args.len(), code.arg_count as usize); + debug_assert!(code.flags.contains(bytecode::CodeFlags::NEWLOCALS)); + debug_assert!(!code.flags.intersects( + bytecode::CodeFlags::VARARGS + | bytecode::CodeFlags::VARKEYWORDS + | bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + )); + debug_assert_eq!(code.kwonlyarg_count, 0); + let frame = Frame::new( code.clone(), Scope::new(None, self.globals.clone()), diff --git a/crates/vm/src/builtins/list.rs b/crates/vm/src/builtins/list.rs index aa25af58e9b..84d7a4e309c 100644 --- a/crates/vm/src/builtins/list.rs +++ b/crates/vm/src/builtins/list.rs @@ -637,6 +637,23 @@ impl PyListIterator { } } +impl PyListIterator { + /// Fast path for FOR_ITER specialization. + pub(crate) fn fast_next(&self) -> Option { + self.internal + .lock() + .next(|list, pos| { + let vec = list.borrow_vec(); + Ok(PyIterReturn::from_result(vec.get(pos).cloned().ok_or(None))) + }) + .ok() + .and_then(|r| match r { + PyIterReturn::Return(v) => Some(v), + PyIterReturn::StopIteration(_) => None, + }) + } +} + impl SelfIter for PyListIterator {} impl IterNext for PyListIterator { fn next(zelf: &Py, _vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index b6c7b263d77..8fed43cd5d7 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -365,7 +365,7 @@ impl PyBaseObject { } #[pyslot] - fn slot_setattro( + pub(crate) fn slot_setattro( obj: &PyObject, attr_name: &Py, value: PySetterValue, diff --git a/crates/vm/src/builtins/property.rs b/crates/vm/src/builtins/property.rs index 6c53c4b4d98..509307c7b00 100644 --- a/crates/vm/src/builtins/property.rs +++ b/crates/vm/src/builtins/property.rs @@ -133,6 +133,10 @@ impl PyProperty { self.getter.read().clone() } + pub(crate) fn get_fget(&self) -> Option { + self.getter.read().clone() + } + #[pygetset] fn fset(&self) -> Option { self.setter.read().clone() diff --git a/crates/vm/src/builtins/range.rs b/crates/vm/src/builtins/range.rs index 0d0b5ccdd5d..ec1a662ddad 100644 --- a/crates/vm/src/builtins/range.rs +++ b/crates/vm/src/builtins/range.rs @@ -613,19 +613,6 @@ pub struct PyRangeIterator { length: usize, } -impl PyRangeIterator { - /// Advance and return next value without going through the iterator protocol. - #[inline] - pub(crate) fn next_fast(&self) -> Option { - let index = self.index.fetch_add(1); - if index < self.length { - Some(self.start + (index as isize) * self.step) - } else { - None - } - } -} - impl PyPayload for PyRangeIterator { #[inline] fn class(ctx: &Context) -> &'static Py { @@ -660,18 +647,25 @@ impl PyRangeIterator { } } +impl PyRangeIterator { + /// Fast path for FOR_ITER specialization. Returns the next isize value + /// without allocating PyInt or PyIterReturn. + pub(crate) fn fast_next(&self) -> Option { + let index = self.index.fetch_add(1); + if index < self.length { + Some(self.start + (index as isize) * self.step) + } else { + None + } + } +} + impl SelfIter for PyRangeIterator {} impl IterNext for PyRangeIterator { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { - // TODO: In pathological case (index == usize::MAX) this can wrap around - // (since fetch_add wraps). This would result in the iterator spinning again - // from the beginning. - let index = zelf.index.fetch_add(1); - let r = if index < zelf.length { - let value = zelf.start + (index as isize) * zelf.step; - PyIterReturn::Return(vm.ctx.new_int(value).into()) - } else { - PyIterReturn::StopIteration(None) + let r = match zelf.fast_next() { + Some(value) => PyIterReturn::Return(vm.ctx.new_int(value).into()), + None => PyIterReturn::StopIteration(None), }; Ok(r) } diff --git a/crates/vm/src/builtins/tuple.rs b/crates/vm/src/builtins/tuple.rs index 046506f6f4d..8ca2f74a3bf 100644 --- a/crates/vm/src/builtins/tuple.rs +++ b/crates/vm/src/builtins/tuple.rs @@ -572,6 +572,24 @@ impl PyTupleIterator { } } +impl PyTupleIterator { + /// Fast path for FOR_ITER specialization. + pub(crate) fn fast_next(&self) -> Option { + self.internal + .lock() + .next(|tuple, pos| { + Ok(PyIterReturn::from_result( + tuple.get(pos).cloned().ok_or(None), + )) + }) + .ok() + .and_then(|r| match r { + PyIterReturn::Return(v) => Some(v), + PyIterReturn::StopIteration(_) => None, + }) + } +} + impl SelfIter for PyTupleIterator {} impl IterNext for PyTupleIterator { fn next(zelf: &Py, _vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index d43bbd8fc3b..f69163bd8ca 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -55,6 +55,10 @@ pub struct PyType { pub tp_version_tag: AtomicU32, } +/// Monotonic counter for type version tags. Once it reaches `u32::MAX`, +/// `assign_version_tag()` returns 0 permanently, disabling new inline-cache +/// entries but not invalidating correctness (cache misses fall back to the +/// generic path). static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1); unsafe impl crate::object::Traverse for PyType { @@ -199,7 +203,8 @@ fn is_subtype_with_mro(a_mro: &[PyTypeRef], a: &Py, b: &Py) -> b } impl PyType { - /// Assign a fresh version tag. Returns 0 on overflow (all caches invalidated). + /// Assign a fresh version tag. Returns 0 if the version counter has been + /// exhausted, in which case no new cache entries can be created. pub fn assign_version_tag(&self) -> u32 { loop { let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed); diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 34c98ad9c75..e4d8174abbd 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -17,7 +17,9 @@ use crate::{ object::{Traverse, TraverseFn}, }; use alloc::fmt; -use core::{mem::size_of, ops::ControlFlow}; +use core::mem::size_of; +use core::ops::ControlFlow; +use core::sync::atomic::{AtomicU64, Ordering::Relaxed}; use num_traits::ToPrimitive; // HashIndex is intended to be same size with hash::PyHash @@ -34,6 +36,7 @@ type EntryIndex = usize; pub struct Dict { inner: PyRwLock>, + version: AtomicU64, } unsafe impl Traverse for Dict { @@ -98,6 +101,7 @@ impl Clone for Dict { fn clone(&self) -> Self { Self { inner: PyRwLock::new(self.inner.read().clone()), + version: AtomicU64::new(0), } } } @@ -111,6 +115,7 @@ impl Default for Dict { indices: vec![IndexEntry::FREE; 8], entries: Vec::new(), }), + version: AtomicU64::new(0), } } } @@ -254,6 +259,16 @@ impl DictInner { type PopInnerResult = ControlFlow>>; impl Dict { + /// Monotonically increasing version counter for mutation tracking. + pub fn version(&self) -> u64 { + self.version.load(Relaxed) + } + + /// Bump the version counter after any mutation. + fn bump_version(&self) { + self.version.fetch_add(1, Relaxed); + } + fn read(&self) -> PyRwLockReadGuard<'_, DictInner> { self.inner.read() } @@ -283,6 +298,7 @@ impl Dict { }; if entry.index == index_index { let removed = core::mem::replace(&mut entry.value, value); + self.bump_version(); // defer dec RC break Some(removed); } else { @@ -298,6 +314,7 @@ impl Dict { continue; } inner.unchecked_push(index_index, hash, key.to_pyobject(vm), value, entry_index); + self.bump_version(); break None; } }; @@ -361,6 +378,7 @@ impl Dict { inner.indices.resize(8, IndexEntry::FREE); inner.used = 0; inner.filled = 0; + self.bump_version(); // defer dec rc core::mem::take(&mut inner.entries) }; @@ -439,6 +457,7 @@ impl Dict { continue; } inner.unchecked_push(index_index, hash, key.to_owned(), value, entry); + self.bump_version(); break None; } }; @@ -475,6 +494,7 @@ impl Dict { value.clone(), index_entry, ); + self.bump_version(); return Ok(value); } } @@ -511,6 +531,7 @@ impl Dict { let key_obj = key.to_pyobject(vm); let ret = (key_obj.clone(), value.clone()); inner.unchecked_push(index_index, hash, key_obj, value, index_entry); + self.bump_version(); return Ok(ret); } } @@ -698,6 +719,7 @@ impl Dict { } = IndexEntry::DUMMY; inner.used -= 1; let removed = slot.take(); + self.bump_version(); Ok(ControlFlow::Break(removed)) } @@ -727,6 +749,7 @@ impl Dict { // entry.index always refers valid index inner.indices.get_unchecked_mut(entry.index) } = IndexEntry::DUMMY; + self.bump_version(); Some((entry.key, entry.value)) } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e35efff6f19..ad077132ef1 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1,3 +1,4 @@ +use crate::anystr::AnyStr; #[cfg(feature = "flame")] use crate::bytecode::InstructionMetadata; use crate::{ @@ -5,18 +6,21 @@ use crate::{ TryFromObject, VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyBaseObject, PyCode, PyCoroutine, PyDict, PyDictRef, - PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, - PyTemplate, PyTraceback, PyType, PyUtf8Str, + PyFloat, PyGenerator, PyInt, PyInterpolation, PyList, PyModule, PyProperty, PySet, PySlice, + PyStr, PyStrInterned, PyTemplate, PyTraceback, PyType, PyUtf8Str, asyncgenerator::PyAsyncGenWrappedValue, + builtin_func::PyNativeFunction, + descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, frame::stack_analysis, function::{PyCell, PyCellRef, PyFunction}, + list::PyListIterator, range::PyRangeIterator, - tuple::{PyTuple, PyTupleRef}, + tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, - convert::ToPyResult, + convert::{ToPyObject, ToPyResult}, coroutine::Coro, exceptions::ExceptionCtor, function::{ArgMapping, Either, FuncArgs}, @@ -24,6 +28,7 @@ use crate::{ object::{Traverse, TraverseFn}, protocol::{PyIter, PyIterReturn}, scope::Scope, + sliceable::SliceableSequenceOp, stdlib::{builtins, sys::monitoring, typing}, types::{PyComparisonOp, PyTypeFlags}, vm::{Context, PyMethod}, @@ -38,6 +43,7 @@ use core::sync::atomic::Ordering::{Acquire, Relaxed}; use indexmap::IndexMap; use itertools::Itertools; use malachite_bigint::BigInt; +use num_traits::Zero; use rustpython_common::atomic::{PyAtomic, Radium}; use rustpython_common::{ boxvec::BoxVec, @@ -766,8 +772,8 @@ impl ExecutingFrame<'_> { } // Fire 'opcode' trace event for sys.settrace when f_trace_opcodes - // is set. Skip RESUME and ExtendedArg (matching CPython's exclusion - // of these in _Py_call_instrumentation_instruction). + // is set. Skip RESUME and ExtendedArg + // (_Py_call_instrumentation_instruction). if !vm.is_none(&self.object.trace.lock()) && *self.object.trace_opcodes.lock() && !matches!( @@ -781,6 +787,33 @@ impl ExecutingFrame<'_> { } } + if let Err(exception) = vm.check_signals() { + #[cold] + fn handle_signal_exception( + frame: &mut ExecutingFrame<'_>, + exception: PyBaseExceptionRef, + idx: usize, + vm: &VirtualMachine, + ) -> FrameResult { + let (loc, _end_loc) = frame.code.locations[idx]; + let next = exception.__traceback__(); + let new_traceback = + PyTraceback::new(next, frame.object.to_owned(), idx as u32 * 2, loc.line); + exception.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + vm.contextualize_exception(&exception); + frame.unwind_blocks(vm, UnwindReason::Raising { exception }) + } + match handle_signal_exception(self, exception, idx, vm) { + Ok(None) => {} + Ok(Some(value)) => { + break Ok(value); + } + Err(exception) => { + break Err(exception); + } + } + continue; + } let lasti_before = self.lasti(); let result = self.execute_instruction(op, arg, &mut do_extend_arg, vm); // Skip inline cache entries if instruction fell through (no jump). @@ -1190,8 +1223,6 @@ impl ExecutingFrame<'_> { extend_arg: &mut bool, vm: &VirtualMachine, ) -> FrameResult { - vm.check_signals()?; - flame_guard!(format!( "Frame::execute_instruction({})", instruction.display(arg, &self.code.code).to_string() @@ -1240,7 +1271,21 @@ impl ExecutingFrame<'_> { // TODO: In CPython, this does in-place unicode concatenation when // refcount is 1. Falls back to regular iadd for now. Instruction::BinaryOpInplaceAddUnicode => { - self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); + self.pop_value(); + self.pop_value(); + self.push_value(result.to_pyobject(vm)); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::InplaceAdd); + self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) + } } Instruction::BinarySlice => { // Stack: [container, start, stop] -> [result] @@ -1368,8 +1413,21 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallKw { nargs } => { + let nargs = nargs.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_call_kw(vm, nargs, instr_idx, cache_base); + } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] - let args = self.collect_keyword_args(nargs.get(arg)); + let args = self.collect_keyword_args(nargs); self.execute_call(args, vm) } Instruction::CallFunctionEx => { @@ -1408,8 +1466,36 @@ impl ExecutingFrame<'_> { self.push_value(matched); Ok(None) } - Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)), + Instruction::CompareOp { op } => { + let op_val = op.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_compare_op(vm, op_val, instr_idx, cache_base); + } + self.execute_compare(vm, op_val) + } Instruction::ContainsOp(invert) => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_contains_op(vm, instr_idx, cache_base); + } + let b = self.pop_value(); let a = self.pop_value(); @@ -1593,6 +1679,18 @@ impl ExecutingFrame<'_> { Instruction::ForIter { .. } => { // Relative forward jump: target = lasti + caches + delta let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_for_iter(vm, instr_idx, cache_base); + } self.execute_for_iter(vm, target)?; Ok(None) } @@ -1815,7 +1913,21 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::LoadAttr { idx } => self.load_attr(vm, idx.get(arg)), - Instruction::LoadSuperAttr { arg: idx } => self.load_super_attr(vm, idx.get(arg)), + Instruction::LoadSuperAttr { arg: idx } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_load_super_attr(vm, idx.get(arg), instr_idx, cache_base); + } + self.load_super_attr(vm, idx.get(arg)) + } Instruction::LoadBuildClass => { let build_class = if let Some(builtins_dict) = self.builtins_dict { builtins_dict @@ -2062,6 +2174,18 @@ impl ExecutingFrame<'_> { } Instruction::LoadGlobal(idx) => { let oparg = idx.get(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_load_global(vm, oparg, instr_idx, cache_base); + } let name = &self.code.names[(oparg >> 1) as usize]; let x = self.load_global_or_builtin(name, vm)?; self.push_value(x); @@ -2535,7 +2659,21 @@ impl ExecutingFrame<'_> { self.execute_set_function_attribute(vm, attr.get(arg)) } Instruction::SetupAnnotations => self.setup_annotations(vm), - Instruction::StoreAttr { idx } => self.store_attr(vm, idx.get(arg)), + Instruction::StoreAttr { idx } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_store_attr(vm, idx.get(arg), instr_idx, cache_base); + } + self.store_attr(vm, idx.get(arg)) + } Instruction::StoreDeref(i) => { let value = self.pop_value(); self.state.cells_frees[i.get(arg) as usize].set(Some(value)); @@ -2599,7 +2737,21 @@ impl ExecutingFrame<'_> { container.set_item(&*slice, value, vm)?; Ok(None) } - Instruction::StoreSubscr => self.execute_store_subscript(vm), + Instruction::StoreSubscr => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_store_subscr(vm, instr_idx, cache_base); + } + self.execute_store_subscript(vm) + } Instruction::Swap { index } => { let len = self.state.stack.len(); debug_assert!(len > 0, "stack underflow in SWAP"); @@ -2618,6 +2770,18 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::ToBool => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_to_bool(vm, instr_idx, cache_base); + } let obj = self.pop_value(); let bool_val = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(bool_val).into()); @@ -2627,7 +2791,21 @@ impl ExecutingFrame<'_> { let args = args.get(arg); self.execute_unpack_ex(vm, args.before, args.after) } - Instruction::UnpackSequence { size } => self.unpack_sequence(size.get(arg), vm), + Instruction::UnpackSequence { size } => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_unpack_sequence(vm, instr_idx, cache_base); + } + self.unpack_sequence(size.get(arg), vm) + } Instruction::WithExceptStart => { // Stack: [..., __exit__, lasti, prev_exc, exc] // Call __exit__(type, value, tb) and push result @@ -2673,22 +2851,86 @@ impl ExecutingFrame<'_> { } Instruction::Send { .. } => { // (receiver, v -- receiver, retval) - // Pops v, sends it to receiver. On yield, pushes retval - // (so stack = [..., receiver, retval]). On return/StopIteration, - // also pushes retval and jumps to END_SEND which will pop receiver. - // Relative forward: target = lasti + caches(1) + delta + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let counter = self.code.instructions.read_cache_u16(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_cache_u16(cache_base, counter - 1); + } + } else { + self.specialize_send(instr_idx, cache_base); + } + let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let val = self.pop_value(); + let receiver = self.top_value(); + + match self._send(receiver, val, vm)? { + PyIterReturn::Return(value) => { + self.push_value(value); + Ok(None) + } + PyIterReturn::StopIteration(value) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value.clone()); + self.fire_exception_trace(&stop_exc, vm)?; + } + let value = vm.unwrap_or_none(value); + self.push_value(value); + self.jump(exit_label); + Ok(None) + } + } + } + Instruction::SendGen => { let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + // Stack: [receiver, val] — peek receiver before popping + let receiver = self.nth_value(1); + let is_coro = self.builtin_coro(receiver).is_some(); let val = self.pop_value(); let receiver = self.top_value(); + if is_coro { + let coro = self.builtin_coro(receiver).unwrap(); + match coro.send(receiver, val, vm)? { + PyIterReturn::Return(value) => { + self.push_value(value); + return Ok(None); + } + PyIterReturn::StopIteration(value) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value.clone()); + self.fire_exception_trace(&stop_exc, vm)?; + } + let value = vm.unwrap_or_none(value); + self.push_value(value); + self.jump(exit_label); + return Ok(None); + } + } + } + // Deoptimize + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::Send { + target: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); Ok(None) } PyIterReturn::StopIteration(value) => { - // Fire 'exception' trace event for StopIteration, - // matching SEND's exception handling. if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { let stop_exc = vm.new_stop_iteration(value.clone()); self.fire_exception_trace(&stop_exc, vm)?; @@ -2891,70 +3133,371 @@ impl ExecutingFrame<'_> { } self.load_attr_slow(vm, oparg) } - // Specialized BINARY_OP opcodes - Instruction::BinaryOpAddInt => { - let b = self.top_value(); - let a = self.nth_value(1); - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - let result = a_int.as_bigint() + b_int.as_bigint(); - self.pop_value(); + Instruction::LoadAttrModule => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(module) = owner.downcast_ref_if_exact::(vm) + && let Ok(value) = module.get_attr(attr_name, vm) + { self.pop_value(); - self.push_value(vm.ctx.new_bigint(&result).into()); - Ok(None) - } else { - self.deoptimize_binary_op(bytecode::BinaryOperator::Add); - self.execute_bin_op(vm, bytecode::BinaryOperator::Add) + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); } - } - Instruction::BinaryOpSubtractInt => { - let b = self.top_value(); - let a = self.nth_value(1); - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - let result = a_int.as_bigint() - b_int.as_bigint(); - self.pop_value(); - self.pop_value(); - self.push_value(vm.ctx.new_bigint(&result).into()); - Ok(None) - } else { - self.deoptimize_binary_op(bytecode::BinaryOperator::Subtract); - self.execute_bin_op(vm, bytecode::BinaryOperator::Subtract) + // Deoptimize + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } + self.load_attr_slow(vm, oparg) } - Instruction::BinaryOpMultiplyInt => { - let b = self.top_value(); - let a = self.nth_value(1); - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - let result = a_int.as_bigint() * b_int.as_bigint(); - self.pop_value(); + Instruction::LoadAttrNondescriptorNoDict => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + // Load cached class attribute directly (no dict, no data descriptor) + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); self.pop_value(); - self.push_value(vm.ctx.new_bigint(&result).into()); - Ok(None) - } else { - self.deoptimize_binary_op(bytecode::BinaryOperator::Multiply); - self.execute_bin_op(vm, bytecode::BinaryOperator::Multiply) + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } + self.load_attr_slow(vm, oparg) } - Instruction::BinaryOpAddFloat => { - let b = self.top_value(); - let a = self.nth_value(1); - if let (Some(a_f), Some(b_f)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - let result = a_f.to_f64() + b_f.to_f64(); - self.pop_value(); - self.pop_value(); - self.push_value(vm.ctx.new_float(result).into()); - Ok(None) + Instruction::LoadAttrNondescriptorWithValues => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + // Instance dict has priority — check if attr is shadowed + if let Some(dict) = owner.dict() + && let Some(value) = dict.get_item_opt(attr_name, vm)? + { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + // Not in instance dict — use cached class attr + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrClass => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && let Some(owner_type) = owner.downcast_ref::() + && owner_type.tp_version_tag.load(Acquire) == type_version + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrSlot => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + let slot_offset = + self.code.instructions.read_cache_u32(cache_base + 3) as usize; + if let Some(value) = owner.get_slot(slot_offset) { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + // Slot is None → AttributeError (fall through to slow path) + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrProperty => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 && owner.class().tp_version_tag.load(Acquire) == type_version { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + if descr_ptr != 0 { + let descr = unsafe { &*(descr_ptr as *const PyObject) }; + if let Some(prop) = descr.downcast_ref::() { + let owner = self.pop_value(); + if let Some(getter) = prop.get_fget() { + let result = getter.call((owner,), vm)?; + self.push_value(result); + return Ok(None); + } + } + } + } + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.load_attr_slow(vm, oparg) + } + Instruction::StoreAttrInstanceValue => { + let attr_idx = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[attr_idx as usize]; + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + { + self.pop_value(); // owner + let value = self.pop_value(); + dict.set_item(attr_name, value, vm)?; + return Ok(None); + } + // Deoptimize + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.store_attr(vm, attr_idx) + } + Instruction::StoreAttrSlot => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + let version_match = type_version != 0 && { + let owner = self.top_value(); + owner.class().tp_version_tag.load(Acquire) == type_version + }; + + if version_match { + let slot_offset = + self.code.instructions.read_cache_u32(cache_base + 3) as usize; + let owner = self.pop_value(); + let value = self.pop_value(); + owner.set_slot(slot_offset, Some(value)); + return Ok(None); + } + // Deoptimize + let attr_idx = u32::from(arg); + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttr { idx: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + self.store_attr(vm, attr_idx) + } + Instruction::StoreSubscrListInt => { + // Stack: [value, obj, idx] (TOS=idx, TOS1=obj, TOS2=value) + let idx = self.pop_value(); + let obj = self.pop_value(); + let value = self.pop_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) + && let Some(int_idx) = idx.downcast_ref_if_exact::(vm) + && let Ok(i) = int_idx.try_to_primitive::(vm) + { + let mut vec = list.borrow_vec_mut(); + if let Some(pos) = vec.wrap_index(i) { + vec[pos] = value; + return Ok(None); + } + drop(vec); + self.deoptimize_store_subscr(); + return Err(vm.new_index_error("list assignment index out of range")); + } + self.deoptimize_store_subscr(); + obj.set_item(&*idx, value, vm)?; + Ok(None) + } + Instruction::StoreSubscrDict => { + // Stack: [value, obj, idx] (TOS=idx, TOS1=obj, TOS2=value) + let idx = self.pop_value(); + let obj = self.pop_value(); + let value = self.pop_value(); + if let Some(dict) = obj.downcast_ref_if_exact::(vm) { + dict.set_item(&*idx, value, vm)?; + Ok(None) + } else { + self.deoptimize_store_subscr(); + obj.set_item(&*idx, value, vm)?; + Ok(None) + } + } + // Specialized BINARY_OP opcodes + Instruction::BinaryOpAddInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_int.as_bigint() + b_int.as_bigint(); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bigint(&result).into()); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::Add); + self.execute_bin_op(vm, bytecode::BinaryOperator::Add) + } + } + Instruction::BinaryOpSubtractInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_int.as_bigint() - b_int.as_bigint(); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bigint(&result).into()); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subtract); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subtract) + } + } + Instruction::BinaryOpMultiplyInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_int.as_bigint() * b_int.as_bigint(); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bigint(&result).into()); + Ok(None) + } else { + self.deoptimize_binary_op(bytecode::BinaryOperator::Multiply); + self.execute_bin_op(vm, bytecode::BinaryOperator::Multiply) + } + } + Instruction::BinaryOpAddFloat => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_f), Some(b_f)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_f.to_f64() + b_f.to_f64(); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_float(result).into()); + Ok(None) } else { self.deoptimize_binary_op(bytecode::BinaryOperator::Add); self.execute_bin_op(vm, bytecode::BinaryOperator::Add) @@ -2994,43 +3537,171 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Multiply) } } - Instruction::CallPyExactArgs => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); - let nargs: u32 = arg.into(); - // Stack: [callable, self_or_null, arg1, ..., argN] - let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 - { - let args: Vec = self.pop_multiple(nargs as usize).collect(); - let _null = self.pop_value_opt(); // self_or_null (NULL) - let callable = self.pop_value(); - let func = callable.downcast_ref::().unwrap(); - let result = func.invoke_exact_args(&args, vm)?; - self.push_value(result); + Instruction::BinaryOpAddUnicode => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); + self.pop_value(); + self.pop_value(); + self.push_value(result.to_pyobject(vm)); Ok(None) } else { - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Call { - nargs: Arg::marker(), - }, - ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.deoptimize_binary_op(bytecode::BinaryOperator::Add); + self.execute_bin_op(vm, bytecode::BinaryOperator::Add) } } - Instruction::CallBoundMethodExactArgs => { - let instr_idx = self.lasti() as usize - 1; + Instruction::BinaryOpSubscrListInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(list), Some(idx)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = idx.try_to_primitive::(vm) + { + let vec = list.borrow_vec(); + if let Some(pos) = vec.wrap_index(i) { + let value = vec.do_get(pos); + drop(vec); + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + drop(vec); + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(vm.new_index_error("list index out of range")); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrTupleInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(tuple), Some(idx)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = idx.try_to_primitive::(vm) + { + let elements = tuple.as_slice(); + if let Some(pos) = elements.wrap_index(i) { + let value = elements[pos].clone(); + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(vm.new_index_error("tuple index out of range")); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrDict => { + let b = self.top_value(); + let a = self.nth_value(1); + if let Some(dict) = a.downcast_ref_if_exact::(vm) { + match dict.get_item_opt(b, vm) { + Ok(Some(value)) => { + self.pop_value(); + self.pop_value(); + self.push_value(value); + return Ok(None); + } + Ok(None) => { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + let key = self.pop_value(); + self.pop_value(); + return Err(vm.new_key_error(key)); + } + Err(e) => { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(e); + } + } + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrStrInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Ok(i) = b_int.try_to_primitive::(vm) + { + match a_str.getitem_by_index(vm, i) { + Ok(ch) => { + self.pop_value(); + self.pop_value(); + self.push_value(PyStr::from(ch).into_pyobject(vm)); + return Ok(None); + } + Err(e) => { + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + return Err(e); + } + } + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpSubscrListSlice => { + let b = self.top_value(); + let a = self.nth_value(1); + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref::().is_some() + { + let b_owned = self.pop_value(); + let a_owned = self.pop_value(); + let result = a_owned.get_item(b_owned.as_object(), vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_binary_op(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::CallPyExactArgs => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + // Stack: [callable, self_or_null, arg1, ..., argN] + let callable = self.nth_value(nargs + 1); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args: Vec = self.pop_multiple(nargs as usize).collect(); + let _null = self.pop_value_opt(); // self_or_null (NULL) + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let result = func.invoke_exact_args(&args, vm)?; + self.push_value(result); + Ok(None) + } else { + // Deoptimize + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::Call { + nargs: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + } + Instruction::CallBoundMethodExactArgs => { + let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); @@ -3067,1491 +3738,3650 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } } - // All INSTRUMENTED_* opcodes delegate to a cold function to keep - // the hot instruction loop free of monitoring overhead. - _ => self.execute_instrumented(instruction, arg, vm), - } - } - - /// Handle all INSTRUMENTED_* opcodes. This function is cold — it only - /// runs when sys.monitoring has rewritten the bytecode. - #[cold] - fn execute_instrumented( - &mut self, - instruction: Instruction, - arg: bytecode::OpArg, - vm: &VirtualMachine, - ) -> FrameResult { - debug_assert!( - instruction.is_instrumented(), - "execute_instrumented called with non-instrumented opcode {instruction:?}" - ); - self.monitoring_mask = vm.state.monitoring_events.load(); - match instruction { - Instruction::InstrumentedResume => { - // Version check: re-instrument if stale - let global_ver = vm - .state - .instrumentation_version - .load(atomic::Ordering::Acquire); - let code_ver = self - .code - .instrumentation_version - .load(atomic::Ordering::Acquire); - if code_ver != global_ver { - let events = { - let state = vm.state.monitoring.lock(); - state.events_for_code(self.code.get_id()) - }; - monitoring::instrument_code(self.code, events); - self.code - .instrumentation_version - .store(global_ver, atomic::Ordering::Release); - // Re-execute (may have been de-instrumented to base Resume) - self.update_lasti(|i| *i -= 1); - return Ok(None); - } - let resume_type = u32::from(arg); - let offset = (self.lasti() - 1) * 2; - if resume_type == 0 { - if self.monitoring_mask & monitoring::EVENT_PY_START != 0 { - monitoring::fire_py_start(vm, self.code, offset)?; + Instruction::CallLen => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, null, arg] + let obj = self.pop_value(); // arg + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let len = obj.length(vm)?; + self.push_value(vm.ctx.new_int(len).into()); + return Ok(None); } - } else if self.monitoring_mask & monitoring::EVENT_PY_RESUME != 0 { - monitoring::fire_py_resume(vm, self.code, offset)?; + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedReturnValue => { - let value = self.pop_value(); - if self.monitoring_mask & monitoring::EVENT_PY_RETURN != 0 { - let offset = (self.lasti() - 1) * 2; - monitoring::fire_py_return(vm, self.code, offset, &value)?; + Instruction::CallIsinstance => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 2 { + // Stack: [callable, null, obj, class_info] + let class_info = self.pop_value(); + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let result = obj.is_instance(&class_info, vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + return Ok(None); + } + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + self.push_value(class_info); } - self.unwind_blocks(vm, UnwindReason::Returning { value }) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedYieldValue => { - debug_assert!( - self.state - .stack - .iter() - .flatten() - .all(|sr| !sr.is_borrowed()), - "borrowed refs on stack at yield point" - ); - let value = self.pop_value(); - if self.monitoring_mask & monitoring::EVENT_PY_YIELD != 0 { - let offset = (self.lasti() - 1) * 2; - monitoring::fire_py_yield(vm, self.code, offset, &value)?; + Instruction::CallType1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, null, arg] + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let tp = obj.class().to_owned().into(); + self.push_value(tp); + return Ok(None); + } + // Guard failed — re-push and fallback + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); } - let oparg = u32::from(arg); - let wrap = oparg == 0; - let value = if wrap && self.code.flags.contains(bytecode::CodeFlags::COROUTINE) { - PyAsyncGenWrappedValue(value).into_pyobject(vm) - } else { - value - }; - Ok(Some(ExecutionResult::Yield(value))) - } - Instruction::InstrumentedCall => { - let args = self.collect_positional_args(u32::from(arg)); - self.execute_call_instrumented(args, vm) - } - Instruction::InstrumentedCallKw => { - let args = self.collect_keyword_args(u32::from(arg)); - self.execute_call_instrumented(args, vm) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedCallFunctionEx => { - let args = self.collect_ex_args(vm)?; - self.execute_call_instrumented(args, vm) + Instruction::CallStr1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + let result = obj.str(vm)?; + self.push_value(result.into()); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedLoadSuperAttr => { - let oparg = bytecode::LoadSuperAttr::from(u32::from(arg)); - let offset = (self.lasti() - 1) * 2; - // Fire CALL event before super() call - let call_args = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { - let global_super: PyObjectRef = self.nth_value(2).to_owned(); - let arg0 = if oparg.has_class() { - self.nth_value(1).to_owned() - } else { - monitoring::get_missing(vm) - }; - monitoring::fire_call(vm, self.code, offset, &global_super, arg0.clone())?; - Some((global_super, arg0)) - } else { - None - }; - match self.load_super_attr(vm, oparg) { - Ok(result) => { - // Fire C_RETURN on success - if let Some((global_super, arg0)) = call_args { - monitoring::fire_c_return(vm, self.code, offset, &global_super, arg0)?; + Instruction::CallTuple1 => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag { + // tuple(x) returns x as-is when x is already an exact tuple + if let Ok(tuple) = obj.clone().downcast_exact::(vm) { + self.push_value(tuple.into_pyref().into()); + } else { + let elements: Vec = vm.extract_elements_with(&obj, Ok)?; + self.push_value(vm.ctx.new_tuple(elements).into()); } - Ok(result) + return Ok(None); } - Err(exc) => { - // Fire C_RAISE on failure - let exc = if let Some((global_super, arg0)) = call_args { - match monitoring::fire_c_raise( - vm, - self.code, - offset, - &global_super, - arg0, - ) { - Ok(()) => exc, - Err(monitor_exc) => monitor_exc, - } - } else { - exc + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallBuiltinO => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + let obj = self.pop_value(); + let _null = self.pop_value_opt(); + let callable = self.pop_value(); + let callable_tag = &*callable as *const PyObject as u32; + if cached_tag == callable_tag + && let Some(native) = callable.downcast_ref::() + { + let args = FuncArgs { + args: vec![obj], + kwargs: Default::default(), }; - Err(exc) + let result = (native.value.func)(vm, args)?; + self.push_value(result); + return Ok(None); } + self.push_value(callable); + self.push_value_opt(_null); + self.push_value(obj); } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedJumpForward => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + u32::from(arg); - let target = bytecode::Label(target_idx); - self.jump(target); - if self.monitoring_mask & monitoring::EVENT_JUMP != 0 { - monitoring::fire_jump(vm, self.code, src_offset, target.0 * 2)?; - } - Ok(None) - } - Instruction::InstrumentedJumpBackward => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + 1 - u32::from(arg); - let target = bytecode::Label(target_idx); - self.jump(target); - if self.monitoring_mask & monitoring::EVENT_JUMP != 0 { - monitoring::fire_jump(vm, self.code, src_offset, target.0 * 2)?; - } - Ok(None) - } - Instruction::InstrumentedForIter => { - let src_offset = (self.lasti() - 1) * 2; - let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); - let continued = self.execute_for_iter(vm, target)?; - if continued { - if self.monitoring_mask & monitoring::EVENT_BRANCH_LEFT != 0 { - let dest_offset = (self.lasti() + 1) * 2; // after caches - monitoring::fire_branch_left(vm, self.code, src_offset, dest_offset)?; - } - } else if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - let dest_offset = self.lasti() * 2; - monitoring::fire_branch_right(vm, self.code, src_offset, dest_offset)?; + Instruction::CallBuiltinFast => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag { + callable + .downcast_ref::() + .map(|n| n.value.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable + let args = FuncArgs { + args: positional_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedEndFor => { - // Stack: [value, receiver(iter), ...] - // PyGen_Check: only fire STOP_ITERATION for generators - let is_gen = self - .nth_value(1) - .downcast_ref::() - .is_some(); - let value = self.pop_value(); - if is_gen && self.monitoring_mask & monitoring::EVENT_STOP_ITERATION != 0 { - let offset = (self.lasti() - 1) * 2; - monitoring::fire_stop_iteration(vm, self.code, offset, &value)?; + Instruction::CallPyGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_positional_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = func.invoke(final_args, vm)?; + self.push_value(result); + Ok(None) + } else { + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Ok(None) } - Instruction::InstrumentedEndSend => { - let value = self.pop_value(); - let receiver = self.pop_value(); - // PyGen_Check || PyCoro_CheckExact - let is_gen_or_coro = receiver - .downcast_ref::() - .is_some() - || receiver - .downcast_ref::() - .is_some(); - if is_gen_or_coro && self.monitoring_mask & monitoring::EVENT_STOP_ITERATION != 0 { - let offset = (self.lasti() - 1) * 2; - monitoring::fire_stop_iteration(vm, self.code, offset, &value)?; + Instruction::CallBoundMethodGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_positional_args(nargs); + let self_val = self.pop_value(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let mut final_args = args; + final_args.prepend_arg(self_val); + let result = func.invoke(final_args, vm)?; + self.push_value(result); + Ok(None) + } else { + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - self.push_value(value); - Ok(None) } - Instruction::InstrumentedPopJumpIfTrue => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + 1 + u32::from(arg); - let obj = self.pop_value(); - let value = obj.try_to_bool(vm)?; - if value { - self.jump(bytecode::Label(target_idx)); - if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + Instruction::CallListAppend => { + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [list.append (bound method), self_or_null (list), item] + let item = self.pop_value(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + if let Some(list_obj) = self_or_null.as_ref() + && let Some(list) = list_obj.downcast_ref_if_exact::(vm) + { + list.append(item); + self.push_value(vm.ctx.none()); + return Ok(None); } + self.push_value(callable); + self.push_value_opt(self_or_null); + self.push_value(item); } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedPopJumpIfFalse => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + 1 + u32::from(arg); - let obj = self.pop_value(); - let value = obj.try_to_bool(vm)?; - if !value { - self.jump(bytecode::Label(target_idx)); - if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + Instruction::CallMethodDescriptorNoargs => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 0 { + // Stack: [callable, self_or_null] — peek to get func ptr + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 1].is_some(); + let callable = self.nth_value(1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let args = FuncArgs { + args: vec![self_val], + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedPopJumpIfNone => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + 1 + u32::from(arg); - let value = self.pop_value(); - if vm.is_none(&value) { - self.jump(bytecode::Label(target_idx)); - if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + Instruction::CallMethodDescriptorO => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + if nargs == 1 { + // Stack: [callable, self_or_null, arg1] + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - 2].is_some(); + let callable = self.nth_value(2); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let obj = self.pop_value(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let args = FuncArgs { + args: vec![self_val, obj], + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedPopJumpIfNotNone => { - let src_offset = (self.lasti() - 1) * 2; - let target_idx = self.lasti() + 1 + u32::from(arg); - let value = self.pop_value(); - if !vm.is_none(&value) { - self.jump(bytecode::Label(target_idx)); - if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; - } + Instruction::CallMethodDescriptorFast => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(nargs as usize + 1); + all_args.push(self_val); + all_args.extend(positional_args); + let args = FuncArgs { + args: all_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedNotTaken => { - if self.monitoring_mask & monitoring::EVENT_BRANCH_LEFT != 0 { - let not_taken_idx = self.lasti() as usize - 1; - // Scan backwards past CACHE entries to find the branch instruction - let mut branch_idx = not_taken_idx.saturating_sub(1); - while branch_idx > 0 - && matches!(self.code.instructions[branch_idx].op, Instruction::Cache) - { - branch_idx -= 1; - } - let src_offset = (branch_idx as u32) * 2; - let dest_offset = self.lasti() * 2; - monitoring::fire_branch_left(vm, self.code, src_offset, dest_offset)?; + Instruction::CallBuiltinClass => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag && callable.downcast_ref::().is_some() { + let args = self.collect_positional_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + let result = callable.call(final_args, vm)?; + self.push_value(result); + return Ok(None); } - Ok(None) - } - Instruction::InstrumentedPopIter => { - // BRANCH_RIGHT is fired by InstrumentedForIter, not here. - self.pop_value(); - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedEndAsyncFor => { - if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { - let oparg_val = u32::from(arg); - // src = next_instr - oparg (END_SEND position) - let src_offset = (self.lasti() - oparg_val) * 2; - // dest = this_instr + 1 - let dest_offset = self.lasti() * 2; - monitoring::fire_branch_right(vm, self.code, src_offset, dest_offset)?; + Instruction::CallMethodDescriptorFastWithKeywords => { + // Native function interface is uniform regardless of keyword support + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let func = if cached_tag == callable_tag && self_or_null_is_some { + callable + .downcast_ref::() + .map(|d| d.method.func) + } else { + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_val = self.pop_value_opt().unwrap(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(nargs as usize + 1); + all_args.push(self_val); + all_args.extend(positional_args); + let args = FuncArgs { + args: all_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } - let exc = self.pop_value(); - let _awaitable = self.pop_value(); - let exc = exc - .downcast::() - .expect("EndAsyncFor expects exception on stack"); - if exc.fast_isinstance(vm.ctx.exceptions.stop_async_iteration) { - vm.set_exception(None); - Ok(None) + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallBuiltinFastWithKeywords => { + // Native function interface is uniform regardless of keyword support + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let func = if cached_tag == callable_tag { + callable + .downcast_ref::() + .map(|n| n.value.func) } else { - Err(exc) + None + }; + if let Some(func) = func { + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable + let args = FuncArgs { + args: positional_args, + kwargs: Default::default(), + }; + let result = func(vm, args)?; + self.push_value(result); + return Ok(None); } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } - Instruction::InstrumentedLine => { - let idx = self.lasti() as usize - 1; - let offset = idx as u32 * 2; - - // Read the full side-table chain before firing any events, - // because a callback may de-instrument and clear the tables. - let (real_op_byte, also_instruction) = { - let data = self.code.monitoring_data.lock(); - let line_op = data.as_ref().map(|d| d.line_opcodes[idx]).unwrap_or(0); - if line_op == u8::from(Instruction::InstrumentedInstruction) { - // LINE wraps INSTRUCTION: resolve the INSTRUCTION side-table too - let inst_op = data - .as_ref() - .map(|d| d.per_instruction_opcodes[idx]) - .unwrap_or(0); - (inst_op, true) + Instruction::CallNonPyGeneral => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag { + let args = self.collect_positional_args(nargs); + return self.execute_call(args, vm); + } + self.deoptimize_call(); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallKwPy => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + // callable is at position nargs + 2 from top (nargs args + kwarg_names + self_or_null) + let callable = self.nth_value(nargs + 2); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_keyword_args(nargs); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args } else { - (line_op, false) + args + }; + let result = func.invoke(final_args, vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallKwBoundMethod => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + // Stack: [callable, self_or_null(=self), arg1, ..., argN, kwarg_names] + let callable = self.nth_value(nargs + 2); + if let Some(func) = callable.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let args = self.collect_keyword_args(nargs); + let self_val = self.pop_value(); // self_or_null is always Some here + let callable = self.pop_value(); + let func = callable.downcast_ref::().unwrap(); + let mut final_args = args; + final_args.prepend_arg(self_val); + let result = func.invoke(final_args, vm)?; + self.push_value(result); + return Ok(None); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } + Instruction::CallKwNonPy => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let nargs: u32 = arg.into(); + let callable = self.nth_value(nargs + 2); + let callable_tag = callable as *const PyObject as u32; + if cached_tag == callable_tag { + let args = self.collect_keyword_args(nargs); + return self.execute_call(args, vm); + } + self.deoptimize_call_kw(); + let args = self.collect_keyword_args(nargs); + self.execute_call(args, vm) + } + Instruction::LoadSuperAttrAttr => { + let oparg = u32::from(arg); + let attr_name = self.code.names[(oparg >> 2) as usize]; + // Stack: [global_super, class, self] + let self_obj = self.top_value(); + let class_obj = self.nth_value(1); + let global_super = self.nth_value(2); + // Guard: global_super is builtin super and class is a type + if global_super.is(&vm.ctx.types.super_type.as_object()) + && class_obj.downcast_ref::().is_some() + { + let class = class_obj.downcast_ref::().unwrap(); + let start_type = self_obj.class(); + // MRO lookup: skip classes up to and including `class`, then search + let mro: Vec> = start_type.mro_map_collect(|x| x.to_owned()); + let mut found = None; + let mut past_class = false; + for cls in &mro { + if !past_class { + if cls.is(class) { + past_class = true; + } + continue; + } + if let Some(descr) = cls.get_direct_attr(attr_name) { + // Call descriptor __get__ if available + // Pass None for obj when self IS its own type (classmethod) + let obj_arg = if self_obj.is(start_type.as_object()) { + None + } else { + Some(self_obj.to_owned()) + }; + let result = vm + .call_get_descriptor_specific( + &descr, + obj_arg, + Some(start_type.as_object().to_owned()), + ) + .unwrap_or(Ok(descr))?; + found = Some(result); + break; + } } - }; - debug_assert!( - real_op_byte != 0, - "INSTRUMENTED_LINE at {idx} without stored opcode" - ); - - // Fire LINE event only if line changed - if let Some((loc, _)) = self.code.locations.get(idx) { - let line = loc.line.get() as u32; - if line != self.state.prev_line && line > 0 { - self.state.prev_line = line; - monitoring::fire_line(vm, self.code, offset, line)?; + if let Some(attr) = found { + self.pop_value(); // self + self.pop_value(); // class + self.pop_value(); // super + self.push_value(attr); + return Ok(None); } } - - // If the LINE position also had INSTRUCTION, fire that event too - if also_instruction { - monitoring::fire_instruction(vm, self.code, offset)?; + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { arg: Arg::marker() }, + ); + let cache_base = self.lasti() as usize; + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - - // Re-dispatch to the real original opcode - let original_op = Instruction::try_from(real_op_byte) - .expect("invalid opcode in side-table chain"); - let lasti_before_dispatch = self.lasti(); - let result = if original_op.to_base().is_some() { - self.execute_instrumented(original_op, arg, vm) + let oparg = LoadSuperAttr::new(oparg); + self.load_super_attr(vm, oparg) + } + Instruction::LoadSuperAttrMethod => { + let oparg = u32::from(arg); + let attr_name = self.code.names[(oparg >> 2) as usize]; + // Stack: [global_super, class, self] + let self_obj = self.top_value(); + let class_obj = self.nth_value(1); + let global_super = self.nth_value(2); + // Guard: global_super is builtin super and class is a type + if global_super.is(&vm.ctx.types.super_type.as_object()) + && class_obj.downcast_ref::().is_some() + { + let class = class_obj.downcast_ref::().unwrap(); + let self_val = self_obj.to_owned(); + let start_type = self_obj.class(); + // MRO lookup + let mro: Vec> = start_type.mro_map_collect(|x| x.to_owned()); + let mut found = None; + let mut past_class = false; + for cls in &mro { + if !past_class { + if cls.is(class) { + past_class = true; + } + continue; + } + if let Some(descr) = cls.get_direct_attr(attr_name) { + let descr_cls = descr.class(); + if descr_cls + .slots + .flags + .has_feature(PyTypeFlags::METHOD_DESCRIPTOR) + { + // Method descriptor: push unbound func + self + // CALL will prepend self as first positional arg + found = Some((descr, true)); + } else if let Some(descr_get) = descr_cls.slots.descr_get.load() { + // Has __get__ but not METHOD_DESCRIPTOR: bind it + let bound = descr_get( + descr, + Some(self_val.clone()), + Some(start_type.as_object().to_owned()), + vm, + )?; + found = Some((bound, false)); + } else { + // Plain attribute + found = Some((descr, false)); + } + break; + } + } + if let Some((attr, is_method)) = found { + self.pop_value(); // self + self.pop_value(); // class + self.pop_value(); // super + self.push_value(attr); + if is_method { + self.push_value(self_val); + } else { + self.push_null(); + } + return Ok(None); + } + } + // Deoptimize + unsafe { + self.code.instructions.replace_op( + self.lasti() as usize - 1, + Instruction::LoadSuperAttr { arg: Arg::marker() }, + ); + let cache_base = self.lasti() as usize; + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + let oparg = LoadSuperAttr::new(oparg); + self.load_super_attr(vm, oparg) + } + Instruction::CompareOpInt => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = op.eval_ord(a_int.as_bigint().cmp(b_int.as_bigint())); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) } else { - let mut do_extend_arg = false; - self.execute_instruction(original_op, arg, &mut do_extend_arg, vm) - }; - let orig_caches = original_op.to_base().unwrap_or(original_op).cache_entries(); - if orig_caches > 0 && self.lasti() == lasti_before_dispatch { - self.update_lasti(|i| *i += orig_caches as u32); + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) } - result } - Instruction::InstrumentedInstruction => { - let idx = self.lasti() as usize - 1; - let offset = idx as u32 * 2; - - // Get original opcode from side-table - let original_op_byte = { - let data = self.code.monitoring_data.lock(); - data.as_ref() - .map(|d| d.per_instruction_opcodes[idx]) - .unwrap_or(0) - }; - debug_assert!( - original_op_byte != 0, - "INSTRUMENTED_INSTRUCTION at {idx} without stored opcode" - ); - - // Fire INSTRUCTION event - monitoring::fire_instruction(vm, self.code, offset)?; - - // Re-dispatch to original opcode - let original_op = Instruction::try_from(original_op_byte) - .expect("invalid opcode in instruction side-table"); - let lasti_before_dispatch = self.lasti(); - let result = if original_op.to_base().is_some() { - self.execute_instrumented(original_op, arg, vm) + Instruction::CompareOpFloat => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_f), Some(b_f)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = a_f + .to_f64() + .partial_cmp(&b_f.to_f64()) + .is_some_and(|ord| op.eval_ord(ord)); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) } else { - let mut do_extend_arg = false; - self.execute_instruction(original_op, arg, &mut do_extend_arg, vm) - }; - let orig_caches = original_op.to_base().unwrap_or(original_op).cache_entries(); - if orig_caches > 0 && self.lasti() == lasti_before_dispatch { - self.update_lasti(|i| *i += orig_caches as u32); + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) } - result } - _ => { - unreachable!("{instruction:?} instruction should not be executed") + Instruction::CompareOpStr => { + let b = self.top_value(); + let a = self.nth_value(1); + if let (Some(a_str), Some(b_str)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let op = self.compare_op_from_arg(arg); + let result = op.eval_ord(a_str.as_wtf8().cmp(b_str.as_wtf8())); + self.pop_value(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_compare_op(); + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + self.execute_compare(vm, op) + } + } + Instruction::ToBoolBool => { + let obj = self.top_value(); + if obj.class().is(vm.ctx.types.bool_type) { + // Already a bool, no-op + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolInt => { + let obj = self.top_value(); + if let Some(int_val) = obj.downcast_ref_if_exact::(vm) { + let result = !int_val.as_bigint().is_zero(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolNone => { + let obj = self.top_value(); + if obj.class().is(vm.ctx.types.none_type) { + self.pop_value(); + self.push_value(vm.ctx.new_bool(false).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolList => { + let obj = self.top_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) { + let result = !list.borrow_vec().is_empty(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolStr => { + let obj = self.top_value(); + if let Some(s) = obj.downcast_ref_if_exact::(vm) { + let result = !s.is_empty(); + self.pop_value(); + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ToBoolAlwaysTrue => { + // Objects without __bool__ or __len__ are always True. + // Guard: check the type hasn't gained these slots. + let obj = self.top_value(); + let slots = &obj.class().slots; + if slots.as_number.boolean.load().is_none() + && slots.as_mapping.length.load().is_none() + && slots.as_sequence.length.load().is_none() + { + self.pop_value(); + self.push_value(vm.ctx.new_bool(true).into()); + Ok(None) + } else { + self.deoptimize_to_bool(); + let obj = self.pop_value(); + let result = obj.try_to_bool(vm)?; + self.push_value(vm.ctx.new_bool(result).into()); + Ok(None) + } + } + Instruction::ContainsOpDict => { + let b = self.top_value(); // haystack + if let Some(dict) = b.downcast_ref_if_exact::(vm) { + let a = self.nth_value(1); // needle + let found = dict.get_item_opt(a, vm)?.is_some(); + self.pop_value(); + self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => found, + bytecode::Invert::Yes => !found, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } else { + self.deoptimize_contains_op(); + let b = self.pop_value(); + let a = self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => self._in(vm, &a, &b)?, + bytecode::Invert::Yes => self._not_in(vm, &a, &b)?, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } + } + Instruction::ContainsOpSet => { + let b = self.top_value(); // haystack + if b.downcast_ref_if_exact::(vm).is_some() { + let a = self.nth_value(1); // needle + let found = vm._contains(b, a)?; + self.pop_value(); + self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => found, + bytecode::Invert::Yes => !found, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } else { + self.deoptimize_contains_op(); + let b = self.pop_value(); + let a = self.pop_value(); + let invert = bytecode::Invert::try_from(u32::from(arg) as u8) + .unwrap_or(bytecode::Invert::No); + let value = match invert { + bytecode::Invert::No => self._in(vm, &a, &b)?, + bytecode::Invert::Yes => self._not_in(vm, &a, &b)?, + }; + self.push_value(vm.ctx.new_bool(value).into()); + Ok(None) + } + } + Instruction::UnpackSequenceTwoTuple => { + let obj = self.top_value(); + if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + let elements = tuple.as_slice(); + if elements.len() == 2 { + let e0 = elements[0].clone(); + let e1 = elements[1].clone(); + self.pop_value(); + self.push_value(e1); + self.push_value(e0); + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + let size = u32::from(arg); + self.unpack_sequence(size, vm) + } + Instruction::UnpackSequenceTuple => { + let size = u32::from(arg) as usize; + let obj = self.top_value(); + if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + let elements = tuple.as_slice(); + if elements.len() == size { + let elems: Vec<_> = elements.to_vec(); + self.pop_value(); + for elem in elems.into_iter().rev() { + self.push_value(elem); + } + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + self.unpack_sequence(size as u32, vm) + } + Instruction::UnpackSequenceList => { + let size = u32::from(arg) as usize; + let obj = self.top_value(); + if let Some(list) = obj.downcast_ref_if_exact::(vm) { + let vec = list.borrow_vec(); + if vec.len() == size { + let elems: Vec<_> = vec.to_vec(); + drop(vec); + self.pop_value(); + for elem in elems.into_iter().rev() { + self.push_value(elem); + } + return Ok(None); + } + } + self.deoptimize_unpack_sequence(); + self.unpack_sequence(size as u32, vm) + } + Instruction::ForIterRange => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(range_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = range_iter.fast_next() { + self.push_value(vm.ctx.new_int(value).into()); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::ForIterList => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(list_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = list_iter.fast_next() { + self.push_value(value); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::ForIterTuple => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(tuple_iter) = iter.downcast_ref_if_exact::(vm) { + if let Some(value) = tuple_iter.fast_next() { + self.push_value(value); + } else { + self.for_iter_jump_on_exhausted(target); + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::ForIterGen => { + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let iter = self.top_value(); + if let Some(generator) = iter.downcast_ref_if_exact::(vm) { + match generator.as_coro().send(iter, vm.ctx.none(), vm) { + Ok(PyIterReturn::Return(value)) => { + self.push_value(value); + } + Ok(PyIterReturn::StopIteration(_)) => { + self.for_iter_jump_on_exhausted(target); + } + Err(e) => return Err(e), + } + Ok(None) + } else { + self.deoptimize_for_iter(); + self.execute_for_iter(vm, target)?; + Ok(None) + } + } + Instruction::LoadGlobalModule => { + let oparg = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let current_version = self.globals.version() as u32; + if cached_version == current_version { + // globals unchanged — name is in globals, look up only there + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(x) = self.globals.get_item_opt(name, vm)? { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } else { + // Name was removed from globals + self.deoptimize_load_global(); + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } else { + self.deoptimize_load_global(); + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } + Instruction::LoadGlobalBuiltin => { + let oparg = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); + let current_version = self.globals.version() as u32; + if cached_version == current_version { + // globals unchanged — name is NOT in globals, look up in builtins + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(builtins_dict) = self.builtins.downcast_ref::() + && let Some(x) = builtins_dict.get_item_opt(name, vm)? + { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + return Ok(None); + } + // Fallback: name not found or builtins not a dict + self.deoptimize_load_global(); + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } else { + self.deoptimize_load_global(); + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) + } + } + // All INSTRUMENTED_* opcodes delegate to a cold function to keep + // the hot instruction loop free of monitoring overhead. + _ => self.execute_instrumented(instruction, arg, vm), + } + } + + /// Handle all INSTRUMENTED_* opcodes. This function is cold — it only + /// runs when sys.monitoring has rewritten the bytecode. + #[cold] + fn execute_instrumented( + &mut self, + instruction: Instruction, + arg: bytecode::OpArg, + vm: &VirtualMachine, + ) -> FrameResult { + debug_assert!( + instruction.is_instrumented(), + "execute_instrumented called with non-instrumented opcode {instruction:?}" + ); + self.monitoring_mask = vm.state.monitoring_events.load(); + match instruction { + Instruction::InstrumentedResume => { + // Version check: re-instrument if stale + let global_ver = vm + .state + .instrumentation_version + .load(atomic::Ordering::Acquire); + let code_ver = self + .code + .instrumentation_version + .load(atomic::Ordering::Acquire); + if code_ver != global_ver { + let events = { + let state = vm.state.monitoring.lock(); + state.events_for_code(self.code.get_id()) + }; + monitoring::instrument_code(self.code, events); + self.code + .instrumentation_version + .store(global_ver, atomic::Ordering::Release); + // Re-execute (may have been de-instrumented to base Resume) + self.update_lasti(|i| *i -= 1); + return Ok(None); + } + let resume_type = u32::from(arg); + let offset = (self.lasti() - 1) * 2; + if resume_type == 0 { + if self.monitoring_mask & monitoring::EVENT_PY_START != 0 { + monitoring::fire_py_start(vm, self.code, offset)?; + } + } else if self.monitoring_mask & monitoring::EVENT_PY_RESUME != 0 { + monitoring::fire_py_resume(vm, self.code, offset)?; + } + Ok(None) + } + Instruction::InstrumentedReturnValue => { + let value = self.pop_value(); + if self.monitoring_mask & monitoring::EVENT_PY_RETURN != 0 { + let offset = (self.lasti() - 1) * 2; + monitoring::fire_py_return(vm, self.code, offset, &value)?; + } + self.unwind_blocks(vm, UnwindReason::Returning { value }) + } + Instruction::InstrumentedYieldValue => { + debug_assert!( + self.state + .stack + .iter() + .flatten() + .all(|sr| !sr.is_borrowed()), + "borrowed refs on stack at yield point" + ); + let value = self.pop_value(); + if self.monitoring_mask & monitoring::EVENT_PY_YIELD != 0 { + let offset = (self.lasti() - 1) * 2; + monitoring::fire_py_yield(vm, self.code, offset, &value)?; + } + let oparg = u32::from(arg); + let wrap = oparg == 0; + let value = if wrap && self.code.flags.contains(bytecode::CodeFlags::COROUTINE) { + PyAsyncGenWrappedValue(value).into_pyobject(vm) + } else { + value + }; + Ok(Some(ExecutionResult::Yield(value))) + } + Instruction::InstrumentedCall => { + let args = self.collect_positional_args(u32::from(arg)); + self.execute_call_instrumented(args, vm) + } + Instruction::InstrumentedCallKw => { + let args = self.collect_keyword_args(u32::from(arg)); + self.execute_call_instrumented(args, vm) + } + Instruction::InstrumentedCallFunctionEx => { + let args = self.collect_ex_args(vm)?; + self.execute_call_instrumented(args, vm) + } + Instruction::InstrumentedLoadSuperAttr => { + let oparg = bytecode::LoadSuperAttr::from(u32::from(arg)); + let offset = (self.lasti() - 1) * 2; + // Fire CALL event before super() call + let call_args = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { + let global_super: PyObjectRef = self.nth_value(2).to_owned(); + let arg0 = if oparg.has_class() { + self.nth_value(1).to_owned() + } else { + monitoring::get_missing(vm) + }; + monitoring::fire_call(vm, self.code, offset, &global_super, arg0.clone())?; + Some((global_super, arg0)) + } else { + None + }; + match self.load_super_attr(vm, oparg) { + Ok(result) => { + // Fire C_RETURN on success + if let Some((global_super, arg0)) = call_args { + monitoring::fire_c_return(vm, self.code, offset, &global_super, arg0)?; + } + Ok(result) + } + Err(exc) => { + // Fire C_RAISE on failure + let exc = if let Some((global_super, arg0)) = call_args { + match monitoring::fire_c_raise( + vm, + self.code, + offset, + &global_super, + arg0, + ) { + Ok(()) => exc, + Err(monitor_exc) => monitor_exc, + } + } else { + exc + }; + Err(exc) + } + } + } + Instruction::InstrumentedJumpForward => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + u32::from(arg); + let target = bytecode::Label(target_idx); + self.jump(target); + if self.monitoring_mask & monitoring::EVENT_JUMP != 0 { + monitoring::fire_jump(vm, self.code, src_offset, target.0 * 2)?; + } + Ok(None) + } + Instruction::InstrumentedJumpBackward => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + 1 - u32::from(arg); + let target = bytecode::Label(target_idx); + self.jump(target); + if self.monitoring_mask & monitoring::EVENT_JUMP != 0 { + monitoring::fire_jump(vm, self.code, src_offset, target.0 * 2)?; + } + Ok(None) + } + Instruction::InstrumentedForIter => { + let src_offset = (self.lasti() - 1) * 2; + let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let continued = self.execute_for_iter(vm, target)?; + if continued { + if self.monitoring_mask & monitoring::EVENT_BRANCH_LEFT != 0 { + let dest_offset = (self.lasti() + 1) * 2; // after caches + monitoring::fire_branch_left(vm, self.code, src_offset, dest_offset)?; + } + } else if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + let dest_offset = self.lasti() * 2; + monitoring::fire_branch_right(vm, self.code, src_offset, dest_offset)?; + } + Ok(None) + } + Instruction::InstrumentedEndFor => { + // Stack: [value, receiver(iter), ...] + // PyGen_Check: only fire STOP_ITERATION for generators + let is_gen = self + .nth_value(1) + .downcast_ref::() + .is_some(); + let value = self.pop_value(); + if is_gen && self.monitoring_mask & monitoring::EVENT_STOP_ITERATION != 0 { + let offset = (self.lasti() - 1) * 2; + monitoring::fire_stop_iteration(vm, self.code, offset, &value)?; + } + Ok(None) + } + Instruction::InstrumentedEndSend => { + let value = self.pop_value(); + let receiver = self.pop_value(); + // PyGen_Check || PyCoro_CheckExact + let is_gen_or_coro = receiver + .downcast_ref::() + .is_some() + || receiver + .downcast_ref::() + .is_some(); + if is_gen_or_coro && self.monitoring_mask & monitoring::EVENT_STOP_ITERATION != 0 { + let offset = (self.lasti() - 1) * 2; + monitoring::fire_stop_iteration(vm, self.code, offset, &value)?; + } + self.push_value(value); + Ok(None) + } + Instruction::InstrumentedPopJumpIfTrue => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + 1 + u32::from(arg); + let obj = self.pop_value(); + let value = obj.try_to_bool(vm)?; + if value { + self.jump(bytecode::Label(target_idx)); + if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + } + } + Ok(None) + } + Instruction::InstrumentedPopJumpIfFalse => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + 1 + u32::from(arg); + let obj = self.pop_value(); + let value = obj.try_to_bool(vm)?; + if !value { + self.jump(bytecode::Label(target_idx)); + if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + } + } + Ok(None) + } + Instruction::InstrumentedPopJumpIfNone => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + 1 + u32::from(arg); + let value = self.pop_value(); + if vm.is_none(&value) { + self.jump(bytecode::Label(target_idx)); + if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + } + } + Ok(None) + } + Instruction::InstrumentedPopJumpIfNotNone => { + let src_offset = (self.lasti() - 1) * 2; + let target_idx = self.lasti() + 1 + u32::from(arg); + let value = self.pop_value(); + if !vm.is_none(&value) { + self.jump(bytecode::Label(target_idx)); + if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + monitoring::fire_branch_right(vm, self.code, src_offset, target_idx * 2)?; + } + } + Ok(None) + } + Instruction::InstrumentedNotTaken => { + if self.monitoring_mask & monitoring::EVENT_BRANCH_LEFT != 0 { + let not_taken_idx = self.lasti() as usize - 1; + // Scan backwards past CACHE entries to find the branch instruction + let mut branch_idx = not_taken_idx.saturating_sub(1); + while branch_idx > 0 + && matches!(self.code.instructions[branch_idx].op, Instruction::Cache) + { + branch_idx -= 1; + } + let src_offset = (branch_idx as u32) * 2; + let dest_offset = self.lasti() * 2; + monitoring::fire_branch_left(vm, self.code, src_offset, dest_offset)?; + } + Ok(None) + } + Instruction::InstrumentedPopIter => { + // BRANCH_RIGHT is fired by InstrumentedForIter, not here. + self.pop_value(); + Ok(None) + } + Instruction::InstrumentedEndAsyncFor => { + if self.monitoring_mask & monitoring::EVENT_BRANCH_RIGHT != 0 { + let oparg_val = u32::from(arg); + // src = next_instr - oparg (END_SEND position) + let src_offset = (self.lasti() - oparg_val) * 2; + // dest = this_instr + 1 + let dest_offset = self.lasti() * 2; + monitoring::fire_branch_right(vm, self.code, src_offset, dest_offset)?; + } + let exc = self.pop_value(); + let _awaitable = self.pop_value(); + let exc = exc + .downcast::() + .expect("EndAsyncFor expects exception on stack"); + if exc.fast_isinstance(vm.ctx.exceptions.stop_async_iteration) { + vm.set_exception(None); + Ok(None) + } else { + Err(exc) + } + } + Instruction::InstrumentedLine => { + let idx = self.lasti() as usize - 1; + let offset = idx as u32 * 2; + + // Read the full side-table chain before firing any events, + // because a callback may de-instrument and clear the tables. + let (real_op_byte, also_instruction) = { + let data = self.code.monitoring_data.lock(); + let line_op = data.as_ref().map(|d| d.line_opcodes[idx]).unwrap_or(0); + if line_op == u8::from(Instruction::InstrumentedInstruction) { + // LINE wraps INSTRUCTION: resolve the INSTRUCTION side-table too + let inst_op = data + .as_ref() + .map(|d| d.per_instruction_opcodes[idx]) + .unwrap_or(0); + (inst_op, true) + } else { + (line_op, false) + } + }; + debug_assert!( + real_op_byte != 0, + "INSTRUMENTED_LINE at {idx} without stored opcode" + ); + + // Fire LINE event only if line changed + if let Some((loc, _)) = self.code.locations.get(idx) { + let line = loc.line.get() as u32; + if line != self.state.prev_line && line > 0 { + self.state.prev_line = line; + monitoring::fire_line(vm, self.code, offset, line)?; + } + } + + // If the LINE position also had INSTRUCTION, fire that event too + if also_instruction { + monitoring::fire_instruction(vm, self.code, offset)?; + } + + // Re-dispatch to the real original opcode + let original_op = Instruction::try_from(real_op_byte) + .expect("invalid opcode in side-table chain"); + let lasti_before_dispatch = self.lasti(); + let result = if original_op.to_base().is_some() { + self.execute_instrumented(original_op, arg, vm) + } else { + let mut do_extend_arg = false; + self.execute_instruction(original_op, arg, &mut do_extend_arg, vm) + }; + let orig_caches = original_op.to_base().unwrap_or(original_op).cache_entries(); + if orig_caches > 0 && self.lasti() == lasti_before_dispatch { + self.update_lasti(|i| *i += orig_caches as u32); + } + result + } + Instruction::InstrumentedInstruction => { + let idx = self.lasti() as usize - 1; + let offset = idx as u32 * 2; + + // Get original opcode from side-table + let original_op_byte = { + let data = self.code.monitoring_data.lock(); + data.as_ref() + .map(|d| d.per_instruction_opcodes[idx]) + .unwrap_or(0) + }; + debug_assert!( + original_op_byte != 0, + "INSTRUMENTED_INSTRUCTION at {idx} without stored opcode" + ); + + // Fire INSTRUCTION event + monitoring::fire_instruction(vm, self.code, offset)?; + + // Re-dispatch to original opcode + let original_op = Instruction::try_from(original_op_byte) + .expect("invalid opcode in instruction side-table"); + let lasti_before_dispatch = self.lasti(); + let result = if original_op.to_base().is_some() { + self.execute_instrumented(original_op, arg, vm) + } else { + let mut do_extend_arg = false; + self.execute_instruction(original_op, arg, &mut do_extend_arg, vm) + }; + let orig_caches = original_op.to_base().unwrap_or(original_op).cache_entries(); + if orig_caches > 0 && self.lasti() == lasti_before_dispatch { + self.update_lasti(|i| *i += orig_caches as u32); + } + result + } + _ => { + unreachable!("{instruction:?} instruction should not be executed") + } + } + } + + #[inline] + fn load_global_or_builtin(&self, name: &Py, vm: &VirtualMachine) -> PyResult { + if let Some(builtins_dict) = self.builtins_dict { + // Fast path: both globals and builtins are exact dicts + // SAFETY: builtins_dict is only set when globals is also exact dict + let globals_exact = unsafe { PyExact::ref_unchecked(self.globals.as_ref()) }; + globals_exact + .get_chain_exact(builtins_dict, name, vm)? + .ok_or_else(|| { + vm.new_name_error(format!("name '{name}' is not defined"), name.to_owned()) + }) + } else { + // Slow path: builtins is not a dict, use generic __getitem__ + if let Some(value) = self.globals.get_item_opt(name, vm)? { + return Ok(value); + } + self.builtins.get_item(name, vm).map_err(|e| { + if e.fast_isinstance(vm.ctx.exceptions.key_error) { + vm.new_name_error(format!("name '{name}' is not defined"), name.to_owned()) + } else { + e + } + }) + } + } + + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn import(&mut self, vm: &VirtualMachine, module_name: Option<&Py>) -> PyResult<()> { + let module_name = module_name.unwrap_or(vm.ctx.empty_str); + let top = self.pop_value(); + let from_list = match >::try_from_object(vm, top)? { + Some(from_list) => from_list.try_into_typed::(vm)?, + None => vm.ctx.empty_tuple_typed().to_owned(), + }; + let level = usize::try_from_object(vm, self.pop_value())?; + + let module = vm.import_from(module_name, &from_list, level)?; + + self.push_value(module); + Ok(()) + } + + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn import_from(&mut self, vm: &VirtualMachine, idx: bytecode::NameIdx) -> PyResult { + let module = self.top_value(); + let name = self.code.names[idx as usize]; + + // Load attribute, and transform any error into import error. + if let Some(obj) = vm.get_attribute_opt(module.to_owned(), name)? { + return Ok(obj); + } + // fallback to importing '{module.__name__}.{name}' from sys.modules + let fallback_module = (|| { + let mod_name = module.get_attr(identifier!(vm, __name__), vm).ok()?; + let mod_name = mod_name.downcast_ref::()?; + let full_mod_name = format!("{mod_name}.{name}"); + let sys_modules = vm.sys_module.get_attr("modules", vm).ok()?; + sys_modules.get_item(&full_mod_name, vm).ok() + })(); + + if let Some(sub_module) = fallback_module { + return Ok(sub_module); + } + + use crate::import::{ + get_spec_file_origin, is_possibly_shadowing_path, is_stdlib_module_name, + }; + + // Get module name for the error message + let mod_name_obj = module.get_attr(identifier!(vm, __name__), vm).ok(); + let mod_name_str = mod_name_obj + .as_ref() + .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())); + let module_name = mod_name_str.as_deref().unwrap_or(""); + + let spec = module + .get_attr("__spec__", vm) + .ok() + .filter(|s| !vm.is_none(s)); + + let origin = get_spec_file_origin(&spec, vm); + + let is_possibly_shadowing = origin + .as_ref() + .map(|o| is_possibly_shadowing_path(o, vm)) + .unwrap_or(false); + let is_possibly_shadowing_stdlib = if is_possibly_shadowing { + if let Some(ref mod_name) = mod_name_obj { + is_stdlib_module_name(mod_name, vm)? + } else { + false + } + } else { + false + }; + + let msg = if is_possibly_shadowing_stdlib { + let origin = origin.as_ref().unwrap(); + format!( + "cannot import name '{name}' from '{module_name}' \ + (consider renaming '{origin}' since it has the same \ + name as the standard library module named '{module_name}' \ + and prevents importing that standard library module)" + ) + } else { + let is_init = is_module_initializing(module, vm); + if is_init { + if is_possibly_shadowing { + let origin = origin.as_ref().unwrap(); + format!( + "cannot import name '{name}' from '{module_name}' \ + (consider renaming '{origin}' if it has the same name \ + as a library you intended to import)" + ) + } else if let Some(ref path) = origin { + format!( + "cannot import name '{name}' from partially initialized module \ + '{module_name}' (most likely due to a circular import) ({path})" + ) + } else { + format!( + "cannot import name '{name}' from partially initialized module \ + '{module_name}' (most likely due to a circular import)" + ) + } + } else if let Some(ref path) = origin { + format!("cannot import name '{name}' from '{module_name}' ({path})") + } else { + format!("cannot import name '{name}' from '{module_name}' (unknown location)") + } + }; + let err = vm.new_import_error(msg, vm.ctx.new_utf8_str(module_name)); + + if let Some(ref path) = origin { + let _ignore = err + .as_object() + .set_attr("path", vm.ctx.new_str(path.as_str()), vm); + } + + // name_from = the attribute name that failed to import (best-effort metadata) + let _ignore = err.as_object().set_attr("name_from", name.to_owned(), vm); + + Err(err) + } + + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn import_star(&mut self, vm: &VirtualMachine) -> PyResult<()> { + let module = self.pop_value(); + + let Some(dict) = module.dict() else { + return Ok(()); + }; + + let mod_name = module + .get_attr(identifier!(vm, __name__), vm) + .ok() + .and_then(|n| n.downcast::().ok()); + + let require_str = |obj: PyObjectRef, attr: &str| -> PyResult> { + obj.downcast().map_err(|obj: PyObjectRef| { + let source = if let Some(ref mod_name) = mod_name { + format!("{}.{attr}", mod_name.as_wtf8()) + } else { + attr.to_owned() + }; + let repr = obj.repr(vm).unwrap_or_else(|_| vm.ctx.new_str("?")); + vm.new_type_error(format!( + "{} in {} must be str, not {}", + repr.as_wtf8(), + source, + obj.class().name() + )) + }) + }; + + let locals_map = self.locals.mapping(vm); + if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) { + let items: Vec = all.try_to_value(vm)?; + for item in items { + let name = require_str(item, "__all__")?; + let value = module.get_attr(&*name, vm)?; + locals_map.ass_subscript(&name, Some(value), vm)?; + } + } else { + for (k, v) in dict { + let k = require_str(k, "__dict__")?; + if !k.as_bytes().starts_with(b"_") { + locals_map.ass_subscript(&k, Some(v), vm)?; + } + } + } + Ok(()) + } + + /// Unwind blocks. + /// The reason for unwinding gives a hint on what to do when + /// unwinding a block. + /// Optionally returns an exception. + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn unwind_blocks(&mut self, vm: &VirtualMachine, reason: UnwindReason) -> FrameResult { + // use exception table for exception handling + match reason { + UnwindReason::Raising { exception } => { + // Look up handler in exception table + // lasti points to NEXT instruction (already incremented in run loop) + // The exception occurred at the previous instruction + // Python uses signed int where INSTR_OFFSET() - 1 = -1 before first instruction. + // We use u32, so check for 0 explicitly. + if self.lasti() == 0 { + // No instruction executed yet, no handler can match + return Err(exception); + } + let offset = self.lasti() - 1; + if let Some(entry) = + bytecode::find_exception_handler(&self.code.exceptiontable, offset) + { + // Fire EXCEPTION_HANDLED before setting up handler. + // If the callback raises, the handler is NOT set up and the + // new exception propagates instead. + if vm.state.monitoring_events.load() & monitoring::EVENT_EXCEPTION_HANDLED != 0 + { + let byte_offset = offset * 2; + let exc_obj: PyObjectRef = exception.clone().into(); + monitoring::fire_exception_handled(vm, self.code, byte_offset, &exc_obj)?; + } + + // 1. Pop stack to entry.depth + while self.state.stack.len() > entry.depth as usize { + self.state.stack.pop(); + } + + // 2. If push_lasti=true (SETUP_CLEANUP), push lasti before exception + // pushes lasti as PyLong + if entry.push_lasti { + self.push_value(vm.ctx.new_int(offset as i32).into()); + } + + // 3. Push exception onto stack + // always push exception, PUSH_EXC_INFO transforms [exc] -> [prev_exc, exc] + // Do NOT call vm.set_exception here! PUSH_EXC_INFO will do it. + // PUSH_EXC_INFO needs to get prev_exc from vm.current_exception() BEFORE setting the new one. + self.push_value(exception.into()); + + // 4. Jump to handler + self.jump(bytecode::Label(entry.target)); + + Ok(None) + } else { + // No handler found, propagate exception + Err(exception) + } + } + UnwindReason::Returning { value } => Ok(Some(ExecutionResult::Return(value))), + } + } + + fn execute_store_subscript(&mut self, vm: &VirtualMachine) -> FrameResult { + let idx = self.pop_value(); + let obj = self.pop_value(); + let value = self.pop_value(); + obj.set_item(&*idx, value, vm)?; + Ok(None) + } + + fn execute_delete_subscript(&mut self, vm: &VirtualMachine) -> FrameResult { + let idx = self.pop_value(); + let obj = self.pop_value(); + obj.del_item(&*idx, vm)?; + Ok(None) + } + + fn execute_build_map(&mut self, vm: &VirtualMachine, size: u32) -> FrameResult { + let size = size as usize; + let map_obj = vm.ctx.new_dict(); + for (key, value) in self.pop_multiple(2 * size).tuples() { + map_obj.set_item(&*key, value, vm)?; + } + + self.push_value(map_obj.into()); + Ok(None) + } + + fn execute_build_slice( + &mut self, + vm: &VirtualMachine, + argc: bytecode::BuildSliceArgCount, + ) -> FrameResult { + let step = match argc { + bytecode::BuildSliceArgCount::Two => None, + bytecode::BuildSliceArgCount::Three => Some(self.pop_value()), + }; + let stop = self.pop_value(); + let start = self.pop_value(); + + let obj = PySlice { + start: Some(start), + stop, + step, + } + .into_ref(&vm.ctx); + self.push_value(obj.into()); + Ok(None) + } + + fn collect_positional_args(&mut self, nargs: u32) -> FuncArgs { + FuncArgs { + args: self.pop_multiple(nargs as usize).collect(), + kwargs: IndexMap::new(), + } + } + + fn collect_keyword_args(&mut self, nargs: u32) -> FuncArgs { + let kwarg_names = self + .pop_value() + .downcast::() + .expect("kwarg names should be tuple of strings"); + let args = self.pop_multiple(nargs as usize); + + let kwarg_names = kwarg_names.as_slice().iter().map(|pyobj| { + pyobj + .downcast_ref::() + .unwrap() + .as_str() + .to_owned() + }); + FuncArgs::with_kwargs_names(args, kwarg_names) + } + + fn collect_ex_args(&mut self, vm: &VirtualMachine) -> PyResult { + let kwargs_or_null = self.pop_value_opt(); + let kwargs = if let Some(kw_obj) = kwargs_or_null { + let mut kwargs = IndexMap::new(); + + // Stack: [callable, self_or_null, args_tuple] + let callable = self.nth_value(2); + let func_str = Self::object_function_str(callable, vm); + + Self::iterate_mapping_keys(vm, &kw_obj, &func_str, |key| { + let key_str = key + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("keywords must be strings"))?; + let value = kw_obj.get_item(&*key, vm)?; + kwargs.insert(key_str.as_str().to_owned(), value); + Ok(()) + })?; + kwargs + } else { + IndexMap::new() + }; + let args_obj = self.pop_value(); + let args = if let Some(tuple) = args_obj.downcast_ref::() { + tuple.as_slice().to_vec() + } else { + // Single *arg passed directly; convert to sequence at runtime. + // Stack: [callable, self_or_null] + let callable = self.nth_value(1); + let func_str = Self::object_function_str(callable, vm); + let not_iterable = args_obj.class().slots.iter.load().is_none() + && args_obj + .get_class_attr(vm.ctx.intern_str("__getitem__")) + .is_none(); + args_obj.try_to_value::>(vm).map_err(|e| { + if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { + vm.new_type_error(format!( + "{} argument after * must be an iterable, not {}", + func_str, + args_obj.class().name() + )) + } else { + e + } + })? + }; + Ok(FuncArgs { args, kwargs }) + } + + /// Returns a display string for a callable object for use in error messages. + /// For objects with `__qualname__`, returns "module.qualname()" or "qualname()". + /// For other objects, returns repr(obj). + fn object_function_str(obj: &PyObject, vm: &VirtualMachine) -> Wtf8Buf { + let repr_fallback = || { + obj.repr(vm) + .as_ref() + .map_or("?".as_ref(), |s| s.as_wtf8()) + .to_owned() + }; + let Ok(qualname) = obj.get_attr(vm.ctx.intern_str("__qualname__"), vm) else { + return repr_fallback(); + }; + let Some(qualname_str) = qualname.downcast_ref::() else { + return repr_fallback(); + }; + if let Ok(module) = obj.get_attr(vm.ctx.intern_str("__module__"), vm) + && let Some(module_str) = module.downcast_ref::() + && module_str.as_bytes() != b"builtins" + { + return wtf8_concat!(module_str.as_wtf8(), ".", qualname_str.as_wtf8(), "()"); + } + wtf8_concat!(qualname_str.as_wtf8(), "()") + } + + /// Helper function to iterate over mapping keys using the keys() method. + /// This ensures proper order preservation for OrderedDict and other custom mappings. + fn iterate_mapping_keys( + vm: &VirtualMachine, + mapping: &PyObject, + func_str: &Wtf8, + mut key_handler: F, + ) -> PyResult<()> + where + F: FnMut(PyObjectRef) -> PyResult<()>, + { + let Some(keys_method) = vm.get_method(mapping.to_owned(), vm.ctx.intern_str("keys")) else { + return Err(vm.new_type_error(format!( + "{} argument after ** must be a mapping, not {}", + func_str, + mapping.class().name() + ))); + }; + + let keys = keys_method?.call((), vm)?.get_iter(vm)?; + while let PyIterReturn::Return(key) = keys.next(vm)? { + key_handler(key)?; + } + Ok(()) + } + + #[inline] + fn execute_call(&mut self, args: FuncArgs, vm: &VirtualMachine) -> FrameResult { + // Stack: [callable, self_or_null, ...] + let self_or_null = self.pop_value_opt(); // Option + let callable = self.pop_value(); + + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + + let value = callable.call(final_args, vm)?; + self.push_value(value); + Ok(None) + } + + /// Instrumented version of execute_call: fires CALL, C_RETURN, and C_RAISE events. + fn execute_call_instrumented(&mut self, args: FuncArgs, vm: &VirtualMachine) -> FrameResult { + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + + let final_args = if let Some(self_val) = self_or_null { + let mut args = args; + args.prepend_arg(self_val); + args + } else { + args + }; + + let is_python_call = callable.downcast_ref::().is_some(); + + // Fire CALL event + let call_arg0 = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { + let arg0 = final_args + .args + .first() + .cloned() + .unwrap_or_else(|| monitoring::get_missing(vm)); + let offset = (self.lasti() - 1) * 2; + monitoring::fire_call(vm, self.code, offset, &callable, arg0.clone())?; + Some(arg0) + } else { + None + }; + + match callable.call(final_args, vm) { + Ok(value) => { + if let Some(arg0) = call_arg0 + && !is_python_call + { + let offset = (self.lasti() - 1) * 2; + monitoring::fire_c_return(vm, self.code, offset, &callable, arg0)?; + } + self.push_value(value); + Ok(None) + } + Err(exc) => { + let exc = if let Some(arg0) = call_arg0 + && !is_python_call + { + let offset = (self.lasti() - 1) * 2; + match monitoring::fire_c_raise(vm, self.code, offset, &callable, arg0) { + Ok(()) => exc, + Err(monitor_exc) => monitor_exc, + } + } else { + exc + }; + Err(exc) + } + } + } + + fn execute_raise(&mut self, vm: &VirtualMachine, kind: bytecode::RaiseKind) -> FrameResult { + let cause = match kind { + bytecode::RaiseKind::RaiseCause => { + let val = self.pop_value(); + Some(if vm.is_none(&val) { + // if the cause arg is none, we clear the cause + None + } else { + // if the cause arg is an exception, we overwrite it + let ctor = ExceptionCtor::try_from_object(vm, val).map_err(|_| { + vm.new_type_error("exception causes must derive from BaseException") + })?; + Some(ctor.instantiate(vm)?) + }) + } + // if there's no cause arg, we keep the cause as is + _ => None, + }; + let exception = match kind { + bytecode::RaiseKind::RaiseCause | bytecode::RaiseKind::Raise => { + ExceptionCtor::try_from_object(vm, self.pop_value())?.instantiate(vm)? + } + bytecode::RaiseKind::BareRaise => { + // RAISE_VARARGS 0: bare `raise` gets exception from VM state + // This is the current exception set by PUSH_EXC_INFO + vm.topmost_exception().ok_or_else(|| { + vm.new_runtime_error("No active exception to reraise".to_owned()) + })? + } + bytecode::RaiseKind::ReraiseFromStack => { + // RERAISE: gets exception from stack top + // Used in cleanup blocks where exception is on stack after COPY 3 + let exc = self.pop_value(); + exc.downcast::().map_err(|obj| { + vm.new_type_error(format!( + "exceptions must derive from BaseException, not {}", + obj.class().name() + )) + })? + } + }; + #[cfg(debug_assertions)] + debug!("Exception raised: {exception:?} with cause: {cause:?}"); + if let Some(cause) = cause { + exception.set___cause__(cause); + } + Err(exception) + } + + fn builtin_coro<'a>(&self, coro: &'a PyObject) -> Option<&'a Coro> { + match_class!(match coro { + ref g @ PyGenerator => Some(g.as_coro()), + ref c @ PyCoroutine => Some(c.as_coro()), + _ => None, + }) + } + + fn _send( + &self, + jen: &PyObject, + val: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult { + match self.builtin_coro(jen) { + Some(coro) => coro.send(jen, val, vm), + // TODO: turn return type to PyResult then ExecutionResult will be simplified + None if vm.is_none(&val) => PyIter::new(jen).next(vm), + None => { + let meth = jen.get_attr("send", vm)?; + PyIterReturn::from_pyresult(meth.call((val,), vm), vm) + } + } + } + + fn execute_unpack_ex(&mut self, vm: &VirtualMachine, before: u8, after: u8) -> FrameResult { + let (before, after) = (before as usize, after as usize); + let value = self.pop_value(); + let not_iterable = value.class().slots.iter.load().is_none() + && value + .get_class_attr(vm.ctx.intern_str("__getitem__")) + .is_none(); + let elements: Vec<_> = value.try_to_value(vm).map_err(|e| { + if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { + vm.new_type_error(format!( + "cannot unpack non-iterable {} object", + value.class().name() + )) + } else { + e + } + })?; + let min_expected = before + after; + + let middle = elements.len().checked_sub(min_expected).ok_or_else(|| { + vm.new_value_error(format!( + "not enough values to unpack (expected at least {}, got {})", + min_expected, + elements.len() + )) + })?; + + let mut elements = elements; + // Elements on stack from right-to-left: + self.state.stack.extend( + elements + .drain(before + middle..) + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); + + let middle_elements = elements.drain(before..).collect(); + let t = vm.ctx.new_list(middle_elements); + self.push_value(t.into()); + + // Lastly the first reversed values: + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); + + Ok(None) + } + + #[inline] + fn jump(&mut self, label: bytecode::Label) { + let target_pc = label.0; + vm_trace!("jump from {:?} to {:?}", self.lasti(), target_pc); + self.update_lasti(|i| *i = target_pc); + } + + /// Jump forward by `delta` code units from after instruction + caches. + /// lasti is already at instruction_index + 1, so after = lasti + caches. + /// + /// Unchecked arithmetic is intentional: the compiler guarantees valid + /// targets, and debug builds will catch overflow via Rust's default checks. + #[inline] + fn jump_relative_forward(&mut self, delta: u32, caches: u32) { + let target = self.lasti() + caches + delta; + self.update_lasti(|i| *i = target); + } + + /// Jump backward by `delta` code units from after instruction + caches. + /// + /// Unchecked arithmetic is intentional: the compiler guarantees valid + /// targets, and debug builds will catch underflow via Rust's default checks. + #[inline] + fn jump_relative_backward(&mut self, delta: u32, caches: u32) { + let target = self.lasti() + caches - delta; + self.update_lasti(|i| *i = target); + } + + #[inline] + fn pop_jump_if_relative( + &mut self, + vm: &VirtualMachine, + arg: bytecode::OpArg, + caches: u32, + flag: bool, + ) -> FrameResult { + let obj = self.pop_value(); + let value = obj.try_to_bool(vm)?; + if value == flag { + self.jump_relative_forward(u32::from(arg), caches); + } + Ok(None) + } + + /// Advance the iterator on top of stack. + /// Returns `true` if iteration continued (item pushed), `false` if exhausted (jumped). + fn execute_for_iter( + &mut self, + vm: &VirtualMachine, + target: bytecode::Label, + ) -> Result { + let top = self.top_value(); + + // FOR_ITER_RANGE: bypass generic iterator protocol for range iterators + if let Some(range_iter) = top.downcast_ref_if_exact::(vm) { + if let Some(value) = range_iter.fast_next() { + self.push_value(vm.ctx.new_int(value).into()); + return Ok(true); + } + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(None); + self.fire_exception_trace(&stop_exc, vm)?; } + self.jump(self.for_iter_jump_target(target)); + return Ok(false); } - } - #[inline] - fn load_global_or_builtin(&self, name: &Py, vm: &VirtualMachine) -> PyResult { - if let Some(builtins_dict) = self.builtins_dict { - // Fast path: both globals and builtins are exact dicts - // SAFETY: builtins_dict is only set when globals is also exact dict - let globals_exact = unsafe { PyExact::ref_unchecked(self.globals.as_ref()) }; - globals_exact - .get_chain_exact(builtins_dict, name, vm)? - .ok_or_else(|| { - vm.new_name_error(format!("name '{name}' is not defined"), name.to_owned()) - }) - } else { - // Slow path: builtins is not a dict, use generic __getitem__ - if let Some(value) = self.globals.get_item_opt(name, vm)? { - return Ok(value); + let top_of_stack = PyIter::new(top); + let next_obj = top_of_stack.next(vm); + + match next_obj { + Ok(PyIterReturn::Return(value)) => { + self.push_value(value); + Ok(true) } - self.builtins.get_item(name, vm).map_err(|e| { - if e.fast_isinstance(vm.ctx.exceptions.key_error) { - vm.new_name_error(format!("name '{name}' is not defined"), name.to_owned()) - } else { - e + Ok(PyIterReturn::StopIteration(value)) => { + // Fire 'exception' trace event for StopIteration, matching + // FOR_ITER's inline call to _PyEval_MonitorRaise. + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value); + self.fire_exception_trace(&stop_exc, vm)?; } - }) + self.jump(self.for_iter_jump_target(target)); + Ok(false) + } + Err(next_error) => { + self.pop_value(); + Err(next_error) + } } } - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn import(&mut self, vm: &VirtualMachine, module_name: Option<&Py>) -> PyResult<()> { - let module_name = module_name.unwrap_or(vm.ctx.empty_str); - let top = self.pop_value(); - let from_list = match >::try_from_object(vm, top)? { - Some(from_list) => from_list.try_into_typed::(vm)?, - None => vm.ctx.empty_tuple_typed().to_owned(), - }; - let level = usize::try_from_object(vm, self.pop_value())?; + /// Compute the jump target for FOR_ITER exhaustion: skip END_FOR and jump to POP_ITER. + fn for_iter_jump_target(&self, target: bytecode::Label) -> bytecode::Label { + let target_idx = target.0 as usize; + if let Some(unit) = self.code.instructions.get(target_idx) + && matches!( + unit.op, + bytecode::Instruction::EndFor | bytecode::Instruction::InstrumentedEndFor + ) + { + return bytecode::Label(target.0 + 1); + } + target + } + fn execute_make_function(&mut self, vm: &VirtualMachine) -> FrameResult { + // MakeFunction only takes code object, no flags + let code_obj: PyRef = self + .pop_value() + .downcast() + .expect("Stack value should be code object"); - let module = vm.import_from(module_name, &from_list, level)?; + // Create function with minimal attributes + let func_obj = PyFunction::new(code_obj, self.globals.clone(), vm)?.into_pyobject(vm); - self.push_value(module); - Ok(()) + self.push_value(func_obj); + Ok(None) } - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn import_from(&mut self, vm: &VirtualMachine, idx: bytecode::NameIdx) -> PyResult { - let module = self.top_value(); - let name = self.code.names[idx as usize]; + fn execute_set_function_attribute( + &mut self, + vm: &VirtualMachine, + attr: bytecode::MakeFunctionFlags, + ) -> FrameResult { + // SET_FUNCTION_ATTRIBUTE sets attributes on a function + // Stack: [..., attr_value, func] -> [..., func] + // Stack order: func is at -1, attr_value is at -2 - // Load attribute, and transform any error into import error. - if let Some(obj) = vm.get_attribute_opt(module.to_owned(), name)? { - return Ok(obj); - } - // fallback to importing '{module.__name__}.{name}' from sys.modules - let fallback_module = (|| { - let mod_name = module.get_attr(identifier!(vm, __name__), vm).ok()?; - let mod_name = mod_name.downcast_ref::()?; - let full_mod_name = format!("{mod_name}.{name}"); - let sys_modules = vm.sys_module.get_attr("modules", vm).ok()?; - sys_modules.get_item(&full_mod_name, vm).ok() - })(); + let func = self.pop_value_opt(); + let attr_value = expect_unchecked(self.replace_top(func), "attr_value must not be null"); - if let Some(sub_module) = fallback_module { - return Ok(sub_module); - } + let func = self.top_value(); + // Get the function reference and call the new method + let func_ref = func + .downcast_ref::() + .expect("SET_FUNCTION_ATTRIBUTE expects function on stack"); - use crate::import::{ - get_spec_file_origin, is_possibly_shadowing_path, is_stdlib_module_name, + let payload: &PyFunction = func_ref.payload(); + // SetFunctionAttribute always follows MakeFunction, so at this point + // there are no other references to func. It is therefore safe to treat it as mutable. + unsafe { + let payload_ptr = payload as *const PyFunction as *mut PyFunction; + (*payload_ptr).set_function_attribute(attr, attr_value, vm)?; }; - // Get module name for the error message - let mod_name_obj = module.get_attr(identifier!(vm, __name__), vm).ok(); - let mod_name_str = mod_name_obj - .as_ref() - .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())); - let module_name = mod_name_str.as_deref().unwrap_or(""); - - let spec = module - .get_attr("__spec__", vm) - .ok() - .filter(|s| !vm.is_none(s)); - - let origin = get_spec_file_origin(&spec, vm); + Ok(None) + } - let is_possibly_shadowing = origin - .as_ref() - .map(|o| is_possibly_shadowing_path(o, vm)) - .unwrap_or(false); - let is_possibly_shadowing_stdlib = if is_possibly_shadowing { - if let Some(ref mod_name) = mod_name_obj { - is_stdlib_module_name(mod_name, vm)? - } else { - false + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { + let b_ref = &self.pop_value(); + let a_ref = &self.pop_value(); + let value = match op { + // BINARY_OP_ADD_INT / BINARY_OP_SUBTRACT_INT fast paths: + // bypass binary_op1 dispatch for exact int types, use i64 arithmetic + // when possible to avoid BigInt heap allocation. + bytecode::BinaryOperator::Add | bytecode::BinaryOperator::InplaceAdd => { + if let (Some(a), Some(b)) = ( + a_ref.downcast_ref_if_exact::(vm), + b_ref.downcast_ref_if_exact::(vm), + ) { + Ok(self.int_add(a.as_bigint(), b.as_bigint(), vm)) + } else if matches!(op, bytecode::BinaryOperator::Add) { + vm._add(a_ref, b_ref) + } else { + vm._iadd(a_ref, b_ref) + } } - } else { - false - }; - - let msg = if is_possibly_shadowing_stdlib { - let origin = origin.as_ref().unwrap(); - format!( - "cannot import name '{name}' from '{module_name}' \ - (consider renaming '{origin}' since it has the same \ - name as the standard library module named '{module_name}' \ - and prevents importing that standard library module)" - ) - } else { - let is_init = is_module_initializing(module, vm); - if is_init { - if is_possibly_shadowing { - let origin = origin.as_ref().unwrap(); - format!( - "cannot import name '{name}' from '{module_name}' \ - (consider renaming '{origin}' if it has the same name \ - as a library you intended to import)" - ) - } else if let Some(ref path) = origin { - format!( - "cannot import name '{name}' from partially initialized module \ - '{module_name}' (most likely due to a circular import) ({path})" - ) + bytecode::BinaryOperator::Subtract | bytecode::BinaryOperator::InplaceSubtract => { + if let (Some(a), Some(b)) = ( + a_ref.downcast_ref_if_exact::(vm), + b_ref.downcast_ref_if_exact::(vm), + ) { + Ok(self.int_sub(a.as_bigint(), b.as_bigint(), vm)) + } else if matches!(op, bytecode::BinaryOperator::Subtract) { + vm._sub(a_ref, b_ref) } else { - format!( - "cannot import name '{name}' from partially initialized module \ - '{module_name}' (most likely due to a circular import)" - ) + vm._isub(a_ref, b_ref) } - } else if let Some(ref path) = origin { - format!("cannot import name '{name}' from '{module_name}' ({path})") - } else { - format!("cannot import name '{name}' from '{module_name}' (unknown location)") } - }; - let err = vm.new_import_error(msg, vm.ctx.new_utf8_str(module_name)); - - if let Some(ref path) = origin { - let _ignore = err - .as_object() - .set_attr("path", vm.ctx.new_str(path.as_str()), vm); - } + bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), + bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), + bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()), + bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref), + bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), + bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref), + bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), + bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), + bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), + bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), + bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), + bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref), + bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref), + bytecode::BinaryOperator::InplacePower => { + vm._ipow(a_ref, b_ref, vm.ctx.none.as_object()) + } + bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref), + bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref), + bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref), + bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref), + bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref), + bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref), + bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref), + bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref), + bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm), + }?; - // name_from = the attribute name that failed to import (best-effort metadata) - let _ignore = err.as_object().set_attr("name_from", name.to_owned(), vm); + self.push_value(value); + Ok(None) + } - Err(err) + /// Int addition with i64 fast path to avoid BigInt heap allocation. + #[inline] + fn int_add(&self, a: &BigInt, b: &BigInt, vm: &VirtualMachine) -> PyObjectRef { + use num_traits::ToPrimitive; + if let (Some(av), Some(bv)) = (a.to_i64(), b.to_i64()) + && let Some(result) = av.checked_add(bv) + { + return vm.ctx.new_int(result).into(); + } + vm.ctx.new_int(a + b).into() } - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn import_star(&mut self, vm: &VirtualMachine) -> PyResult<()> { - let module = self.pop_value(); + /// Int subtraction with i64 fast path to avoid BigInt heap allocation. + #[inline] + fn int_sub(&self, a: &BigInt, b: &BigInt, vm: &VirtualMachine) -> PyObjectRef { + use num_traits::ToPrimitive; + if let (Some(av), Some(bv)) = (a.to_i64(), b.to_i64()) + && let Some(result) = av.checked_sub(bv) + { + return vm.ctx.new_int(result).into(); + } + vm.ctx.new_int(a - b).into() + } - let Some(dict) = module.dict() else { - return Ok(()); + #[cold] + fn setup_annotations(&mut self, vm: &VirtualMachine) -> FrameResult { + let __annotations__ = identifier!(vm, __annotations__); + let locals_obj = self.locals.as_object(vm); + // Try using locals as dict first, if not, fallback to generic method. + let has_annotations = if let Some(d) = locals_obj.downcast_ref_if_exact::(vm) { + d.contains_key(__annotations__, vm) + } else { + self._in(vm, __annotations__.as_object(), locals_obj)? }; + if !has_annotations { + locals_obj.set_item(__annotations__, vm.ctx.new_dict().into(), vm)?; + } + Ok(None) + } - let mod_name = module - .get_attr(identifier!(vm, __name__), vm) - .ok() - .and_then(|n| n.downcast::().ok()); + /// _PyEval_UnpackIterableStackRef + fn unpack_sequence(&mut self, size: u32, vm: &VirtualMachine) -> FrameResult { + let value = self.pop_value(); + let size = size as usize; - let require_str = |obj: PyObjectRef, attr: &str| -> PyResult> { - obj.downcast().map_err(|obj: PyObjectRef| { - let source = if let Some(ref mod_name) = mod_name { - format!("{}.{attr}", mod_name.as_wtf8()) - } else { - attr.to_owned() - }; - let repr = obj.repr(vm).unwrap_or_else(|_| vm.ctx.new_str("?")); + // Fast path for exact tuple/list types (not subclasses) — push + // elements directly from the slice without intermediate Vec allocation, + // matching UNPACK_SEQUENCE_TUPLE / UNPACK_SEQUENCE_LIST specializations. + let cls = value.class(); + if cls.is(vm.ctx.types.tuple_type) { + let tuple = value.downcast_ref::().unwrap(); + return self.unpack_fast(tuple.as_slice(), size, vm); + } + if cls.is(vm.ctx.types.list_type) { + let list = value.downcast_ref::().unwrap(); + let borrowed = list.borrow_vec(); + return self.unpack_fast(&borrowed, size, vm); + } + + // General path — iterate up to `size + 1` elements to avoid + // consuming the entire iterator (fixes hang on infinite sequences). + let not_iterable = value.class().slots.iter.load().is_none() + && value + .get_class_attr(vm.ctx.intern_str("__getitem__")) + .is_none(); + let iter = PyIter::try_from_object(vm, value.clone()).map_err(|e| { + if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { vm.new_type_error(format!( - "{} in {} must be str, not {}", - repr.as_wtf8(), - source, - obj.class().name() + "cannot unpack non-iterable {} object", + value.class().name() )) - }) - }; - - let locals_map = self.locals.mapping(vm); - if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) { - let items: Vec = all.try_to_value(vm)?; - for item in items { - let name = require_str(item, "__all__")?; - let value = module.get_attr(&*name, vm)?; - locals_map.ass_subscript(&name, Some(value), vm)?; + } else { + e } - } else { - for (k, v) in dict { - let k = require_str(k, "__dict__")?; - if !k.as_bytes().starts_with(b"_") { - locals_map.ass_subscript(&k, Some(v), vm)?; + })?; + + let mut elements = Vec::with_capacity(size); + for _ in 0..size { + match iter.next(vm)? { + PyIterReturn::Return(item) => elements.push(item), + PyIterReturn::StopIteration(_) => { + return Err(vm.new_value_error(format!( + "not enough values to unpack (expected {size}, got {})", + elements.len() + ))); } } } - Ok(()) + + // Check that the iterator is exhausted. + match iter.next(vm)? { + PyIterReturn::Return(_) => { + // For exact dict types, show "got N" using the container's + // size (PyDict_Size). Exact tuple/list are handled by the + // fast path above and never reach here. + let msg = if value.class().is(vm.ctx.types.dict_type) { + if let Ok(got) = value.length(vm) { + if got > size { + format!("too many values to unpack (expected {size}, got {got})") + } else { + format!("too many values to unpack (expected {size})") + } + } else { + format!("too many values to unpack (expected {size})") + } + } else { + format!("too many values to unpack (expected {size})") + }; + Err(vm.new_value_error(msg)) + } + PyIterReturn::StopIteration(_) => { + self.state.stack.extend( + elements + .into_iter() + .rev() + .map(|e| Some(PyStackRef::new_owned(e))), + ); + Ok(None) + } + } } - /// Unwind blocks. - /// The reason for unwinding gives a hint on what to do when - /// unwinding a block. - /// Optionally returns an exception. - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn unwind_blocks(&mut self, vm: &VirtualMachine, reason: UnwindReason) -> FrameResult { - // use exception table for exception handling - match reason { - UnwindReason::Raising { exception } => { - // Look up handler in exception table - // lasti points to NEXT instruction (already incremented in run loop) - // The exception occurred at the previous instruction - // Python uses signed int where INSTR_OFFSET() - 1 = -1 before first instruction - // We use u32, so check for 0 explicitly (equivalent to CPython's -1) - if self.lasti() == 0 { - // No instruction executed yet, no handler can match - return Err(exception); + fn unpack_fast( + &mut self, + elements: &[PyObjectRef], + size: usize, + vm: &VirtualMachine, + ) -> FrameResult { + match elements.len().cmp(&size) { + core::cmp::Ordering::Equal => { + for elem in elements.iter().rev() { + self.push_value(elem.clone()); } - let offset = self.lasti() - 1; - if let Some(entry) = - bytecode::find_exception_handler(&self.code.exceptiontable, offset) - { - // Fire EXCEPTION_HANDLED before setting up handler. - // If the callback raises, the handler is NOT set up and the - // new exception propagates instead. - if vm.state.monitoring_events.load() & monitoring::EVENT_EXCEPTION_HANDLED != 0 - { - let byte_offset = offset * 2; - let exc_obj: PyObjectRef = exception.clone().into(); - monitoring::fire_exception_handled(vm, self.code, byte_offset, &exc_obj)?; - } + Ok(None) + } + core::cmp::Ordering::Greater => Err(vm.new_value_error(format!( + "too many values to unpack (expected {size}, got {})", + elements.len() + ))), + core::cmp::Ordering::Less => Err(vm.new_value_error(format!( + "not enough values to unpack (expected {size}, got {})", + elements.len() + ))), + } + } - // 1. Pop stack to entry.depth - while self.state.stack.len() > entry.depth as usize { - self.state.stack.pop(); - } + fn convert_value( + &mut self, + conversion: bytecode::ConvertValueOparg, + vm: &VirtualMachine, + ) -> FrameResult { + use bytecode::ConvertValueOparg; + let value = self.pop_value(); + let value = match conversion { + ConvertValueOparg::Str => value.str(vm)?.into(), + ConvertValueOparg::Repr => value.repr(vm)?.into(), + ConvertValueOparg::Ascii => vm.ctx.new_str(builtins::ascii(value, vm)?).into(), + ConvertValueOparg::None => value, + }; - // 2. If push_lasti=true (SETUP_CLEANUP), push lasti before exception - // pushes lasti as PyLong - if entry.push_lasti { - self.push_value(vm.ctx.new_int(offset as i32).into()); - } + self.push_value(value); + Ok(None) + } - // 3. Push exception onto stack - // always push exception, PUSH_EXC_INFO transforms [exc] -> [prev_exc, exc] - // Note: Do NOT call vm.set_exception here! PUSH_EXC_INFO will do it. - // PUSH_EXC_INFO needs to get prev_exc from vm.current_exception() BEFORE setting the new one. - self.push_value(exception.into()); + fn _in(&self, vm: &VirtualMachine, needle: &PyObject, haystack: &PyObject) -> PyResult { + let found = vm._contains(haystack, needle)?; + Ok(found) + } - // 4. Jump to handler - self.jump(bytecode::Label(entry.target)); + #[inline(always)] + fn _not_in( + &self, + vm: &VirtualMachine, + needle: &PyObject, + haystack: &PyObject, + ) -> PyResult { + Ok(!self._in(vm, needle, haystack)?) + } - Ok(None) - } else { - // No handler found, propagate exception - Err(exception) - } - } - UnwindReason::Returning { value } => Ok(Some(ExecutionResult::Return(value))), + #[cfg_attr(feature = "flame-it", flame("Frame"))] + fn execute_compare( + &mut self, + vm: &VirtualMachine, + op: bytecode::ComparisonOperator, + ) -> FrameResult { + let b = self.pop_value(); + let a = self.pop_value(); + let cmp_op: PyComparisonOp = op.into(); + + // COMPARE_OP_INT: leaf type, cannot recurse — skip rich_compare dispatch + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + let result = cmp_op.eval_ord(a_int.as_bigint().cmp(b_int.as_bigint())); + self.push_value(vm.ctx.new_bool(result).into()); + return Ok(None); + } + // COMPARE_OP_FLOAT: leaf type, cannot recurse — skip rich_compare dispatch. + // Falls through on NaN (partial_cmp returns None) for correct != semantics. + if let (Some(a_f), Some(b_f)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(ord) = a_f.to_f64().partial_cmp(&b_f.to_f64()) + { + let result = cmp_op.eval_ord(ord); + self.push_value(vm.ctx.new_bool(result).into()); + return Ok(None); } - } - fn execute_store_subscript(&mut self, vm: &VirtualMachine) -> FrameResult { - let idx = self.pop_value(); - let obj = self.pop_value(); - let value = self.pop_value(); - obj.set_item(&*idx, value, vm)?; + let value = a.rich_compare(b, cmp_op, vm)?; + self.push_value(value); Ok(None) } - fn execute_delete_subscript(&mut self, vm: &VirtualMachine) -> FrameResult { - let idx = self.pop_value(); - let obj = self.pop_value(); - obj.del_item(&*idx, vm)?; - Ok(None) - } + fn load_attr(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; - fn execute_build_map(&mut self, vm: &VirtualMachine, size: u32) -> FrameResult { - let size = size as usize; - let map_obj = vm.ctx.new_dict(); - for (key, value) in self.pop_multiple(2 * size).tuples() { - map_obj.set_item(&*key, value, vm)?; + let counter = self.code.instructions.read_adaptive_counter(cache_base); + if counter > 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, counter - 1); + } + } else { + self.specialize_load_attr(vm, oparg, instr_idx, cache_base); } - self.push_value(map_obj.into()); - Ok(None) + self.load_attr_slow(vm, oparg) } - fn execute_build_slice( + fn specialize_load_attr( &mut self, - vm: &VirtualMachine, - argc: bytecode::BuildSliceArgCount, - ) -> FrameResult { - let step = match argc { - bytecode::BuildSliceArgCount::Two => None, - bytecode::BuildSliceArgCount::Three => Some(self.pop_value()), - }; - let stop = self.pop_value(); - let start = self.pop_value(); + _vm: &VirtualMachine, + oparg: LoadAttr, + instr_idx: usize, + cache_base: usize, + ) { + let obj = self.top_value(); + let cls = obj.class(); - let obj = PySlice { - start: Some(start), - stop, - step, + // Check if this is a type object (class attribute access) + if obj.downcast_ref::().is_some() { + self.specialize_class_load_attr(_vm, oparg, instr_idx, cache_base); + return; } - .into_ref(&vm.ctx); - self.push_value(obj.into()); - Ok(None) - } - fn collect_positional_args(&mut self, nargs: u32) -> FuncArgs { - FuncArgs { - args: self.pop_multiple(nargs as usize).collect(), - kwargs: IndexMap::new(), + // Only specialize if getattro is the default (PyBaseObject::getattro) + let is_default_getattro = cls + .slots + .getattro + .load() + .is_some_and(|f| f as usize == PyBaseObject::getattro as *const () as usize); + if !is_default_getattro { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; } - } - fn collect_keyword_args(&mut self, nargs: u32) -> FuncArgs { - let kwarg_names = self - .pop_value() - .downcast::() - .expect("kwarg names should be tuple of strings"); - let args = self.pop_multiple(nargs as usize); + // Get or assign type version + let mut type_version = cls.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = cls.assign_version_tag(); + } + if type_version == 0 { + // Version counter overflow — backoff to avoid re-attempting every execution + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } - let kwarg_names = kwarg_names.as_slice().iter().map(|pyobj| { - pyobj - .downcast_ref::() - .unwrap() - .as_str() - .to_owned() - }); - FuncArgs::with_kwargs_names(args, kwarg_names) - } + // Module attribute access: use LoadAttrModule + if obj.downcast_ref_if_exact::(_vm).is_some() { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrModule); + } + return; + } - fn collect_ex_args(&mut self, vm: &VirtualMachine) -> PyResult { - let kwargs_or_null = self.pop_value_opt(); - let kwargs = if let Some(kw_obj) = kwargs_or_null { - let mut kwargs = IndexMap::new(); + let attr_name = self.code.names[oparg.name_idx() as usize]; - // Stack: [callable, self_or_null, args_tuple] - let callable = self.nth_value(2); - let func_str = Self::object_function_str(callable, vm); + // Look up attr in class via MRO + let cls_attr = cls.get_attr(attr_name); + let class_has_dict = cls.slots.flags.has_feature(PyTypeFlags::HAS_DICT); - Self::iterate_mapping_keys(vm, &kw_obj, &func_str, |key| { - let key_str = key - .downcast_ref::() - .ok_or_else(|| vm.new_type_error("keywords must be strings"))?; - let value = kw_obj.get_item(&*key, vm)?; - kwargs.insert(key_str.as_str().to_owned(), value); - Ok(()) - })?; - kwargs - } else { - IndexMap::new() - }; - let args_obj = self.pop_value(); - let args = if let Some(tuple) = args_obj.downcast_ref::() { - tuple.as_slice().to_vec() + if oparg.is_method() { + // Method specialization + if let Some(ref descr) = cls_attr + && descr + .class() + .slots + .flags + .has_feature(PyTypeFlags::METHOD_DESCRIPTOR) + { + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + } + + let new_op = if !class_has_dict { + Instruction::LoadAttrMethodNoDict + } else { + Instruction::LoadAttrMethodWithValues + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + return; + } + // Can't specialize this method call + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } else { - // Single *arg passed directly; convert to sequence at runtime. - // Stack: [callable, self_or_null] - let callable = self.nth_value(1); - let func_str = Self::object_function_str(callable, vm); - let not_iterable = args_obj.class().slots.iter.load().is_none() - && args_obj - .get_class_attr(vm.ctx.intern_str("__getitem__")) - .is_none(); - args_obj.try_to_value::>(vm).map_err(|e| { - if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { - vm.new_type_error(format!( - "{} argument after * must be an iterable, not {}", - func_str, - args_obj.class().name() - )) + // Regular attribute access + let has_data_descr = cls_attr.as_ref().is_some_and(|descr| { + let descr_cls = descr.class(); + descr_cls.slots.descr_get.load().is_some() + && descr_cls.slots.descr_set.load().is_some() + }); + let has_descr_get = cls_attr + .as_ref() + .is_some_and(|descr| descr.class().slots.descr_get.load().is_some()); + + if has_data_descr { + // Check for member descriptor (slot access) + if let Some(ref descr) = cls_attr + && let Some(member_descr) = descr.downcast_ref::() + && let MemberGetter::Offset(offset) = member_descr.member.getter + { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u32(cache_base + 3, offset as u32); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrSlot); + } + } else if let Some(ref descr) = cls_attr + && descr.downcast_ref::().is_some() + { + // Property descriptor — cache the property object pointer + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrProperty); + } } else { - e + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } else if has_descr_get { + // Non-data descriptor with __get__ — can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } else if class_has_dict { + if let Some(ref descr) = cls_attr { + // Plain class attr + class supports dict — check dict first, fallback + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrNondescriptorWithValues); + } + } else { + // No class attr, must be in instance dict + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); + } } - })? - }; - Ok(FuncArgs { args, kwargs }) - } - - /// Returns a display string for a callable object for use in error messages. - /// For objects with `__qualname__`, returns "module.qualname()" or "qualname()". - /// For other objects, returns repr(obj). - fn object_function_str(obj: &PyObject, vm: &VirtualMachine) -> Wtf8Buf { - let repr_fallback = || { - obj.repr(vm) - .as_ref() - .map_or("?".as_ref(), |s| s.as_wtf8()) - .to_owned() - }; - let Ok(qualname) = obj.get_attr(vm.ctx.intern_str("__qualname__"), vm) else { - return repr_fallback(); - }; - let Some(qualname_str) = qualname.downcast_ref::() else { - return repr_fallback(); - }; - if let Ok(module) = obj.get_attr(vm.ctx.intern_str("__module__"), vm) - && let Some(module_str) = module.downcast_ref::() - && module_str.as_bytes() != b"builtins" - { - return wtf8_concat!(module_str.as_wtf8(), ".", qualname_str.as_wtf8(), "()"); + } else if let Some(ref descr) = cls_attr { + // No dict support, plain class attr — cache directly + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrNondescriptorNoDict); + } + } else { + // No dict, no class attr — can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } } - wtf8_concat!(qualname_str.as_wtf8(), "()") } - /// Helper function to iterate over mapping keys using the keys() method. - /// This ensures proper order preservation for OrderedDict and other custom mappings. - fn iterate_mapping_keys( - vm: &VirtualMachine, - mapping: &PyObject, - func_str: &Wtf8, - mut key_handler: F, - ) -> PyResult<()> - where - F: FnMut(PyObjectRef) -> PyResult<()>, - { - let Some(keys_method) = vm.get_method(mapping.to_owned(), vm.ctx.intern_str("keys")) else { - return Err(vm.new_type_error(format!( - "{} argument after ** must be a mapping, not {}", - func_str, - mapping.class().name() - ))); - }; + fn specialize_class_load_attr( + &mut self, + _vm: &VirtualMachine, + oparg: LoadAttr, + instr_idx: usize, + cache_base: usize, + ) { + let obj = self.top_value(); + let owner_type = obj.downcast_ref::().unwrap(); - let keys = keys_method?.call((), vm)?.get_iter(vm)?; - while let PyIterReturn::Return(key) = keys.next(vm)? { - key_handler(key)?; + // Get or assign type version for the type object itself + let mut type_version = owner_type.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = owner_type.assign_version_tag(); + } + if type_version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; } - Ok(()) - } - #[inline] - fn execute_call(&mut self, args: FuncArgs, vm: &VirtualMachine) -> FrameResult { - // Stack: [callable, self_or_null, ...] - let self_or_null = self.pop_value_opt(); // Option - let callable = self.pop_value(); + let attr_name = self.code.names[oparg.name_idx() as usize]; - let final_args = if let Some(self_val) = self_or_null { - let mut args = args; - args.prepend_arg(self_val); - args - } else { - args - }; + // Check metaclass: ensure no data descriptor on metaclass for this name + let mcl = obj.class(); + let mcl_attr = mcl.get_attr(attr_name); + if let Some(ref attr) = mcl_attr { + let attr_class = attr.class(); + if attr_class.slots.descr_set.load().is_some() { + // Data descriptor on metaclass — can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; + } + } - let value = callable.call(final_args, vm)?; - self.push_value(value); - Ok(None) + // Look up attr in the type's own MRO + let cls_attr = owner_type.get_attr(attr_name); + if let Some(ref descr) = cls_attr { + let descr_class = descr.class(); + let has_descr_get = descr_class.slots.descr_get.load().is_some(); + if !has_descr_get { + // METHOD or NON_DESCRIPTOR — can cache directly + let descr_ptr = &**descr as *const PyObject as u64; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .write_cache_u64(cache_base + 5, descr_ptr); + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadAttrClass); + } + return; + } + } + + // Can't specialize + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - /// Instrumented version of execute_call: fires CALL, C_RETURN, and C_RAISE events. - fn execute_call_instrumented(&mut self, args: FuncArgs, vm: &VirtualMachine) -> FrameResult { - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); + fn load_attr_slow(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { + let attr_name = self.code.names[oparg.name_idx() as usize]; + let parent = self.pop_value(); - let final_args = if let Some(self_val) = self_or_null { - let mut args = args; - args.prepend_arg(self_val); - args + if oparg.is_method() { + // Method call: push [method, self_or_null] + let method = PyMethod::get(parent.clone(), attr_name, vm)?; + match method { + PyMethod::Function { target: _, func } => { + self.push_value(func); + self.push_value(parent); + } + PyMethod::Attribute(val) => { + self.push_value(val); + self.push_null(); + } + } } else { - args - }; - - let is_python_call = callable.downcast_ref::().is_some(); + // Regular attribute access + let obj = parent.get_attr(attr_name, vm)?; + self.push_value(obj); + } + Ok(None) + } - // Fire CALL event - let call_arg0 = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { - let arg0 = final_args - .args - .first() - .cloned() - .unwrap_or_else(|| monitoring::get_missing(vm)); - let offset = (self.lasti() - 1) * 2; - monitoring::fire_call(vm, self.code, offset, &callable, arg0.clone())?; - Some(arg0) - } else { - None - }; + fn specialize_binary_op( + &mut self, + vm: &VirtualMachine, + op: bytecode::BinaryOperator, + instr_idx: usize, + cache_base: usize, + ) { + let b = self.top_value(); + let a = self.nth_value(1); - match callable.call(final_args, vm) { - Ok(value) => { - if let Some(arg0) = call_arg0 - && !is_python_call + let new_op = match op { + bytecode::BinaryOperator::Add => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() { - let offset = (self.lasti() - 1) * 2; - monitoring::fire_c_return(vm, self.code, offset, &callable, arg0)?; + Some(Instruction::BinaryOpAddInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpAddFloat) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpAddUnicode) + } else { + None } - self.push_value(value); - Ok(None) } - Err(exc) => { - let exc = if let Some(arg0) = call_arg0 - && !is_python_call + bytecode::BinaryOperator::Subtract => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() { - let offset = (self.lasti() - 1) * 2; - match monitoring::fire_c_raise(vm, self.code, offset, &callable, arg0) { - Ok(()) => exc, - Err(monitor_exc) => monitor_exc, - } + Some(Instruction::BinaryOpSubtractInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubtractFloat) } else { - exc - }; - Err(exc) + None + } } - } - } - - fn execute_raise(&mut self, vm: &VirtualMachine, kind: bytecode::RaiseKind) -> FrameResult { - let cause = match kind { - bytecode::RaiseKind::RaiseCause => { - let val = self.pop_value(); - Some(if vm.is_none(&val) { - // if the cause arg is none, we clear the cause + bytecode::BinaryOperator::Multiply => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpMultiplyInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpMultiplyFloat) + } else { + None + } + } + bytecode::BinaryOperator::Subscr => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrListInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrTupleInt) + } else if a.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::BinaryOpSubscrDict) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubscrStrInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref::().is_some() + { + Some(Instruction::BinaryOpSubscrListSlice) + } else { None + } + } + bytecode::BinaryOperator::InplaceAdd => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpInplaceAddUnicode) } else { - // if the cause arg is an exception, we overwrite it - let ctor = ExceptionCtor::try_from_object(vm, val).map_err(|_| { - vm.new_type_error("exception causes must derive from BaseException") - })?; - Some(ctor.instantiate(vm)?) - }) + None + } } - // if there's no cause arg, we keep the cause as is _ => None, }; - let exception = match kind { - bytecode::RaiseKind::RaiseCause | bytecode::RaiseKind::Raise => { - ExceptionCtor::try_from_object(vm, self.pop_value())?.instantiate(vm)? - } - bytecode::RaiseKind::BareRaise => { - // RAISE_VARARGS 0: bare `raise` gets exception from VM state - // This is the current exception set by PUSH_EXC_INFO - vm.topmost_exception().ok_or_else(|| { - vm.new_runtime_error("No active exception to reraise".to_owned()) - })? + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); } - bytecode::RaiseKind::ReraiseFromStack => { - // RERAISE: gets exception from stack top - // Used in cleanup blocks where exception is on stack after COPY 3 - let exc = self.pop_value(); - exc.downcast::().map_err(|obj| { - vm.new_type_error(format!( - "exceptions must derive from BaseException, not {}", - obj.class().name() - )) - })? + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - }; - #[cfg(debug_assertions)] - debug!("Exception raised: {exception:?} with cause: {cause:?}"); - if let Some(cause) = cause { - exception.set___cause__(cause); } - Err(exception) } - fn builtin_coro<'a>(&self, coro: &'a PyObject) -> Option<&'a Coro> { - match_class!(match coro { - ref g @ PyGenerator => Some(g.as_coro()), - ref c @ PyCoroutine => Some(c.as_coro()), - _ => None, - }) + fn deoptimize_binary_op(&mut self, _op: bytecode::BinaryOperator) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::BinaryOp { op: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - fn _send( - &self, - jen: &PyObject, - val: PyObjectRef, + fn specialize_call( + &mut self, vm: &VirtualMachine, - ) -> PyResult { - match self.builtin_coro(jen) { - Some(coro) => coro.send(jen, val, vm), - // FIXME: turn return type to PyResult then ExecutionResult will be simplified - None if vm.is_none(&val) => PyIter::new(jen).next(vm), - None => { - let meth = jen.get_attr("send", vm)?; - PyIterReturn::from_pyresult(meth.call((val,), vm), vm) - } - } - } + nargs: u32, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [callable, self_or_null, arg1, ..., argN] + // callable is at position nargs + 1 from top + // self_or_null is at position nargs from top + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); + let callable = self.nth_value(nargs + 1); - fn execute_unpack_ex(&mut self, vm: &VirtualMachine, before: u8, after: u8) -> FrameResult { - let (before, after) = (before as usize, after as usize); - let value = self.pop_value(); - let not_iterable = value.class().slots.iter.load().is_none() - && value - .get_class_attr(vm.ctx.intern_str("__getitem__")) - .is_none(); - let elements: Vec<_> = value.try_to_value(vm).map_err(|e| { - if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { - vm.new_type_error(format!( - "cannot unpack non-iterable {} object", - value.class().name() - )) - } else { - e + if let Some(func) = callable.downcast_ref::() { + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; } - })?; - let min_expected = before + after; - - let middle = elements.len().checked_sub(min_expected).ok_or_else(|| { - vm.new_value_error(format!( - "not enough values to unpack (expected at least {}, got {})", - min_expected, - elements.len() - )) - })?; - - let mut elements = elements; - // Elements on stack from right-to-left: - self.state.stack.extend( - elements - .drain(before + middle..) - .rev() - .map(|e| Some(PyStackRef::new_owned(e))), - ); - - let middle_elements = elements.drain(before..).collect(); - let t = vm.ctx.new_list(middle_elements); - self.push_value(t.into()); - - // Lastly the first reversed values: - self.state.stack.extend( - elements - .into_iter() - .rev() - .map(|e| Some(PyStackRef::new_owned(e))), - ); - Ok(None) - } + let effective_nargs = if self_or_null_is_some { + nargs + 1 + } else { + nargs + }; - #[inline] - fn jump(&mut self, label: bytecode::Label) { - let target_pc = label.0; - vm_trace!("jump from {:?} to {:?}", self.lasti(), target_pc); - self.update_lasti(|i| *i = target_pc); - } + let new_op = if func.can_specialize_call(effective_nargs) { + if self_or_null_is_some { + Instruction::CallBoundMethodExactArgs + } else { + Instruction::CallPyExactArgs + } + } else if self_or_null_is_some { + Instruction::CallBoundMethodGeneral + } else { + Instruction::CallPyGeneral + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + return; + } - /// Jump forward by `delta` code units from after instruction + caches. - /// lasti is already at instruction_index + 1, so after = lasti + caches. - /// - /// Unchecked arithmetic is intentional: the compiler guarantees valid - /// targets, and debug builds will catch overflow via Rust's default checks. - #[inline] - fn jump_relative_forward(&mut self, delta: u32, caches: u32) { - let target = self.lasti() + caches + delta; - self.update_lasti(|i| *i = target); - } + // Try to specialize method descriptor calls + if self_or_null_is_some && callable.downcast_ref::().is_some() { + let callable_tag = callable as *const PyObject as u32; + let new_op = match nargs { + 0 => Instruction::CallMethodDescriptorNoargs, + 1 => Instruction::CallMethodDescriptorO, + _ => Instruction::CallMethodDescriptorFast, + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } - /// Jump backward by `delta` code units from after instruction + caches. - /// - /// Unchecked arithmetic is intentional: the compiler guarantees valid - /// targets, and debug builds will catch underflow via Rust's default checks. - #[inline] - fn jump_relative_backward(&mut self, delta: u32, caches: u32) { - let target = self.lasti() + caches - delta; - self.update_lasti(|i| *i = target); - } + // Try to specialize builtin calls + if !self_or_null_is_some { + if let Some(native) = callable.downcast_ref::() + && native.zelf.is_none() + { + let callable_tag = callable as *const PyObject as u32; + let new_op = match (native.value.name, nargs) { + ("len", 1) => Instruction::CallLen, + ("isinstance", 2) => Instruction::CallIsinstance, + (_, 1) => Instruction::CallBuiltinO, + _ => Instruction::CallBuiltinFast, + }; + let new_op = Some(new_op); + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + } + // type/str/tuple(x) specialization + if callable.class().is(vm.ctx.types.type_type) { + if nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + } + // General builtin class call (any type with Callable) + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallBuiltinClass); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + return; + } + } - #[inline] - fn pop_jump_if_relative( - &mut self, - vm: &VirtualMachine, - arg: bytecode::OpArg, - caches: u32, - flag: bool, - ) -> FrameResult { - let obj = self.pop_value(); - let value = obj.try_to_bool(vm)?; - if value == flag { - self.jump_relative_forward(u32::from(arg), caches); + // General fallback: cache callable identity to skip re-specialization + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallNonPyGeneral); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); } - Ok(None) } - /// Advance the iterator on top of stack. - /// Returns `true` if iteration continued (item pushed), `false` if exhausted (jumped). - fn execute_for_iter( + fn specialize_call_kw( &mut self, - vm: &VirtualMachine, - target: bytecode::Label, - ) -> Result { - let top = self.top_value(); + _vm: &VirtualMachine, + nargs: u32, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + // callable is at position nargs + 2 from top + let stack = &self.state.stack; + let stack_len = stack.len(); + let self_or_null_is_some = stack[stack_len - nargs as usize - 2].is_some(); + let callable = self.nth_value(nargs + 2); - // FOR_ITER_RANGE: bypass generic iterator protocol for range iterators - if let Some(range_iter) = top.downcast_ref_if_exact::(vm) { - if let Some(value) = range_iter.next_fast() { - self.push_value(vm.ctx.new_int(value).into()); - return Ok(true); + if let Some(func) = callable.downcast_ref::() { + let version = func.func_version(); + if version == 0 { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; } - if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { - let stop_exc = vm.new_stop_iteration(None); - self.fire_exception_trace(&stop_exc, vm)?; + + let new_op = if self_or_null_is_some { + Instruction::CallKwBoundMethod + } else { + Instruction::CallKwPy + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); } - self.jump(self.for_iter_jump_target(target)); - return Ok(false); + return; } - let top_of_stack = PyIter::new(top); - let next_obj = top_of_stack.next(vm); + // General fallback + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CallKwNonPy); + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + } - match next_obj { - Ok(PyIterReturn::Return(value)) => { - self.push_value(value); - Ok(true) - } - Ok(PyIterReturn::StopIteration(value)) => { - // Fire 'exception' trace event for StopIteration, matching - // FOR_ITER's inline call to _PyEval_MonitorRaise. - if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { - let stop_exc = vm.new_stop_iteration(value); - self.fire_exception_trace(&stop_exc, vm)?; - } - self.jump(self.for_iter_jump_target(target)); - Ok(false) + fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { + // Stack: [receiver, val] — receiver is at position 1 + let receiver = self.nth_value(1); + if self.builtin_coro(receiver).is_some() { + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::SendGen); } - Err(next_error) => { - self.pop_value(); - Err(next_error) + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } } - /// Compute the jump target for FOR_ITER exhaustion: skip END_FOR and jump to POP_ITER. - fn for_iter_jump_target(&self, target: bytecode::Label) -> bytecode::Label { - let target_idx = target.0 as usize; - if let Some(unit) = self.code.instructions.get(target_idx) - && matches!( - unit.op, - bytecode::Instruction::EndFor | bytecode::Instruction::InstrumentedEndFor - ) + fn specialize_load_super_attr( + &mut self, + vm: &VirtualMachine, + oparg: LoadSuperAttr, + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [global_super, class, self] + let global_super = self.nth_value(2); + let class = self.nth_value(1); + + if !global_super.is(&vm.ctx.types.super_type.as_object()) + || class.downcast_ref::().is_none() { - return bytecode::Label(target.0 + 1); + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + return; } - target - } - fn execute_make_function(&mut self, vm: &VirtualMachine) -> FrameResult { - // MakeFunction only takes code object, no flags - let code_obj: PyRef = self - .pop_value() - .downcast() - .expect("Stack value should be code object"); - - // Create function with minimal attributes - let func_obj = PyFunction::new(code_obj, self.globals.clone(), vm)?.into_pyobject(vm); - self.push_value(func_obj); - Ok(None) + let new_op = if oparg.is_load_method() { + Instruction::LoadSuperAttrMethod + } else { + Instruction::LoadSuperAttrAttr + }; + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } } - fn execute_set_function_attribute( + fn specialize_compare_op( &mut self, vm: &VirtualMachine, - attr: bytecode::MakeFunctionFlags, - ) -> FrameResult { - // SET_FUNCTION_ATTRIBUTE sets attributes on a function - // Stack: [..., attr_value, func] -> [..., func] - // Stack order: func is at -1, attr_value is at -2 + _op: bytecode::ComparisonOperator, + instr_idx: usize, + cache_base: usize, + ) { + let b = self.top_value(); + let a = self.nth_value(1); - let func = self.pop_value_opt(); - let attr_value = expect_unchecked(self.replace_top(func), "attr_value must not be null"); + let new_op = if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpFloat) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::CompareOpStr) + } else { + None + }; - let func = self.top_value(); - // Get the function reference and call the new method - let func_ref = func - .downcast_ref::() - .expect("SET_FUNCTION_ATTRIBUTE expects function on stack"); + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } + } - let payload: &PyFunction = func_ref.payload(); - // SetFunctionAttribute always follows MakeFunction, so at this point - // there are no other references to func. It is therefore safe to treat it as mutable. - unsafe { - let payload_ptr = payload as *const PyFunction as *mut PyFunction; - (*payload_ptr).set_function_attribute(attr, attr_value, vm)?; - }; + /// Recover the ComparisonOperator from the instruction arg byte. + /// `replace_op` preserves the arg byte, so the original op remains accessible. + fn compare_op_from_arg(&self, arg: bytecode::OpArg) -> PyComparisonOp { + bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal) + .into() + } - Ok(None) + fn deoptimize_compare_op(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::CompareOp { op: Arg::marker() }); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { - let b_ref = &self.pop_value(); - let a_ref = &self.pop_value(); - let value = match op { - // BINARY_OP_ADD_INT / BINARY_OP_SUBTRACT_INT fast paths: - // bypass binary_op1 dispatch for exact int types, use i64 arithmetic - // when possible to avoid BigInt heap allocation. - bytecode::BinaryOperator::Add | bytecode::BinaryOperator::InplaceAdd => { - if let (Some(a), Some(b)) = ( - a_ref.downcast_ref_if_exact::(vm), - b_ref.downcast_ref_if_exact::(vm), - ) { - Ok(self.int_add(a.as_bigint(), b.as_bigint(), vm)) - } else if matches!(op, bytecode::BinaryOperator::Add) { - vm._add(a_ref, b_ref) - } else { - vm._iadd(a_ref, b_ref) - } - } - bytecode::BinaryOperator::Subtract | bytecode::BinaryOperator::InplaceSubtract => { - if let (Some(a), Some(b)) = ( - a_ref.downcast_ref_if_exact::(vm), - b_ref.downcast_ref_if_exact::(vm), - ) { - Ok(self.int_sub(a.as_bigint(), b.as_bigint(), vm)) - } else if matches!(op, bytecode::BinaryOperator::Subtract) { - vm._sub(a_ref, b_ref) - } else { - vm._isub(a_ref, b_ref) - } + fn specialize_to_bool(&mut self, vm: &VirtualMachine, instr_idx: usize, _cache_base: usize) { + let obj = self.top_value(); + let cls = obj.class(); + + let new_op = if cls.is(vm.ctx.types.bool_type) { + Some(Instruction::ToBoolBool) + } else if cls.is(PyInt::class(&vm.ctx)) { + Some(Instruction::ToBoolInt) + } else if cls.is(vm.ctx.types.none_type) { + Some(Instruction::ToBoolNone) + } else if cls.is(PyList::class(&vm.ctx)) { + Some(Instruction::ToBoolList) + } else if cls.is(PyStr::class(&vm.ctx)) { + Some(Instruction::ToBoolStr) + } else if cls.slots.as_number.boolean.load().is_none() + && cls.slots.as_mapping.length.load().is_none() + && cls.slots.as_sequence.length.load().is_none() + { + Some(Instruction::ToBoolAlwaysTrue) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); } - bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref), - bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref), - bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()), - bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref), - bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref), - bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref), - bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref), - bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref), - bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref), - bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref), - bytecode::BinaryOperator::And => vm._and(a_ref, b_ref), - bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref), - bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref), - bytecode::BinaryOperator::InplacePower => { - vm._ipow(a_ref, b_ref, vm.ctx.none.as_object()) + } else { + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref), - bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref), - bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref), - bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref), - bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref), - bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref), - bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref), - bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref), - bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm), - }?; + } + } - self.push_value(value); - Ok(None) + fn deoptimize_to_bool(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::ToBool); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - /// Int addition with i64 fast path to avoid BigInt heap allocation. - #[inline] - fn int_add(&self, a: &BigInt, b: &BigInt, vm: &VirtualMachine) -> PyObjectRef { - use num_traits::ToPrimitive; - if let (Some(av), Some(bv)) = (a.to_i64(), b.to_i64()) - && let Some(result) = av.checked_add(bv) - { - return vm.ctx.new_int(result).into(); + fn specialize_for_iter(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + let iter = self.top_value(); + + let new_op = if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterRange) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterList) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterTuple) + } else if iter.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ForIterGen) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - vm.ctx.new_int(a + b).into() } - /// Int subtraction with i64 fast path to avoid BigInt heap allocation. - #[inline] - fn int_sub(&self, a: &BigInt, b: &BigInt, vm: &VirtualMachine) -> PyObjectRef { - use num_traits::ToPrimitive; - if let (Some(av), Some(bv)) = (a.to_i64(), b.to_i64()) - && let Some(result) = av.checked_sub(bv) - { - return vm.ctx.new_int(result).into(); + fn deoptimize_call(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::Call { + nargs: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - vm.ctx.new_int(a - b).into() } - #[cold] - fn setup_annotations(&mut self, vm: &VirtualMachine) -> FrameResult { - let __annotations__ = identifier!(vm, __annotations__); - let locals_obj = self.locals.as_object(vm); - // Try using locals as dict first, if not, fallback to generic method. - let has_annotations = if let Some(d) = locals_obj.downcast_ref_if_exact::(vm) { - d.contains_key(__annotations__, vm) - } else { - self._in(vm, __annotations__.as_object(), locals_obj)? - }; - if !has_annotations { - locals_obj.set_item(__annotations__, vm.ctx.new_dict().into(), vm)?; + fn deoptimize_call_kw(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::CallKw { + nargs: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - Ok(None) } - /// _PyEval_UnpackIterableStackRef - fn unpack_sequence(&mut self, size: u32, vm: &VirtualMachine) -> FrameResult { - let value = self.pop_value(); - let size = size as usize; - - // Fast path for exact tuple/list types (not subclasses) — push - // elements directly from the slice without intermediate Vec allocation, - // matching UNPACK_SEQUENCE_TUPLE / UNPACK_SEQUENCE_LIST specializations. - let cls = value.class(); - if cls.is(vm.ctx.types.tuple_type) { - let tuple = value.downcast_ref::().unwrap(); - return self.unpack_fast(tuple.as_slice(), size, vm); - } - if cls.is(vm.ctx.types.list_type) { - let list = value.downcast_ref::().unwrap(); - let borrowed = list.borrow_vec(); - return self.unpack_fast(&borrowed, size, vm); + fn deoptimize_for_iter(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::ForIter { + target: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } + } - // General path — iterate up to `size + 1` elements to avoid - // consuming the entire iterator (fixes hang on infinite sequences). - let not_iterable = value.class().slots.iter.load().is_none() - && value - .get_class_attr(vm.ctx.intern_str("__getitem__")) - .is_none(); - let iter = PyIter::try_from_object(vm, value.clone()).map_err(|e| { - if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { - vm.new_type_error(format!( - "cannot unpack non-iterable {} object", - value.class().name() - )) + /// Handle iterator exhaustion in specialized FOR_ITER handlers. + /// Skips END_FOR if present at target and jumps. + fn for_iter_jump_on_exhausted(&mut self, target: bytecode::Label) { + let target_idx = target.0 as usize; + let jump_target = if let Some(unit) = self.code.instructions.get(target_idx) { + if matches!( + unit.op, + bytecode::Instruction::EndFor | bytecode::Instruction::InstrumentedEndFor + ) { + bytecode::Label(target.0 + 1) } else { - e + target } - })?; + } else { + target + }; + self.jump(jump_target); + } - let mut elements = Vec::with_capacity(size); - for _ in 0..size { - match iter.next(vm)? { - PyIterReturn::Return(item) => elements.push(item), - PyIterReturn::StopIteration(_) => { - return Err(vm.new_value_error(format!( - "not enough values to unpack (expected {size}, got {})", - elements.len() - ))); - } - } - } + fn specialize_load_global( + &mut self, + vm: &VirtualMachine, + oparg: u32, + instr_idx: usize, + cache_base: usize, + ) { + let name = self.code.names[(oparg >> 1) as usize]; + // Check if name exists in globals + let in_globals = self.globals.get_item_opt(name, vm).ok().flatten().is_some(); + + let globals_version = self.globals.version() as u32; + + let new_op = if in_globals { + Some(Instruction::LoadGlobalModule) + } else if self + .builtins + .downcast_ref::() + .and_then(|b| b.get_item_opt(name, vm).ok().flatten()) + .is_some() + { + Some(Instruction::LoadGlobalBuiltin) + } else { + None + }; - // Check that the iterator is exhausted. - match iter.next(vm)? { - PyIterReturn::Return(_) => { - // For exact dict types, show "got N" using the container's - // size (PyDict_Size). Exact tuple/list are handled by the - // fast path above and never reach here. - let msg = if value.class().is(vm.ctx.types.dict_type) { - if let Ok(got) = value.length(vm) { - if got > size { - format!("too many values to unpack (expected {size}, got {got})") - } else { - format!("too many values to unpack (expected {size})") - } - } else { - format!("too many values to unpack (expected {size})") - } - } else { - format!("too many values to unpack (expected {size})") - }; - Err(vm.new_value_error(msg)) + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, globals_version); } - PyIterReturn::StopIteration(_) => { - self.state.stack.extend( - elements - .into_iter() - .rev() - .map(|e| Some(PyStackRef::new_owned(e))), - ); - Ok(None) + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } } - fn unpack_fast( - &mut self, - elements: &[PyObjectRef], - size: usize, - vm: &VirtualMachine, - ) -> FrameResult { - match elements.len().cmp(&size) { - core::cmp::Ordering::Equal => { - for elem in elements.iter().rev() { - self.push_value(elem.clone()); - } - Ok(None) - } - core::cmp::Ordering::Greater => Err(vm.new_value_error(format!( - "too many values to unpack (expected {size}, got {})", - elements.len() - ))), - core::cmp::Ordering::Less => Err(vm.new_value_error(format!( - "not enough values to unpack (expected {size}, got {})", - elements.len() - ))), + fn deoptimize_load_global(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadGlobal(Arg::marker())); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } - fn convert_value( + fn specialize_store_subscr( &mut self, - conversion: bytecode::ConvertValueOparg, vm: &VirtualMachine, - ) -> FrameResult { - use bytecode::ConvertValueOparg; - let value = self.pop_value(); - let value = match conversion { - ConvertValueOparg::Str => value.str(vm)?.into(), - ConvertValueOparg::Repr => value.repr(vm)?.into(), - ConvertValueOparg::Ascii => vm.ctx.new_str(builtins::ascii(value, vm)?).into(), - ConvertValueOparg::None => value, - }; + instr_idx: usize, + cache_base: usize, + ) { + // Stack: [value, obj, idx] — obj is TOS-1 + let obj = self.nth_value(1); + let idx = self.top_value(); - self.push_value(value); - Ok(None) - } + let new_op = if obj.downcast_ref_if_exact::(vm).is_some() + && idx.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::StoreSubscrListInt) + } else if obj.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::StoreSubscrDict) + } else { + None + }; - fn _in(&self, vm: &VirtualMachine, needle: &PyObject, haystack: &PyObject) -> PyResult { - let found = vm._contains(haystack, needle)?; - Ok(found) + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } } - #[inline(always)] - fn _not_in( - &self, - vm: &VirtualMachine, - needle: &PyObject, - haystack: &PyObject, - ) -> PyResult { - Ok(!self._in(vm, needle, haystack)?) + fn deoptimize_store_subscr(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreSubscr); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn execute_compare( - &mut self, - vm: &VirtualMachine, - op: bytecode::ComparisonOperator, - ) -> FrameResult { - let b = self.pop_value(); - let a = self.pop_value(); - let cmp_op: PyComparisonOp = op.into(); + fn specialize_contains_op(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + let haystack = self.top_value(); // b = TOS = haystack + let new_op = if haystack.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ContainsOpDict) + } else if haystack.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::ContainsOpSet) + } else { + None + }; - // COMPARE_OP_INT: leaf type, cannot recurse — skip rich_compare dispatch - if let (Some(a_int), Some(b_int)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) { - let result = cmp_op.eval_ord(a_int.as_bigint().cmp(b_int.as_bigint())); - self.push_value(vm.ctx.new_bool(result).into()); - return Ok(None); - } - // COMPARE_OP_FLOAT: leaf type, cannot recurse — skip rich_compare dispatch. - // Falls through on NaN (partial_cmp returns None) for correct != semantics. - if let (Some(a_f), Some(b_f)) = ( - a.downcast_ref_if_exact::(vm), - b.downcast_ref_if_exact::(vm), - ) && let Some(ord) = a_f.to_f64().partial_cmp(&b_f.to_f64()) - { - let result = cmp_op.eval_ord(ord); - self.push_value(vm.ctx.new_bool(result).into()); - return Ok(None); + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - - let value = a.rich_compare(b, cmp_op, vm)?; - self.push_value(value); - Ok(None) } - fn load_attr(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { + fn deoptimize_contains_op(&mut self) { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::ContainsOp(Arg::marker())); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } + } - let counter = self.code.instructions.read_adaptive_counter(cache_base); - if counter > 0 { + fn specialize_unpack_sequence( + &mut self, + vm: &VirtualMachine, + instr_idx: usize, + cache_base: usize, + ) { + let obj = self.top_value(); + let new_op = if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { + if tuple.len() == 2 { + Some(Instruction::UnpackSequenceTwoTuple) + } else { + Some(Instruction::UnpackSequenceTuple) + } + } else if obj.downcast_ref_if_exact::(vm).is_some() { + Some(Instruction::UnpackSequenceList) + } else { + None + }; + + if let Some(new_op) = new_op { + unsafe { + self.code.instructions.replace_op(instr_idx, new_op); + } + } else { unsafe { self.code .instructions - .write_adaptive_counter(cache_base, counter - 1); + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } - } else { - self.specialize_load_attr(vm, oparg, instr_idx, cache_base); } + } - self.load_attr_slow(vm, oparg) + fn deoptimize_unpack_sequence(&mut self) { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + unsafe { + self.code.instructions.replace_op( + instr_idx, + Instruction::UnpackSequence { + size: Arg::marker(), + }, + ); + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + } } - fn specialize_load_attr( + fn specialize_store_attr( &mut self, _vm: &VirtualMachine, - oparg: LoadAttr, + attr_idx: bytecode::NameIdx, instr_idx: usize, cache_base: usize, ) { - let obj = self.top_value(); - let cls = obj.class(); + // TOS = owner (the object being assigned to) + let owner = self.top_value(); + let cls = owner.class(); - // Only specialize if getattro is the default (PyBaseObject::getattro) - let is_default_getattro = cls + // Only specialize if setattr is the default (generic_setattr) + let is_default_setattr = cls .slots - .getattro + .setattro .load() - .is_some_and(|f| f as usize == PyBaseObject::getattro as *const () as usize); - if !is_default_getattro { + .is_some_and(|f| f as usize == PyBaseObject::slot_setattro as *const () as usize); + if !is_default_setattr { unsafe { self.code .instructions @@ -4566,7 +7396,6 @@ impl ExecutingFrame<'_> { type_version = cls.assign_version_tag(); } if type_version == 0 { - // Version counter overflow — backoff to avoid re-attempting every execution unsafe { self.code .instructions @@ -4575,157 +7404,46 @@ impl ExecutingFrame<'_> { return; } - let attr_name = self.code.names[oparg.name_idx() as usize]; - - // Look up attr in class via MRO + // Check for data descriptor + let attr_name = self.code.names[attr_idx as usize]; let cls_attr = cls.get_attr(attr_name); - let has_dict = obj.dict().is_some(); + let has_data_descr = cls_attr.as_ref().is_some_and(|descr| { + let descr_cls = descr.class(); + descr_cls.slots.descr_get.load().is_some() && descr_cls.slots.descr_set.load().is_some() + }); - if oparg.is_method() { - // Method specialization + if has_data_descr { + // Check for member descriptor (slot access) if let Some(ref descr) = cls_attr - && descr - .class() - .slots - .flags - .has_feature(PyTypeFlags::METHOD_DESCRIPTOR) + && let Some(member_descr) = descr.downcast_ref::() + && let MemberGetter::Offset(offset) = member_descr.member.getter { - let descr_ptr = &**descr as *const PyObject as u64; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); self.code .instructions - .write_cache_u64(cache_base + 5, descr_ptr); - } - - let new_op = if !has_dict { - Instruction::LoadAttrMethodNoDict - } else { - Instruction::LoadAttrMethodWithValues - }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } - return; - } - // Can't specialize this method call - unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - } else { - // Regular attribute access - let has_data_descr = cls_attr.as_ref().is_some_and(|descr| { - let descr_cls = descr.class(); - descr_cls.slots.descr_get.load().is_some() - && descr_cls.slots.descr_set.load().is_some() - }); - - if !has_data_descr && has_dict { - // Instance attribute access — skip class descriptor check - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, type_version); + .write_cache_u32(cache_base + 3, offset as u32); self.code .instructions - .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); + .replace_op(instr_idx, Instruction::StoreAttrSlot); } } else { - // Data descriptor or no dict — can't easily specialize unsafe { self.code .instructions .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); } } - } - } - - fn load_attr_slow(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult { - let attr_name = self.code.names[oparg.name_idx() as usize]; - let parent = self.pop_value(); - - if oparg.is_method() { - // Method call: push [method, self_or_null] - let method = PyMethod::get(parent.clone(), attr_name, vm)?; - match method { - PyMethod::Function { target: _, func } => { - self.push_value(func); - self.push_value(parent); - } - PyMethod::Attribute(val) => { - self.push_value(val); - self.push_null(); - } - } - } else { - // Regular attribute access - let obj = parent.get_attr(attr_name, vm)?; - self.push_value(obj); - } - Ok(None) - } - - fn specialize_binary_op( - &mut self, - vm: &VirtualMachine, - op: bytecode::BinaryOperator, - instr_idx: usize, - cache_base: usize, - ) { - let b = self.top_value(); - let a = self.nth_value(1); - - let new_op = match op { - bytecode::BinaryOperator::Add => { - if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpAddInt) - } else if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpAddFloat) - } else { - None - } - } - bytecode::BinaryOperator::Subtract => { - if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpSubtractInt) - } else if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpSubtractFloat) - } else { - None - } - } - bytecode::BinaryOperator::Multiply => { - if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpMultiplyInt) - } else if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::BinaryOpMultiplyFloat) - } else { - None - } - } - _ => None, - }; - - if let Some(new_op) = new_op { + } else if owner.dict().is_some() { unsafe { - self.code.instructions.replace_op(instr_idx, new_op); + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + self.code + .instructions + .replace_op(instr_idx, Instruction::StoreAttrInstanceValue); } } else { unsafe { @@ -4736,75 +7454,6 @@ impl ExecutingFrame<'_> { } } - fn deoptimize_binary_op(&mut self, _op: bytecode::BinaryOperator) { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::BinaryOp { op: Arg::marker() }); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - } - - fn specialize_call( - &mut self, - _vm: &VirtualMachine, - nargs: u32, - instr_idx: usize, - cache_base: usize, - ) { - // Stack: [callable, self_or_null, arg1, ..., argN] - // callable is at position nargs + 1 from top - // self_or_null is at position nargs from top - let stack = &self.state.stack; - let stack_len = stack.len(); - let self_or_null_is_some = stack[stack_len - nargs as usize - 1].is_some(); - let callable = self.nth_value(nargs + 1); - - if let Some(func) = callable.downcast_ref::() { - let version = func.func_version(); - if version == 0 { - unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - return; - } - - let effective_nargs = if self_or_null_is_some { - nargs + 1 - } else { - nargs - }; - - if func.can_specialize_call(effective_nargs) { - let new_op = if self_or_null_is_some { - Instruction::CallBoundMethodExactArgs - } else { - Instruction::CallPyExactArgs - }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - // Store func_version in cache (after counter) - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - return; - } - } - - unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - } - fn load_super_attr(&mut self, vm: &VirtualMachine, oparg: LoadSuperAttr) -> FrameResult { let attr_name = self.code.names[oparg.name_idx() as usize];