diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index 58a0e816087..e8534e9744a 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -148,6 +148,7 @@ nbytes ncallbacks ndigits ndim +needsfree nldecoder nlocals NOARGS @@ -168,6 +169,7 @@ pycache pycodecs pycs pyexpat +PYTHONAPI PYTHONBREAKPOINT PYTHONDEBUG PYTHONDONTWRITEBYTECODE diff --git a/.cspell.json b/.cspell.json index 9f88a74f96d..3bd06fc2032 100644 --- a/.cspell.json +++ b/.cspell.json @@ -75,9 +75,9 @@ "makeunicodedata", "miri", "notrace", + "oparg", "openat", "pyarg", - "pyarg", "pyargs", "pyast", "PyAttr", @@ -107,6 +107,7 @@ "pystruct", "pystructseq", "pytrace", + "pytype", "reducelib", "richcompare", "RustPython", @@ -116,7 +117,6 @@ "sysmodule", "tracebacks", "typealiases", - "unconstructible", "unhashable", "uninit", "unraisable", @@ -131,6 +131,7 @@ "getrusage", "nanosleep", "sigaction", + "sighandler", "WRLCK", // win32 "birthtime", diff --git a/crates/stdlib/src/pystruct.rs b/crates/stdlib/src/pystruct.rs index 0a006f5a0f2..34a4905ed9f 100644 --- a/crates/stdlib/src/pystruct.rs +++ b/crates/stdlib/src/pystruct.rs @@ -28,7 +28,7 @@ pub(crate) mod _struct { // CPython turns str to bytes but we do reversed way here // The only performance difference is this transition cost let fmt = match_class!(match obj { - s @ PyStr => s.is_ascii().then_some(s), + s @ PyStr => s.isascii().then_some(s), b @ PyBytes => ascii::AsciiStr::from_ascii(&b) .ok() .map(|s| vm.ctx.new_str(s)), diff --git a/crates/vm/src/buffer.rs b/crates/vm/src/buffer.rs index cf49d6815c0..eeb6a676542 100644 --- a/crates/vm/src/buffer.rs +++ b/crates/vm/src/buffer.rs @@ -261,6 +261,56 @@ impl FormatCode { return Err("embedded null character".to_owned()); } + // PEP3118: Handle extended format specifiers + // T{...} - struct, X{} - function pointer, (...) - array shape, :name: - field name + if c == b'T' || c == b'X' { + // Skip struct/function pointer: consume until matching '}' + if chars.peek() == Some(&b'{') { + chars.next(); // consume '{' + let mut depth = 1; + while depth > 0 { + match chars.next() { + Some(b'{') => depth += 1, + Some(b'}') => depth -= 1, + None => return Err("unmatched '{' in format".to_owned()), + _ => {} + } + } + continue; + } + } + + if c == b'(' { + // Skip array shape: consume until matching ')' + let mut depth = 1; + while depth > 0 { + match chars.next() { + Some(b'(') => depth += 1, + Some(b')') => depth -= 1, + None => return Err("unmatched '(' in format".to_owned()), + _ => {} + } + } + continue; + } + + if c == b':' { + // Skip field name: consume until next ':' + loop { + match chars.next() { + Some(b':') => break, + None => return Err("unmatched ':' in format".to_owned()), + _ => {} + } + } + continue; + } + + if c == b'{' || c == b'}' { + // Skip standalone braces (pointer targets, etc.) + continue; + } + let code = FormatType::try_from(c) .ok() .filter(|c| match c { diff --git a/crates/vm/src/builtins/builtin_func.rs b/crates/vm/src/builtins/builtin_func.rs index da5fd5e8075..2b569375b28 100644 --- a/crates/vm/src/builtins/builtin_func.rs +++ b/crates/vm/src/builtins/builtin_func.rs @@ -114,7 +114,7 @@ impl PyNativeFunction { zelf.0.value.doc } - #[pygetset(name = "__self__")] + #[pygetset] fn __self__(_zelf: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { vm.ctx.none() } @@ -181,7 +181,7 @@ impl PyNativeMethod { Ok((getattr, (target, name))) } - #[pygetset(name = "__self__")] + #[pygetset] fn __self__(zelf: PyRef, _vm: &VirtualMachine) -> Option { zelf.func.zelf.clone() } diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 0459cecbdd2..c29e45ddcf6 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -629,12 +629,11 @@ impl GetDescriptor for PyFunction { vm: &VirtualMachine, ) -> PyResult { let (_zelf, obj) = Self::_unwrap(&zelf, obj, vm)?; - let obj = if vm.is_none(&obj) && !Self::_cls_is(&cls, obj.class()) { + Ok(if vm.is_none(&obj) && !Self::_cls_is(&cls, obj.class()) { zelf } else { PyBoundMethod::new(obj, zelf).into_ref(&vm.ctx).into() - }; - Ok(obj) + }) } } diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 6f917cd853c..cb95652f937 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -450,8 +450,8 @@ impl PyBaseObject { Ok(()) } - #[pygetset(name = "__class__")] - fn get_class(obj: PyObjectRef) -> PyTypeRef { + #[pygetset] + fn __class__(obj: PyObjectRef) -> PyTypeRef { obj.class().to_owned() } diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index 279b84362a6..8084c4d053e 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -625,9 +625,9 @@ impl PyStr { self.data.char_len() } - #[pymethod(name = "isascii")] + #[pymethod] #[inline(always)] - pub const fn is_ascii(&self) -> bool { + pub const fn isascii(&self) -> bool { matches!(self.kind(), StrKind::Ascii) } @@ -960,7 +960,7 @@ impl PyStr { format_map(&format_string, &mapping, vm) } - #[pymethod(name = "__format__")] + #[pymethod] fn __format__( zelf: PyRef, spec: PyStrRef, diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 15743350397..68de17f60b6 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1445,8 +1445,8 @@ impl GetAttr for PyType { #[pyclass] impl Py { - #[pygetset(name = "__mro__")] - fn get_mro(&self) -> PyTuple { + #[pygetset] + fn __mro__(&self) -> PyTuple { let elements: Vec = self.mro_map_collect(|x| x.as_object().to_owned()); PyTuple::new_unchecked(elements.into_boxed_slice()) } diff --git a/crates/vm/src/exceptions.rs b/crates/vm/src/exceptions.rs index 036d914810d..bb10ca02c2c 100644 --- a/crates/vm/src/exceptions.rs +++ b/crates/vm/src/exceptions.rs @@ -624,8 +624,8 @@ impl PyBaseException { *self.context.write() = context; } - #[pygetset(name = "__suppress_context__")] - pub(super) fn get_suppress_context(&self) -> bool { + #[pygetset] + pub(super) fn __suppress_context__(&self) -> bool { self.suppress_context.load() } @@ -1112,7 +1112,7 @@ impl serde::Serialize for SerializeException<'_, '_> { .__context__() .map(|exc| SerializeExceptionOwned { vm: self.vm, exc }), )?; - struc.serialize_field("suppress_context", &self.exc.get_suppress_context())?; + struc.serialize_field("suppress_context", &self.exc.__suppress_context__())?; let args = { struct Args<'vm>(&'vm VirtualMachine, PyTupleRef); @@ -1550,6 +1550,7 @@ pub(super) mod types { pub struct PyUnboundLocalError(PyNameError); #[pyexception(name, base = PyException, ctx = "os_error")] + #[repr(C)] pub struct PyOSError { base: PyException, errno: PyAtomicRef>, @@ -1857,8 +1858,8 @@ pub(super) mod types { self.errno.swap_to_temporary_refs(value, vm); } - #[pygetset(name = "strerror")] - fn get_strerror(&self) -> Option { + #[pygetset] + fn strerror(&self) -> Option { self.strerror.to_owned() } diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index e04b87de594..60b623ef3ed 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -1102,6 +1102,28 @@ where } } +impl Py { + /// Converts `&Py` to `&Py`. + #[inline] + pub fn to_base(&self) -> &Py { + debug_assert!(self.as_object().downcast_ref::().is_some()); + // SAFETY: T is #[repr(transparent)] over T::Base, + // so Py and Py have the same layout. + unsafe { &*(self as *const Py as *const Py) } + } + + /// Converts `&Py` to `&Py` where U is an ancestor type. + #[inline] + pub fn upcast_ref(&self) -> &Py + where + T: StaticType, + { + debug_assert!(T::static_type().is_subtype(U::static_type())); + // SAFETY: T is a subtype of U, so Py can be viewed as Py. + unsafe { &*(self as *const Py as *const Py) } + } +} + impl Borrow for PyRef where T: PyPayload, diff --git a/crates/vm/src/protocol/buffer.rs b/crates/vm/src/protocol/buffer.rs index 1dafda203d9..948ec763dc6 100644 --- a/crates/vm/src/protocol/buffer.rs +++ b/crates/vm/src/protocol/buffer.rs @@ -202,14 +202,18 @@ impl BufferDescriptor { #[cfg(debug_assertions)] pub fn validate(self) -> Self { assert!(self.itemsize != 0); - assert!(self.ndim() != 0); - let mut shape_product = 1; - for (shape, stride, suboffset) in self.dim_desc.iter().cloned() { - shape_product *= shape; - assert!(suboffset >= 0); - assert!(stride != 0); + // ndim=0 is valid for scalar types (e.g., ctypes Structure) + if self.ndim() == 0 { + assert!(self.itemsize == self.len); + } else { + let mut shape_product = 1; + for (shape, stride, suboffset) in self.dim_desc.iter().cloned() { + shape_product *= shape; + assert!(suboffset >= 0); + assert!(stride != 0); + } + assert!(shape_product * self.itemsize == self.len); } - assert!(shape_product * self.itemsize == self.len); self } diff --git a/crates/vm/src/stdlib/ast/python.rs b/crates/vm/src/stdlib/ast/python.rs index 042db4aa74e..aa21d8b034a 100644 --- a/crates/vm/src/stdlib/ast/python.rs +++ b/crates/vm/src/stdlib/ast/python.rs @@ -47,8 +47,8 @@ pub(crate) mod _ast { Ok(()) } - #[pyattr(name = "_fields")] - fn fields(ctx: &Context) -> PyTupleRef { + #[pyattr] + fn _fields(ctx: &Context) -> PyTupleRef { ctx.empty_tuple.clone() } } diff --git a/crates/vm/src/stdlib/codecs.rs b/crates/vm/src/stdlib/codecs.rs index 5f1b721dfb4..821b313090c 100644 --- a/crates/vm/src/stdlib/codecs.rs +++ b/crates/vm/src/stdlib/codecs.rs @@ -176,7 +176,7 @@ mod _codecs { #[pyfunction] fn latin_1_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult { - if args.s.is_ascii() { + if args.s.isascii() { return Ok((args.s.as_bytes().to_vec(), args.s.byte_len())); } do_codec!(latin_1::encode, args, vm) @@ -189,7 +189,7 @@ mod _codecs { #[pyfunction] fn ascii_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult { - if args.s.is_ascii() { + if args.s.isascii() { return Ok((args.s.as_bytes().to_vec(), args.s.byte_len())); } do_codec!(ascii::encode, args, vm) diff --git a/crates/vm/src/stdlib/ctypes.rs b/crates/vm/src/stdlib/ctypes.rs index ebe2d16ffb2..3fdb2df6104 100644 --- a/crates/vm/src/stdlib/ctypes.rs +++ b/crates/vm/src/stdlib/ctypes.rs @@ -1,77 +1,372 @@ // spell-checker:disable -pub(crate) mod array; -pub(crate) mod base; -pub(crate) mod field; -pub(crate) mod function; -pub(crate) mod library; -pub(crate) mod pointer; -pub(crate) mod structure; -pub(crate) mod thunk; -pub(crate) mod union; -pub(crate) mod util; - -use crate::builtins::PyModule; -use crate::class::PyClassImpl; -use crate::{Py, PyRef, VirtualMachine}; - -pub use crate::stdlib::ctypes::base::{CDataObject, PyCData, PyCSimple, PyCSimpleType}; - -pub fn extend_module_nodes(vm: &VirtualMachine, module: &Py) { +mod array; +mod base; +mod function; +mod library; +mod pointer; +mod simple; +mod structure; +mod union; + +use crate::{ + AsObject, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, + builtins::{PyModule, PyStr, PyType}, + class::PyClassImpl, + types::TypeDataRef, +}; +use std::ffi::{ + c_double, c_float, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, c_ulong, + c_ulonglong, c_ushort, +}; +use std::mem; +use widestring::WideChar; + +pub use array::PyCArray; +pub use base::{FfiArgValue, PyCData, PyCField, StgInfo, StgInfoFlags}; +pub use pointer::PyCPointer; +pub use simple::{PyCSimple, PyCSimpleType}; +pub use structure::PyCStructure; +pub use union::PyCUnion; + +/// Extension for PyType to get StgInfo +/// PyStgInfo_FromType +impl Py { + /// Get StgInfo from a ctypes type object + /// + /// Returns a TypeDataRef to StgInfo if the type has one and is initialized, error otherwise. + /// Abstract classes (whose metaclass __init__ was not called) will have uninitialized StgInfo. + fn stg_info<'a>(&'a self, vm: &VirtualMachine) -> PyResult> { + self.stg_info_opt() + .ok_or_else(|| vm.new_type_error("abstract class")) + } + + /// Get StgInfo if initialized, None otherwise. + fn stg_info_opt(&self) -> Option> { + self.get_type_data::() + .filter(|info| info.initialized) + } + + /// Get _type_ attribute as String (type code like "i", "d", etc.) + fn type_code(&self, vm: &VirtualMachine) -> Option { + self.as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|t: PyObjectRef| t.downcast_ref::().map(|s| s.to_string())) + } + + /// Mark all base classes as finalized + fn mark_bases_final(&self) { + for base in self.bases.read().iter() { + if let Some(mut stg) = base.get_type_data_mut::() { + stg.flags |= StgInfoFlags::DICTFLAG_FINAL; + } else { + let mut stg = StgInfo::default(); + stg.flags |= StgInfoFlags::DICTFLAG_FINAL; + let _ = base.init_type_data(stg); + } + } + } +} + +impl PyType { + /// Check if StgInfo is already initialized - prevent double initialization + pub(crate) fn check_not_initialized(&self, vm: &VirtualMachine) -> PyResult<()> { + if let Some(stg_info) = self.get_type_data::() + && stg_info.initialized + { + return Err(vm.new_exception_msg( + vm.ctx.exceptions.system_error.to_owned(), + format!("StgInfo of '{}' is already initialized.", self.name()), + )); + } + Ok(()) + } +} + +// Dynamic type check helpers for PyCData +// These check if an object's type's metaclass is a subclass of a specific metaclass + +pub(crate) fn make_module(vm: &VirtualMachine) -> PyRef { + let module = _ctypes::make_module(vm); let ctx = &vm.ctx; PyCSimpleType::make_class(ctx); array::PyCArrayType::make_class(ctx); - field::PyCFieldType::make_class(ctx); pointer::PyCPointerType::make_class(ctx); structure::PyCStructType::make_class(ctx); union::PyCUnionType::make_class(ctx); - extend_module!(vm, module, { + extend_module!(vm, &module, { "_CData" => PyCData::make_class(ctx), "_SimpleCData" => PyCSimple::make_class(ctx), - "Array" => array::PyCArray::make_class(ctx), - "CField" => field::PyCField::make_class(ctx), + "Array" => PyCArray::make_class(ctx), + "CField" => PyCField::make_class(ctx), "CFuncPtr" => function::PyCFuncPtr::make_class(ctx), - "_Pointer" => pointer::PyCPointer::make_class(ctx), + "_Pointer" => PyCPointer::make_class(ctx), "_pointer_type_cache" => ctx.new_dict(), - "Structure" => structure::PyCStructure::make_class(ctx), - "CThunkObject" => thunk::PyCThunk::make_class(ctx), - "Union" => union::PyCUnion::make_class(ctx), - }) + "_array_type_cache" => ctx.new_dict(), + "Structure" => PyCStructure::make_class(ctx), + "CThunkObject" => function::PyCThunk::make_class(ctx), + "Union" => PyCUnion::make_class(ctx), + }); + module } -pub(crate) fn make_module(vm: &VirtualMachine) -> PyRef { - let module = _ctypes::make_module(vm); - extend_module_nodes(vm, &module); - module +/// Size of long double - platform dependent +/// x86_64 macOS/Linux: 16 bytes (80-bit extended + padding) +/// ARM64: 16 bytes (128-bit) +/// Windows: 8 bytes (same as double) +#[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + not(target_os = "windows") +))] +const LONG_DOUBLE_SIZE: usize = 16; + +#[cfg(target_os = "windows")] +const LONG_DOUBLE_SIZE: usize = mem::size_of::(); + +#[cfg(not(any( + all( + any(target_arch = "x86_64", target_arch = "aarch64"), + not(target_os = "windows") + ), + target_os = "windows" +)))] +const LONG_DOUBLE_SIZE: usize = mem::size_of::(); + +/// Type information for ctypes simple types +struct TypeInfo { + pub size: usize, + pub ffi_type_fn: fn() -> libffi::middle::Type, +} + +/// Get type information (size and ffi_type) for a ctypes type code +fn type_info(ty: &str) -> Option { + use libffi::middle::Type; + match ty { + "c" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u8, + }), + "u" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: if mem::size_of::() == 2 { + Type::u16 + } else { + Type::u32 + }, + }), + "b" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::i8, + }), + "B" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u8, + }), + "h" | "v" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::i16, + }), + "H" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u16, + }), + "i" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::i32, + }), + "I" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u32, + }), + "l" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: if mem::size_of::() == 8 { + Type::i64 + } else { + Type::i32 + }, + }), + "L" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: if mem::size_of::() == 8 { + Type::u64 + } else { + Type::u32 + }, + }), + "q" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::i64, + }), + "Q" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u64, + }), + "f" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::f32, + }), + "d" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::f64, + }), + "g" => Some(TypeInfo { + // long double - platform dependent size + // x86_64 macOS/Linux: 16 bytes (80-bit extended + padding) + // ARM64: 16 bytes (128-bit) + // Windows: 8 bytes (same as double) + // Note: Use f64 as FFI type since Rust doesn't support long double natively + size: LONG_DOUBLE_SIZE, + ffi_type_fn: Type::f64, + }), + "?" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::u8, + }), + "z" | "Z" | "P" | "X" | "O" => Some(TypeInfo { + size: mem::size_of::(), + ffi_type_fn: Type::pointer, + }), + "void" => Some(TypeInfo { + size: 0, + ffi_type_fn: Type::void, + }), + _ => None, + } +} + +/// Get size for a ctypes type code +fn get_size(ty: &str) -> usize { + type_info(ty).map(|t| t.size).expect("invalid type code") +} + +/// Get alignment for simple type codes from type_info(). +/// For primitive C types (c_int, c_long, etc.), alignment equals size. +fn get_align(ty: &str) -> usize { + get_size(ty) } #[pymodule] pub(crate) mod _ctypes { - use super::base::{CDataObject, PyCData, PyCSimple}; - use crate::builtins::PyTypeRef; + use super::library; + use super::{PyCArray, PyCData, PyCPointer, PyCSimple, PyCStructure, PyCUnion}; + use crate::builtins::{PyType, PyTypeRef}; use crate::class::StaticType; use crate::convert::ToPyObject; - use crate::function::{Either, FuncArgs, OptionalArg}; - use crate::stdlib::ctypes::library; - use crate::{AsObject, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine}; - use crossbeam_utils::atomic::AtomicCell; - use std::ffi::{ - c_double, c_float, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, c_ulong, - c_ulonglong, c_ushort, - }; - use std::mem; - use widestring::WideChar; - - /// CArgObject - returned by byref() + use crate::function::{Either, OptionalArg}; + use crate::types::Representable; + use crate::{AsObject, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine}; + use num_traits::ToPrimitive; + + /// CArgObject - returned by byref() and paramfunc + /// tagPyCArgObject #[pyclass(name = "CArgObject", module = "_ctypes", no_attr)] #[derive(Debug, PyPayload)] pub struct CArgObject { + /// Type tag ('P', 'V', 'i', 'd', etc.) + pub tag: u8, + /// The actual FFI value (mirrors union value) + pub value: super::FfiArgValue, + /// Reference to original object (for memory safety) pub obj: PyObjectRef, + /// Size for struct/union ('V' tag) #[allow(dead_code)] + pub size: usize, + /// Offset for byref() pub offset: isize, } - #[pyclass] + /// is_literal_char - check if character is printable literal (not \\ or ') + fn is_literal_char(c: u8) -> bool { + c < 128 && c.is_ascii_graphic() && c != b'\\' && c != b'\'' + } + + impl Representable for CArgObject { + // PyCArg_repr - use tag and value fields directly + fn repr_str(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + use super::base::FfiArgValue; + + let tag_char = zelf.tag as char; + + // Format value based on tag + match zelf.tag { + b'b' | b'h' | b'i' | b'l' | b'q' => { + // Signed integers + let n = match zelf.value { + FfiArgValue::I8(v) => v as i64, + FfiArgValue::I16(v) => v as i64, + FfiArgValue::I32(v) => v as i64, + FfiArgValue::I64(v) => v, + _ => 0, + }; + Ok(format!("", tag_char, n)) + } + b'B' | b'H' | b'I' | b'L' | b'Q' => { + // Unsigned integers + let n = match zelf.value { + FfiArgValue::U8(v) => v as u64, + FfiArgValue::U16(v) => v as u64, + FfiArgValue::U32(v) => v as u64, + FfiArgValue::U64(v) => v, + _ => 0, + }; + Ok(format!("", tag_char, n)) + } + b'f' => { + let v = match zelf.value { + FfiArgValue::F32(v) => v as f64, + _ => 0.0, + }; + Ok(format!("", tag_char, v)) + } + b'd' | b'g' => { + let v = match zelf.value { + FfiArgValue::F64(v) => v, + FfiArgValue::F32(v) => v as f64, + _ => 0.0, + }; + Ok(format!("", tag_char, v)) + } + b'c' => { + // c_char - single byte + let byte = match zelf.value { + FfiArgValue::I8(v) => v as u8, + FfiArgValue::U8(v) => v, + _ => 0, + }; + if is_literal_char(byte) { + Ok(format!("", tag_char, byte as char)) + } else { + Ok(format!("", tag_char, byte)) + } + } + b'z' | b'Z' | b'P' | b'V' => { + // Pointer types + let ptr = match zelf.value { + FfiArgValue::Pointer(v) => v, + _ => 0, + }; + if ptr == 0 { + Ok(format!("", tag_char)) + } else { + Ok(format!("", tag_char, ptr)) + } + } + _ => { + // Default fallback + let addr = zelf.get_id(); + if is_literal_char(zelf.tag) { + Ok(format!("", tag_char, addr)) + } else { + Ok(format!("", zelf.tag, addr)) + } + } + } + } + } + + #[pyclass(with(Representable))] impl CArgObject { #[pygetset] fn _obj(&self) -> PyObjectRef { @@ -83,43 +378,43 @@ pub(crate) mod _ctypes { const __VERSION__: &str = "1.1.0"; // TODO: get properly - #[pyattr(name = "RTLD_LOCAL")] + #[pyattr] const RTLD_LOCAL: i32 = 0; // TODO: get properly - #[pyattr(name = "RTLD_GLOBAL")] + #[pyattr] const RTLD_GLOBAL: i32 = 0; #[cfg(target_os = "windows")] - #[pyattr(name = "SIZEOF_TIME_T")] - pub const SIZEOF_TIME_T: usize = 8; + #[pyattr] + const SIZEOF_TIME_T: usize = 8; #[cfg(not(target_os = "windows"))] - #[pyattr(name = "SIZEOF_TIME_T")] - pub const SIZEOF_TIME_T: usize = 4; + #[pyattr] + const SIZEOF_TIME_T: usize = 4; - #[pyattr(name = "CTYPES_MAX_ARGCOUNT")] - pub const CTYPES_MAX_ARGCOUNT: usize = 1024; + #[pyattr] + const CTYPES_MAX_ARGCOUNT: usize = 1024; #[pyattr] - pub const FUNCFLAG_STDCALL: u32 = 0x0; + const FUNCFLAG_STDCALL: u32 = 0x0; #[pyattr] - pub const FUNCFLAG_CDECL: u32 = 0x1; + const FUNCFLAG_CDECL: u32 = 0x1; #[pyattr] - pub const FUNCFLAG_HRESULT: u32 = 0x2; + const FUNCFLAG_HRESULT: u32 = 0x2; #[pyattr] - pub const FUNCFLAG_PYTHONAPI: u32 = 0x4; + const FUNCFLAG_PYTHONAPI: u32 = 0x4; #[pyattr] - pub const FUNCFLAG_USE_ERRNO: u32 = 0x8; + const FUNCFLAG_USE_ERRNO: u32 = 0x8; #[pyattr] - pub const FUNCFLAG_USE_LASTERROR: u32 = 0x10; + const FUNCFLAG_USE_LASTERROR: u32 = 0x10; #[pyattr] - pub const TYPEFLAG_ISPOINTER: u32 = 0x100; + const TYPEFLAG_ISPOINTER: u32 = 0x100; #[pyattr] - pub const TYPEFLAG_HASPOINTER: u32 = 0x200; + const TYPEFLAG_HASPOINTER: u32 = 0x200; #[pyattr] - pub const DICTFLAG_FINAL: u32 = 0x1000; + const DICTFLAG_FINAL: u32 = 0x1000; #[pyattr(name = "ArgumentError", once)] fn argument_error(vm: &VirtualMachine) -> PyTypeRef { @@ -130,369 +425,138 @@ pub(crate) mod _ctypes { ) } - #[pyattr(name = "FormatError", once)] - fn format_error(vm: &VirtualMachine) -> PyTypeRef { - vm.ctx.new_exception_type( - "_ctypes", - "FormatError", - Some(vec![vm.ctx.exceptions.exception_type.to_owned()]), - ) - } - - pub fn get_size(ty: &str) -> usize { - match ty { - "u" => mem::size_of::(), - "c" | "b" => mem::size_of::(), - "h" => mem::size_of::(), - "H" => mem::size_of::(), - "i" => mem::size_of::(), - "I" => mem::size_of::(), - "l" => mem::size_of::(), - "q" => mem::size_of::(), - "L" => mem::size_of::(), - "Q" => mem::size_of::(), - "f" => mem::size_of::(), - "d" | "g" => mem::size_of::(), - "?" | "B" => mem::size_of::(), - "P" | "z" | "Z" => mem::size_of::(), - "O" => mem::size_of::(), - _ => unreachable!(), - } - } - - /// Get alignment for a simple type - for C types, alignment equals size - pub fn get_align(ty: &str) -> usize { - get_size(ty) - } - - /// Get the size of a ctypes type from its type object - #[allow(dead_code)] - pub fn get_size_from_type(cls: &PyTypeRef, vm: &VirtualMachine) -> PyResult { - // Try to get _type_ attribute for simple types - if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) - && let Ok(s) = type_attr.str(vm) - { - let s = s.to_string(); - if s.len() == 1 && SIMPLE_TYPE_CHARS.contains(s.as_str()) { - return Ok(get_size(&s)); - } + #[cfg(target_os = "windows")] + #[pyattr(name = "COMError", once)] + fn com_error(vm: &VirtualMachine) -> PyTypeRef { + use crate::builtins::type_::PyAttributes; + use crate::function::FuncArgs; + use crate::types::{PyTypeFlags, PyTypeSlots}; + + // Sets hresult, text, details as instance attributes in __init__ + // This function has InitFunc signature for direct slots.init use + fn comerror_init(zelf: PyObjectRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { + let (hresult, text, details): ( + Option, + Option, + Option, + ) = args.bind(vm)?; + let hresult = hresult.unwrap_or_else(|| vm.ctx.none()); + let text = text.unwrap_or_else(|| vm.ctx.none()); + let details = details.unwrap_or_else(|| vm.ctx.none()); + + // Set instance attributes + zelf.set_attr("hresult", hresult.clone(), vm)?; + zelf.set_attr("text", text.clone(), vm)?; + zelf.set_attr("details", details.clone(), vm)?; + + // self.args = args[1:] = (text, details) + // via: PyObject_SetAttrString(self, "args", PySequence_GetSlice(args, 1, size)) + let args_tuple: PyObjectRef = vm.ctx.new_tuple(vec![text, details]).into(); + zelf.set_attr("args", args_tuple, vm)?; + + Ok(()) } - // Fall back to sizeof - size_of(cls.clone().into(), vm) - } - /// Convert bytes to appropriate Python object based on ctypes type - pub fn bytes_to_pyobject( - cls: &PyTypeRef, - bytes: &[u8], - vm: &VirtualMachine, - ) -> PyResult { - // Try to get _type_ attribute - if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) - && let Ok(s) = type_attr.str(vm) - { - let ty = s.to_string(); - return match ty.as_str() { - "c" => { - // c_char - single byte - Ok(vm.ctx.new_bytes(bytes.to_vec()).into()) - } - "b" => { - // c_byte - signed char - let val = if !bytes.is_empty() { bytes[0] as i8 } else { 0 }; - Ok(vm.ctx.new_int(val).into()) - } - "B" => { - // c_ubyte - unsigned char - let val = if !bytes.is_empty() { bytes[0] } else { 0 }; - Ok(vm.ctx.new_int(val).into()) - } - "h" => { - // c_short - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_short::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "H" => { - // c_ushort - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_ushort::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "i" => { - // c_int - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_int::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "I" => { - // c_uint - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_uint::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "l" => { - // c_long - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_long::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "L" => { - // c_ulong - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_ulong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "q" => { - // c_longlong - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_longlong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "Q" => { - // c_ulonglong - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_ulonglong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "f" => { - // c_float - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_float::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0.0 - }; - Ok(vm.ctx.new_float(val as f64).into()) - } - "d" | "g" => { - // c_double - const SIZE: usize = mem::size_of::(); - let val = if bytes.len() >= SIZE { - c_double::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) - } else { - 0.0 - }; - Ok(vm.ctx.new_float(val).into()) - } - "?" => { - // c_bool - let val = !bytes.is_empty() && bytes[0] != 0; - Ok(vm.ctx.new_bool(val).into()) - } - "P" | "z" | "Z" => { - // Pointer types - return as integer address - let val = if bytes.len() >= mem::size_of::() { - const UINTPTR_LEN: usize = mem::size_of::(); - let mut arr = [0u8; UINTPTR_LEN]; - arr[..bytes.len().min(UINTPTR_LEN)] - .copy_from_slice(&bytes[..bytes.len().min(UINTPTR_LEN)]); - usize::from_ne_bytes(arr) - } else { - 0 - }; - Ok(vm.ctx.new_int(val).into()) - } - "u" => { - // c_wchar - wide character - let val = if bytes.len() >= mem::size_of::() { - let wc = if mem::size_of::() == 2 { - u16::from_ne_bytes([bytes[0], bytes[1]]) as u32 - } else { - u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) - }; - char::from_u32(wc).unwrap_or('\0') - } else { - '\0' - }; - Ok(vm.ctx.new_str(val.to_string()).into()) - } - _ => Ok(vm.ctx.none()), - }; - } - // Default: return bytes as-is - Ok(vm.ctx.new_bytes(bytes.to_vec()).into()) - } + // Create exception type with IMMUTABLETYPE flag + let mut attrs = PyAttributes::default(); + attrs.insert( + vm.ctx.intern_str("__module__"), + vm.ctx.new_str("_ctypes").into(), + ); + attrs.insert( + vm.ctx.intern_str("__doc__"), + vm.ctx + .new_str("Raised when a COM method call failed.") + .into(), + ); + + // Create slots with IMMUTABLETYPE flag + let slots = PyTypeSlots { + name: "COMError", + flags: PyTypeFlags::heap_type_flags() + | PyTypeFlags::HAS_DICT + | PyTypeFlags::IMMUTABLETYPE, + ..PyTypeSlots::default() + }; - const SIMPLE_TYPE_CHARS: &str = "cbBhHiIlLdfguzZPqQ?O"; + let exc_type = PyType::new_heap( + "COMError", + vec![vm.ctx.exceptions.exception_type.to_owned()], + attrs, + slots, + vm.ctx.types.type_type.to_owned(), + &vm.ctx, + ) + .unwrap(); - pub fn new_simple_type( - cls: Either<&PyObject, &PyTypeRef>, - vm: &VirtualMachine, - ) -> PyResult { - let cls = match cls { - Either::A(obj) => obj, - Either::B(typ) => typ.as_object(), - }; + // Set our custom init after new_heap, which runs init_slots that would + // otherwise overwrite slots.init with init_wrapper (due to __init__ in MRO). + exc_type.slots.init.store(Some(comerror_init)); - if let Ok(_type_) = cls.get_attr("_type_", vm) { - if _type_.is_instance((&vm.ctx.types.str_type).as_ref(), vm)? { - let tp_str = _type_.str(vm)?.to_string(); - - if tp_str.len() != 1 { - Err(vm.new_value_error( - format!("class must define a '_type_' attribute which must be a string of length 1, str: {tp_str}"), - )) - } else if !SIMPLE_TYPE_CHARS.contains(tp_str.as_str()) { - Err(vm.new_attribute_error(format!("class must define a '_type_' attribute which must be\n a single character string containing one of {SIMPLE_TYPE_CHARS}, currently it is {tp_str}."))) - } else { - let size = get_size(&tp_str); - let cdata = CDataObject::from_bytes(vec![0u8; size], None); - Ok(PyCSimple { - _base: PyCData::new(cdata.clone()), - _type_: tp_str, - value: AtomicCell::new(vm.ctx.none()), - cdata: rustpython_common::lock::PyRwLock::new(cdata), - }) - } - } else { - Err(vm.new_type_error("class must define a '_type_' string attribute")) - } - } else { - Err(vm.new_attribute_error("class must define a '_type_' attribute")) - } + exc_type } /// Get the size of a ctypes type or instance - #[pyfunction(name = "sizeof")] - pub fn size_of(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { - use super::pointer::PyCPointer; - use super::structure::{PyCStructType, PyCStructure}; + #[pyfunction] + pub fn sizeof(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + use super::structure::PyCStructType; use super::union::PyCUnionType; - use super::util::StgInfo; - use crate::builtins::PyType; - // 1. Check TypeDataSlot on class (for instances) - if let Some(stg_info) = obj.class().get_type_data::() { - return Ok(stg_info.size); - } - - // 2. Check TypeDataSlot on type itself (for type objects) - if let Some(type_obj) = obj.downcast_ref::() - && let Some(stg_info) = type_obj.get_type_data::() - { - return Ok(stg_info.size); - } - - // 3. Instances with cdata buffer - if let Some(structure) = obj.downcast_ref::() { - return Ok(structure.cdata.read().size()); - } - if let Some(simple) = obj.downcast_ref::() { - return Ok(simple.cdata.read().size()); - } - if obj.fast_isinstance(PyCPointer::static_type()) { - return Ok(std::mem::size_of::()); - } - - // 3. Type objects - if let Ok(type_ref) = obj.clone().downcast::() { - // Structure types - check if metaclass is or inherits from PyCStructType - if type_ref + // 1. Check if obj is a TYPE object (not instance) - PyStgInfo_FromType + if let Some(type_obj) = obj.downcast_ref::() { + // Type object - return StgInfo.size + if let Some(stg_info) = type_obj.stg_info_opt() { + return Ok(stg_info.size); + } + // Fallback for type objects without StgInfo + // Array types + if type_obj + .class() + .fast_issubclass(super::array::PyCArrayType::static_type()) + && let Ok(stg) = type_obj.stg_info(vm) + { + return Ok(stg.size); + } + // Structure types + if type_obj .class() .fast_issubclass(PyCStructType::static_type()) { - return calculate_struct_size(&type_ref, vm); + return super::structure::calculate_struct_size(type_obj, vm); } - // Union types - check if metaclass is or inherits from PyCUnionType - if type_ref + // Union types + if type_obj .class() .fast_issubclass(PyCUnionType::static_type()) { - return calculate_union_size(&type_ref, vm); + return super::union::calculate_union_size(type_obj, vm); } - // Simple types (c_int, c_char, etc.) - if type_ref.fast_issubclass(PyCSimple::static_type()) { - let instance = new_simple_type(Either::B(&type_ref), vm)?; - return Ok(get_size(&instance._type_)); + // Simple types + if type_obj.fast_issubclass(PyCSimple::static_type()) { + if let Ok(type_attr) = type_obj.as_object().get_attr("_type_", vm) + && let Ok(type_str) = type_attr.str(vm) + { + return Ok(super::get_size(type_str.as_ref())); + } + return Ok(std::mem::size_of::()); } // Pointer types - if type_ref.fast_issubclass(PyCPointer::static_type()) { + if type_obj.fast_issubclass(PyCPointer::static_type()) { return Ok(std::mem::size_of::()); } + return Err(vm.new_type_error("this type has no size")); } - Err(vm.new_type_error("this type has no size")) - } - - /// Calculate Structure type size from _fields_ (sum of field sizes) - fn calculate_struct_size( - cls: &crate::builtins::PyTypeRef, - vm: &VirtualMachine, - ) -> PyResult { - use crate::AsObject; - - if let Ok(fields_attr) = cls.as_object().get_attr("_fields_", vm) { - let fields: Vec = fields_attr.try_to_value(vm).unwrap_or_default(); - let mut total_size = 0usize; - - for field in fields.iter() { - if let Some(tuple) = field.downcast_ref::() - && let Some(field_type) = tuple.get(1) - { - // Recursively calculate field type size - total_size += size_of(field_type.clone(), vm)?; - } - } - return Ok(total_size); + // 2. Instance object - return actual buffer size (b_size) + // CDataObject_Check + return obj->b_size + if let Some(cdata) = obj.downcast_ref::() { + return Ok(cdata.size()); } - Ok(0) - } - - /// Calculate Union type size from _fields_ (max field size) - fn calculate_union_size( - cls: &crate::builtins::PyTypeRef, - vm: &VirtualMachine, - ) -> PyResult { - use crate::AsObject; - - if let Ok(fields_attr) = cls.as_object().get_attr("_fields_", vm) { - let fields: Vec = fields_attr.try_to_value(vm).unwrap_or_default(); - let mut max_size = 0usize; - - for field in fields.iter() { - if let Some(tuple) = field.downcast_ref::() - && let Some(field_type) = tuple.get(1) - { - let field_size = size_of(field_type.clone(), vm)?; - max_size = max_size.max(field_size); - } - } - return Ok(max_size); + if obj.fast_isinstance(PyCPointer::static_type()) { + return Ok(std::mem::size_of::()); } - Ok(0) + + Err(vm.new_type_error("this type has no size")) } #[cfg(windows)] @@ -513,7 +577,7 @@ pub(crate) mod _ctypes { #[cfg(not(windows))] #[pyfunction(name = "dlopen")] fn load_library_unix( - name: Option, + name: Option, _load_flags: OptionalArg, vm: &VirtualMachine, ) -> PyResult { @@ -523,9 +587,12 @@ pub(crate) mod _ctypes { Some(name) => { let cache = library::libcache(); let mut cache_write = cache.write(); - let (id, _) = cache_write - .get_or_insert_lib(&name, vm) - .map_err(|e| vm.new_os_error(e.to_string()))?; + let os_str = name.as_os_str(vm)?; + let (id, _) = cache_write.get_or_insert_lib(&*os_str, vm).map_err(|e| { + // Include filename in error message for better diagnostics + let name_str = os_str.to_string_lossy(); + vm.new_os_error(format!("{}: {}", name_str, e)) + })?; Ok(id) } None => { @@ -548,7 +615,9 @@ pub(crate) mod _ctypes { } #[pyfunction(name = "POINTER")] - pub fn create_pointer_type(cls: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn create_pointer_type(cls: PyObjectRef, vm: &VirtualMachine) -> PyResult { + use crate::builtins::PyStr; + // Get the _pointer_type_cache let ctypes_module = vm.import("_ctypes", 0)?; let cache = ctypes_module.get_attr("_pointer_type_cache", vm)?; @@ -563,33 +632,60 @@ pub(crate) mod _ctypes { // Get the _Pointer base class let pointer_base = ctypes_module.get_attr("_Pointer", vm)?; + // Create a new type that inherits from _Pointer + let pointer_base_type = pointer_base + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("_Pointer must be a type"))?; + let metaclass = pointer_base_type.class().to_owned(); + + let bases = vm.ctx.new_tuple(vec![pointer_base]); + let dict = vm.ctx.new_dict(); + + // PyUnicode_CheckExact(cls) - string creates incomplete pointer type + if let Some(s) = cls.downcast_ref::() { + // Incomplete pointer type: _type_ not set, cache key is id(result) + let name = format!("LP_{}", s.as_str()); + + let new_type = metaclass + .as_object() + .call((vm.ctx.new_str(name), bases, dict), vm)?; + + // Store with id(result) as key for incomplete pointer types + let id_key: PyObjectRef = vm.ctx.new_int(new_type.get_id() as i64).into(); + vm.call_method(&cache, "__setitem__", (id_key, new_type.clone()))?; + + return Ok(new_type); + } + + // PyType_Check(cls) - type creates complete pointer type + if !cls.class().fast_issubclass(vm.ctx.types.type_type.as_ref()) { + return Err(vm.new_type_error("must be a ctypes type")); + } + // Create the name for the pointer type let name = if let Ok(type_obj) = cls.get_attr("__name__", vm) { format!("LP_{}", type_obj.str(vm)?) - } else if let Ok(s) = cls.str(vm) { - format!("LP_{}", s) } else { "LP_unknown".to_string() }; - // Create a new type that inherits from _Pointer - let type_type = &vm.ctx.types.type_type; - let bases = vm.ctx.new_tuple(vec![pointer_base]); - let dict = vm.ctx.new_dict(); + // Complete pointer type: set _type_ attribute dict.set_item("_type_", cls.clone(), vm)?; - let new_type = type_type + // Call the metaclass (PyCPointerType) to create the new type + let new_type = metaclass .as_object() .call((vm.ctx.new_str(name), bases, dict), vm)?; - // Store in cache using __setitem__ + // Store in cache with cls as key vm.call_method(&cache, "__setitem__", (cls, new_type.clone()))?; Ok(new_type) } - #[pyfunction(name = "pointer")] - pub fn create_pointer_inst(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + #[pyfunction] + fn pointer(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { // Get the type of the object let obj_type = obj.class().to_owned(); @@ -607,7 +703,7 @@ pub(crate) mod _ctypes { #[cfg(target_os = "windows")] #[pyfunction(name = "_check_HRESULT")] - pub fn check_hresult(_self: PyObjectRef, hr: i32, _vm: &VirtualMachine) -> PyResult { + fn check_hresult(_self: PyObjectRef, hr: i32, _vm: &VirtualMachine) -> PyResult { // TODO: fixme if hr < 0 { // vm.ctx.new_windows_error(hr) @@ -619,18 +715,17 @@ pub(crate) mod _ctypes { #[pyfunction] fn addressof(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { - if obj.is_instance(PyCSimple::static_type().as_ref(), vm)? { - let simple = obj.downcast_ref::().unwrap(); - Ok(simple.value.as_ptr() as usize) + // All ctypes objects should return cdata buffer pointer + if let Some(cdata) = obj.downcast_ref::() { + Ok(cdata.buffer.read().as_ptr() as usize) } else { Err(vm.new_type_error("expected a ctypes instance")) } } #[pyfunction] - fn byref(obj: PyObjectRef, offset: OptionalArg, vm: &VirtualMachine) -> PyResult { - use super::base::PyCData; - use crate::class::StaticType; + pub fn byref(obj: PyObjectRef, offset: OptionalArg, vm: &VirtualMachine) -> PyResult { + use super::FfiArgValue; // Check if obj is a ctypes instance if !obj.fast_isinstance(PyCData::static_type()) @@ -644,9 +739,23 @@ pub(crate) mod _ctypes { let offset_val = offset.unwrap_or(0); + // Get buffer address: (char *)((CDataObject *)obj)->b_ptr + offset + let ptr_val = if let Some(simple) = obj.downcast_ref::() { + let buffer = simple.0.buffer.read(); + (buffer.as_ptr() as isize + offset_val) as usize + } else if let Some(cdata) = obj.downcast_ref::() { + let buffer = cdata.buffer.read(); + (buffer.as_ptr() as isize + offset_val) as usize + } else { + 0 + }; + // Create CArgObject to hold the reference Ok(CArgObject { + tag: b'P', + value: FfiArgValue::Pointer(ptr_val), obj, + size: 0, offset: offset_val, } .to_pyobject(vm)) @@ -654,11 +763,6 @@ pub(crate) mod _ctypes { #[pyfunction] fn alignment(tp: Either, vm: &VirtualMachine) -> PyResult { - use super::base::PyCSimpleType; - use super::pointer::PyCPointer; - use super::structure::PyCStructure; - use super::union::PyCUnion; - use super::util::StgInfo; use crate::builtins::PyType; let obj = match &tp { @@ -667,23 +771,27 @@ pub(crate) mod _ctypes { }; // 1. Check TypeDataSlot on class (for instances) - if let Some(stg_info) = obj.class().get_type_data::() { + if let Some(stg_info) = obj.class().stg_info_opt() { return Ok(stg_info.align); } // 2. Check TypeDataSlot on type itself (for type objects) if let Some(type_obj) = obj.downcast_ref::() - && let Some(stg_info) = type_obj.get_type_data::() + && let Some(stg_info) = type_obj.stg_info_opt() { return Ok(stg_info.align); } - // 3. Fallback for simple types without TypeDataSlot - if obj.fast_isinstance(PyCSimple::static_type()) { - // Get stg_info from the type by reading _type_ attribute - let cls = obj.class().to_owned(); - let stg_info = PyCSimpleType::get_stg_info(&cls, vm); - return Ok(stg_info.align); + // 3. Fallback for simple types + if obj.fast_isinstance(PyCSimple::static_type()) + && let Ok(stg) = obj.class().stg_info(vm) + { + return Ok(stg.align); + } + if obj.fast_isinstance(PyCArray::static_type()) + && let Ok(stg) = obj.class().stg_info(vm) + { + return Ok(stg.align); } if obj.fast_isinstance(PyCStructure::static_type()) { // Calculate alignment from _fields_ @@ -715,8 +823,8 @@ pub(crate) mod _ctypes { // Simple type: _type_ is a single character string if let Ok(s) = type_attr.str(vm) { let ty = s.to_string(); - if ty.len() == 1 && SIMPLE_TYPE_CHARS.contains(ty.as_str()) { - return Ok(get_align(&ty)); + if ty.len() == 1 && super::simple::SIMPLE_TYPE_CHARS.contains(ty.as_str()) { + return Ok(super::get_align(&ty)); } } } @@ -754,9 +862,45 @@ pub(crate) mod _ctypes { } #[pyfunction] - fn resize(_args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { - // TODO: RUSTPYTHON - Err(vm.new_value_error("not implemented")) + fn resize(obj: PyObjectRef, size: isize, vm: &VirtualMachine) -> PyResult<()> { + use std::borrow::Cow; + + // 1. Get StgInfo from object's class (validates ctypes instance) + let stg_info = obj + .class() + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("expected ctypes instance"))?; + + // 2. Validate size + if size < 0 || (size as usize) < stg_info.size { + return Err(vm.new_value_error(format!("minimum size is {}", stg_info.size))); + } + + // 3. Get PyCData via upcast (works for all ctypes types due to repr(transparent)) + let cdata = obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected ctypes instance"))?; + + // 4. Check if buffer is owned (not borrowed from external memory) + { + let buffer = cdata.buffer.read(); + if matches!(&*buffer, Cow::Borrowed(_)) { + return Err(vm.new_value_error( + "Memory cannot be resized because this object doesn't own it".to_owned(), + )); + } + } + + // 5. Resize the buffer + let new_size = size as usize; + let mut buffer = cdata.buffer.write(); + let old_data = buffer.to_vec(); + let mut new_data = vec![0u8; new_size]; + let copy_len = old_data.len().min(new_size); + new_data[..copy_len].copy_from_slice(&old_data[..copy_len]); + *buffer = Cow::Owned(new_data); + + Ok(()) } #[pyfunction] @@ -796,77 +940,306 @@ pub(crate) mod _ctypes { #[pyattr] fn _string_at_addr(_vm: &VirtualMachine) -> usize { - let f = libc::strnlen; - f as usize + super::function::INTERNAL_STRING_AT_ADDR } #[pyattr] fn _wstring_at_addr(_vm: &VirtualMachine) -> usize { - // Return address of wcsnlen or similar wide string function - #[cfg(not(target_os = "windows"))] - { - let f = libc::wcslen; - f as usize - } - #[cfg(target_os = "windows")] - { - // FIXME: On Windows, use wcslen from ucrt - 0 - } + super::function::INTERNAL_WSTRING_AT_ADDR } #[pyattr] fn _cast_addr(_vm: &VirtualMachine) -> usize { - // todo!("Implement _cast_addr") - 0 + super::function::INTERNAL_CAST_ADDR } - #[pyfunction(name = "_cast")] - pub fn pycfunction_cast( + #[pyfunction] + fn _cast( obj: PyObjectRef, - _obj2: PyObjectRef, + src: PyObjectRef, ctype: PyObjectRef, vm: &VirtualMachine, ) -> PyResult { - use super::array::PyCArray; - use super::base::PyCData; - use super::pointer::PyCPointer; - use crate::class::StaticType; + super::function::cast_impl(obj, src, ctype, vm) + } + + /// Python-level cast function (PYFUNCTYPE wrapper) + #[pyfunction] + fn cast(obj: PyObjectRef, typ: PyObjectRef, vm: &VirtualMachine) -> PyResult { + super::function::cast_impl(obj.clone(), obj, typ, vm) + } + + /// Return buffer interface information for a ctypes type or object. + /// Returns a tuple (format, ndim, shape) where: + /// - format: PEP 3118 format string + /// - ndim: number of dimensions + /// - shape: tuple of dimension sizes + #[pyfunction] + fn buffer_info(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // Determine if obj is a type or an instance + let is_type = obj.class().fast_issubclass(vm.ctx.types.type_type.as_ref()); + let cls = if is_type { + obj.clone() + } else { + obj.class().to_owned().into() + }; + + // Get format from type - try _type_ first (for simple types), then _stg_info_format_ + let format = if let Ok(type_attr) = cls.get_attr("_type_", vm) { + type_attr.str(vm)?.to_string() + } else if let Ok(format_attr) = cls.get_attr("_stg_info_format_", vm) { + format_attr.str(vm)?.to_string() + } else { + return Err(vm.new_type_error("not a ctypes type or object")); + }; + + // Non-array types have ndim=0 and empty shape + // TODO: Implement ndim/shape for arrays when StgInfo supports it + let ndim = 0; + let shape: Vec = vec![]; + + let shape_tuple = vm.ctx.new_tuple(shape); + Ok(vm + .ctx + .new_tuple(vec![ + vm.ctx.new_str(format).into(), + vm.ctx.new_int(ndim).into(), + shape_tuple.into(), + ]) + .into()) + } + + /// Unpickle a ctypes object. + #[pyfunction] + fn _unpickle(typ: PyObjectRef, state: PyObjectRef, vm: &VirtualMachine) -> PyResult { + if !state.class().is(vm.ctx.types.tuple_type.as_ref()) { + return Err(vm.new_type_error("state must be a tuple")); + } + let obj = vm.call_method(&typ, "__new__", (typ.clone(),))?; + vm.call_method(&obj, "__setstate__", (state,))?; + Ok(obj) + } + + /// Call a function at the given address with the given arguments. + #[pyfunction] + fn call_function( + func_addr: usize, + args: crate::builtins::PyTupleRef, + vm: &VirtualMachine, + ) -> PyResult { + call_function_internal(func_addr, args, 0, vm) + } + + /// Call a cdecl function at the given address with the given arguments. + #[pyfunction] + fn call_cdeclfunction( + func_addr: usize, + args: crate::builtins::PyTupleRef, + vm: &VirtualMachine, + ) -> PyResult { + call_function_internal(func_addr, args, FUNCFLAG_CDECL, vm) + } - // Python signature: _cast(obj, obj, ctype) - // Python passes the same object twice (obj and _obj2 are the same) - // We ignore _obj2 as it's redundant + fn call_function_internal( + func_addr: usize, + args: crate::builtins::PyTupleRef, + _flags: u32, + vm: &VirtualMachine, + ) -> PyResult { + use libffi::middle::{Arg, Cif, CodePtr, Type}; - // Check if this is a pointer type (has _type_ attribute) - if ctype.get_attr("_type_", vm).is_err() { - return Err(vm.new_type_error("cast() argument 2 must be a pointer type".to_string())); + if func_addr == 0 { + return Err(vm.new_value_error("NULL function pointer")); } - // Create an instance of the target pointer type with no arguments - let result = ctype.call((), vm)?; + let mut ffi_args: Vec = Vec::with_capacity(args.len()); + let mut arg_values: Vec = Vec::with_capacity(args.len()); + let mut arg_types: Vec = Vec::with_capacity(args.len()); + + for arg in args.iter() { + if vm.is_none(arg) { + arg_values.push(0); + arg_types.push(Type::pointer()); + } else if let Ok(int_val) = arg.try_int(vm) { + let val = int_val.as_bigint().to_i64().unwrap_or(0) as isize; + arg_values.push(val); + arg_types.push(Type::isize()); + } else if let Some(bytes) = arg.downcast_ref::() { + let ptr = bytes.as_bytes().as_ptr() as isize; + arg_values.push(ptr); + arg_types.push(Type::pointer()); + } else if let Some(s) = arg.downcast_ref::() { + let ptr = s.as_str().as_ptr() as isize; + arg_values.push(ptr); + arg_types.push(Type::pointer()); + } else { + return Err(vm.new_type_error(format!( + "Don't know how to convert parameter of type '{}'", + arg.class().name() + ))); + } + } - // Get the pointer value from the source object - // If obj is a CData instance (including arrays), use the object itself - // If obj is an integer, use it directly as the pointer value - let ptr_value: PyObjectRef = if obj.fast_isinstance(PyCData::static_type()) - || obj.fast_isinstance(PyCArray::static_type()) - || obj.fast_isinstance(PyCPointer::static_type()) - { - // For CData objects (including arrays and pointers), store the object itself - obj.clone() - } else if let Ok(int_val) = obj.try_int(vm) { - // For integers, treat as pointer address - vm.ctx.new_int(int_val.as_bigint().clone()).into() - } else { - return Err(vm.new_type_error(format!( - "cast() argument 1 must be a ctypes instance or an integer, not {}", - obj.class().name() - ))); + for val in &arg_values { + ffi_args.push(Arg::new(val)); + } + + let cif = Cif::new(arg_types, Type::isize()); + let code_ptr = CodePtr::from_ptr(func_addr as *const _); + let result: isize = unsafe { cif.call(code_ptr, &ffi_args) }; + Ok(vm.ctx.new_int(result).into()) + } + + /// Convert a pointer (as integer) to a Python object. + #[pyfunction(name = "PyObj_FromPtr")] + fn py_obj_from_ptr(ptr: usize, vm: &VirtualMachine) -> PyResult { + if ptr == 0 { + return Err(vm.new_value_error("NULL pointer access")); + } + let raw_ptr = ptr as *mut crate::object::PyObject; + unsafe { + let obj = crate::PyObjectRef::from_raw(std::ptr::NonNull::new_unchecked(raw_ptr)); + let obj = std::mem::ManuallyDrop::new(obj); + Ok((*obj).clone()) + } + } + + #[pyfunction(name = "Py_INCREF")] + fn py_incref(obj: PyObjectRef, _vm: &VirtualMachine) -> PyObjectRef { + // TODO: + obj + } + + #[pyfunction(name = "Py_DECREF")] + fn py_decref(obj: PyObjectRef, _vm: &VirtualMachine) -> PyObjectRef { + // TODO: + obj + } + + #[cfg(target_os = "macos")] + #[pyfunction] + fn _dyld_shared_cache_contains_path( + path: Option, + vm: &VirtualMachine, + ) -> PyResult { + use std::ffi::CString; + + let path = match path { + Some(p) if !vm.is_none(&p) => p, + _ => return Ok(false), }; - // Set the contents of the pointer by setting the attribute - result.set_attr("contents", ptr_value, vm)?; + let path_str = path.str(vm)?.to_string(); + let c_path = + CString::new(path_str).map_err(|_| vm.new_value_error("path contains null byte"))?; + unsafe extern "C" { + fn _dyld_shared_cache_contains_path(path: *const libc::c_char) -> bool; + } + + let result = unsafe { _dyld_shared_cache_contains_path(c_path.as_ptr()) }; Ok(result) } + + #[cfg(windows)] + #[pyfunction(name = "FormatError")] + fn format_error_func(code: OptionalArg, _vm: &VirtualMachine) -> PyResult { + use windows_sys::Win32::Foundation::{GetLastError, LocalFree}; + use windows_sys::Win32::System::Diagnostics::Debug::{ + FORMAT_MESSAGE_ALLOCATE_BUFFER, FORMAT_MESSAGE_FROM_SYSTEM, + FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW, + }; + + let error_code = code.unwrap_or_else(|| unsafe { GetLastError() }); + + let mut buffer: *mut u16 = std::ptr::null_mut(); + let len = unsafe { + FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER + | FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_IGNORE_INSERTS, + std::ptr::null(), + error_code, + 0, + &mut buffer as *mut *mut u16 as *mut u16, + 0, + std::ptr::null(), + ) + }; + + if len == 0 || buffer.is_null() { + return Ok("".to_string()); + } + + let message = unsafe { + let slice = std::slice::from_raw_parts(buffer, len as usize); + let msg = String::from_utf16_lossy(slice).trim_end().to_string(); + LocalFree(buffer as *mut _); + msg + }; + + Ok(message) + } + + #[cfg(windows)] + #[pyfunction(name = "CopyComPointer")] + fn copy_com_pointer(src: PyObjectRef, dst: PyObjectRef, vm: &VirtualMachine) -> PyResult { + use windows_sys::Win32::Foundation::{E_POINTER, S_OK}; + + // 1. Extract pointer-to-pointer address from dst (byref() result) + let pdst: usize = if let Some(carg) = dst.downcast_ref::() { + // byref() result: object buffer address + offset + let base = if let Some(cdata) = carg.obj.downcast_ref::() { + cdata.buffer.read().as_ptr() as usize + } else { + return Ok(E_POINTER); + }; + (base as isize + carg.offset) as usize + } else { + return Ok(E_POINTER); + }; + + if pdst == 0 { + return Ok(E_POINTER); + } + + // 2. Extract COM pointer value from src + let src_ptr: usize = if vm.is_none(&src) { + 0 + } else if let Some(cdata) = src.downcast_ref::() { + // c_void_p etc: read pointer value from buffer + let buffer = cdata.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + usize::from_ne_bytes( + buffer[..std::mem::size_of::()] + .try_into() + .unwrap_or([0; std::mem::size_of::()]), + ) + } else { + 0 + } + } else { + return Ok(E_POINTER); + }; + + // 3. Call IUnknown::AddRef if src is non-NULL + if src_ptr != 0 { + unsafe { + // IUnknown vtable: [QueryInterface, AddRef, Release, ...] + let iunknown = src_ptr as *mut *const usize; + let vtable = *iunknown; + debug_assert!(!vtable.is_null(), "IUnknown vtable is null"); + let addref_fn: extern "system" fn(*mut std::ffi::c_void) -> u32 = + std::mem::transmute(*vtable.add(1)); // AddRef is index 1 + addref_fn(src_ptr as *mut std::ffi::c_void); + } + } + + // 4. Copy pointer: *pdst = src + unsafe { + *(pdst as *mut usize) = src_ptr; + } + + Ok(S_OK) + } } diff --git a/crates/vm/src/stdlib/ctypes/array.rs b/crates/vm/src/stdlib/ctypes/array.rs index fe12a781d9f..60e6516bfe0 100644 --- a/crates/vm/src/stdlib/ctypes/array.rs +++ b/crates/vm/src/stdlib/ctypes/array.rs @@ -1,41 +1,109 @@ -use crate::atomic_func; -use crate::builtins::{PyBytes, PyInt}; -use crate::class::StaticType; -use crate::function::FuncArgs; -use crate::protocol::{ - BufferDescriptor, BufferMethods, PyBuffer, PyNumberMethods, PySequenceMethods, -}; -use crate::stdlib::ctypes::base::CDataObject; -use crate::stdlib::ctypes::util::StgInfo; -use crate::types::{AsBuffer, AsNumber, AsSequence}; -use crate::{AsObject, Py, PyObjectRef, PyPayload}; +use super::StgInfo; +use super::base::{CDATA_BUFFER_METHODS, PyCData}; use crate::{ - PyResult, VirtualMachine, - builtins::{PyType, PyTypeRef}, - types::Constructor, + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, + atomic_func, + builtins::{PyBytes, PyInt, PyList, PySlice, PyStr, PyType, PyTypeRef}, + class::StaticType, + function::{ArgBytesLike, FuncArgs, PySetterValue}, + protocol::{BufferDescriptor, PyBuffer, PyNumberMethods, PySequenceMethods}, + types::{AsBuffer, AsNumber, AsSequence, Constructor, Initializer}, }; -use crossbeam_utils::atomic::AtomicCell; -use num_traits::ToPrimitive; -use rustpython_common::lock::PyRwLock; -use rustpython_vm::stdlib::ctypes::_ctypes::get_size; -use rustpython_vm::stdlib::ctypes::base::PyCData; +use num_traits::{Signed, ToPrimitive}; + +/// Creates array type for (element_type, length) +/// Uses _array_type_cache to ensure identical calls return the same type object +pub(super) fn array_type_from_ctype( + itemtype: PyObjectRef, + length: usize, + vm: &VirtualMachine, +) -> PyResult { + // PyCArrayType_from_ctype + + // Get the _array_type_cache from _ctypes module + let ctypes_module = vm.import("_ctypes", 0)?; + let cache = ctypes_module.get_attr("_array_type_cache", vm)?; + + // Create cache key: (itemtype, length) tuple + let length_obj: PyObjectRef = vm.ctx.new_int(length).into(); + let cache_key = vm.ctx.new_tuple(vec![itemtype.clone(), length_obj]); + + // Check if already in cache + if let Ok(cached) = vm.call_method(&cache, "__getitem__", (cache_key.clone(),)) + && !vm.is_none(&cached) + { + return Ok(cached); + } -/// PyCArrayType - metatype for Array types -/// CPython stores array info (type, length) in StgInfo via type_data -#[pyclass(name = "PyCArrayType", base = PyType, module = "_ctypes")] -#[derive(Debug)] -#[repr(transparent)] -pub struct PyCArrayType(PyType); + // Cache miss - create new array type + let itemtype_ref = itemtype + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("Expected a type object"))?; + + let item_stg = itemtype_ref + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("_type_ must have storage info"))?; + + let element_size = item_stg.size; + let element_align = item_stg.align; + let item_format = item_stg.format.clone(); + let item_shape = item_stg.shape.clone(); + let item_flags = item_stg.flags; + + // Check overflow before multiplication + let total_size = element_size + .checked_mul(length) + .ok_or_else(|| vm.new_overflow_error("array too large"))?; + + // format name: "c_int_Array_5" + let type_name = format!("{}_Array_{}", itemtype_ref.name(), length); + + // Get item type code before moving itemtype + let item_type_code = itemtype_ref + .as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); + + let stg_info = StgInfo::new_array( + total_size, + element_align, + length, + itemtype_ref.clone(), + element_size, + item_format.as_deref(), + &item_shape, + item_flags, + ); -/// Create a new Array type with StgInfo stored in type_data (CPython style) -pub fn create_array_type_with_stg_info(stg_info: StgInfo, vm: &VirtualMachine) -> PyResult { - // Get PyCArrayType as metaclass - let metaclass = PyCArrayType::static_type().to_owned(); + let new_type = create_array_type_with_name(stg_info, &type_name, vm)?; + + // Special case for character arrays - add value/raw attributes + let new_type_ref: PyTypeRef = new_type + .clone() + .downcast() + .map_err(|_| vm.new_type_error("expected type"))?; + + match item_type_code.as_deref() { + Some("c") => add_char_array_getsets(&new_type_ref, vm), + Some("u") => add_wchar_array_getsets(&new_type_ref, vm), + _ => {} + } + + // Store in cache + vm.call_method(&cache, "__setitem__", (cache_key, new_type.clone()))?; - // Create a unique name for the array type - let type_name = format!("Array_{}", stg_info.length); + Ok(new_type) +} - // Create args for type(): (name, bases, dict) +/// create_array_type_with_name - create array type with specified name +fn create_array_type_with_name( + stg_info: StgInfo, + type_name: &str, + vm: &VirtualMachine, +) -> PyResult { + let metaclass = PyCArrayType::static_type().to_owned(); let name = vm.ctx.new_str(type_name); let bases = vm .ctx @@ -47,170 +115,205 @@ pub fn create_array_type_with_stg_info(stg_info: StgInfo, vm: &VirtualMachine) - crate::function::KwArgs::default(), ); - // Create the new type using PyType::slot_new with PyCArrayType as metaclass let new_type = crate::builtins::type_::PyType::slot_new(metaclass, args, vm)?; - // Set StgInfo in type_data let type_ref: PyTypeRef = new_type .clone() .downcast() - .map_err(|_| vm.new_type_error("Failed to create array type".to_owned()))?; + .map_err(|_| vm.new_type_error("Failed to create array type"))?; - if type_ref.init_type_data(stg_info.clone()).is_err() { - // Type data already initialized - update it - if let Some(mut existing) = type_ref.get_type_data_mut::() { - *existing = stg_info; - } + // Set class attributes for _type_ and _length_ + if let Some(element_type) = stg_info.element_type.clone() { + new_type.set_attr("_type_", element_type, vm)?; } + new_type.set_attr("_length_", vm.ctx.new_int(stg_info.length), vm)?; + + super::base::set_or_init_stginfo(&type_ref, stg_info); Ok(new_type) } -impl Constructor for PyCArrayType { - type Args = PyObjectRef; +/// PyCArrayType - metatype for Array types +#[pyclass(name = "PyCArrayType", base = PyType, module = "_ctypes")] +#[derive(Debug)] +#[repr(transparent)] +pub(super) struct PyCArrayType(PyType); - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unimplemented!("use slot_new") - } -} +// PyCArrayType implements Initializer for slots.init (PyCArrayType_init) +impl Initializer for PyCArrayType { + type Args = FuncArgs; -#[pyclass(flags(IMMUTABLETYPE), with(Constructor, AsNumber))] -impl PyCArrayType { - #[pygetset(name = "_type_")] - fn typ(zelf: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { - zelf.downcast_ref::() - .and_then(|t| t.get_type_data::()) - .and_then(|stg| stg.element_type.clone()) - .unwrap_or_else(|| vm.ctx.none()) - } + fn init(zelf: PyRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // zelf is the newly created array type (e.g., T in "class T(Array)") + let new_type: &PyType = &zelf.0; + + new_type.check_not_initialized(vm)?; + + // 1. Get _length_ from class dict first + let direct_length = new_type + .attributes + .read() + .get(vm.ctx.intern_str("_length_")) + .cloned(); + + // 2. Get _type_ from class dict first + let direct_type = new_type + .attributes + .read() + .get(vm.ctx.intern_str("_type_")) + .cloned(); + + // 3. Find parent StgInfo from MRO (for inheritance) + // Note: PyType.mro does NOT include self, so no skip needed + let parent_stg_info = new_type + .mro + .read() + .iter() + .find_map(|base| base.stg_info_opt().map(|s| s.clone())); + + // 4. Resolve _length_ (direct or inherited) + let length = if let Some(length_attr) = direct_length { + // Direct _length_ defined - validate it (PyLong_Check) + let length_int = length_attr + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("The '_length_' attribute must be an integer"))?; + let bigint = length_int.as_bigint(); + // Check sign first - negative values are ValueError + if bigint.is_negative() { + return Err(vm.new_value_error("The '_length_' attribute must not be negative")); + } + // Positive values that don't fit in usize are OverflowError + bigint + .to_usize() + .ok_or_else(|| vm.new_overflow_error("The '_length_' attribute is too large"))? + } else if let Some(ref parent_info) = parent_stg_info { + // Inherit from parent + parent_info.length + } else { + return Err(vm.new_attribute_error("class must define a '_length_' attribute")); + }; - #[pygetset(name = "_length_")] - fn length(zelf: PyObjectRef) -> usize { - zelf.downcast_ref::() - .and_then(|t| t.get_type_data::()) - .map(|stg| stg.length) - .unwrap_or(0) - } + // 5. Resolve _type_ and get item_info (direct or inherited) + let (element_type, item_size, item_align, item_format, item_shape, item_flags) = + if let Some(type_attr) = direct_type { + // Direct _type_ defined - validate it (PyStgInfo_FromType) + let type_ref = type_attr + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("_type_ must be a type"))?; + let (size, align, format, shape, flags) = { + let item_info = type_ref + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("_type_ must have storage info"))?; + ( + item_info.size, + item_info.align, + item_info.format.clone(), + item_info.shape.clone(), + item_info.flags, + ) + }; + (type_ref, size, align, format, shape, flags) + } else if let Some(ref parent_info) = parent_stg_info { + // Inherit from parent + let parent_type = parent_info + .element_type + .clone() + .ok_or_else(|| vm.new_type_error("_type_ must have storage info"))?; + ( + parent_type, + parent_info.element_size, + parent_info.align, + parent_info.format.clone(), + parent_info.shape.clone(), + parent_info.flags, + ) + } else { + return Err(vm.new_attribute_error("class must define a '_type_' attribute")); + }; - #[pymethod] - fn __mul__(zelf: PyObjectRef, n: isize, vm: &VirtualMachine) -> PyResult { - if n < 0 { - return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); + // 6. Check overflow (item_size != 0 && length > MAX / item_size) + if item_size != 0 && length > usize::MAX / item_size { + return Err(vm.new_overflow_error("array too large")); } - // Get inner array info from TypeDataSlot - let type_ref = zelf.downcast_ref::().unwrap(); - let (_inner_length, inner_size) = type_ref - .get_type_data::() - .map(|stg| (stg.length, stg.size)) - .unwrap_or((0, 0)); - - // The element type of the new array is the current array type itself - let current_array_type: PyObjectRef = zelf.clone(); - - // Element size is the total size of the inner array - let new_element_size = inner_size; - let total_size = new_element_size * (n as usize); - + // 7. Initialize StgInfo (PyStgInfo_Init + field assignment) let stg_info = StgInfo::new_array( - total_size, - new_element_size, - n as usize, - current_array_type, - new_element_size, + item_size * length, // size = item_size * length + item_align, // align = item_info->align + length, // length + element_type.clone(), + item_size, // element_size + item_format.as_deref(), + &item_shape, + item_flags, ); - create_array_type_with_stg_info(stg_info, vm) - } + // 8. Store StgInfo in type_data + super::base::set_or_init_stginfo(new_type, stg_info); - #[pyclassmethod] - fn in_dll( - zelf: PyObjectRef, - dll: PyObjectRef, - name: crate::builtins::PyStrRef, - vm: &VirtualMachine, - ) -> PyResult { - use libloading::Symbol; + // 9. Get type code before moving element_type + let item_type_code = element_type + .as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); + + // 10. Set class attributes for _type_ and _length_ + zelf.as_object().set_attr("_type_", element_type, vm)?; + zelf.as_object() + .set_attr("_length_", vm.ctx.new_int(length), vm)?; + + // 11. Special case for character arrays - add value/raw attributes + // if (iteminfo->getfunc == _ctypes_get_fielddesc("c")->getfunc) + // add_getset((PyTypeObject*)self, CharArray_getsets); + // else if (iteminfo->getfunc == _ctypes_get_fielddesc("u")->getfunc) + // add_getset((PyTypeObject*)self, WCharArray_getsets); + + // Get type ref for add_getset + let type_ref: PyTypeRef = zelf.as_object().to_owned().downcast().unwrap(); + match item_type_code.as_deref() { + Some("c") => add_char_array_getsets(&type_ref, vm), + Some("u") => add_wchar_array_getsets(&type_ref, vm), + _ => {} + } - // Get the library handle from dll object - let handle = if let Ok(int_handle) = dll.try_int(vm) { - // dll is an integer handle - int_handle - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - } else { - // dll is a CDLL/PyDLL/WinDLL object with _handle attribute - dll.get_attr("_handle", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - }; + Ok(()) + } +} - // Get the library from cache - let library_cache = crate::stdlib::ctypes::library::libcache().read(); - let library = library_cache - .get_lib(handle) - .ok_or_else(|| vm.new_attribute_error("Library not found".to_owned()))?; - - // Get symbol address from library - let symbol_name = format!("{}\0", name.as_str()); - let inner_lib = library.lib.lock(); - - let symbol_address = if let Some(lib) = &*inner_lib { - unsafe { - // Try to get the symbol from the library - let symbol: Symbol<'_, *mut u8> = lib.get(symbol_name.as_bytes()).map_err(|e| { - vm.new_attribute_error(format!("{}: symbol '{}' not found", e, name.as_str())) - })?; - *symbol as usize - } - } else { - return Err(vm.new_attribute_error("Library is closed".to_owned())); - }; +#[pyclass(flags(IMMUTABLETYPE), with(Initializer, AsNumber))] +impl PyCArrayType { + #[pymethod] + fn from_param(zelf: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // zelf is the array type class that from_param was called on + let cls = zelf + .downcast::() + .map_err(|_| vm.new_type_error("from_param: expected a type"))?; + + // 1. If already an instance of the requested type, return it + if value.is_instance(cls.as_object(), vm)? { + return Ok(value); + } - // Get size from the array type via TypeDataSlot - let type_ref = zelf.downcast_ref::().unwrap(); - let (element_type, length, element_size) = type_ref - .get_type_data::() - .map(|stg| { - ( - stg.element_type.clone().unwrap_or_else(|| vm.ctx.none()), - stg.length, - stg.element_size, - ) - }) - .unwrap_or_else(|| (vm.ctx.none(), 0, 0)); - let total_size = element_size * length; - - // Read data from symbol address - let data = if symbol_address != 0 && total_size > 0 { - unsafe { - let ptr = symbol_address as *const u8; - std::slice::from_raw_parts(ptr, total_size).to_vec() + // 2. Check for CArgObject (PyCArg_CheckExact) + if let Some(carg) = value.downcast_ref::() { + // Check if the wrapped object is an instance of the requested type + if carg.obj.is_instance(cls.as_object(), vm)? { + return Ok(value); // Return the CArgObject as-is } - } else { - vec![0; total_size] - }; - - // Create instance - let cdata = CDataObject::from_bytes(data, None); - let instance = PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), } - .into_pyobject(vm); - // Store base reference to keep dll alive - if let Ok(array_ref) = instance.clone().downcast::() { - array_ref.cdata.write().base = Some(dll); + // 3. Check for _as_parameter_ attribute + if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { + return PyCArrayType::from_param(cls.as_object().to_owned(), as_parameter, vm); } - Ok(instance) + Err(vm.new_type_error(format!( + "expected {} instance instead of {}", + cls.name(), + value.class().name() + ))) } } @@ -223,8 +326,28 @@ impl AsNumber for PyCArrayType { .try_index(vm)? .as_bigint() .to_isize() - .ok_or_else(|| vm.new_overflow_error("array size too large".to_owned()))?; - PyCArrayType::__mul__(a.to_owned(), n, vm) + .ok_or_else(|| vm.new_overflow_error("array size too large"))?; + + if n < 0 { + return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); + } + + // Check for overflow before creating the new array type + let zelf_type = a + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("Expected type"))?; + + if let Some(stg_info) = zelf_type.stg_info_opt() { + let current_size = stg_info.size; + // Check if current_size * n would overflow + if current_size != 0 && (n as usize) > isize::MAX as usize / current_size { + return Err(vm.new_overflow_error("array too large")); + } + } + + // Use cached array type creation + // The element type of the new array is the current array type itself + array_type_from_ctype(a.to_owned(), n as usize, vm) }), ..PyNumberMethods::NOT_IMPLEMENTED }; @@ -232,27 +355,28 @@ impl AsNumber for PyCArrayType { } } +/// PyCArray - Array instance +/// All array metadata (element_type, length, element_size) is stored in the type's StgInfo #[pyclass( name = "Array", base = PyCData, metaclass = "PyCArrayType", module = "_ctypes" )] -pub struct PyCArray { - _base: PyCData, - /// Element type - can be a simple type (c_int) or an array type (c_int * 5) - pub(super) typ: PyRwLock, - pub(super) length: AtomicCell, - pub(super) element_size: AtomicCell, - pub(super) cdata: PyRwLock, -} +#[derive(Debug)] +#[repr(transparent)] +pub struct PyCArray(pub PyCData); -impl std::fmt::Debug for PyCArray { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PyCArray") - .field("typ", &self.typ) - .field("length", &self.length) - .finish() +impl PyCArray { + /// Get the type code of array element type (e.g., "c" for c_char, "u" for c_wchar) + fn get_element_type_code(zelf: &Py, vm: &VirtualMachine) -> Option { + zelf.class() + .stg_info_opt() + .and_then(|info| info.element_type.clone())? + .as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())) } } @@ -260,60 +384,29 @@ impl Constructor for PyCArray { type Args = FuncArgs; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - // Get _type_ and _length_ from the class - let type_attr = cls.as_object().get_attr("_type_", vm).ok(); - let length_attr = cls.as_object().get_attr("_length_", vm).ok(); - - let element_type = type_attr.unwrap_or_else(|| vm.ctx.types.object_type.to_owned().into()); - let length = if let Some(len_obj) = length_attr { - len_obj.try_int(vm)?.as_bigint().to_usize().unwrap_or(0) - } else { - 0 + // Check for abstract class - StgInfo must exist and be initialized + // Extract values in a block to drop the borrow before using cls + let (length, total_size) = { + let stg = cls.stg_info(vm)?; + (stg.length, stg.size) }; - // Get element size from _type_ - let element_size = if let Ok(type_code) = element_type.get_attr("_type_", vm) { - if let Ok(s) = type_code.str(vm) { - let s = s.to_string(); - if s.len() == 1 { - get_size(&s) - } else { - std::mem::size_of::() - } - } else { - std::mem::size_of::() - } - } else { - std::mem::size_of::() - }; + // Check for too many initializers + if args.args.len() > length { + return Err(vm.new_index_error("too many initializers")); + } - let total_size = element_size * length; - let mut buffer = vec![0u8; total_size]; + // Create array with zero-initialized buffer + let buffer = vec![0u8; total_size]; + let instance = PyCArray(PyCData::from_bytes_with_length(buffer, None, length)) + .into_ref_with_type(vm, cls)?; - // Initialize from positional arguments + // Initialize elements using setitem_by_index (Array_init pattern) for (i, value) in args.args.iter().enumerate() { - if i >= length { - break; - } - let offset = i * element_size; - if let Ok(int_val) = value.try_int(vm) { - let bytes = PyCArray::int_to_bytes(int_val.as_bigint(), element_size); - if offset + element_size <= buffer.len() { - buffer[offset..offset + element_size].copy_from_slice(&bytes); - } - } + PyCArray::setitem_by_index(&instance, i as isize, value.clone(), vm)?; } - let cdata = CDataObject::from_bytes(buffer, None); - PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), - } - .into_ref_with_type(vm, cls) - .map(Into::into) + Ok(instance.into()) } fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { @@ -325,15 +418,19 @@ impl AsSequence for PyCArray { fn as_sequence() -> &'static PySequenceMethods { use std::sync::LazyLock; static AS_SEQUENCE: LazyLock = LazyLock::new(|| PySequenceMethods { - length: atomic_func!(|seq, _vm| Ok(PyCArray::sequence_downcast(seq).length.load())), + length: atomic_func!(|seq, _vm| { + let zelf = PyCArray::sequence_downcast(seq); + Ok(zelf.class().stg_info_opt().map_or(0, |i| i.length)) + }), item: atomic_func!(|seq, i, vm| { - PyCArray::getitem_by_index(PyCArray::sequence_downcast(seq), i, vm) + let zelf = PyCArray::sequence_downcast(seq); + PyCArray::getitem_by_index(zelf, i, vm) }), ass_item: atomic_func!(|seq, i, value, vm| { let zelf = PyCArray::sequence_downcast(seq); match value { Some(v) => PyCArray::setitem_by_index(zelf, i, v, vm), - None => Err(vm.new_type_error("cannot delete array elements".to_owned())), + None => Err(vm.new_type_error("cannot delete array elements")), } }), ..PySequenceMethods::NOT_IMPLEMENTED @@ -347,468 +444,839 @@ impl AsSequence for PyCArray { with(Constructor, AsSequence, AsBuffer) )] impl PyCArray { - #[pygetset] - fn _objects(&self) -> Option { - self.cdata.read().objects.clone() - } - fn int_to_bytes(i: &malachite_bigint::BigInt, size: usize) -> Vec { + // Try unsigned first (handles values like 0xFFFFFFFF that overflow signed) + // then fall back to signed (handles negative values) match size { - 1 => vec![i.to_i8().unwrap_or(0) as u8], - 2 => i.to_i16().unwrap_or(0).to_ne_bytes().to_vec(), - 4 => i.to_i32().unwrap_or(0).to_ne_bytes().to_vec(), - 8 => i.to_i64().unwrap_or(0).to_ne_bytes().to_vec(), + 1 => { + if let Some(v) = i.to_u8() { + vec![v] + } else { + vec![i.to_i8().unwrap_or(0) as u8] + } + } + 2 => { + if let Some(v) = i.to_u16() { + v.to_ne_bytes().to_vec() + } else { + i.to_i16().unwrap_or(0).to_ne_bytes().to_vec() + } + } + 4 => { + if let Some(v) = i.to_u32() { + v.to_ne_bytes().to_vec() + } else { + i.to_i32().unwrap_or(0).to_ne_bytes().to_vec() + } + } + 8 => { + if let Some(v) = i.to_u64() { + v.to_ne_bytes().to_vec() + } else { + i.to_i64().unwrap_or(0).to_ne_bytes().to_vec() + } + } _ => vec![0u8; size], } } - fn bytes_to_int(bytes: &[u8], size: usize, vm: &VirtualMachine) -> PyObjectRef { - match size { - 1 => vm.ctx.new_int(bytes[0] as i8).into(), - 2 => { + fn bytes_to_int( + bytes: &[u8], + size: usize, + type_code: Option<&str>, + vm: &VirtualMachine, + ) -> PyObjectRef { + // Unsigned type codes: B (uchar), H (ushort), I (uint), L (ulong), Q (ulonglong) + let is_unsigned = matches!( + type_code, + Some("B") | Some("H") | Some("I") | Some("L") | Some("Q") + ); + + match (size, is_unsigned) { + (1, false) => vm.ctx.new_int(bytes[0] as i8).into(), + (1, true) => vm.ctx.new_int(bytes[0]).into(), + (2, false) => { let val = i16::from_ne_bytes([bytes[0], bytes[1]]); vm.ctx.new_int(val).into() } - 4 => { + (2, true) => { + let val = u16::from_ne_bytes([bytes[0], bytes[1]]); + vm.ctx.new_int(val).into() + } + (4, false) => { let val = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); vm.ctx.new_int(val).into() } - 8 => { + (4, true) => { + let val = u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + vm.ctx.new_int(val).into() + } + (8, false) => { let val = i64::from_ne_bytes([ bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], ]); vm.ctx.new_int(val).into() } + (8, true) => { + let val = u64::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]); + vm.ctx.new_int(val).into() + } _ => vm.ctx.new_int(0).into(), } } - fn getitem_by_index(zelf: &PyCArray, i: isize, vm: &VirtualMachine) -> PyResult { - let length = zelf.length.load() as isize; + fn getitem_by_index(zelf: &Py, i: isize, vm: &VirtualMachine) -> PyResult { + let stg = zelf.class().stg_info_opt(); + let length = stg.as_ref().map_or(0, |i| i.length) as isize; let index = if i < 0 { length + i } else { i }; if index < 0 || index >= length { - return Err(vm.new_index_error("array index out of range".to_owned())); + return Err(vm.new_index_error("invalid index")); } let index = index as usize; - let element_size = zelf.element_size.load(); + let element_size = stg.as_ref().map_or(0, |i| i.element_size); let offset = index * element_size; - let buffer = zelf.cdata.read().buffer.clone(); - if offset + element_size <= buffer.len() { - let bytes = &buffer[offset..offset + element_size]; - Ok(Self::bytes_to_int(bytes, element_size, vm)) + let type_code = Self::get_element_type_code(zelf, vm); + + // Get target buffer and offset (base's buffer if available, otherwise own) + let base_obj = zelf.0.base.read().clone(); + let (buffer_lock, final_offset) = if let Some(cdata) = base_obj + .as_ref() + .and_then(|b| b.downcast_ref::()) + { + (&cdata.buffer, zelf.0.base_offset.load() + offset) } else { - Ok(vm.ctx.new_int(0).into()) + (&zelf.0.buffer, offset) + }; + + let buffer = buffer_lock.read(); + Self::read_element_from_buffer( + &buffer, + final_offset, + element_size, + type_code.as_deref(), + vm, + ) + } + + /// Helper to read an element value from a buffer at given offset + fn read_element_from_buffer( + buffer: &[u8], + offset: usize, + element_size: usize, + type_code: Option<&str>, + vm: &VirtualMachine, + ) -> PyResult { + match type_code { + Some("c") => { + // Return single byte as bytes + if offset < buffer.len() { + Ok(vm.ctx.new_bytes(vec![buffer[offset]]).into()) + } else { + Ok(vm.ctx.new_bytes(vec![0]).into()) + } + } + Some("u") => { + // Return single wchar as str + if let Some(code) = wchar_from_bytes(&buffer[offset..]) { + let s = char::from_u32(code) + .map(|c| c.to_string()) + .unwrap_or_default(); + Ok(vm.ctx.new_str(s).into()) + } else { + Ok(vm.ctx.new_str("").into()) + } + } + Some("z") => { + // c_char_p: pointer to bytes - dereference to get string + if offset + element_size > buffer.len() { + return Ok(vm.ctx.none()); + } + let ptr_bytes = &buffer[offset..offset + element_size]; + let ptr_val = usize::from_ne_bytes( + ptr_bytes + .try_into() + .unwrap_or([0; std::mem::size_of::()]), + ); + if ptr_val == 0 { + return Ok(vm.ctx.none()); + } + // Read null-terminated string from pointer address + unsafe { + let ptr = ptr_val as *const u8; + let mut len = 0; + while *ptr.add(len) != 0 { + len += 1; + } + let bytes = std::slice::from_raw_parts(ptr, len); + Ok(vm.ctx.new_bytes(bytes.to_vec()).into()) + } + } + Some("Z") => { + // c_wchar_p: pointer to wchar_t - dereference to get string + if offset + element_size > buffer.len() { + return Ok(vm.ctx.none()); + } + let ptr_bytes = &buffer[offset..offset + element_size]; + let ptr_val = usize::from_ne_bytes( + ptr_bytes + .try_into() + .unwrap_or([0; std::mem::size_of::()]), + ); + if ptr_val == 0 { + return Ok(vm.ctx.none()); + } + // Read null-terminated wide string using WCHAR_SIZE + unsafe { + let ptr = ptr_val as *const u8; + let mut chars = Vec::new(); + let mut pos = 0usize; + loop { + let code = if WCHAR_SIZE == 2 { + let bytes = std::slice::from_raw_parts(ptr.add(pos), 2); + u16::from_ne_bytes([bytes[0], bytes[1]]) as u32 + } else { + let bytes = std::slice::from_raw_parts(ptr.add(pos), 4); + u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) + }; + if code == 0 { + break; + } + if let Some(ch) = char::from_u32(code) { + chars.push(ch); + } + pos += WCHAR_SIZE; + } + let s: String = chars.into_iter().collect(); + Ok(vm.ctx.new_str(s).into()) + } + } + Some("f") => { + // c_float + if offset + 4 <= buffer.len() { + let bytes: [u8; 4] = buffer[offset..offset + 4].try_into().unwrap(); + let val = f32::from_ne_bytes(bytes); + Ok(vm.ctx.new_float(val as f64).into()) + } else { + Ok(vm.ctx.new_float(0.0).into()) + } + } + Some("d") | Some("g") => { + // c_double / c_longdouble - read f64 from first 8 bytes + if offset + 8 <= buffer.len() { + let bytes: [u8; 8] = buffer[offset..offset + 8].try_into().unwrap(); + let val = f64::from_ne_bytes(bytes); + Ok(vm.ctx.new_float(val).into()) + } else { + Ok(vm.ctx.new_float(0.0).into()) + } + } + _ => { + if offset + element_size <= buffer.len() { + let bytes = &buffer[offset..offset + element_size]; + Ok(Self::bytes_to_int(bytes, element_size, type_code, vm)) + } else { + Ok(vm.ctx.new_int(0).into()) + } + } } } + /// Helper to write an element value to a buffer at given offset + /// This is extracted to share code between direct write and base-buffer write + #[allow(clippy::too_many_arguments)] + fn write_element_to_buffer( + buffer: &mut [u8], + offset: usize, + element_size: usize, + type_code: Option<&str>, + value: &PyObject, + zelf: &Py, + index: usize, + vm: &VirtualMachine, + ) -> PyResult<()> { + match type_code { + Some("c") => { + if let Some(b) = value.downcast_ref::() { + if offset < buffer.len() { + buffer[offset] = b.as_bytes().first().copied().unwrap_or(0); + } + } else if let Ok(int_val) = value.try_int(vm) { + if offset < buffer.len() { + buffer[offset] = int_val.as_bigint().to_u8().unwrap_or(0); + } + } else { + return Err(vm.new_type_error("an integer or bytes of length 1 is required")); + } + } + Some("u") => { + if let Some(s) = value.downcast_ref::() { + let code = s.as_str().chars().next().map(|c| c as u32).unwrap_or(0); + if offset + WCHAR_SIZE <= buffer.len() { + wchar_to_bytes(code, &mut buffer[offset..]); + } + } else { + return Err(vm.new_type_error("unicode string expected")); + } + } + Some("z") => { + let (ptr_val, converted) = if value.is(&vm.ctx.none) { + (0usize, None) + } else if let Some(bytes) = value.downcast_ref::() { + let (c, ptr) = super::base::ensure_z_null_terminated(bytes, vm); + (ptr, Some(c)) + } else if let Ok(int_val) = value.try_index(vm) { + (int_val.as_bigint().to_usize().unwrap_or(0), None) + } else { + return Err(vm.new_type_error( + "bytes or integer address expected instead of {}".to_owned(), + )); + }; + if offset + element_size <= buffer.len() { + buffer[offset..offset + element_size].copy_from_slice(&ptr_val.to_ne_bytes()); + } + if let Some(c) = converted { + return zelf.0.keep_ref(index, c, vm); + } + } + Some("Z") => { + let (ptr_val, converted) = if value.is(&vm.ctx.none) { + (0usize, None) + } else if let Some(s) = value.downcast_ref::() { + let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm); + (ptr, Some(holder)) + } else if let Ok(int_val) = value.try_index(vm) { + (int_val.as_bigint().to_usize().unwrap_or(0), None) + } else { + return Err(vm.new_type_error("unicode string or integer address expected")); + }; + if offset + element_size <= buffer.len() { + buffer[offset..offset + element_size].copy_from_slice(&ptr_val.to_ne_bytes()); + } + if let Some(c) = converted { + return zelf.0.keep_ref(index, c, vm); + } + } + Some("f") => { + // c_float: convert int/float to f32 bytes + let f32_val = if let Ok(float_val) = value.try_float(vm) { + float_val.to_f64() as f32 + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_f64().unwrap_or(0.0) as f32 + } else { + return Err(vm.new_type_error("a float is required")); + }; + if offset + 4 <= buffer.len() { + buffer[offset..offset + 4].copy_from_slice(&f32_val.to_ne_bytes()); + } + } + Some("d") | Some("g") => { + // c_double / c_longdouble: convert int/float to f64 bytes + let f64_val = if let Ok(float_val) = value.try_float(vm) { + float_val.to_f64() + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_f64().unwrap_or(0.0) + } else { + return Err(vm.new_type_error("a float is required")); + }; + if offset + 8 <= buffer.len() { + buffer[offset..offset + 8].copy_from_slice(&f64_val.to_ne_bytes()); + } + // For "g" type, remaining bytes stay zero + } + _ => { + // Handle ctypes instances (copy their buffer) + if let Some(cdata) = value.downcast_ref::() { + let src_buffer = cdata.buffer.read(); + let copy_len = src_buffer.len().min(element_size); + if offset + copy_len <= buffer.len() { + buffer[offset..offset + copy_len].copy_from_slice(&src_buffer[..copy_len]); + } + // Other types: use int_to_bytes + } else if let Ok(int_val) = value.try_int(vm) { + let bytes = Self::int_to_bytes(int_val.as_bigint(), element_size); + if offset + element_size <= buffer.len() { + buffer[offset..offset + element_size].copy_from_slice(&bytes); + } + } else { + return Err(vm.new_type_error(format!( + "expected {} instance, not {}", + type_code.unwrap_or("value"), + value.class().name() + ))); + } + } + } + + // KeepRef + if super::base::PyCData::should_keep_ref(value) { + let to_keep = super::base::PyCData::get_kept_objects(value, vm); + zelf.0.keep_ref(index, to_keep, vm)?; + } + + Ok(()) + } + fn setitem_by_index( - zelf: &PyCArray, + zelf: &Py, i: isize, value: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { - let length = zelf.length.load() as isize; + let stg = zelf.class().stg_info_opt(); + let length = stg.as_ref().map_or(0, |i| i.length) as isize; let index = if i < 0 { length + i } else { i }; if index < 0 || index >= length { - return Err(vm.new_index_error("array index out of range".to_owned())); + return Err(vm.new_index_error("invalid index")); } let index = index as usize; - let element_size = zelf.element_size.load(); + let element_size = stg.as_ref().map_or(0, |i| i.element_size); let offset = index * element_size; + let type_code = Self::get_element_type_code(zelf, vm); + + // Get target buffer and offset (base's buffer if available, otherwise own) + let base_obj = zelf.0.base.read().clone(); + let (buffer_lock, final_offset) = if let Some(cdata) = base_obj + .as_ref() + .and_then(|b| b.downcast_ref::()) + { + (&cdata.buffer, zelf.0.base_offset.load() + offset) + } else { + (&zelf.0.buffer, offset) + }; - let int_val = value.try_int(vm)?; - let bytes = Self::int_to_bytes(int_val.as_bigint(), element_size); - - let mut cdata = zelf.cdata.write(); - if offset + element_size <= cdata.buffer.len() { - cdata.buffer[offset..offset + element_size].copy_from_slice(&bytes); - } - Ok(()) + let mut buffer = buffer_lock.write(); + Self::write_element_to_buffer( + buffer.to_mut(), + final_offset, + element_size, + type_code.as_deref(), + &value, + zelf, + index, + vm, + ) } + // Array_subscript #[pymethod] - fn __getitem__(&self, index: PyObjectRef, vm: &VirtualMachine) -> PyResult { - if let Some(i) = index.downcast_ref::() { + fn __getitem__(zelf: &Py, item: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // PyIndex_Check + if let Some(i) = item.downcast_ref::() { let i = i.as_bigint().to_isize().ok_or_else(|| { - vm.new_index_error("cannot fit index into an index-sized integer".to_owned()) + vm.new_index_error("cannot fit index into an index-sized integer") })?; - Self::getitem_by_index(self, i, vm) + // getitem_by_index handles negative index normalization + Self::getitem_by_index(zelf, i, vm) + } + // PySlice_Check + else if let Some(slice) = item.downcast_ref::() { + Self::getitem_by_slice(zelf, slice, vm) } else { - Err(vm.new_type_error("array indices must be integers".to_owned())) + Err(vm.new_type_error("indices must be integers")) + } + } + + // Array_subscript slice handling + fn getitem_by_slice(zelf: &Py, slice: &PySlice, vm: &VirtualMachine) -> PyResult { + use crate::sliceable::SaturatedSliceIter; + + let stg = zelf.class().stg_info_opt(); + let length = stg.as_ref().map_or(0, |i| i.length); + + // PySlice_Unpack + PySlice_AdjustIndices + let sat_slice = slice.to_saturated(vm)?; + let (range, step, slice_len) = sat_slice.adjust_indices(length); + + let type_code = Self::get_element_type_code(zelf, vm); + let element_size = stg.as_ref().map_or(0, |i| i.element_size); + let start = range.start; + + match type_code.as_deref() { + // c_char → bytes (item_info->getfunc == "c") + Some("c") => { + if slice_len == 0 { + return Ok(vm.ctx.new_bytes(vec![]).into()); + } + let buffer = zelf.0.buffer.read(); + // step == 1 optimization: direct memcpy + if step == 1 { + let start_offset = start * element_size; + let end_offset = start_offset + slice_len; + if end_offset <= buffer.len() { + return Ok(vm + .ctx + .new_bytes(buffer[start_offset..end_offset].to_vec()) + .into()); + } + } + // Non-contiguous: iterate + let iter = SaturatedSliceIter::from_adjust_indices(range, step, slice_len); + let mut result = Vec::with_capacity(slice_len); + for idx in iter { + let offset = idx * element_size; + if offset < buffer.len() { + result.push(buffer[offset]); + } + } + Ok(vm.ctx.new_bytes(result).into()) + } + // c_wchar → str (item_info->getfunc == "u") + Some("u") => { + if slice_len == 0 { + return Ok(vm.ctx.new_str("").into()); + } + let buffer = zelf.0.buffer.read(); + // step == 1 optimization: direct conversion + if step == 1 { + let start_offset = start * WCHAR_SIZE; + let end_offset = start_offset + slice_len * WCHAR_SIZE; + if end_offset <= buffer.len() { + let wchar_bytes = &buffer[start_offset..end_offset]; + let result: String = wchar_bytes + .chunks(WCHAR_SIZE) + .filter_map(|chunk| wchar_from_bytes(chunk).and_then(char::from_u32)) + .collect(); + return Ok(vm.ctx.new_str(result).into()); + } + } + // Non-contiguous: iterate + let iter = SaturatedSliceIter::from_adjust_indices(range, step, slice_len); + let mut result = String::with_capacity(slice_len); + for idx in iter { + let offset = idx * WCHAR_SIZE; + if let Some(code_point) = wchar_from_bytes(&buffer[offset..]) + && let Some(c) = char::from_u32(code_point) + { + result.push(c); + } + } + Ok(vm.ctx.new_str(result).into()) + } + // Other types → list (PyList_New + Array_item for each) + _ => { + let iter = SaturatedSliceIter::from_adjust_indices(range, step, slice_len); + let mut result = Vec::with_capacity(slice_len); + for idx in iter { + result.push(Self::getitem_by_index(zelf, idx as isize, vm)?); + } + Ok(PyList::from(result).into_ref(&vm.ctx).into()) + } } } + // Array_ass_subscript #[pymethod] fn __setitem__( - &self, - index: PyObjectRef, + zelf: &Py, + item: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { - if let Some(i) = index.downcast_ref::() { + // Array does not support item deletion + // (handled implicitly - value is always provided in __setitem__) + + // PyIndex_Check + if let Some(i) = item.downcast_ref::() { let i = i.as_bigint().to_isize().ok_or_else(|| { - vm.new_index_error("cannot fit index into an index-sized integer".to_owned()) + vm.new_index_error("cannot fit index into an index-sized integer") })?; - Self::setitem_by_index(self, i, value, vm) + // setitem_by_index handles negative index normalization + Self::setitem_by_index(zelf, i, value, vm) + } + // PySlice_Check + else if let Some(slice) = item.downcast_ref::() { + Self::setitem_by_slice(zelf, slice, value, vm) } else { - Err(vm.new_type_error("array indices must be integers".to_owned())) + Err(vm.new_type_error("indices must be integer")) } } + // Array does not support item deletion #[pymethod] - fn __len__(&self) -> usize { - self.length.load() + fn __delitem__(&self, _item: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + Err(vm.new_type_error("Array does not support item deletion")) } - #[pygetset(name = "_type_")] - fn typ(&self) -> PyObjectRef { - self.typ.read().clone() - } + // Array_ass_subscript slice handling + fn setitem_by_slice( + zelf: &Py, + slice: &PySlice, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + use crate::sliceable::SaturatedSliceIter; - #[pygetset(name = "_length_")] - fn length_getter(&self) -> usize { - self.length.load() - } + let length = zelf.class().stg_info_opt().map_or(0, |i| i.length); - #[pygetset] - fn value(&self, vm: &VirtualMachine) -> PyObjectRef { - // Return bytes representation of the buffer - let buffer = self.cdata.read().buffer.clone(); - vm.ctx.new_bytes(buffer.clone()).into() - } + // PySlice_Unpack + PySlice_AdjustIndices + let sat_slice = slice.to_saturated(vm)?; + let (range, step, slice_len) = sat_slice.adjust_indices(length); - #[pygetset(setter)] - fn set_value(&self, value: PyObjectRef, _vm: &VirtualMachine) -> PyResult<()> { - if let Some(bytes) = value.downcast_ref::() { - let mut cdata = self.cdata.write(); - let src = bytes.as_bytes(); - let len = std::cmp::min(src.len(), cdata.buffer.len()); - cdata.buffer[..len].copy_from_slice(&src[..len]); + // other_len = PySequence_Length(value); + let items: Vec = vm.extract_elements_with(&value, Ok)?; + let other_len = items.len(); + + if other_len != slice_len { + return Err(vm.new_value_error("Can only assign sequence of same size")); } - Ok(()) - } - #[pygetset] - fn raw(&self, vm: &VirtualMachine) -> PyObjectRef { - let cdata = self.cdata.read(); - vm.ctx.new_bytes(cdata.buffer.clone()).into() - } + // Use SaturatedSliceIter for correct index iteration (handles negative step) + let iter = SaturatedSliceIter::from_adjust_indices(range, step, slice_len); - #[pygetset(setter)] - fn set_raw(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { - if let Some(bytes) = value.downcast_ref::() { - let mut cdata = self.cdata.write(); - let src = bytes.as_bytes(); - let len = std::cmp::min(src.len(), cdata.buffer.len()); - cdata.buffer[..len].copy_from_slice(&src[..len]); - Ok(()) - } else { - Err(vm.new_type_error("expected bytes".to_owned())) + for (idx, item) in iter.zip(items) { + Self::setitem_by_index(zelf, idx as isize, item, vm)?; } + Ok(()) } - #[pyclassmethod] - fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; + #[pymethod] + fn __len__(zelf: &Py, _vm: &VirtualMachine) -> usize { + zelf.class().stg_info_opt().map_or(0, |i| i.length) + } +} - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; +impl PyCArray { + #[allow(unused)] + pub fn to_arg(&self, _vm: &VirtualMachine) -> PyResult { + let buffer = self.0.buffer.read(); + Ok(libffi::middle::Arg::new(&*buffer)) + } +} - // Create instance with data from address - if address == 0 || size == 0 { - return Err(vm.new_value_error("NULL pointer access".to_owned())); - } - unsafe { - let ptr = address as *const u8; - let bytes = std::slice::from_raw_parts(ptr, size); - // Get element type and length from cls - let element_type = cls.as_object().get_attr("_type_", vm)?; - let element_type: PyTypeRef = element_type - .downcast() - .map_err(|_| vm.new_type_error("_type_ must be a type".to_owned()))?; - let length = cls - .as_object() - .get_attr("_length_", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .unwrap_or(0); - let element_size = if length > 0 { size / length } else { 0 }; - - let cdata = CDataObject::from_bytes(bytes.to_vec(), None); - Ok(PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type.into()), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), +impl AsBuffer for PyCArray { + fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + let buffer_len = zelf.0.buffer.read().len(); + + // Get format and shape from type's StgInfo + let stg_info = zelf + .class() + .stg_info_opt() + .expect("PyCArray type must have StgInfo"); + let format = stg_info.format.clone(); + let shape = stg_info.shape.clone(); + let element_size = stg_info.element_size; + + let desc = if let Some(fmt) = format + && !shape.is_empty() + { + // Build dim_desc from shape (C-contiguous: row-major order) + // stride[i] = product(shape[i+1:]) * itemsize + let mut dim_desc = Vec::with_capacity(shape.len()); + let mut stride = element_size as isize; + + // Calculate strides from innermost to outermost dimension + for &dim_size in shape.iter().rev() { + dim_desc.push((dim_size, stride, 0)); + stride *= dim_size as isize; } - .into_pyobject(vm)) - } - } + dim_desc.reverse(); + + BufferDescriptor { + len: buffer_len, + readonly: false, + itemsize: element_size, + format: std::borrow::Cow::Owned(fmt), + dim_desc, + } + } else { + // Fallback to simple buffer if no format/shape info + BufferDescriptor::simple(buffer_len, false) + }; - #[pyclassmethod] - fn from_buffer( - cls: PyTypeRef, - source: PyObjectRef, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::TryFromObject; - use crate::protocol::PyBuffer; - use crate::stdlib::ctypes::_ctypes::size_of; + let buf = PyBuffer::new(zelf.to_owned().into(), desc, &CDATA_BUFFER_METHODS); + Ok(buf) + } +} - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; +// CharArray and WCharArray getsets - added dynamically via add_getset - // Get buffer from source - let buffer = PyBuffer::try_from_object(vm, source.clone())?; +// CharArray_get_value +fn char_array_get_value(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let zelf = obj.downcast_ref::().unwrap(); + let buffer = zelf.0.buffer.read(); + let len = buffer.iter().position(|&b| b == 0).unwrap_or(buffer.len()); + Ok(vm.ctx.new_bytes(buffer[..len].to_vec()).into()) +} - // Check if buffer is writable - if buffer.desc.readonly { - return Err(vm.new_type_error("underlying buffer is not writable".to_owned())); - } +// CharArray_set_value +fn char_array_set_value(obj: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let zelf = obj.downcast_ref::().unwrap(); + let bytes = value + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("bytes expected"))?; + let mut buffer = zelf.0.buffer.write(); + let src = bytes.as_bytes(); + + if src.len() > buffer.len() { + return Err(vm.new_value_error("byte string too long")); + } - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Check if buffer is large enough - let buffer_len = buffer.desc.len; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); - } + buffer.to_mut()[..src.len()].copy_from_slice(src); + if src.len() < buffer.len() { + buffer.to_mut()[src.len()] = 0; + } + Ok(()) +} - // Read bytes from buffer at offset - let bytes = buffer.obj_bytes(); - let data = &bytes[offset..offset + size]; +// CharArray_get_raw +fn char_array_get_raw(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let zelf = obj.downcast_ref::().unwrap(); + let buffer = zelf.0.buffer.read(); + Ok(vm.ctx.new_bytes(buffer.to_vec()).into()) +} - // Get element type and length from cls - let element_type = cls.as_object().get_attr("_type_", vm)?; - let element_type: PyTypeRef = element_type - .downcast() - .map_err(|_| vm.new_type_error("_type_ must be a type".to_owned()))?; - let length = cls - .as_object() - .get_attr("_length_", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .unwrap_or(0); - let element_size = if length > 0 { size / length } else { 0 }; - - let cdata = CDataObject::from_bytes(data.to_vec(), Some(buffer.obj.clone())); - Ok(PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type.into()), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), - } - .into_pyobject(vm)) +// CharArray_set_raw +fn char_array_set_raw( + obj: PyObjectRef, + value: PySetterValue, + vm: &VirtualMachine, +) -> PyResult<()> { + let value = value.ok_or_else(|| vm.new_attribute_error("cannot delete attribute"))?; + let zelf = obj.downcast_ref::().unwrap(); + let bytes_like = ArgBytesLike::try_from_object(vm, value)?; + let mut buffer = zelf.0.buffer.write(); + let src = bytes_like.borrow_buf(); + if src.len() > buffer.len() { + return Err(vm.new_value_error("byte string too long")); } + buffer.to_mut()[..src.len()].copy_from_slice(&src); + Ok(()) +} - #[pyclassmethod] - fn from_buffer_copy( - cls: PyTypeRef, - source: crate::function::ArgBytesLike, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; - - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; - - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Borrow bytes from source - let source_bytes = source.borrow_buf(); - let buffer_len = source_bytes.len(); - - // Check if buffer is large enough - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); - } +// WCharArray_get_value +fn wchar_array_get_value(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let zelf = obj.downcast_ref::().unwrap(); + let buffer = zelf.0.buffer.read(); + Ok(vm.ctx.new_str(wstring_from_bytes(&buffer)).into()) +} - // Copy bytes from buffer at offset - let data = &source_bytes[offset..offset + size]; +// WCharArray_set_value +fn wchar_array_set_value( + obj: PyObjectRef, + value: PyObjectRef, + vm: &VirtualMachine, +) -> PyResult<()> { + let zelf = obj.downcast_ref::().unwrap(); + let s = value + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("unicode string expected"))?; + let mut buffer = zelf.0.buffer.write(); + let wchar_count = buffer.len() / WCHAR_SIZE; + let char_count = s.as_str().chars().count(); + + if char_count > wchar_count { + return Err(vm.new_value_error("string too long")); + } - // Get element type and length from cls - let element_type = cls.as_object().get_attr("_type_", vm)?; - let element_type: PyTypeRef = element_type - .downcast() - .map_err(|_| vm.new_type_error("_type_ must be a type".to_owned()))?; - let length = cls - .as_object() - .get_attr("_length_", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .unwrap_or(0); - let element_size = if length > 0 { size / length } else { 0 }; - - let cdata = CDataObject::from_bytes(data.to_vec(), None); - Ok(PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type.into()), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), - } - .into_pyobject(vm)) + for (i, ch) in s.as_str().chars().enumerate() { + let offset = i * WCHAR_SIZE; + wchar_to_bytes(ch as u32, &mut buffer.to_mut()[offset..]); } - #[pyclassmethod] - fn in_dll( - cls: PyTypeRef, - dll: PyObjectRef, - name: crate::builtins::PyStrRef, - vm: &VirtualMachine, - ) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; - use libloading::Symbol; - - // Get the library handle from dll object - let handle = if let Ok(int_handle) = dll.try_int(vm) { - // dll is an integer handle - int_handle - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - } else { - // dll is a CDLL/PyDLL/WinDLL object with _handle attribute - dll.get_attr("_handle", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - }; + let terminator_offset = char_count * WCHAR_SIZE; + if terminator_offset + WCHAR_SIZE <= buffer.len() { + wchar_to_bytes(0, &mut buffer.to_mut()[terminator_offset..]); + } + Ok(()) +} - // Get the library from cache - let library_cache = crate::stdlib::ctypes::library::libcache().read(); - let library = library_cache - .get_lib(handle) - .ok_or_else(|| vm.new_attribute_error("Library not found".to_owned()))?; - - // Get symbol address from library - let symbol_name = format!("{}\0", name.as_str()); - let inner_lib = library.lib.lock(); - - let symbol_address = if let Some(lib) = &*inner_lib { - unsafe { - // Try to get the symbol from the library - let symbol: Symbol<'_, *mut u8> = lib.get(symbol_name.as_bytes()).map_err(|e| { - vm.new_attribute_error(format!("{}: symbol '{}' not found", e, name.as_str())) - })?; - *symbol as usize - } - } else { - return Err(vm.new_attribute_error("Library is closed".to_owned())); - }; +/// add_getset for c_char arrays - adds 'value' and 'raw' attributes +/// add_getset((PyTypeObject*)self, CharArray_getsets) +fn add_char_array_getsets(array_type: &Py, vm: &VirtualMachine) { + // SAFETY: getset is owned by array_type which outlives the getset + let value_getset = unsafe { + vm.ctx.new_getset( + "value", + array_type, + char_array_get_value, + char_array_set_value, + ) + }; + let raw_getset = unsafe { + vm.ctx + .new_getset("raw", array_type, char_array_get_raw, char_array_set_raw) + }; + + array_type + .attributes + .write() + .insert(vm.ctx.intern_str("value"), value_getset.into()); + array_type + .attributes + .write() + .insert(vm.ctx.intern_str("raw"), raw_getset.into()); +} - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; +/// add_getset for c_wchar arrays - adds only 'value' attribute (no 'raw') +fn add_wchar_array_getsets(array_type: &Py, vm: &VirtualMachine) { + // SAFETY: getset is owned by array_type which outlives the getset + let value_getset = unsafe { + vm.ctx.new_getset( + "value", + array_type, + wchar_array_get_value, + wchar_array_set_value, + ) + }; - // Read data from symbol address - let data = if symbol_address != 0 && size > 0 { - unsafe { - let ptr = symbol_address as *const u8; - std::slice::from_raw_parts(ptr, size).to_vec() - } - } else { - vec![0; size] - }; + array_type + .attributes + .write() + .insert(vm.ctx.intern_str("value"), value_getset.into()); +} - // Get element type and length from cls - let element_type = cls.as_object().get_attr("_type_", vm)?; - let element_type: PyTypeRef = element_type - .downcast() - .map_err(|_| vm.new_type_error("_type_ must be a type".to_owned()))?; - let length = cls - .as_object() - .get_attr("_length_", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .unwrap_or(0); - let element_size = if length > 0 { size / length } else { 0 }; - - // Create instance - let cdata = CDataObject::from_bytes(data, None); - let instance = PyCArray { - _base: PyCData::new(cdata.clone()), - typ: PyRwLock::new(element_type.into()), - length: AtomicCell::new(length), - element_size: AtomicCell::new(element_size), - cdata: PyRwLock::new(cdata), - } - .into_pyobject(vm); +// wchar_t helpers - Platform-independent wide character handling +// Windows: sizeof(wchar_t) == 2 (UTF-16) +// Linux/macOS: sizeof(wchar_t) == 4 (UTF-32) - // Store base reference to keep dll alive - if let Ok(array_ref) = instance.clone().downcast::() { - array_ref.cdata.write().base = Some(dll); - } +/// Size of wchar_t on this platform +pub(super) const WCHAR_SIZE: usize = std::mem::size_of::(); - Ok(instance) +/// Read a single wchar_t from bytes (platform-endian) +#[inline] +pub(super) fn wchar_from_bytes(bytes: &[u8]) -> Option { + if bytes.len() < WCHAR_SIZE { + return None; } + Some(if WCHAR_SIZE == 2 { + u16::from_ne_bytes([bytes[0], bytes[1]]) as u32 + } else { + u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) + }) } -impl PyCArray { - #[allow(unused)] - pub fn to_arg(&self, _vm: &VirtualMachine) -> PyResult { - let cdata = self.cdata.read(); - Ok(libffi::middle::Arg::new(&cdata.buffer)) +/// Write a single wchar_t to bytes (platform-endian) +#[inline] +pub(super) fn wchar_to_bytes(ch: u32, buffer: &mut [u8]) { + if WCHAR_SIZE == 2 { + if buffer.len() >= 2 { + buffer[..2].copy_from_slice(&(ch as u16).to_ne_bytes()); + } + } else if buffer.len() >= 4 { + buffer[..4].copy_from_slice(&ch.to_ne_bytes()); } } -static ARRAY_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |buffer| { - rustpython_common::lock::PyMappedRwLockReadGuard::map( - rustpython_common::lock::PyRwLockReadGuard::map( - buffer.obj_as::().cdata.read(), - |x: &CDataObject| x, - ), - |x: &CDataObject| x.buffer.as_slice(), - ) - .into() - }, - obj_bytes_mut: |buffer| { - rustpython_common::lock::PyMappedRwLockWriteGuard::map( - rustpython_common::lock::PyRwLockWriteGuard::map( - buffer.obj_as::().cdata.write(), - |x: &mut CDataObject| x, - ), - |x: &mut CDataObject| x.buffer.as_mut_slice(), - ) - .into() - }, - release: |_| {}, - retain: |_| {}, -}; - -impl AsBuffer for PyCArray { - fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { - let buffer_len = zelf.cdata.read().buffer.len(); - let buf = PyBuffer::new( - zelf.to_owned().into(), - BufferDescriptor::simple(buffer_len, false), // readonly=false for ctypes - &ARRAY_BUFFER_METHODS, - ); - Ok(buf) +/// Read a null-terminated wchar_t string from bytes, returns String +fn wstring_from_bytes(buffer: &[u8]) -> String { + let mut chars = Vec::new(); + for chunk in buffer.chunks(WCHAR_SIZE) { + if chunk.len() < WCHAR_SIZE { + break; + } + let code = if WCHAR_SIZE == 2 { + u16::from_ne_bytes([chunk[0], chunk[1]]) as u32 + } else { + u32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]) + }; + if code == 0 { + break; // null terminator + } + if let Some(ch) = char::from_u32(code) { + chars.push(ch); + } } + chars.into_iter().collect() } diff --git a/crates/vm/src/stdlib/ctypes/base.rs b/crates/vm/src/stdlib/ctypes/base.rs index e45ff0b3b70..38c371346e0 100644 --- a/crates/vm/src/stdlib/ctypes/base.rs +++ b/crates/vm/src/stdlib/ctypes/base.rs @@ -1,871 +1,1113 @@ -use super::_ctypes::bytes_to_pyobject; -use super::util::StgInfo; -use crate::builtins::{PyBytes, PyFloat, PyInt, PyNone, PyStr, PyStrRef, PyType, PyTypeRef}; -use crate::function::{ArgBytesLike, Either, FuncArgs, KwArgs, OptionalArg}; -use crate::protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyNumberMethods}; -use crate::stdlib::ctypes::_ctypes::new_simple_type; -use crate::types::{AsBuffer, AsNumber, Constructor}; +use super::array::{WCHAR_SIZE, wchar_from_bytes, wchar_to_bytes}; +use crate::builtins::{PyBytes, PyDict, PyMemoryView, PyStr, PyType, PyTypeRef}; +use crate::class::StaticType; +use crate::function::{ArgBytesLike, OptionalArg, PySetterValue}; +use crate::protocol::{BufferMethods, PyBuffer}; +use crate::types::{GetDescriptor, Representable}; use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyResult, TryFromObject, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; -use num_traits::ToPrimitive; +use num_traits::{Signed, ToPrimitive}; use rustpython_common::lock::PyRwLock; -use std::ffi::{c_uint, c_ulong, c_ulonglong, c_ushort}; +use std::borrow::Cow; +use std::ffi::{ + c_double, c_float, c_int, c_long, c_longlong, c_short, c_uint, c_ulong, c_ulonglong, c_ushort, +}; use std::fmt::Debug; +use std::mem; +use widestring::WideChar; + +// StgInfo - Storage information for ctypes types +// Stored in TypeDataSlot of heap types (PyType::init_type_data/get_type_data) + +// Flag constants +bitflags::bitflags! { + #[derive(Default, Copy, Clone, Debug, PartialEq, Eq)] + pub struct StgInfoFlags: u32 { + // Function calling convention flags + /// Standard call convention (Windows) + const FUNCFLAG_STDCALL = 0x0; + /// C calling convention + const FUNCFLAG_CDECL = 0x1; + /// Function returns HRESULT + const FUNCFLAG_HRESULT = 0x2; + /// Use Python API calling convention + const FUNCFLAG_PYTHONAPI = 0x4; + /// Capture errno after call + const FUNCFLAG_USE_ERRNO = 0x8; + /// Capture last error after call (Windows) + const FUNCFLAG_USE_LASTERROR = 0x10; + + // Type flags + /// Type is a pointer type + const TYPEFLAG_ISPOINTER = 0x100; + /// Type contains pointer fields + const TYPEFLAG_HASPOINTER = 0x200; + /// Type is or contains a union + const TYPEFLAG_HASUNION = 0x400; + /// Type contains bitfield members + const TYPEFLAG_HASBITFIELD = 0x800; + + // Dict flags + /// Type is finalized (_fields_ has been set) + const DICTFLAG_FINAL = 0x1000; + } +} -/// Get the type code string from a ctypes type (e.g., "i" for c_int) -pub fn get_type_code(cls: &PyTypeRef, vm: &VirtualMachine) -> Option { - cls.as_object() - .get_attr("_type_", vm) - .ok() - .and_then(|t| t.downcast_ref::().map(|s| s.to_string())) +/// ParamFunc - determines how a type is passed to foreign functions +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub(super) enum ParamFunc { + #[default] + None, + /// Array types are passed as pointers (tag = 'P') + Array, + /// Simple types use their specific conversion (tag = type code) + Simple, + /// Pointer types (tag = 'P') + Pointer, + /// Structure types (tag = 'V' for value) + Structure, + /// Union types (tag = 'V' for value) + Union, +} + +#[derive(Clone)] +pub struct StgInfo { + pub initialized: bool, + pub size: usize, // number of bytes + pub align: usize, // alignment requirements + pub length: usize, // number of fields (for arrays/structures) + pub proto: Option, // Only for Pointer/ArrayObject + pub flags: StgInfoFlags, // type flags (TYPEFLAG_*, DICTFLAG_*) + + // Array-specific fields + pub element_type: Option, // _type_ for arrays + pub element_size: usize, // size of each element + + // PEP 3118 buffer protocol fields + pub format: Option, // struct format string (e.g., "i", "(5)i") + pub shape: Vec, // shape for multi-dimensional arrays + + // Function parameter conversion + pub(super) paramfunc: ParamFunc, // how to pass to foreign functions + + // Byte order (for _swappedbytes_) + pub big_endian: bool, // true if big endian, false if little endian + + // FFI field types for structure/union passing (inherited from base class) + pub ffi_field_types: Vec, } -pub fn ffi_type_from_str(_type_: &str) -> Option { - match _type_ { - "c" => Some(libffi::middle::Type::u8()), - "u" => Some(libffi::middle::Type::u32()), - "b" => Some(libffi::middle::Type::i8()), - "B" => Some(libffi::middle::Type::u8()), - "h" => Some(libffi::middle::Type::i16()), - "H" => Some(libffi::middle::Type::u16()), - "i" => Some(libffi::middle::Type::i32()), - "I" => Some(libffi::middle::Type::u32()), - "l" => Some(libffi::middle::Type::i32()), - "L" => Some(libffi::middle::Type::u32()), - "q" => Some(libffi::middle::Type::i64()), - "Q" => Some(libffi::middle::Type::u64()), - "f" => Some(libffi::middle::Type::f32()), - "d" => Some(libffi::middle::Type::f64()), - "g" => Some(libffi::middle::Type::f64()), - "?" => Some(libffi::middle::Type::u8()), - "z" => Some(libffi::middle::Type::u64()), - "Z" => Some(libffi::middle::Type::u64()), - "P" => Some(libffi::middle::Type::u64()), - _ => None, +// StgInfo is stored in type_data which requires Send + Sync. +// The PyTypeRef in proto/element_type fields is protected by the type system's locking mechanism. +// ctypes objects are not thread-safe by design; users must synchronize access. +unsafe impl Send for StgInfo {} +unsafe impl Sync for StgInfo {} + +impl std::fmt::Debug for StgInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StgInfo") + .field("initialized", &self.initialized) + .field("size", &self.size) + .field("align", &self.align) + .field("length", &self.length) + .field("proto", &self.proto) + .field("flags", &self.flags) + .field("element_type", &self.element_type) + .field("element_size", &self.element_size) + .field("format", &self.format) + .field("shape", &self.shape) + .field("paramfunc", &self.paramfunc) + .field("big_endian", &self.big_endian) + .field("ffi_field_types", &self.ffi_field_types.len()) + .finish() } } -#[allow(dead_code)] -fn set_primitive(_type_: &str, value: &PyObject, vm: &VirtualMachine) -> PyResult { - match _type_ { - "c" => { - if value - .to_owned() - .downcast_exact::(vm) - .is_ok_and(|v| v.len() == 1) - || value - .to_owned() - .downcast_exact::(vm) - .is_ok_and(|v| v.len() == 1) - || value - .to_owned() - .downcast_exact::(vm) - .map_or(Ok(false), |v| { - let n = v.as_bigint().to_i64(); - if let Some(n) = n { - Ok((0..=255).contains(&n)) - } else { - Ok(false) - } - })? - { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error("one character bytes, bytearray or integer expected")) - } +impl Default for StgInfo { + fn default() -> Self { + StgInfo { + initialized: false, + size: 0, + align: 1, + length: 0, + proto: None, + flags: StgInfoFlags::empty(), + element_type: None, + element_size: 0, + format: None, + shape: Vec::new(), + paramfunc: ParamFunc::None, + big_endian: cfg!(target_endian = "big"), // native endian by default + ffi_field_types: Vec::new(), } - "u" => { - if let Ok(b) = value.str(vm).map(|v| v.to_string().chars().count() == 1) { - if b { - Ok(value.to_owned()) + } +} + +impl StgInfo { + pub fn new(size: usize, align: usize) -> Self { + StgInfo { + initialized: true, + size, + align, + length: 0, + proto: None, + flags: StgInfoFlags::empty(), + element_type: None, + element_size: 0, + format: None, + shape: Vec::new(), + paramfunc: ParamFunc::None, + big_endian: cfg!(target_endian = "big"), // native endian by default + ffi_field_types: Vec::new(), + } + } + + /// Create StgInfo for an array type + /// item_format: the innermost element's format string (kept as-is, e.g., ", + item_shape: &[usize], + item_flags: StgInfoFlags, + ) -> Self { + // Format is kept from innermost element (e.g., "flags & (TYPEFLAG_ISPOINTER | TYPEFLAG_HASPOINTER)) + // stginfo->flags |= TYPEFLAG_HASPOINTER; + let flags = if item_flags + .intersects(StgInfoFlags::TYPEFLAG_ISPOINTER | StgInfoFlags::TYPEFLAG_HASPOINTER) + { + StgInfoFlags::TYPEFLAG_HASPOINTER + } else { + StgInfoFlags::empty() + }; + + StgInfo { + initialized: true, + size, + align, + length, + proto: None, + flags, + element_type: Some(element_type), + element_size, + format, + shape, + paramfunc: ParamFunc::Array, + big_endian: cfg!(target_endian = "big"), // native endian by default + ffi_field_types: Vec::new(), + } + } + + /// Get libffi type for this StgInfo + /// Note: For very large types, returns pointer type to avoid overflow + pub fn to_ffi_type(&self) -> libffi::middle::Type { + // Limit to avoid overflow in libffi (MAX_STRUCT_SIZE is platform-dependent) + const MAX_FFI_STRUCT_SIZE: usize = 1024 * 1024; // 1MB limit for safety + + match self.paramfunc { + ParamFunc::Structure | ParamFunc::Union => { + if !self.ffi_field_types.is_empty() { + libffi::middle::Type::structure(self.ffi_field_types.iter().cloned()) + } else if self.size <= MAX_FFI_STRUCT_SIZE { + // Small struct without field types: use bytes array + libffi::middle::Type::structure(std::iter::repeat_n( + libffi::middle::Type::u8(), + self.size, + )) } else { - Err(vm.new_type_error("one character unicode string expected")) + // Large struct: treat as pointer (passed by reference) + libffi::middle::Type::pointer() } - } else { - Err(vm.new_type_error(format!( - "unicode string expected instead of {} instance", - value.class().name() - ))) - } - } - "b" | "h" | "H" | "i" | "I" | "l" | "q" | "L" | "Q" => { - if value.to_owned().downcast_exact::(vm).is_ok() { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error(format!( - "an integer is required (got type {})", - value.class().name() - ))) } - } - "f" | "d" | "g" => { - // float allows int - if value.to_owned().downcast_exact::(vm).is_ok() - || value.to_owned().downcast_exact::(vm).is_ok() - { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error(format!("must be real number, not {}", value.class().name()))) + ParamFunc::Array => { + if self.size > MAX_FFI_STRUCT_SIZE || self.length > MAX_FFI_STRUCT_SIZE { + // Large array: treat as pointer + libffi::middle::Type::pointer() + } else if let Some(ref fmt) = self.format { + let elem_type = Self::format_to_ffi_type(fmt); + libffi::middle::Type::structure(std::iter::repeat_n(elem_type, self.length)) + } else { + libffi::middle::Type::structure(std::iter::repeat_n( + libffi::middle::Type::u8(), + self.size, + )) + } } - } - "?" => Ok(PyObjectRef::from( - vm.ctx.new_bool(value.to_owned().try_to_bool(vm)?), - )), - "B" => { - if value.to_owned().downcast_exact::(vm).is_ok() { - // Store as-is, conversion to unsigned happens in the getter - Ok(value.to_owned()) - } else { - Err(vm.new_type_error(format!("int expected instead of {}", value.class().name()))) + ParamFunc::Pointer => libffi::middle::Type::pointer(), + _ => { + // Simple type: derive from format + if let Some(ref fmt) = self.format { + Self::format_to_ffi_type(fmt) + } else { + libffi::middle::Type::u8() + } } } - "z" => { - if value.to_owned().downcast_exact::(vm).is_ok() - || value.to_owned().downcast_exact::(vm).is_ok() - { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error(format!( - "bytes or integer address expected instead of {} instance", - value.class().name() - ))) - } + } + + /// Convert format string to libffi type + fn format_to_ffi_type(fmt: &str) -> libffi::middle::Type { + // Strip endian prefix if present + let code = fmt.trim_start_matches(['<', '>', '!', '@', '=']); + match code { + "b" => libffi::middle::Type::i8(), + "B" => libffi::middle::Type::u8(), + "h" => libffi::middle::Type::i16(), + "H" => libffi::middle::Type::u16(), + "i" | "l" => libffi::middle::Type::i32(), + "I" | "L" => libffi::middle::Type::u32(), + "q" => libffi::middle::Type::i64(), + "Q" => libffi::middle::Type::u64(), + "f" => libffi::middle::Type::f32(), + "d" => libffi::middle::Type::f64(), + "P" | "z" | "Z" | "O" => libffi::middle::Type::pointer(), + _ => libffi::middle::Type::u8(), // default } - "Z" => { - if value.to_owned().downcast_exact::(vm).is_ok() { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error(format!( - "unicode string or integer address expected instead of {} instance", - value.class().name() - ))) - } + } + + /// Check if this type is finalized (cannot set _fields_ again) + pub fn is_final(&self) -> bool { + self.flags.contains(StgInfoFlags::DICTFLAG_FINAL) + } + + /// Get proto type reference (for Pointer/Array types) + pub fn proto(&self) -> &Py { + self.proto.as_deref().expect("type has proto") + } +} + +/// Get PEP3118 format string for a field type +/// Returns the format string considering byte order +pub(super) fn get_field_format( + field_type: &PyObject, + big_endian: bool, + vm: &VirtualMachine, +) -> String { + // 1. Check StgInfo for format + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(stg_info) = type_obj.stg_info_opt() + && let Some(fmt) = &stg_info.format + { + // Handle endian prefix for simple types + if fmt.len() == 1 { + let endian_prefix = if big_endian { ">" } else { "<" }; + return format!("{}{}", endian_prefix, fmt); } - _ => { - // "P" - if value.to_owned().downcast_exact::(vm).is_ok() - || value.to_owned().downcast_exact::(vm).is_ok() - { - Ok(value.to_owned()) - } else { - Err(vm.new_type_error("cannot be converted to pointer")) - } + return fmt.clone(); + } + + // 2. Try to get _type_ attribute for simple types + if let Ok(type_attr) = field_type.get_attr("_type_", vm) + && let Some(type_str) = type_attr.downcast_ref::() + { + let s = type_str.as_str(); + if s.len() == 1 { + let endian_prefix = if big_endian { ">" } else { "<" }; + return format!("{}{}", endian_prefix, s); } + return s.to_string(); } + + // Default: single byte + "B".to_string() } -/// Common data object for all ctypes types -#[derive(Debug, Clone)] -pub struct CDataObject { - /// pointer to memory block (b_ptr + b_size) - pub buffer: Vec, +/// Compute byte order based on swapped flag +#[inline] +pub(super) fn is_big_endian(is_swapped: bool) -> bool { + if is_swapped { + !cfg!(target_endian = "big") + } else { + cfg!(target_endian = "big") + } +} + +/// Shared BufferMethods for all ctypes types (PyCArray, PyCSimple, PyCStructure, PyCUnion) +/// All these types are #[repr(transparent)] wrappers around PyCData +pub(super) static CDATA_BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |buffer| { + rustpython_common::lock::PyRwLockReadGuard::map( + buffer.obj_as::().buffer.read(), + |x| &**x, + ) + .into() + }, + obj_bytes_mut: |buffer| { + rustpython_common::lock::PyRwLockWriteGuard::map( + buffer.obj_as::().buffer.write(), + |x| x.to_mut().as_mut_slice(), + ) + .into() + }, + release: |_| {}, + retain: |_| {}, +}; + +/// Convert Vec to Vec by reinterpreting the memory (same allocation). +fn vec_to_bytes(vec: Vec) -> Vec { + let len = vec.len() * std::mem::size_of::(); + let cap = vec.capacity() * std::mem::size_of::(); + let ptr = vec.as_ptr() as *mut u8; + std::mem::forget(vec); + unsafe { Vec::from_raw_parts(ptr, len, cap) } +} + +/// Ensure PyBytes is null-terminated. Returns (PyBytes to keep, pointer). +/// If already contains null, returns original. Otherwise creates new with null appended. +pub(super) fn ensure_z_null_terminated( + bytes: &PyBytes, + vm: &VirtualMachine, +) -> (PyObjectRef, usize) { + let data = bytes.as_bytes(); + if data.contains(&0) { + // Already has null, use original + let original: PyObjectRef = vm.ctx.new_bytes(data.to_vec()).into(); + (original, data.as_ptr() as usize) + } else { + // Create new with null appended + let mut buffer = data.to_vec(); + buffer.push(0); + let ptr = buffer.as_ptr() as usize; + let new_bytes: PyObjectRef = vm.ctx.new_bytes(buffer).into(); + (new_bytes, ptr) + } +} + +/// Convert str to null-terminated wchar_t buffer. Returns (PyBytes holder, pointer). +pub(super) fn str_to_wchar_bytes(s: &str, vm: &VirtualMachine) -> (PyObjectRef, usize) { + let wchars: Vec = s + .chars() + .map(|c| c as libc::wchar_t) + .chain(std::iter::once(0)) + .collect(); + let ptr = wchars.as_ptr() as usize; + let bytes = vec_to_bytes(wchars); + let holder: PyObjectRef = vm.ctx.new_bytes(bytes).into(); + (holder, ptr) +} + +/// PyCData - base type for all ctypes data types +#[pyclass(name = "_CData", module = "_ctypes")] +#[derive(Debug, PyPayload)] +pub struct PyCData { + /// Memory buffer - Owned (self-owned) or Borrowed (external reference) + /// + /// SAFETY: Borrowed variant's 'static lifetime is not actually static. + /// When created via from_address or from_base_obj, only valid for the lifetime of the source memory. + /// Same behavior as CPython's b_ptr (user responsibility, kept alive via b_base). + pub buffer: PyRwLock>, /// pointer to base object or None (b_base) - #[allow(dead_code)] - pub base: Option, + pub base: PyRwLock>, + /// byte offset within base's buffer (for field access) + pub base_offset: AtomicCell, /// index into base's b_objects list (b_index) - #[allow(dead_code)] - pub index: usize, + pub index: AtomicCell, /// dictionary of references we need to keep (b_objects) - pub objects: Option, + pub objects: PyRwLock>, + /// number of references we need (b_length) + pub length: AtomicCell, } -impl CDataObject { +impl PyCData { /// Create from StgInfo (PyCData_MallocBuffer pattern) pub fn from_stg_info(stg_info: &StgInfo) -> Self { - CDataObject { - buffer: vec![0u8; stg_info.size], - base: None, - index: 0, - objects: None, + PyCData { + buffer: PyRwLock::new(Cow::Owned(vec![0u8; stg_info.size])), + base: PyRwLock::new(None), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(0), + objects: PyRwLock::new(None), + length: AtomicCell::new(stg_info.length), } } /// Create from existing bytes (copies data) pub fn from_bytes(data: Vec, objects: Option) -> Self { - CDataObject { - buffer: data, - base: None, - index: 0, - objects, + PyCData { + buffer: PyRwLock::new(Cow::Owned(data)), + base: PyRwLock::new(None), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(0), + objects: PyRwLock::new(objects), + length: AtomicCell::new(0), } } - /// Create from base object (copies data from base's buffer at offset) - #[allow(dead_code)] - pub fn from_base( - base: PyObjectRef, - _offset: usize, - size: usize, - index: usize, + /// Create from bytes with specified length (for arrays) + pub fn from_bytes_with_length( + data: Vec, objects: Option, + length: usize, ) -> Self { - CDataObject { - buffer: vec![0u8; size], - base: Some(base), - index, - objects, + PyCData { + buffer: PyRwLock::new(Cow::Owned(data)), + base: PyRwLock::new(None), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(0), + objects: PyRwLock::new(objects), + length: AtomicCell::new(length), } } - #[inline] - pub fn size(&self) -> usize { - self.buffer.len() + /// Create from external memory address + /// + /// # Safety + /// The returned slice's 'static lifetime is a lie. + /// Actually only valid for the lifetime of the memory pointed to by ptr. + /// PyCData_AtAddress + pub unsafe fn at_address(ptr: *const u8, size: usize) -> Self { + // = PyCData_AtAddress + // SAFETY: Caller must ensure ptr is valid for the lifetime of returned PyCData + let slice: &'static [u8] = unsafe { std::slice::from_raw_parts(ptr, size) }; + PyCData { + buffer: PyRwLock::new(Cow::Borrowed(slice)), + base: PyRwLock::new(None), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(0), + objects: PyRwLock::new(None), + length: AtomicCell::new(0), + } } -} - -#[pyclass(name = "_CData", module = "_ctypes")] -#[derive(Debug, PyPayload)] -pub struct PyCData { - pub cdata: PyRwLock, -} -impl PyCData { - pub fn new(cdata: CDataObject) -> Self { - Self { - cdata: PyRwLock::new(cdata), + /// Create from base object with offset and data copy + /// + /// Similar to from_base_with_offset, but also stores a copy of the data. + /// This is used for arrays where we need our own buffer for the buffer protocol, + /// but still maintain the base reference for KeepRef and tracking. + pub fn from_base_with_data( + base_obj: PyObjectRef, + offset: usize, + idx: usize, + length: usize, + data: Vec, + ) -> Self { + PyCData { + buffer: PyRwLock::new(Cow::Owned(data)), // Has its own buffer copy + base: PyRwLock::new(Some(base_obj)), // But still tracks base + base_offset: AtomicCell::new(offset), // And offset for writes + index: AtomicCell::new(idx), + objects: PyRwLock::new(None), + length: AtomicCell::new(length), } } -} -#[pyclass(flags(BASETYPE))] -impl PyCData { - #[pygetset] - fn _objects(&self) -> Option { - self.cdata.read().objects.clone() + /// Create from base object's buffer + /// + /// This creates a borrowed view into the base's buffer at the given address. + /// The base object is stored in b_base to keep the memory alive. + /// + /// # Safety + /// ptr must point into base_obj's buffer and remain valid as long as base_obj is alive. + pub unsafe fn from_base_obj( + ptr: *mut u8, + size: usize, + base_obj: PyObjectRef, + idx: usize, + ) -> Self { + // = PyCData_FromBaseObj + // SAFETY: ptr points into base_obj's buffer, kept alive via base reference + let slice: &'static [u8] = unsafe { std::slice::from_raw_parts(ptr, size) }; + PyCData { + buffer: PyRwLock::new(Cow::Borrowed(slice)), + base: PyRwLock::new(Some(base_obj)), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(idx), + objects: PyRwLock::new(None), + length: AtomicCell::new(0), + } } -} -#[pyclass(module = "_ctypes", name = "PyCSimpleType", base = PyType)] -#[derive(Debug)] -#[repr(transparent)] -pub struct PyCSimpleType(PyType); - -#[pyclass(flags(BASETYPE), with(AsNumber))] -impl PyCSimpleType { - /// Get stg_info for a simple type by reading _type_ attribute - pub fn get_stg_info(cls: &PyTypeRef, vm: &VirtualMachine) -> StgInfo { - if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) - && let Ok(type_str) = type_attr.str(vm) - { - let tp_str = type_str.to_string(); - if tp_str.len() == 1 { - let size = super::_ctypes::get_size(&tp_str); - let align = super::_ctypes::get_align(&tp_str); - return StgInfo::new(size, align); - } + /// Create from buffer protocol object (for from_buffer method) + /// + /// Unlike from_bytes, this shares memory with the source buffer. + /// The source object is stored in objects dict to keep the buffer alive. + /// Python stores with key -1 via KeepRef(result, -1, mv). + /// + /// # Safety + /// ptr must point to valid memory that remains valid as long as source is alive. + pub unsafe fn from_buffer_shared( + ptr: *const u8, + size: usize, + length: usize, + source: PyObjectRef, + vm: &VirtualMachine, + ) -> Self { + // SAFETY: Caller must ensure ptr is valid for the lifetime of source + let slice: &'static [u8] = unsafe { std::slice::from_raw_parts(ptr, size) }; + + // Python stores the reference in a dict with key "-1" (unique_key pattern) + let objects_dict = vm.ctx.new_dict(); + objects_dict + .set_item("-1", source, vm) + .expect("Failed to store buffer reference"); + + PyCData { + buffer: PyRwLock::new(Cow::Borrowed(slice)), + base: PyRwLock::new(None), + base_offset: AtomicCell::new(0), + index: AtomicCell::new(0), + objects: PyRwLock::new(Some(objects_dict.into())), + length: AtomicCell::new(length), } - StgInfo::default() - } - #[allow(clippy::new_ret_no_self)] - #[pymethod] - fn new(cls: PyTypeRef, _: OptionalArg, vm: &VirtualMachine) -> PyResult { - Ok(PyObjectRef::from( - new_simple_type(Either::B(&cls), vm)? - .into_ref_with_type(vm, cls)? - .clone(), - )) } - #[pyclassmethod] - fn from_param(cls: PyTypeRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { - // 1. If the value is already an instance of the requested type, return it - if value.fast_isinstance(&cls) { - return Ok(value); + /// Common implementation for from_buffer class method. + /// Validates buffer, creates memoryview, and returns PyCData sharing memory with source. + /// + /// CDataType_from_buffer_impl + pub fn from_buffer_impl( + cls: &Py, + source: PyObjectRef, + offset: isize, + vm: &VirtualMachine, + ) -> PyResult { + let (size, length) = { + let stg_info = cls + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("not a ctypes type"))?; + (stg_info.size, stg_info.length) + }; + + if offset < 0 { + return Err(vm.new_value_error("offset cannot be negative")); } + let offset = offset as usize; - // 2. Get the type code to determine conversion rules - let type_code = get_type_code(&cls, vm); + // Get buffer from source (this exports the buffer) + let buffer = PyBuffer::try_from_object(vm, source)?; - // 3. Handle None for pointer types (c_char_p, c_wchar_p, c_void_p) - if vm.is_none(&value) && matches!(type_code.as_deref(), Some("z") | Some("Z") | Some("P")) { - return Ok(value); + // Check if buffer is writable + if buffer.desc.readonly { + return Err(vm.new_type_error("underlying buffer is not writable")); } - // 4. Try to convert value based on type code - match type_code.as_deref() { - // Integer types: accept integers - Some("b" | "B" | "h" | "H" | "i" | "I" | "l" | "L" | "q" | "Q") => { - if value.try_int(vm).is_ok() { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - } - // Float types: accept numbers - Some("f" | "d" | "g") => { - if value.try_float(vm).is_ok() || value.try_int(vm).is_ok() { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - } - // c_char: 1 byte character - Some("c") => { - if let Some(bytes) = value.downcast_ref::() - && bytes.len() == 1 - { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - if let Ok(int_val) = value.try_int(vm) - && int_val.as_bigint().to_u8().is_some() - { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - return Err(vm.new_type_error( - "one character bytes, bytearray or integer expected".to_string(), - )); - } - // c_wchar: 1 unicode character - Some("u") => { - if let Some(s) = value.downcast_ref::() - && s.as_str().chars().count() == 1 - { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - return Err(vm.new_type_error("one character unicode string expected".to_string())); - } - // c_char_p: bytes pointer - Some("z") => { - if value.downcast_ref::().is_some() { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - } - // c_wchar_p: unicode pointer - Some("Z") => { - if value.downcast_ref::().is_some() { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - } - // c_void_p: most flexible - accepts int, bytes, str - Some("P") => { - if value.try_int(vm).is_ok() - || value.downcast_ref::().is_some() - || value.downcast_ref::().is_some() - { - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(value.clone()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - } - // c_bool - Some("?") => { - let bool_val = value.is_true(vm)?; - let simple = new_simple_type(Either::B(&cls), vm)?; - simple.value.store(vm.ctx.new_bool(bool_val).into()); - return simple.into_ref_with_type(vm, cls.clone()).map(Into::into); - } - _ => {} + // Check if buffer is C contiguous + if !buffer.desc.is_contiguous() { + return Err(vm.new_type_error("underlying buffer is not C contiguous")); } - // 5. Check for _as_parameter_ attribute - if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { - return PyCSimpleType::from_param(cls, as_parameter, vm); + // Check if buffer is large enough + let buffer_len = buffer.desc.len; + if offset + size > buffer_len { + return Err(vm.new_value_error(format!( + "Buffer size too small ({} instead of at least {} bytes)", + buffer_len, + offset + size + ))); } - // 6. Type-specific error messages - match type_code.as_deref() { - Some("z") => Err(vm.new_type_error(format!( - "'{}' object cannot be interpreted as ctypes.c_char_p", - value.class().name() - ))), - Some("Z") => Err(vm.new_type_error(format!( - "'{}' object cannot be interpreted as ctypes.c_wchar_p", - value.class().name() - ))), - _ => Err(vm.new_type_error("wrong type".to_string())), - } - } + // Get buffer pointer - the memory is owned by source + let ptr = { + let bytes = buffer.obj_bytes(); + bytes.as_ptr().wrapping_add(offset) + }; - #[pymethod] - fn __mul__(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { - PyCSimple::repeat(cls, n, vm) + // Create memoryview to keep buffer exported (prevents source from being modified) + // mv = PyMemoryView_FromObject(obj); KeepRef(result, -1, mv); + let memoryview = PyMemoryView::from_buffer(buffer, vm)?; + let mv_obj = memoryview.into_pyobject(vm); + + // Create CData that shares memory with the buffer + Ok(unsafe { Self::from_buffer_shared(ptr, size, length, mv_obj, vm) }) } -} -impl AsNumber for PyCSimpleType { - fn as_number() -> &'static PyNumberMethods { - static AS_NUMBER: PyNumberMethods = PyNumberMethods { - multiply: Some(|a, b, vm| { - // a is a PyCSimpleType instance (type object like c_char) - // b is int (array size) - let cls = a - .downcast_ref::() - .ok_or_else(|| vm.new_type_error("expected type".to_owned()))?; - let n = b - .try_index(vm)? - .as_bigint() - .to_isize() - .ok_or_else(|| vm.new_overflow_error("array size too large".to_owned()))?; - PyCSimple::repeat(cls.to_owned(), n, vm) - }), - ..PyNumberMethods::NOT_IMPLEMENTED + /// Common implementation for from_buffer_copy class method. + /// Copies data from buffer and creates new independent instance. + /// + /// CDataType_from_buffer_copy_impl + pub fn from_buffer_copy_impl( + cls: &Py, + source: &[u8], + offset: isize, + vm: &VirtualMachine, + ) -> PyResult { + let (size, length) = { + let stg_info = cls + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("not a ctypes type"))?; + (stg_info.size, stg_info.length) }; - &AS_NUMBER - } -} -#[pyclass( - module = "_ctypes", - name = "_SimpleCData", - base = PyCData, - metaclass = "PyCSimpleType" -)] -pub struct PyCSimple { - pub _base: PyCData, - pub _type_: String, - pub value: AtomicCell, - pub cdata: PyRwLock, -} + if offset < 0 { + return Err(vm.new_value_error("offset cannot be negative")); + } + let offset = offset as usize; -impl Debug for PyCSimple { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PyCSimple") - .field("_type_", &self._type_) - .finish() + // Check if buffer is large enough + if offset + size > source.len() { + return Err(vm.new_value_error(format!( + "Buffer size too small ({} instead of at least {} bytes)", + source.len(), + offset + size + ))); + } + + // Copy bytes from buffer at offset + let data = source[offset..offset + size].to_vec(); + + Ok(Self::from_bytes_with_length(data, None, length)) } -} -fn value_to_bytes_endian( - _type_: &str, - value: &PyObject, - swapped: bool, - vm: &VirtualMachine, -) -> Vec { - // Helper macro for endian conversion - macro_rules! to_bytes { - ($val:expr) => { - if swapped { - // Use opposite endianness - #[cfg(target_endian = "little")] - { - $val.to_be_bytes().to_vec() - } - #[cfg(target_endian = "big")] - { - $val.to_le_bytes().to_vec() - } - } else { - $val.to_ne_bytes().to_vec() - } - }; + #[inline] + pub fn size(&self) -> usize { + self.buffer.read().len() } - match _type_ { - "c" => { - // c_char - single byte - if let Some(bytes) = value.downcast_ref::() - && !bytes.is_empty() - { - return vec![bytes.as_bytes()[0]]; - } - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_u8() - { - return vec![v]; - } - vec![0] - } - "u" => { - // c_wchar - 4 bytes (wchar_t on most platforms) - if let Ok(s) = value.str(vm) - && let Some(c) = s.as_str().chars().next() - { - return to_bytes!(c as u32); - } - vec![0; 4] + /// Check if this buffer is borrowed (external memory reference) + #[inline] + pub fn is_borrowed(&self) -> bool { + matches!(&*self.buffer.read(), Cow::Borrowed(_)) + } + + /// Write bytes at offset - handles both borrowed and owned buffers + /// + /// For borrowed buffers (from from_address), writes directly to external memory. + /// For owned buffers, writes through to_mut() as normal. + /// + /// # Safety + /// For borrowed buffers, caller must ensure the memory is writable. + pub fn write_bytes_at_offset(&self, offset: usize, bytes: &[u8]) { + let buffer = self.buffer.read(); + if offset + bytes.len() > buffer.len() { + return; // Out of bounds } - "b" => { - // c_byte - signed char (1 byte) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_i8() - { - return vec![v as u8]; + + match &*buffer { + Cow::Borrowed(slice) => { + // For borrowed memory, write directly + // SAFETY: We assume the caller knows this memory is writable + // (e.g., from from_address pointing to a ctypes buffer) + unsafe { + let ptr = slice.as_ptr() as *mut u8; + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.add(offset), bytes.len()); + } } - vec![0] - } - "B" => { - // c_ubyte - unsigned char (1 byte) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_u8() - { - return vec![v]; + Cow::Owned(_) => { + // For owned memory, use to_mut() through write lock + drop(buffer); + let mut buffer = self.buffer.write(); + buffer.to_mut()[offset..offset + bytes.len()].copy_from_slice(bytes); } - vec![0] } - "h" => { - // c_short (2 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_i16() - { - return to_bytes!(v); - } - vec![0; 2] + } + + /// Generate unique key for nested references (unique_key) + /// Creates a hierarchical key by walking up the b_base chain. + /// Format: "index:parent_index:grandparent_index:..." + pub fn unique_key(&self, index: usize) -> String { + let mut key = format!("{index:x}"); + // Walk up the base chain to build hierarchical key + if self.base.read().is_some() { + let parent_index = self.index.load(); + key.push_str(&format!(":{parent_index:x}")); } - "H" => { - // c_ushort (2 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_u16() - { - return to_bytes!(v); - } - vec![0; 2] + key + } + + /// Keep a reference in the objects dictionary (KeepRef) + /// + /// Stores 'keep' in this object's b_objects dict at key 'index'. + /// If keep is None, does nothing (optimization). + /// This function stores the value directly - caller should use get_kept_objects() + /// first if they want to store the _objects of a CData instead of the object itself. + /// + /// If this object has a base (is embedded in another structure/union/array), + /// the reference is stored in the root object's b_objects with a hierarchical key. + pub fn keep_ref(&self, index: usize, keep: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + // Optimization: no need to store None + if vm.is_none(&keep) { + return Ok(()); } - "i" => { - // c_int (4 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_i32() - { - return to_bytes!(v); - } - vec![0; 4] + + // Build hierarchical key + let key = self.unique_key(index); + + // If we have a base object, find root and store there + if let Some(base_obj) = self.base.read().clone() { + // Find root by walking up the base chain + let root_obj = Self::find_root_object(&base_obj); + Self::store_in_object(&root_obj, &key, keep, vm)?; + return Ok(()); } - "I" => { - // c_uint (4 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_u32() - { - return to_bytes!(v); + + // No base - store in own objects dict + let mut objects = self.objects.write(); + + // Initialize b_objects if needed + if objects.is_none() { + if self.length.load() > 0 { + // Need to store multiple references - create a dict + *objects = Some(vm.ctx.new_dict().into()); + } else { + // Only one reference needed - store directly + *objects = Some(keep); + return Ok(()); } - vec![0; 4] } - "l" => { - // c_long (platform dependent) - if let Ok(int_val) = value.try_to_value::(vm) { - return to_bytes!(int_val); - } - const SIZE: usize = std::mem::size_of::(); - vec![0; SIZE] + + // If b_objects is not a dict, convert it to a dict first + // This preserves the existing reference (e.g., from cast) when adding new references + if let Some(obj) = objects.as_ref() + && obj.downcast_ref::().is_none() + { + // Convert existing single reference to a dict + let dict = vm.ctx.new_dict(); + // Store the original object with a special key (id-based) + let id_key: PyObjectRef = vm.ctx.new_int(obj.get_id() as i64).into(); + dict.set_item(&*id_key, obj.clone(), vm)?; + *objects = Some(dict.into()); } - "L" => { - // c_ulong (platform dependent) - if let Ok(int_val) = value.try_to_value::(vm) { - return to_bytes!(int_val); - } - const SIZE: usize = std::mem::size_of::(); - vec![0; SIZE] + + // Store in dict with unique key + if let Some(dict_obj) = objects.as_ref() + && let Some(dict) = dict_obj.downcast_ref::() + { + let key_obj: PyObjectRef = vm.ctx.new_str(key).into(); + dict.set_item(&*key_obj, keep, vm)?; } - "q" => { - // c_longlong (8 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_i64() - { - return to_bytes!(v); - } - vec![0; 8] + + Ok(()) + } + + /// Find the root object (one without a base) by walking up the base chain + fn find_root_object(obj: &PyObject) -> PyObjectRef { + // Try to get base from different ctypes types + let base = if let Some(cdata) = obj.downcast_ref::() { + cdata.base.read().clone() + } else { + None + }; + + // Recurse if there's a base, otherwise this is the root + if let Some(base_obj) = base { + Self::find_root_object(&base_obj) + } else { + obj.to_owned() } - "Q" => { - // c_ulonglong (8 bytes) - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_u64() - { - return to_bytes!(v); - } - vec![0; 8] - } - "f" => { - // c_float (4 bytes) - int도 허용 - if let Ok(float_val) = value.try_float(vm) { - return to_bytes!(float_val.to_f64() as f32); - } - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_f64() - { - return to_bytes!(v as f32); - } - vec![0; 4] - } - "d" | "g" => { - // c_double (8 bytes) - int도 허용 - if let Ok(float_val) = value.try_float(vm) { - return to_bytes!(float_val.to_f64()); - } - if let Ok(int_val) = value.try_int(vm) - && let Some(v) = int_val.as_bigint().to_f64() - { - return to_bytes!(v); - } - vec![0; 8] - } - "?" => { - // c_bool (1 byte) - if let Ok(b) = value.to_owned().try_to_bool(vm) { - return vec![if b { 1 } else { 0 }]; - } - vec![0] - } - "P" | "z" | "Z" => { - // Pointer types (platform pointer size) - vec![0; std::mem::size_of::()] - } - _ => vec![0], } -} -impl Constructor for PyCSimple { - type Args = (OptionalArg,); - - fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - let args: Self::Args = args.bind(vm)?; - let attributes = cls.get_attributes(); - let _type_ = attributes - .iter() - .find(|(k, _)| { - k.to_object() - .str(vm) - .map(|s| s.to_string() == "_type_") - .unwrap_or(false) - }) - .ok_or_else(|| { - vm.new_type_error(format!( - "cannot create '{}' instances: no _type_ attribute", - cls.name() - )) - })? - .1 - .str(vm)? - .to_string(); - let value = if let Some(ref v) = args.0.into_option() { - set_primitive(_type_.as_str(), v, vm)? + /// Store a value in an object's _objects dict with the given key + fn store_in_object( + obj: &PyObject, + key: &str, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + // Get the objects dict from the object + let objects_lock = if let Some(cdata) = obj.downcast_ref::() { + &cdata.objects } else { - match _type_.as_str() { - "c" | "u" => PyObjectRef::from(vm.ctx.new_bytes(vec![0])), - "b" | "B" | "h" | "H" | "i" | "I" | "l" | "q" | "L" | "Q" => { - PyObjectRef::from(vm.ctx.new_int(0)) - } - "f" | "d" | "g" => PyObjectRef::from(vm.ctx.new_float(0.0)), - "?" => PyObjectRef::from(vm.ctx.new_bool(false)), - _ => vm.ctx.none(), // "z" | "Z" | "P" - } + return Ok(()); // Unknown type, skip }; - // Check if this is a swapped endian type - let swapped = cls - .as_object() - .get_attr("_swappedbytes_", vm) - .map(|v| v.is_true(vm).unwrap_or(false)) - .unwrap_or(false); + let mut objects = objects_lock.write(); - let buffer = value_to_bytes_endian(&_type_, &value, swapped, vm); - let cdata = CDataObject::from_bytes(buffer, None); - PyCSimple { - _base: PyCData::new(cdata.clone()), - _type_, - value: AtomicCell::new(value), - cdata: PyRwLock::new(cdata), + // Initialize if needed + if objects.is_none() { + *objects = Some(vm.ctx.new_dict().into()); } - .into_ref_with_type(vm, cls) - .map(Into::into) - } - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unimplemented!("use slot_new") - } -} + // If not a dict, convert to dict + if let Some(obj) = objects.as_ref() + && obj.downcast_ref::().is_none() + { + let dict = vm.ctx.new_dict(); + let id_key: PyObjectRef = vm.ctx.new_int(obj.get_id() as i64).into(); + dict.set_item(&*id_key, obj.clone(), vm)?; + *objects = Some(dict.into()); + } -#[pyclass(flags(BASETYPE), with(Constructor, AsBuffer))] -impl PyCSimple { - #[pygetset] - fn _objects(&self) -> Option { - self.cdata.read().objects.clone() - } - - #[pygetset(name = "value")] - pub fn value(instance: PyObjectRef, vm: &VirtualMachine) -> PyResult { - let zelf: &Py = instance - .downcast_ref() - .ok_or_else(|| vm.new_type_error("cannot get value of instance"))?; - let raw_value = unsafe { (*zelf.value.as_ptr()).clone() }; - - // Convert to unsigned if needed for unsigned types - match zelf._type_.as_str() { - "B" | "H" | "I" | "L" | "Q" => { - if let Ok(int_val) = raw_value.try_int(vm) { - let n = int_val.as_bigint(); - // Use platform-specific C types for correct unsigned conversion - match zelf._type_.as_str() { - "B" => { - if let Some(v) = n.to_i64() { - return Ok(vm.ctx.new_int((v as u8) as u64).into()); - } - } - "H" => { - if let Some(v) = n.to_i64() { - return Ok(vm.ctx.new_int((v as c_ushort) as u64).into()); - } - } - "I" => { - if let Some(v) = n.to_i64() { - return Ok(vm.ctx.new_int((v as c_uint) as u64).into()); - } - } - "L" => { - if let Some(v) = n.to_i128() { - return Ok(vm.ctx.new_int(v as c_ulong).into()); - } - } - "Q" => { - if let Some(v) = n.to_i128() { - return Ok(vm.ctx.new_int(v as c_ulonglong).into()); - } - } - _ => {} - }; - } - Ok(raw_value) - } - _ => Ok(raw_value), + // Store in dict + if let Some(dict_obj) = objects.as_ref() + && let Some(dict) = dict_obj.downcast_ref::() + { + let key_obj: PyObjectRef = vm.ctx.new_str(key).into(); + dict.set_item(&*key_obj, value, vm)?; } + + Ok(()) } - #[pygetset(name = "value", setter)] - fn set_value(instance: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { - let zelf: PyRef = instance - .clone() - .downcast() - .map_err(|_| vm.new_type_error("cannot set value of instance"))?; - let content = set_primitive(zelf._type_.as_str(), &value, vm)?; + /// Get kept objects from a CData instance + /// Returns the _objects of the CData, or an empty dict if None. + pub fn get_kept_objects(value: &PyObject, vm: &VirtualMachine) -> PyObjectRef { + value + .downcast_ref::() + .and_then(|cdata| cdata.objects.read().clone()) + .unwrap_or_else(|| vm.ctx.new_dict().into()) + } - // Check if this is a swapped endian type - let swapped = instance - .class() - .as_object() - .get_attr("_swappedbytes_", vm) - .map(|v| v.is_true(vm).unwrap_or(false)) - .unwrap_or(false); - - // Update buffer when value changes - let buffer_bytes = value_to_bytes_endian(&zelf._type_, &content, swapped, vm); - zelf.cdata.write().buffer = buffer_bytes; - zelf.value.store(content); - Ok(()) + /// Check if a value should be stored in _objects + /// Returns true for ctypes objects and bytes (for c_char_p) + pub fn should_keep_ref(value: &PyObject) -> bool { + value.downcast_ref::().is_some() || value.downcast_ref::().is_some() } - #[pyclassmethod] - fn repeat(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { - use super::_ctypes::get_size; - use super::array::create_array_type_with_stg_info; - - if n < 0 { - return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); - } - // Get element size from cls - let element_size = if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) { - if let Ok(s) = type_attr.str(vm) { - let s = s.to_string(); - if s.len() == 1 { - get_size(&s) - } else { - std::mem::size_of::() - } + /// PyCData_set + /// Sets a field value at the given offset, handling type conversion and KeepRef + #[allow(clippy::too_many_arguments)] + pub fn set_field( + &self, + proto: &PyObject, + value: PyObjectRef, + index: usize, + size: usize, + offset: usize, + needs_swap: bool, + vm: &VirtualMachine, + ) -> PyResult<()> { + // Check if this is a c_char or c_wchar array field + let is_char_array = PyCField::is_char_array(proto, vm); + let is_wchar_array = PyCField::is_wchar_array(proto, vm); + + // For c_char arrays with bytes input, copy only up to first null + if is_char_array { + if let Some(bytes_val) = value.downcast_ref::() { + let src = bytes_val.as_bytes(); + let to_copy = PyCField::bytes_for_char_array(src); + let copy_len = std::cmp::min(to_copy.len(), size); + self.write_bytes_at_offset(offset, &to_copy[..copy_len]); + self.keep_ref(index, value, vm)?; + return Ok(()); } else { - std::mem::size_of::() + return Err(vm.new_type_error("bytes expected instead of str instance")); } - } else { - std::mem::size_of::() - }; - let total_size = element_size * (n as usize); - let stg_info = super::util::StgInfo::new_array( - total_size, - element_size, - n as usize, - cls.clone().into(), - element_size, - ); - create_array_type_with_stg_info(stg_info, vm) - } + } - #[pyclassmethod] - fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { - use super::_ctypes::get_size; - // Get _type_ attribute directly - let type_attr = cls - .as_object() - .get_attr("_type_", vm) - .map_err(|_| vm.new_type_error(format!("'{}' has no _type_ attribute", cls.name())))?; - let type_str = type_attr.str(vm)?.to_string(); - let size = get_size(&type_str); - - // Create instance with value read from address - let value = if address != 0 && size > 0 { - // Safety: This is inherently unsafe - reading from arbitrary memory address - unsafe { - let ptr = address as *const u8; - let bytes = std::slice::from_raw_parts(ptr, size); - // Convert bytes to appropriate Python value based on type - bytes_to_pyobject(&cls, bytes, vm)? + // For c_wchar arrays with str input, convert to wchar_t + if is_wchar_array { + if let Some(str_val) = value.downcast_ref::() { + // Convert str to wchar_t bytes (platform-dependent size) + let mut wchar_bytes = Vec::with_capacity(size); + for ch in str_val.as_str().chars().take(size / WCHAR_SIZE) { + let mut bytes = [0u8; 4]; + wchar_to_bytes(ch as u32, &mut bytes); + wchar_bytes.extend_from_slice(&bytes[..WCHAR_SIZE]); + } + // Pad with nulls to fill the array + while wchar_bytes.len() < size { + wchar_bytes.push(0); + } + self.write_bytes_at_offset(offset, &wchar_bytes); + self.keep_ref(index, value, vm)?; + return Ok(()); + } else if value.downcast_ref::().is_some() { + return Err(vm.new_type_error("str expected instead of bytes instance")); } + } + + // Special handling for Pointer fields with Array values + if let Some(proto_type) = proto.downcast_ref::() + && proto_type + .class() + .fast_issubclass(super::pointer::PyCPointerType::static_type()) + && let Some(array) = value.downcast_ref::() + { + let buffer_addr = { + let array_buffer = array.0.buffer.read(); + array_buffer.as_ptr() as usize + }; + let addr_bytes = buffer_addr.to_ne_bytes(); + let len = std::cmp::min(addr_bytes.len(), size); + self.write_bytes_at_offset(offset, &addr_bytes[..len]); + self.keep_ref(index, value, vm)?; + return Ok(()); + } + + // Get field type code for special handling + let field_type_code = proto + .get_attr("_type_", vm) + .ok() + .and_then(|attr| attr.downcast_ref::().map(|s| s.to_string())); + + let (mut bytes, converted_value) = if let Some(type_code) = &field_type_code { + PyCField::value_to_bytes_for_type(type_code, &value, size, vm)? } else { - vm.ctx.none() + (PyCField::value_to_bytes(&value, size, vm)?, None) }; - // Create instance using the type's constructor - let args = FuncArgs::new(vec![value], KwArgs::default()); - PyCSimple::slot_new(cls.clone(), args, vm) + // Swap bytes for opposite endianness + if needs_swap { + bytes.reverse(); + } + + self.write_bytes_at_offset(offset, &bytes); + + // KeepRef: for z/Z types use converted value, otherwise use original + if let Some(converted) = converted_value { + self.keep_ref(index, converted, vm)?; + } else if Self::should_keep_ref(&value) { + let to_keep = Self::get_kept_objects(&value, vm); + self.keep_ref(index, to_keep, vm)?; + } + + Ok(()) } - #[pyclassmethod] - fn from_buffer( - cls: PyTypeRef, - source: PyObjectRef, - offset: OptionalArg, + /// PyCData_get + /// Gets a field value at the given offset + pub fn get_field( + &self, + proto: &PyObject, + index: usize, + size: usize, + offset: usize, + base_obj: PyObjectRef, vm: &VirtualMachine, ) -> PyResult { - use super::_ctypes::get_size; - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); + // Get buffer data at offset + let buffer = self.buffer.read(); + if offset + size > buffer.len() { + return Ok(vm.ctx.new_int(0).into()); } - let offset = offset as usize; - // Get buffer from source - let buffer = PyBuffer::try_from_object(vm, source.clone())?; + // Check if field type is an array type + if let Some(type_ref) = proto.downcast_ref::() + && let Some(stg) = type_ref.stg_info_opt() + && stg.element_type.is_some() + { + // c_char array → return bytes + if PyCField::is_char_array(proto, vm) { + let data = &buffer[offset..offset + size]; + // Find first null terminator (or use full length) + let end = data.iter().position(|&b| b == 0).unwrap_or(data.len()); + return Ok(vm.ctx.new_bytes(data[..end].to_vec()).into()); + } - // Check if buffer is writable - if buffer.desc.readonly { - return Err(vm.new_type_error("underlying buffer is not writable".to_owned())); + // c_wchar array → return str + if PyCField::is_wchar_array(proto, vm) { + let data = &buffer[offset..offset + size]; + // wchar_t → char conversion, skip null + let chars: String = data + .chunks(WCHAR_SIZE) + .filter_map(|chunk| { + wchar_from_bytes(chunk) + .filter(|&wchar| wchar != 0) + .and_then(char::from_u32) + }) + .collect(); + return Ok(vm.ctx.new_str(chars).into()); + } + + // Other array types - create array with a copy of data from the base's buffer + // The array also keeps a reference to the base for keeping it alive and for writes + let array_data = buffer[offset..offset + size].to_vec(); + drop(buffer); + + let cdata_obj = + Self::from_base_with_data(base_obj, offset, index, stg.length, array_data); + let array_type: PyTypeRef = proto + .to_owned() + .downcast() + .map_err(|_| vm.new_type_error("expected array type"))?; + + return super::array::PyCArray(cdata_obj) + .into_ref_with_type(vm, array_type) + .map(Into::into); } - // Get _type_ attribute directly - let type_attr = cls - .as_object() - .get_attr("_type_", vm) - .map_err(|_| vm.new_type_error(format!("'{}' has no _type_ attribute", cls.name())))?; - let type_str = type_attr.str(vm)?.to_string(); - let size = get_size(&type_str); + let buffer_data = buffer[offset..offset + size].to_vec(); + drop(buffer); - // Check if buffer is large enough - let buffer_len = buffer.desc.len; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); + // Get proto as type + let proto_type: PyTypeRef = proto + .to_owned() + .downcast() + .map_err(|_| vm.new_type_error("field proto is not a type"))?; + + let proto_metaclass = proto_type.class(); + + // Simple types: return primitive value + if proto_metaclass.fast_issubclass(super::simple::PyCSimpleType::static_type()) { + // Check for byte swapping + let needs_swap = base_obj + .class() + .as_object() + .get_attr("_swappedbytes_", vm) + .is_ok() + || proto_type + .as_object() + .get_attr("_swappedbytes_", vm) + .is_ok(); + + let data = if needs_swap && size > 1 { + let mut swapped = buffer_data.clone(); + swapped.reverse(); + swapped + } else { + buffer_data + }; + + return bytes_to_pyobject(&proto_type, &data, vm); } - // Read bytes from buffer at offset - let bytes = buffer.obj_bytes(); - let data = &bytes[offset..offset + size]; - let value = bytes_to_pyobject(&cls, data, vm)?; + // Complex types: create ctypes instance via PyCData_FromBaseObj + let ptr = self.buffer.read().as_ptr().wrapping_add(offset) as *mut u8; + let cdata_obj = unsafe { Self::from_base_obj(ptr, size, base_obj.clone(), index) }; - // Create instance - let args = FuncArgs::new(vec![value], KwArgs::default()); - let instance = PyCSimple::slot_new(cls.clone(), args, vm)?; + if proto_metaclass.fast_issubclass(super::structure::PyCStructType::static_type()) + || proto_metaclass.fast_issubclass(super::union::PyCUnionType::static_type()) + || proto_metaclass.fast_issubclass(super::pointer::PyCPointerType::static_type()) + { + cdata_obj.into_ref_with_type(vm, proto_type).map(Into::into) + } else { + // Fallback + Ok(vm.ctx.new_int(0).into()) + } + } +} - // TODO: Store reference to source in _objects to keep buffer alive - Ok(instance) +#[pyclass(flags(BASETYPE))] +impl PyCData { + #[pygetset] + fn _objects(&self) -> Option { + self.objects.read().clone() + } + + #[pygetset] + fn _b_base_(&self) -> Option { + self.base.read().clone() + } + + #[pygetset] + fn _b_needsfree_(&self) -> i32 { + // Borrowed (from_address) or has base object → 0 (don't free) + // Owned and no base → 1 (need to free) + if self.is_borrowed() || self.base.read().is_some() { + 0 + } else { + 1 + } + } + + // CDataType_methods - shared across all ctypes types + + #[pyclassmethod] + fn from_buffer( + cls: PyTypeRef, + source: PyObjectRef, + offset: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + let cdata = Self::from_buffer_impl(&cls, source, offset.unwrap_or(0), vm)?; + cdata.into_ref_with_type(vm, cls).map(Into::into) } #[pyclassmethod] @@ -875,191 +1117,1237 @@ impl PyCSimple { offset: OptionalArg, vm: &VirtualMachine, ) -> PyResult { - use super::_ctypes::get_size; - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; - - // Get _type_ attribute directly for simple types - let type_attr = cls - .as_object() - .get_attr("_type_", vm) - .map_err(|_| vm.new_type_error(format!("'{}' has no _type_ attribute", cls.name())))?; - let type_str = type_attr.str(vm)?.to_string(); - let size = get_size(&type_str); + let cdata = + Self::from_buffer_copy_impl(&cls, &source.borrow_buf(), offset.unwrap_or(0), vm)?; + cdata.into_ref_with_type(vm, cls).map(Into::into) + } - // Borrow bytes from source - let source_bytes = source.borrow_buf(); - let buffer_len = source_bytes.len(); + #[pyclassmethod] + fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { + let size = { + let stg_info = cls.stg_info(vm)?; + stg_info.size + }; - // Check if buffer is large enough - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); + if size == 0 { + return Err(vm.new_type_error("abstract class")); } - // Copy bytes from buffer at offset - let data = &source_bytes[offset..offset + size]; - let value = bytes_to_pyobject(&cls, data, vm)?; - - // Create instance (independent copy, no reference tracking) - let args = FuncArgs::new(vec![value], KwArgs::default()); - PyCSimple::slot_new(cls.clone(), args, vm) + // PyCData_AtAddress + let cdata = unsafe { Self::at_address(address as *const u8, size) }; + cdata.into_ref_with_type(vm, cls).map(Into::into) } #[pyclassmethod] - fn in_dll(cls: PyTypeRef, dll: PyObjectRef, name: PyStrRef, vm: &VirtualMachine) -> PyResult { - use super::_ctypes::get_size; - use libloading::Symbol; + fn in_dll( + cls: PyTypeRef, + dll: PyObjectRef, + name: crate::builtins::PyStrRef, + vm: &VirtualMachine, + ) -> PyResult { + let size = { + let stg_info = cls.stg_info(vm)?; + stg_info.size + }; + + if size == 0 { + return Err(vm.new_type_error("abstract class")); + } // Get the library handle from dll object let handle = if let Ok(int_handle) = dll.try_int(vm) { - // dll is an integer handle int_handle .as_bigint() .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? + .ok_or_else(|| vm.new_value_error("Invalid library handle"))? } else { - // dll is a CDLL/PyDLL/WinDLL object with _handle attribute dll.get_attr("_handle", vm)? .try_int(vm)? .as_bigint() .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? + .ok_or_else(|| vm.new_value_error("Invalid library handle"))? }; - // Get the library from cache - let library_cache = crate::stdlib::ctypes::library::libcache().read(); - let library = library_cache - .get_lib(handle) - .ok_or_else(|| vm.new_attribute_error("Library not found".to_owned()))?; - - // Get symbol address from library - let symbol_name = format!("{}\0", name.as_str()); - let inner_lib = library.lib.lock(); - - let symbol_address = if let Some(lib) = &*inner_lib { - unsafe { - // Try to get the symbol from the library - let symbol: Symbol<'_, *mut u8> = lib.get(symbol_name.as_bytes()).map_err(|e| { - vm.new_attribute_error(format!("{}: symbol '{}' not found", e, name.as_str())) - })?; - *symbol as usize + // Get symbol address using platform-specific API + let symbol_name = std::ffi::CString::new(name.as_str()) + .map_err(|_| vm.new_value_error("Invalid symbol name"))?; + + #[cfg(windows)] + let ptr: *const u8 = unsafe { + match windows_sys::Win32::System::LibraryLoader::GetProcAddress( + handle as windows_sys::Win32::Foundation::HMODULE, + symbol_name.as_ptr() as *const u8, + ) { + Some(p) => p as *const u8, + None => std::ptr::null(), } + }; + + #[cfg(not(windows))] + let ptr: *const u8 = + unsafe { libc::dlsym(handle as *mut libc::c_void, symbol_name.as_ptr()) as *const u8 }; + + if ptr.is_null() { + return Err( + vm.new_value_error(format!("symbol '{}' not found in library", name.as_str())) + ); + } + + // PyCData_AtAddress + let cdata = unsafe { Self::at_address(ptr, size) }; + cdata.into_ref_with_type(vm, cls).map(Into::into) + } +} + +// PyCField - Field descriptor for Structure/Union types + +/// CField descriptor for Structure/Union field access +#[pyclass(name = "CField", module = "_ctypes")] +#[derive(Debug, PyPayload)] +pub struct PyCField { + /// Byte offset of the field within the structure/union + pub(crate) offset: isize, + /// Encoded size: for bitfields (bit_size << 16) | bit_offset, otherwise byte size + pub(crate) size: isize, + /// Index into PyCData's object array + pub(crate) index: usize, + /// The ctypes type for this field + pub(crate) proto: PyTypeRef, + /// Flag indicating if the field is anonymous (MakeAnonFields sets this) + pub(crate) anonymous: bool, +} + +#[inline(always)] +const fn num_bits(size: isize) -> isize { + size >> 16 +} + +#[inline(always)] +const fn field_size(size: isize) -> isize { + size & 0xFFFF +} + +#[inline(always)] +const fn is_bitfield(size: isize) -> bool { + (size >> 16) != 0 +} + +impl PyCField { + /// Create a new CField descriptor (non-bitfield) + pub fn new(proto: PyTypeRef, offset: isize, size: isize, index: usize) -> Self { + Self { + offset, + size, + index, + proto, + anonymous: false, + } + } + + /// Create a new CField descriptor for a bitfield + #[allow(dead_code)] + pub fn new_bitfield( + proto: PyTypeRef, + offset: isize, + bit_size: u16, + bit_offset: u16, + index: usize, + ) -> Self { + let encoded_size = ((bit_size as isize) << 16) | (bit_offset as isize); + Self { + offset, + size: encoded_size, + index, + proto, + anonymous: false, + } + } + + /// Get the actual byte size (for non-bitfields) or bit storage size (for bitfields) + pub fn byte_size(&self) -> usize { + field_size(self.size) as usize + } + + /// Create a new CField from an existing field with adjusted offset and index + /// Used by MakeFields to promote anonymous fields + pub fn new_from_field(fdescr: &PyCField, index_offset: usize, offset_delta: isize) -> Self { + Self { + offset: fdescr.offset + offset_delta, + size: fdescr.size, + index: fdescr.index + index_offset, + proto: fdescr.proto.clone(), + anonymous: false, // promoted fields are not anonymous themselves + } + } + + /// Set anonymous flag + pub fn set_anonymous(&mut self, anonymous: bool) { + self.anonymous = anonymous; + } +} + +impl Representable for PyCField { + fn repr_str(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + // Get type name from proto (which is always PyTypeRef) + let tp_name = zelf.proto.name().to_string(); + + // Bitfield: + // Regular: + if is_bitfield(zelf.size) { + let bit_offset = field_size(zelf.size); + let bits = num_bits(zelf.size); + Ok(format!( + "", + tp_name, zelf.offset, bit_offset, bits + )) } else { - return Err(vm.new_attribute_error("Library is closed".to_owned())); + Ok(format!( + "", + tp_name, zelf.offset, zelf.size + )) + } + } +} + +/// PyCField_get +impl GetDescriptor for PyCField { + fn descr_get( + zelf: PyObjectRef, + obj: Option, + _cls: Option, + vm: &VirtualMachine, + ) -> PyResult { + let zelf = zelf + .downcast::() + .map_err(|_| vm.new_type_error("expected CField"))?; + + // If obj is None, return the descriptor itself (class attribute access) + let obj = match obj { + Some(obj) if !vm.is_none(&obj) => obj, + _ => return Ok(zelf.into()), }; - // Get _type_ attribute and size - let type_attr = cls - .as_object() - .get_attr("_type_", vm) - .map_err(|_| vm.new_type_error(format!("'{}' has no _type_ attribute", cls.name())))?; - let type_str = type_attr.str(vm)?.to_string(); - let size = get_size(&type_str); - - // Read value from symbol address - let value = if symbol_address != 0 && size > 0 { - // Safety: Reading from a symbol address provided by dlsym - unsafe { - let ptr = symbol_address as *const u8; - let bytes = std::slice::from_raw_parts(ptr, size); - bytes_to_pyobject(&cls, bytes, vm)? + let offset = zelf.offset as usize; + let size = zelf.byte_size(); + + // Get PyCData from obj (works for both Structure and Union) + let cdata = PyCField::get_cdata_from_obj(&obj, vm)?; + + // PyCData_get + cdata.get_field( + zelf.proto.as_object(), + zelf.index, + size, + offset, + obj.clone(), + vm, + ) + } +} + +impl PyCField { + /// Convert a Python value to bytes + fn value_to_bytes(value: &PyObject, size: usize, vm: &VirtualMachine) -> PyResult> { + // 1. Handle bytes objects + if let Some(bytes) = value.downcast_ref::() { + let src = bytes.as_bytes(); + let mut result = vec![0u8; size]; + let len = std::cmp::min(src.len(), size); + result[..len].copy_from_slice(&src[..len]); + Ok(result) + } + // 2. Handle ctypes array instances (copy their buffer) + else if let Some(cdata) = value.downcast_ref::() { + let buffer = cdata.buffer.read(); + let mut result = vec![0u8; size]; + let len = std::cmp::min(buffer.len(), size); + result[..len].copy_from_slice(&buffer[..len]); + Ok(result) + } + // 4. Handle float values (check before int, since float.try_int would truncate) + else if let Some(float_val) = value.downcast_ref::() { + let f = float_val.to_f64(); + match size { + 4 => { + let val = f as f32; + Ok(val.to_ne_bytes().to_vec()) + } + 8 => Ok(f.to_ne_bytes().to_vec()), + _ => unreachable!("wrong payload size"), + } + } + // 4. Handle integer values + else if let Ok(int_val) = value.try_int(vm) { + let i = int_val.as_bigint(); + match size { + 1 => { + let val = i.to_i8().unwrap_or(0); + Ok(val.to_ne_bytes().to_vec()) + } + 2 => { + let val = i.to_i16().unwrap_or(0); + Ok(val.to_ne_bytes().to_vec()) + } + 4 => { + let val = i.to_i32().unwrap_or(0); + Ok(val.to_ne_bytes().to_vec()) + } + 8 => { + let val = i.to_i64().unwrap_or(0); + Ok(val.to_ne_bytes().to_vec()) + } + _ => Ok(vec![0u8; size]), } } else { - vm.ctx.none() - }; + Ok(vec![0u8; size]) + } + } - // Create instance - let args = FuncArgs::new(vec![value], KwArgs::default()); - let instance = PyCSimple::slot_new(cls.clone(), args, vm)?; + /// Convert a Python value to bytes with type-specific handling for pointer types. + /// Returns (bytes, optional holder for wchar buffer). + fn value_to_bytes_for_type( + type_code: &str, + value: &PyObject, + size: usize, + vm: &VirtualMachine, + ) -> PyResult<(Vec, Option)> { + match type_code { + // c_float: always convert to float first (f_set) + "f" => { + let f = if let Some(float_val) = value.downcast_ref::() { + float_val.to_f64() + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_i64().unwrap_or(0) as f64 + } else { + return Err(vm.new_type_error(format!( + "float expected instead of {}", + value.class().name() + ))); + }; + let val = f as f32; + Ok((val.to_ne_bytes().to_vec(), None)) + } + // c_double: always convert to float first (d_set) + "d" => { + let f = if let Some(float_val) = value.downcast_ref::() { + float_val.to_f64() + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_i64().unwrap_or(0) as f64 + } else { + return Err(vm.new_type_error(format!( + "float expected instead of {}", + value.class().name() + ))); + }; + Ok((f.to_ne_bytes().to_vec(), None)) + } + // c_longdouble: convert to float (treated as f64 in RustPython) + "g" => { + let f = if let Some(float_val) = value.downcast_ref::() { + float_val.to_f64() + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_i64().unwrap_or(0) as f64 + } else { + return Err(vm.new_type_error(format!( + "float expected instead of {}", + value.class().name() + ))); + }; + Ok((f.to_ne_bytes().to_vec(), None)) + } + "z" => { + // c_char_p: store pointer to null-terminated bytes + if let Some(bytes) = value.downcast_ref::() { + let (converted, ptr) = ensure_z_null_terminated(bytes, vm); + let mut result = vec![0u8; size]; + let addr_bytes = ptr.to_ne_bytes(); + let len = std::cmp::min(addr_bytes.len(), size); + result[..len].copy_from_slice(&addr_bytes[..len]); + return Ok((result, Some(converted))); + } + // Integer address + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + let mut result = vec![0u8; size]; + let bytes = v.to_ne_bytes(); + let len = std::cmp::min(bytes.len(), size); + result[..len].copy_from_slice(&bytes[..len]); + return Ok((result, None)); + } + // None -> NULL pointer + if vm.is_none(value) { + return Ok((vec![0u8; size], None)); + } + Ok((PyCField::value_to_bytes(value, size, vm)?, None)) + } + "Z" => { + // c_wchar_p: store pointer to null-terminated wchar_t buffer + if let Some(s) = value.downcast_ref::() { + let (holder, ptr) = str_to_wchar_bytes(s.as_str(), vm); + let mut result = vec![0u8; size]; + let addr_bytes = ptr.to_ne_bytes(); + let len = std::cmp::min(addr_bytes.len(), size); + result[..len].copy_from_slice(&addr_bytes[..len]); + return Ok((result, Some(holder))); + } + // Integer address + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + let mut result = vec![0u8; size]; + let bytes = v.to_ne_bytes(); + let len = std::cmp::min(bytes.len(), size); + result[..len].copy_from_slice(&bytes[..len]); + return Ok((result, None)); + } + // None -> NULL pointer + if vm.is_none(value) { + return Ok((vec![0u8; size], None)); + } + Ok((PyCField::value_to_bytes(value, size, vm)?, None)) + } + "P" => { + // c_void_p: store integer as pointer + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + let mut result = vec![0u8; size]; + let bytes = v.to_ne_bytes(); + let len = std::cmp::min(bytes.len(), size); + result[..len].copy_from_slice(&bytes[..len]); + return Ok((result, None)); + } + // None -> NULL pointer + if vm.is_none(value) { + return Ok((vec![0u8; size], None)); + } + Ok((PyCField::value_to_bytes(value, size, vm)?, None)) + } + _ => Ok((PyCField::value_to_bytes(value, size, vm)?, None)), + } + } + + /// Check if the field type is a c_char array (element type has _type_ == 'c') + fn is_char_array(proto: &PyObject, vm: &VirtualMachine) -> bool { + // Get element_type from StgInfo (for array types) + if let Some(proto_type) = proto.downcast_ref::() + && let Some(stg) = proto_type.stg_info_opt() + && let Some(element_type) = &stg.element_type + { + // Check if element type has _type_ == "c" + if let Ok(type_code) = element_type.as_object().get_attr("_type_", vm) + && let Some(s) = type_code.downcast_ref::() + { + return s.as_str() == "c"; + } + } + false + } - // Store base reference to keep dll alive - if let Ok(simple_ref) = instance.clone().downcast::() { - simple_ref.cdata.write().base = Some(dll); + /// Check if the field type is a c_wchar array (element type has _type_ == 'u') + fn is_wchar_array(proto: &PyObject, vm: &VirtualMachine) -> bool { + // Get element_type from StgInfo (for array types) + if let Some(proto_type) = proto.downcast_ref::() + && let Some(stg) = proto_type.stg_info_opt() + && let Some(element_type) = &stg.element_type + { + // Check if element type has _type_ == "u" + if let Ok(type_code) = element_type.as_object().get_attr("_type_", vm) + && let Some(s) = type_code.downcast_ref::() + { + return s.as_str() == "u"; + } } + false + } - Ok(instance) + /// Convert bytes for c_char array assignment (stops at first null terminator) + /// Returns (bytes_to_copy, copy_len) + fn bytes_for_char_array(src: &[u8]) -> &[u8] { + // Find first null terminator and include it + if let Some(null_pos) = src.iter().position(|&b| b == 0) { + &src[..=null_pos] + } else { + src + } } } -impl PyCSimple { - pub fn to_arg( - &self, - ty: libffi::middle::Type, +#[pyclass( + flags(DISALLOW_INSTANTIATION, IMMUTABLETYPE), + with(Representable, GetDescriptor) +)] +impl PyCField { + /// Get PyCData from object (works for both Structure and Union) + fn get_cdata_from_obj<'a>(obj: &'a PyObjectRef, vm: &VirtualMachine) -> PyResult<&'a PyCData> { + if let Some(s) = obj.downcast_ref::() { + Ok(&s.0) + } else if let Some(u) = obj.downcast_ref::() { + Ok(&u.0) + } else { + Err(vm.new_type_error(format!( + "descriptor works only on Structure or Union instances, got {}", + obj.class().name() + ))) + } + } + + /// PyCField_set + #[pyslot] + fn descr_set( + zelf: &crate::PyObject, + obj: PyObjectRef, + value: PySetterValue, + vm: &VirtualMachine, + ) -> PyResult<()> { + let zelf = zelf + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected CField"))?; + + let offset = zelf.offset as usize; + let size = zelf.byte_size(); + + // Get PyCData from obj (works for both Structure and Union) + let cdata = Self::get_cdata_from_obj(&obj, vm)?; + + match value { + PySetterValue::Assign(value) => { + // Check if needs byte swapping + let needs_swap = (obj + .class() + .as_object() + .get_attr("_swappedbytes_", vm) + .is_ok() + || zelf + .proto + .as_object() + .get_attr("_swappedbytes_", vm) + .is_ok()) + && size > 1; + + // PyCData_set + cdata.set_field( + zelf.proto.as_object(), + value, + zelf.index, + size, + offset, + needs_swap, + vm, + ) + } + PySetterValue::Delete => Err(vm.new_type_error("cannot delete field")), + } + } + + #[pymethod] + fn __set__( + zelf: PyObjectRef, + obj: PyObjectRef, + value: PyObjectRef, vm: &VirtualMachine, - ) -> Option { - let value = unsafe { (*self.value.as_ptr()).clone() }; - if let Ok(i) = value.try_int(vm) { - let i = i.as_bigint(); - return if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u8().as_raw_ptr()) { - i.to_u8().map(|r: u8| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i8().as_raw_ptr()) { - i.to_i8().map(|r: i8| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u16().as_raw_ptr()) { - i.to_u16().map(|r: u16| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i16().as_raw_ptr()) { - i.to_i16().map(|r: i16| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u32().as_raw_ptr()) { - i.to_u32().map(|r: u32| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i32().as_raw_ptr()) { - i.to_i32().map(|r: i32| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u64().as_raw_ptr()) { - i.to_u64().map(|r: u64| libffi::middle::Arg::new(&r)) - } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i64().as_raw_ptr()) { - i.to_i64().map(|r: i64| libffi::middle::Arg::new(&r)) + ) -> PyResult<()> { + Self::descr_set(&zelf, obj, PySetterValue::Assign(value), vm) + } + + #[pymethod] + fn __delete__(zelf: PyObjectRef, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + Self::descr_set(&zelf, obj, PySetterValue::Delete, vm) + } + + #[pygetset] + fn offset(&self) -> isize { + self.offset + } + + #[pygetset] + fn size(&self) -> isize { + self.size + } +} + +// ParamFunc implementations (PyCArgObject creation) + +use super::_ctypes::CArgObject; + +/// Call the appropriate paramfunc based on StgInfo.paramfunc +/// info->paramfunc(st, obj) +pub(super) fn call_paramfunc(obj: &PyObject, vm: &VirtualMachine) -> PyResult { + let cls = obj.class(); + let stg_info = cls + .stg_info_opt() + .ok_or_else(|| vm.new_type_error("not a ctypes type"))?; + + match stg_info.paramfunc { + ParamFunc::Simple => simple_paramfunc(obj, vm), + ParamFunc::Array => array_paramfunc(obj, vm), + ParamFunc::Pointer => pointer_paramfunc(obj, vm), + ParamFunc::Structure | ParamFunc::Union => struct_union_paramfunc(obj, &stg_info, vm), + ParamFunc::None => Err(vm.new_type_error("no paramfunc")), + } +} + +/// PyCSimpleType_paramfunc +fn simple_paramfunc(obj: &PyObject, vm: &VirtualMachine) -> PyResult { + use super::simple::PyCSimple; + + let simple = obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected simple type"))?; + + // Get type code from _type_ attribute + let cls = obj.class().to_owned(); + let type_code = cls + .type_code(vm) + .ok_or_else(|| vm.new_type_error("no _type_ attribute"))?; + let tag = type_code.as_bytes().first().copied().unwrap_or(b'?'); + + // Read value from buffer: memcpy(&parg->value, self->b_ptr, self->b_size) + let buffer = simple.0.buffer.read(); + let ffi_value = buffer_to_ffi_value(&type_code, &buffer); + + Ok(CArgObject { + tag, + value: ffi_value, + obj: obj.to_owned(), + size: 0, + offset: 0, + }) +} + +/// PyCArrayType_paramfunc +fn array_paramfunc(obj: &PyObject, vm: &VirtualMachine) -> PyResult { + use super::array::PyCArray; + + let array = obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected array"))?; + + // p->value.p = (char *)self->b_ptr + let buffer = array.0.buffer.read(); + let ptr_val = buffer.as_ptr() as usize; + + Ok(CArgObject { + tag: b'P', + value: FfiArgValue::Pointer(ptr_val), + obj: obj.to_owned(), + size: 0, + offset: 0, + }) +} + +/// PyCPointerType_paramfunc +fn pointer_paramfunc(obj: &PyObject, vm: &VirtualMachine) -> PyResult { + use super::pointer::PyCPointer; + + let ptr = obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected pointer"))?; + + // parg->value.p = *(void **)self->b_ptr + let ptr_val = ptr.get_ptr_value(); + + Ok(CArgObject { + tag: b'P', + value: FfiArgValue::Pointer(ptr_val), + obj: obj.to_owned(), + size: 0, + offset: 0, + }) +} + +/// StructUnionType_paramfunc (for both Structure and Union) +fn struct_union_paramfunc( + obj: &PyObject, + stg_info: &StgInfo, + _vm: &VirtualMachine, +) -> PyResult { + // Get buffer pointer + // For large structs (> sizeof(void*)), we'd need to allocate and copy. + // For now, just point to buffer directly and keep obj reference for memory safety. + let buffer = if let Some(cdata) = obj.downcast_ref::() { + cdata.buffer.read() + } else { + return Ok(CArgObject { + tag: b'V', + value: FfiArgValue::Pointer(0), + obj: obj.to_owned(), + size: stg_info.size, + offset: 0, + }); + }; + + let ptr_val = buffer.as_ptr() as usize; + let size = buffer.len(); + + Ok(CArgObject { + tag: b'V', + value: FfiArgValue::Pointer(ptr_val), + obj: obj.to_owned(), + size, + offset: 0, + }) +} + +// FfiArgValue - Owned FFI argument value + +/// Owned FFI argument value. Keeps the value alive for the duration of the FFI call. +#[derive(Debug, Clone)] +pub enum FfiArgValue { + U8(u8), + I8(i8), + U16(u16), + I16(i16), + U32(u32), + I32(i32), + U64(u64), + I64(i64), + F32(f32), + F64(f64), + Pointer(usize), + /// Pointer with owned data. The PyObjectRef keeps the pointed data alive. + OwnedPointer(usize, #[allow(dead_code)] crate::PyObjectRef), +} + +impl FfiArgValue { + /// Create an Arg reference to this owned value + pub fn as_arg(&self) -> libffi::middle::Arg { + match self { + FfiArgValue::U8(v) => libffi::middle::Arg::new(v), + FfiArgValue::I8(v) => libffi::middle::Arg::new(v), + FfiArgValue::U16(v) => libffi::middle::Arg::new(v), + FfiArgValue::I16(v) => libffi::middle::Arg::new(v), + FfiArgValue::U32(v) => libffi::middle::Arg::new(v), + FfiArgValue::I32(v) => libffi::middle::Arg::new(v), + FfiArgValue::U64(v) => libffi::middle::Arg::new(v), + FfiArgValue::I64(v) => libffi::middle::Arg::new(v), + FfiArgValue::F32(v) => libffi::middle::Arg::new(v), + FfiArgValue::F64(v) => libffi::middle::Arg::new(v), + FfiArgValue::Pointer(v) => libffi::middle::Arg::new(v), + FfiArgValue::OwnedPointer(v, _) => libffi::middle::Arg::new(v), + } + } +} + +/// Convert buffer bytes to FfiArgValue based on type code +pub(super) fn buffer_to_ffi_value(type_code: &str, buffer: &[u8]) -> FfiArgValue { + match type_code { + "c" | "b" => { + let v = buffer.first().map(|&b| b as i8).unwrap_or(0); + FfiArgValue::I8(v) + } + "B" => { + let v = buffer.first().copied().unwrap_or(0); + FfiArgValue::U8(v) + } + "h" => { + let v = if buffer.len() >= 2 { + i16::from_ne_bytes(buffer[..2].try_into().unwrap()) + } else { + 0 + }; + FfiArgValue::I16(v) + } + "H" => { + let v = if buffer.len() >= 2 { + u16::from_ne_bytes(buffer[..2].try_into().unwrap()) + } else { + 0 + }; + FfiArgValue::U16(v) + } + "i" => { + let v = if buffer.len() >= 4 { + i32::from_ne_bytes(buffer[..4].try_into().unwrap()) + } else { + 0 + }; + FfiArgValue::I32(v) + } + "I" => { + let v = if buffer.len() >= 4 { + u32::from_ne_bytes(buffer[..4].try_into().unwrap()) + } else { + 0 + }; + FfiArgValue::U32(v) + } + "l" | "q" => { + let v = if buffer.len() >= 8 { + i64::from_ne_bytes(buffer[..8].try_into().unwrap()) + } else if buffer.len() >= 4 { + i32::from_ne_bytes(buffer[..4].try_into().unwrap()) as i64 + } else { + 0 + }; + FfiArgValue::I64(v) + } + "L" | "Q" => { + let v = if buffer.len() >= 8 { + u64::from_ne_bytes(buffer[..8].try_into().unwrap()) + } else if buffer.len() >= 4 { + u32::from_ne_bytes(buffer[..4].try_into().unwrap()) as u64 + } else { + 0 + }; + FfiArgValue::U64(v) + } + "f" => { + let v = if buffer.len() >= 4 { + f32::from_ne_bytes(buffer[..4].try_into().unwrap()) + } else { + 0.0 + }; + FfiArgValue::F32(v) + } + "d" | "g" => { + let v = if buffer.len() >= 8 { + f64::from_ne_bytes(buffer[..8].try_into().unwrap()) } else { - None + 0.0 }; + FfiArgValue::F64(v) } - if let Ok(_f) = value.try_float(vm) { - todo!(); + "z" | "Z" | "P" | "O" => FfiArgValue::Pointer(read_ptr_from_buffer(buffer)), + "?" => { + let v = buffer.first().map(|&b| b != 0).unwrap_or(false); + FfiArgValue::U8(if v { 1 } else { 0 }) } - if let Ok(_b) = value.try_to_bool(vm) { - todo!(); + "u" => { + // wchar_t - 4 bytes on most platforms + let v = if buffer.len() >= 4 { + u32::from_ne_bytes(buffer[..4].try_into().unwrap()) + } else { + 0 + }; + FfiArgValue::U32(v) } - None + _ => FfiArgValue::Pointer(0), } } -static SIMPLE_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |buffer| { - rustpython_common::lock::PyMappedRwLockReadGuard::map( - rustpython_common::lock::PyRwLockReadGuard::map( - buffer.obj_as::().cdata.read(), - |x: &CDataObject| x, - ), - |x: &CDataObject| x.buffer.as_slice(), - ) - .into() - }, - obj_bytes_mut: |buffer| { - rustpython_common::lock::PyMappedRwLockWriteGuard::map( - rustpython_common::lock::PyRwLockWriteGuard::map( - buffer.obj_as::().cdata.write(), - |x: &mut CDataObject| x, - ), - |x: &mut CDataObject| x.buffer.as_mut_slice(), - ) - .into() - }, - release: |_| {}, - retain: |_| {}, -}; +/// Convert bytes to appropriate Python object based on ctypes type +pub(super) fn bytes_to_pyobject( + cls: &Py, + bytes: &[u8], + vm: &VirtualMachine, +) -> PyResult { + // Try to get _type_ attribute + if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) + && let Ok(s) = type_attr.str(vm) + { + let ty = s.to_string(); + return match ty.as_str() { + "c" => Ok(vm.ctx.new_bytes(bytes.to_vec()).into()), + "b" => { + let val = if !bytes.is_empty() { bytes[0] as i8 } else { 0 }; + Ok(vm.ctx.new_int(val).into()) + } + "B" => { + let val = if !bytes.is_empty() { bytes[0] } else { 0 }; + Ok(vm.ctx.new_int(val).into()) + } + "h" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_short::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "H" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_ushort::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "i" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_int::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "I" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_uint::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "l" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_long::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "L" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_ulong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "q" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_longlong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "Q" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_ulonglong::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_int(val).into()) + } + "f" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_float::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0.0 + }; + Ok(vm.ctx.new_float(val as f64).into()) + } + "d" => { + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_double::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0.0 + }; + Ok(vm.ctx.new_float(val).into()) + } + "g" => { + // long double - read as f64 for now since Rust doesn't have native long double + // This may lose precision on platforms where long double > 64 bits + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_double::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0.0 + }; + Ok(vm.ctx.new_float(val).into()) + } + "?" => { + let val = !bytes.is_empty() && bytes[0] != 0; + Ok(vm.ctx.new_bool(val).into()) + } + "v" => { + // VARIANT_BOOL: non-zero = True, zero = False + const SIZE: usize = mem::size_of::(); + let val = if bytes.len() >= SIZE { + c_short::from_ne_bytes(bytes[..SIZE].try_into().expect("size checked")) + } else { + 0 + }; + Ok(vm.ctx.new_bool(val != 0).into()) + } + "z" => { + // c_char_p: read NULL-terminated string from pointer + let ptr = read_ptr_from_buffer(bytes); + if ptr == 0 { + return Ok(vm.ctx.none()); + } + let c_str = unsafe { std::ffi::CStr::from_ptr(ptr as _) }; + Ok(vm.ctx.new_bytes(c_str.to_bytes().to_vec()).into()) + } + "Z" => { + // c_wchar_p: read NULL-terminated wide string from pointer + let ptr = read_ptr_from_buffer(bytes); + if ptr == 0 { + return Ok(vm.ctx.none()); + } + let len = unsafe { libc::wcslen(ptr as *const libc::wchar_t) }; + let wchars = + unsafe { std::slice::from_raw_parts(ptr as *const libc::wchar_t, len) }; + let s: String = wchars + .iter() + .filter_map(|&c| char::from_u32(c as u32)) + .collect(); + Ok(vm.ctx.new_str(s).into()) + } + "P" => { + // c_void_p: return pointer value as integer + let val = read_ptr_from_buffer(bytes); + if val == 0 { + return Ok(vm.ctx.none()); + } + Ok(vm.ctx.new_int(val).into()) + } + "u" => { + let val = if bytes.len() >= mem::size_of::() { + let wc = if mem::size_of::() == 2 { + u16::from_ne_bytes([bytes[0], bytes[1]]) as u32 + } else { + u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) + }; + char::from_u32(wc).unwrap_or('\0') + } else { + '\0' + }; + Ok(vm.ctx.new_str(val).into()) + } + _ => Ok(vm.ctx.none()), + }; + } + // Default: return bytes as-is + Ok(vm.ctx.new_bytes(bytes.to_vec()).into()) +} -impl AsBuffer for PyCSimple { - fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { - let buffer_len = zelf.cdata.read().buffer.len(); - let buf = PyBuffer::new( - zelf.to_owned().into(), - BufferDescriptor::simple(buffer_len, false), // readonly=false for ctypes - &SIMPLE_BUFFER_METHODS, - ); - Ok(buf) +// Shared functions for Structure and Union types + +/// Parse a non-negative integer attribute, returning default if not present +pub(super) fn get_usize_attr( + obj: &PyObject, + attr: &str, + default: usize, + vm: &VirtualMachine, +) -> PyResult { + let Ok(attr_val) = obj.get_attr(vm.ctx.intern_str(attr), vm) else { + return Ok(default); + }; + let n = attr_val + .try_int(vm) + .map_err(|_| vm.new_value_error(format!("{attr} must be a non-negative integer")))?; + let val = n.as_bigint(); + if val.is_negative() { + return Err(vm.new_value_error(format!("{attr} must be a non-negative integer"))); + } + Ok(val.to_usize().unwrap_or(default)) +} + +/// Read a pointer value from buffer +#[inline] +pub(super) fn read_ptr_from_buffer(buffer: &[u8]) -> usize { + const PTR_SIZE: usize = std::mem::size_of::(); + if buffer.len() >= PTR_SIZE { + usize::from_ne_bytes(buffer[..PTR_SIZE].try_into().unwrap()) + } else { + 0 + } +} + +/// Set or initialize StgInfo on a type +pub(super) fn set_or_init_stginfo(type_ref: &PyType, stg_info: StgInfo) { + if type_ref.init_type_data(stg_info.clone()).is_err() + && let Some(mut existing) = type_ref.get_type_data_mut::() + { + *existing = stg_info; + } +} + +/// Check if a field type supports byte order swapping +pub(super) fn check_other_endian_support( + field_type: &PyObject, + vm: &VirtualMachine, +) -> PyResult<()> { + let other_endian_attr = if cfg!(target_endian = "little") { + "__ctype_be__" + } else { + "__ctype_le__" + }; + + if field_type.get_attr(other_endian_attr, vm).is_ok() { + return Ok(()); + } + + // Array type: recursively check element type + if let Ok(elem_type) = field_type.get_attr("_type_", vm) + && field_type.get_attr("_length_", vm).is_ok() + { + return check_other_endian_support(&elem_type, vm); } + + // Structure/Union: has StgInfo but no _type_ attribute + if let Some(type_obj) = field_type.downcast_ref::() + && type_obj.stg_info_opt().is_some() + && field_type.get_attr("_type_", vm).is_err() + { + return Ok(()); + } + + Err(vm.new_type_error(format!( + "This type does not support other endian: {}", + field_type.class().name() + ))) +} + +/// Get the size of a ctypes field type +pub(super) fn get_field_size(field_type: &PyObject, vm: &VirtualMachine) -> PyResult { + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(stg_info) = type_obj.stg_info_opt() + { + return Ok(stg_info.size); + } + + if let Some(size) = field_type + .get_attr("_type_", vm) + .ok() + .and_then(|type_attr| type_attr.str(vm).ok()) + .and_then(|type_str| { + let s = type_str.to_string(); + (s.len() == 1).then(|| super::get_size(&s)) + }) + { + return Ok(size); + } + + if let Some(s) = field_type + .get_attr("size_of_instances", vm) + .ok() + .and_then(|size_method| size_method.call((), vm).ok()) + .and_then(|size| size.try_int(vm).ok()) + .and_then(|n| n.as_bigint().to_usize()) + { + return Ok(s); + } + + Ok(std::mem::size_of::()) +} + +/// Get the alignment of a ctypes field type +pub(super) fn get_field_align(field_type: &PyObject, vm: &VirtualMachine) -> usize { + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(stg_info) = type_obj.stg_info_opt() + && stg_info.align > 0 + { + return stg_info.align; + } + + if let Some(align) = field_type + .get_attr("_type_", vm) + .ok() + .and_then(|type_attr| type_attr.str(vm).ok()) + .and_then(|type_str| { + let s = type_str.to_string(); + (s.len() == 1).then(|| super::get_size(&s)) + }) + { + return align; + } + + 1 +} + +/// Promote fields from anonymous struct/union to parent type +fn make_fields( + cls: &Py, + descr: &super::PyCField, + index: usize, + offset: isize, + vm: &VirtualMachine, +) -> PyResult<()> { + use crate::builtins::{PyList, PyTuple}; + use crate::convert::ToPyObject; + + let fields = descr.proto.as_object().get_attr("_fields_", vm)?; + let fieldlist: Vec = if let Some(list) = fields.downcast_ref::() { + list.borrow_vec().to_vec() + } else if let Some(tuple) = fields.downcast_ref::() { + tuple.to_vec() + } else { + return Err(vm.new_type_error("_fields_ must be a sequence")); + }; + + for pair in fieldlist.iter() { + let field_tuple = pair + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("_fields_ must contain tuples"))?; + + if field_tuple.len() < 2 { + continue; + } + + let fname = field_tuple + .first() + .expect("len checked") + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("field name must be a string"))?; + + let fdescr_obj = descr + .proto + .as_object() + .get_attr(vm.ctx.intern_str(fname.as_str()), vm)?; + let fdescr = fdescr_obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("unexpected type"))?; + + if fdescr.anonymous { + make_fields( + cls, + fdescr, + index + fdescr.index, + offset + fdescr.offset, + vm, + )?; + continue; + } + + let new_descr = super::PyCField::new_from_field(fdescr, index, offset); + cls.set_attr(vm.ctx.intern_str(fname.as_str()), new_descr.to_pyobject(vm)); + } + + Ok(()) +} + +/// Process _anonymous_ attribute for struct/union +pub(super) fn make_anon_fields(cls: &Py, vm: &VirtualMachine) -> PyResult<()> { + use crate::builtins::{PyList, PyTuple}; + use crate::convert::ToPyObject; + + let anon = match cls.as_object().get_attr("_anonymous_", vm) { + Ok(anon) => anon, + Err(_) => return Ok(()), + }; + + let anon_names: Vec = if let Some(list) = anon.downcast_ref::() { + list.borrow_vec().to_vec() + } else if let Some(tuple) = anon.downcast_ref::() { + tuple.to_vec() + } else { + return Err(vm.new_type_error("_anonymous_ must be a sequence")); + }; + + for fname_obj in anon_names.iter() { + let fname = fname_obj + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("_anonymous_ items must be strings"))?; + + let descr_obj = cls + .as_object() + .get_attr(vm.ctx.intern_str(fname.as_str()), vm)?; + + let descr = descr_obj.downcast_ref::().ok_or_else(|| { + vm.new_attribute_error(format!( + "'{}' is specified in _anonymous_ but not in _fields_", + fname.as_str() + )) + })?; + + let mut new_descr = super::PyCField::new_from_field(descr, 0, 0); + new_descr.set_anonymous(true); + cls.set_attr(vm.ctx.intern_str(fname.as_str()), new_descr.to_pyobject(vm)); + + make_fields(cls, descr, descr.index, descr.offset, vm)?; + } + + Ok(()) } diff --git a/crates/vm/src/stdlib/ctypes/function.rs b/crates/vm/src/stdlib/ctypes/function.rs index b4e600f77ba..9bddb0ef0e8 100644 --- a/crates/vm/src/stdlib/ctypes/function.rs +++ b/crates/vm/src/stdlib/ctypes/function.rs @@ -1,67 +1,310 @@ // spell-checker:disable -use crate::builtins::{PyNone, PyStr, PyTuple, PyTupleRef, PyType, PyTypeRef}; -use crate::convert::ToPyObject; -use crate::function::FuncArgs; -use crate::stdlib::ctypes::PyCData; -use crate::stdlib::ctypes::base::{CDataObject, PyCSimple, ffi_type_from_str}; -use crate::stdlib::ctypes::thunk::PyCThunk; -use crate::types::Representable; -use crate::types::{Callable, Constructor}; -use crate::{AsObject, Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine}; -use crossbeam_utils::atomic::AtomicCell; -use libffi::middle::{Arg, Cif, CodePtr, Type}; +use super::{ + _ctypes::CArgObject, PyCArray, PyCData, PyCPointer, PyCStructure, base::FfiArgValue, + simple::PyCSimple, type_info, +}; +use crate::{ + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, + builtins::{PyBytes, PyDict, PyNone, PyStr, PyTuple, PyType, PyTypeRef}, + class::StaticType, + convert::ToPyObject, + function::FuncArgs, + types::{Callable, Constructor, Representable}, + vm::thread::with_current_vm, +}; +use libffi::{ + low, + middle::{Arg, Cif, Closure, CodePtr, Type}, +}; use libloading::Symbol; use num_traits::ToPrimitive; use rustpython_common::lock::PyRwLock; use std::ffi::{self, c_void}; use std::fmt::Debug; -// See also: https://github.com/python/cpython/blob/4f8bb3947cfbc20f970ff9d9531e1132a9e95396/Modules/_ctypes/callproc.c#L15 +// Internal function addresses for special ctypes functions +pub(super) const INTERNAL_CAST_ADDR: usize = 1; +pub(super) const INTERNAL_STRING_AT_ADDR: usize = 2; +pub(super) const INTERNAL_WSTRING_AT_ADDR: usize = 3; type FP = unsafe extern "C" fn(); -pub trait ArgumentType { +/// Get FFI type for a ctypes type code +fn get_ffi_type(ty: &str) -> Option { + type_info(ty).map(|t| (t.ffi_type_fn)()) +} + +// PyCFuncPtr - Function pointer implementation + +/// Get FFI type from CArgObject tag character +fn ffi_type_from_tag(tag: u8) -> Type { + match tag { + b'c' | b'b' => Type::i8(), + b'B' => Type::u8(), + b'h' => Type::i16(), + b'H' => Type::u16(), + b'i' => Type::i32(), + b'I' => Type::u32(), + b'l' => { + if std::mem::size_of::() == 8 { + Type::i64() + } else { + Type::i32() + } + } + b'L' => { + if std::mem::size_of::() == 8 { + Type::u64() + } else { + Type::u32() + } + } + b'q' => Type::i64(), + b'Q' => Type::u64(), + b'f' => Type::f32(), + b'd' | b'g' => Type::f64(), + b'?' => Type::u8(), + b'u' => { + if std::mem::size_of::() == 2 { + Type::u16() + } else { + Type::u32() + } + } + _ => Type::pointer(), // 'P', 'V', 'z', 'Z', 'O', etc. + } +} + +/// Convert any object to a pointer value for c_void_p arguments +/// Follows ConvParam logic for pointer types +fn convert_to_pointer(value: &PyObject, vm: &VirtualMachine) -> PyResult { + // 0. CArgObject (from byref()) -> buffer address + offset + if let Some(carg) = value.downcast_ref::() { + // Get buffer address from the underlying object + let base_addr = if let Some(cdata) = carg.obj.downcast_ref::() { + cdata.buffer.read().as_ptr() as usize + } else { + return Err(vm.new_type_error(format!( + "byref() argument must be a ctypes instance, not '{}'", + carg.obj.class().name() + ))); + }; + let addr = (base_addr as isize + carg.offset) as usize; + return Ok(FfiArgValue::Pointer(addr)); + } + + // 1. None -> NULL + if value.is(&vm.ctx.none) { + return Ok(FfiArgValue::Pointer(0)); + } + + // 2. PyCArray -> buffer address (PyCArrayType_paramfunc) + if let Some(array) = value.downcast_ref::() { + let addr = array.0.buffer.read().as_ptr() as usize; + return Ok(FfiArgValue::Pointer(addr)); + } + + // 3. PyCPointer -> stored pointer value + if let Some(ptr) = value.downcast_ref::() { + return Ok(FfiArgValue::Pointer(ptr.get_ptr_value())); + } + + // 4. PyCStructure -> buffer address + if let Some(struct_obj) = value.downcast_ref::() { + let addr = struct_obj.0.buffer.read().as_ptr() as usize; + return Ok(FfiArgValue::Pointer(addr)); + } + + // 5. PyCSimple (c_void_p, c_char_p, etc.) -> value from buffer + if let Some(simple) = value.downcast_ref::() { + let buffer = simple.0.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + let addr = super::base::read_ptr_from_buffer(&buffer); + return Ok(FfiArgValue::Pointer(addr)); + } + } + + // 6. bytes -> buffer address (PyBytes_AsString) + if let Some(bytes) = value.downcast_ref::() { + let addr = bytes.as_bytes().as_ptr() as usize; + return Ok(FfiArgValue::Pointer(addr)); + } + + // 7. Integer -> direct value + if let Ok(int_val) = value.try_int(vm) { + return Ok(FfiArgValue::Pointer( + int_val.as_bigint().to_usize().unwrap_or(0), + )); + } + + // 8. Check _as_parameter_ attribute ( recursive ConvParam) + if let Ok(as_param) = value.get_attr("_as_parameter_", vm) { + return convert_to_pointer(&as_param, vm); + } + + Err(vm.new_type_error(format!( + "cannot convert '{}' to c_void_p", + value.class().name() + ))) +} + +/// ConvParam-like conversion for when argtypes is None +/// Returns both the FFI type and the converted value +fn conv_param(value: &PyObject, vm: &VirtualMachine) -> PyResult<(Type, FfiArgValue)> { + // 1. CArgObject (from byref() or paramfunc) -> use stored type and value + if let Some(carg) = value.downcast_ref::() { + let ffi_type = ffi_type_from_tag(carg.tag); + return Ok((ffi_type, carg.value.clone())); + } + + // 2. None -> NULL pointer + if value.is(&vm.ctx.none) { + return Ok((Type::pointer(), FfiArgValue::Pointer(0))); + } + + // 3. ctypes objects -> use paramfunc + if let Ok(carg) = super::base::call_paramfunc(value, vm) { + let ffi_type = ffi_type_from_tag(carg.tag); + return Ok((ffi_type, carg.value.clone())); + } + + // 4. Python str -> pointer (use internal UTF-8 buffer) + if let Some(s) = value.downcast_ref::() { + let addr = s.as_str().as_ptr() as usize; + return Ok((Type::pointer(), FfiArgValue::Pointer(addr))); + } + + // 9. Python bytes -> pointer to buffer + if let Some(bytes) = value.downcast_ref::() { + let addr = bytes.as_bytes().as_ptr() as usize; + return Ok((Type::pointer(), FfiArgValue::Pointer(addr))); + } + + // 10. Python int -> i32 (default integer type) + if let Ok(int_val) = value.try_int(vm) { + let val = int_val.as_bigint().to_i32().unwrap_or(0); + return Ok((Type::i32(), FfiArgValue::I32(val))); + } + + // 11. Python float -> f64 + if let Ok(float_val) = value.try_float(vm) { + return Ok((Type::f64(), FfiArgValue::F64(float_val.to_f64()))); + } + + // 12. Check _as_parameter_ attribute + if let Ok(as_param) = value.get_attr("_as_parameter_", vm) { + return conv_param(&as_param, vm); + } + + Err(vm.new_type_error(format!( + "Don't know how to convert parameter {}", + value.class().name() + ))) +} + +trait ArgumentType { fn to_ffi_type(&self, vm: &VirtualMachine) -> PyResult; - fn convert_object(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult; + fn convert_object(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult; } impl ArgumentType for PyTypeRef { fn to_ffi_type(&self, vm: &VirtualMachine) -> PyResult { + use super::pointer::PyCPointer; + use super::structure::PyCStructure; + + // CArgObject (from byref()) should be treated as pointer + if self.fast_issubclass(CArgObject::static_type()) { + return Ok(Type::pointer()); + } + + // Pointer types (POINTER(T)) are always pointer FFI type + // Check if type is a subclass of _Pointer (PyCPointer) + if self.fast_issubclass(PyCPointer::static_type()) { + return Ok(Type::pointer()); + } + + // Structure types are passed as pointers + if self.fast_issubclass(PyCStructure::static_type()) { + return Ok(Type::pointer()); + } + + // Use get_attr to traverse MRO (for subclasses like MyInt(c_int)) let typ = self - .get_class_attr(vm.ctx.intern_str("_type_")) - .ok_or(vm.new_type_error("Unsupported argument type".to_string()))?; + .as_object() + .get_attr(vm.ctx.intern_str("_type_"), vm) + .ok() + .ok_or(vm.new_type_error("Unsupported argument type"))?; let typ = typ .downcast_ref::() - .ok_or(vm.new_type_error("Unsupported argument type".to_string()))?; + .ok_or(vm.new_type_error("Unsupported argument type"))?; let typ = typ.to_string(); let typ = typ.as_str(); - let converted_typ = ffi_type_from_str(typ); - if let Some(typ) = converted_typ { - Ok(typ) - } else { - Err(vm.new_type_error(format!("Unsupported argument type: {}", typ))) - } + get_ffi_type(typ) + .ok_or_else(|| vm.new_type_error(format!("Unsupported argument type: {}", typ))) } - fn convert_object(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { - // if self.fast_isinstance::(vm) { - // let array = value.downcast::()?; - // return Ok(Arg::from(array.as_ptr())); - // } - if let Ok(simple) = value.downcast::() { + fn convert_object(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // Call from_param first to convert the value (like CPython's callproc.c:1235) + // converter = PyTuple_GET_ITEM(argtypes, i); + // v = PyObject_CallOneArg(converter, arg); + let from_param = self + .as_object() + .get_attr(vm.ctx.intern_str("from_param"), vm)?; + let converted = from_param.call((value.clone(),), vm)?; + + // Then pass the converted value to ConvParam logic + // CArgObject (from from_param) -> use stored value directly + if let Some(carg) = converted.downcast_ref::() { + return Ok(carg.value.clone()); + } + + // None -> NULL pointer + if vm.is_none(&converted) { + return Ok(FfiArgValue::Pointer(0)); + } + + // For pointer types (POINTER(T)), we need to pass the ADDRESS of the value's buffer + if self.fast_issubclass(PyCPointer::static_type()) { + if let Some(cdata) = converted.downcast_ref::() { + let addr = cdata.buffer.read().as_ptr() as usize; + return Ok(FfiArgValue::Pointer(addr)); + } + return convert_to_pointer(&converted, vm); + } + + // For structure types, convert to pointer to structure + if self.fast_issubclass(PyCStructure::static_type()) { + return convert_to_pointer(&converted, vm); + } + + // Get the type code for this argument type + let type_code = self + .as_object() + .get_attr(vm.ctx.intern_str("_type_"), vm) + .ok() + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); + + // For pointer types (c_void_p, c_char_p, c_wchar_p), handle as pointer + if matches!(type_code.as_deref(), Some("P") | Some("z") | Some("Z")) { + return convert_to_pointer(&converted, vm); + } + + // PyCSimple (already a ctypes instance from from_param) + if let Ok(simple) = converted.clone().downcast::() { let typ = ArgumentType::to_ffi_type(self, vm)?; - let arg = simple - .to_arg(typ, vm) - .ok_or(vm.new_type_error("Unsupported argument type".to_string()))?; - return Ok(arg); + let ffi_value = simple + .to_ffi_value(typ, vm) + .ok_or(vm.new_type_error("Unsupported argument type"))?; + return Ok(ffi_value); } - Err(vm.new_type_error("Unsupported argument type".to_string())) + + Err(vm.new_type_error("Unsupported argument type")) } } -pub trait ReturnType { - fn to_ffi_type(&self) -> Option; +trait ReturnType { + fn to_ffi_type(&self, vm: &VirtualMachine) -> Option; #[allow(clippy::wrong_self_convention)] fn from_ffi_type( &self, @@ -71,8 +314,34 @@ pub trait ReturnType { } impl ReturnType for PyTypeRef { - fn to_ffi_type(&self) -> Option { - ffi_type_from_str(self.name().to_string().as_str()) + fn to_ffi_type(&self, vm: &VirtualMachine) -> Option { + // Try to get _type_ attribute first (for ctypes types like c_void_p) + if let Ok(type_attr) = self.as_object().get_attr(vm.ctx.intern_str("_type_"), vm) + && let Some(s) = type_attr.downcast_ref::() + && let Some(ffi_type) = get_ffi_type(s.as_str()) + { + return Some(ffi_type); + } + + // Check for Structure/Array types (have StgInfo but no _type_) + // _ctypes_get_ffi_type: returns appropriately sized type for struct returns + if let Some(stg_info) = self.stg_info_opt() { + let size = stg_info.size; + // Small structs can be returned in registers + // Match can_return_struct_as_int/can_return_struct_as_sint64 + return Some(if size <= 4 { + Type::i32() + } else if size <= 8 { + Type::i64() + } else { + // Large structs: use pointer-sized return + // (ABI typically returns via hidden pointer parameter) + Type::pointer() + }); + } + + // Fallback to class name + get_ffi_type(self.name().to_string().as_str()) } fn from_ffi_type( @@ -80,9 +349,11 @@ impl ReturnType for PyTypeRef { value: *mut ffi::c_void, vm: &VirtualMachine, ) -> PyResult> { - // Get the type code from _type_ attribute + // Get the type code from _type_ attribute (use get_attr to traverse MRO) let type_code = self - .get_class_attr(vm.ctx.intern_str("_type_")) + .as_object() + .get_attr(vm.ctx.intern_str("_type_"), vm) + .ok() .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); let result = match type_code.as_deref() { @@ -129,27 +400,59 @@ impl ReturnType for PyTypeRef { .new_float(unsafe { *(value as *const f32) } as f64) .into(), Some("d") => vm.ctx.new_float(unsafe { *(value as *const f64) }).into(), - Some("P") | Some("z") | Some("Z") => vm.ctx.new_int(value as usize).into(), + Some("P") | Some("z") | Some("Z") => { + vm.ctx.new_int(unsafe { *(value as *const usize) }).into() + } Some("?") => vm .ctx .new_bool(unsafe { *(value as *const u8) } != 0) .into(), None => { - // No _type_ attribute, try to create an instance of the type - // This handles cases like Structure or Array return types - return Ok(Some( - vm.ctx.new_int(unsafe { *(value as *const i32) }).into(), - )); + // No _type_ attribute - check for Structure/Array types + // GetResult: PyCData_FromBaseObj creates instance from memory + if let Some(stg_info) = self.stg_info_opt() { + let size = stg_info.size; + // Create instance of the ctypes type + let instance = self.as_object().call((), vm)?; + + // Copy return value memory into instance buffer + // Use a block to properly scope the borrow + { + let src = unsafe { std::slice::from_raw_parts(value as *const u8, size) }; + if let Some(cdata) = instance.downcast_ref::() { + let mut buffer = cdata.buffer.write(); + if buffer.len() >= size { + buffer.to_mut()[..size].copy_from_slice(src); + } + } else if let Some(structure) = instance.downcast_ref::() { + let mut buffer = structure.0.buffer.write(); + if buffer.len() >= size { + buffer.to_mut()[..size].copy_from_slice(src); + } + } else if let Some(array) = instance.downcast_ref::() { + let mut buffer = array.0.buffer.write(); + if buffer.len() >= size { + buffer.to_mut()[..size].copy_from_slice(src); + } + } + } + return Ok(Some(instance)); + } + // Not a ctypes type - call type with int result + return self + .as_object() + .call((unsafe { *(value as *const i32) },), vm) + .map(Some); } - _ => return Err(vm.new_type_error("Unsupported return type".to_string())), + _ => return Err(vm.new_type_error("Unsupported return type")), }; Ok(Some(result)) } } impl ReturnType for PyNone { - fn to_ffi_type(&self) -> Option { - ffi_type_from_str("void") + fn to_ffi_type(&self, _vm: &VirtualMachine) -> Option { + get_ffi_type("void") } fn from_ffi_type( @@ -161,47 +464,319 @@ impl ReturnType for PyNone { } } +/// PyCFuncPtr - Function pointer instance +/// Saved in _base.buffer #[pyclass(module = "_ctypes", name = "CFuncPtr", base = PyCData)] -pub struct PyCFuncPtr { - _base: PyCData, - pub name: PyRwLock>, - pub ptr: PyRwLock>, - #[allow(dead_code)] - pub needs_free: AtomicCell, - pub arg_types: PyRwLock>>, - pub res_type: PyRwLock>, - pub _flags_: AtomicCell, - #[allow(dead_code)] - pub handler: PyObjectRef, +#[repr(C)] +pub(super) struct PyCFuncPtr { + pub _base: PyCData, + /// Thunk for callbacks (keeps thunk alive) + pub thunk: PyRwLock>>, + /// Original Python callable (for callbacks) + pub callable: PyRwLock>, + /// Converters cache + pub converters: PyRwLock>, + /// Instance-level argtypes override + pub argtypes: PyRwLock>, + /// Instance-level restype override + pub restype: PyRwLock>, + /// Checker function + pub checker: PyRwLock>, + /// Error checking function + pub errcheck: PyRwLock>, + /// COM method vtable index + /// When set, the function reads the function pointer from the vtable at call time + #[cfg(windows)] + pub index: PyRwLock>, + /// COM method IID (interface ID) for error handling + #[cfg(windows)] + pub iid: PyRwLock>, + /// Parameter flags for COM methods (direction: IN=1, OUT=2, IN|OUT=4) + /// Each element is (direction, name, default) tuple + pub paramflags: PyRwLock>, } impl Debug for PyCFuncPtr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PyCFuncPtr") - .field("flags", &self._flags_) + .field("func_ptr", &self.get_func_ptr()) .finish() } } +/// Extract pointer value from a ctypes argument (c_void_p conversion) +fn extract_ptr_from_arg(arg: &PyObject, vm: &VirtualMachine) -> PyResult { + // Try to get pointer value from various ctypes types + if let Some(ptr) = arg.downcast_ref::() { + return Ok(ptr.get_ptr_value()); + } + if let Some(simple) = arg.downcast_ref::() { + let buffer = simple.0.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + return Ok(usize::from_ne_bytes( + buffer[..std::mem::size_of::()].try_into().unwrap(), + )); + } + } + if let Some(cdata) = arg.downcast_ref::() { + // For arrays/structures, return address of buffer + return Ok(cdata.buffer.read().as_ptr() as usize); + } + // PyStr: return internal buffer address + if let Some(s) = arg.downcast_ref::() { + return Ok(s.as_str().as_ptr() as usize); + } + // PyBytes: return internal buffer address + if let Some(bytes) = arg.downcast_ref::() { + return Ok(bytes.as_bytes().as_ptr() as usize); + } + // Try as integer + if let Ok(int_val) = arg.try_int(vm) { + return Ok(int_val.as_bigint().to_usize().unwrap_or(0)); + } + Err(vm.new_type_error(format!( + "cannot convert '{}' to pointer", + arg.class().name() + ))) +} + +/// string_at implementation - read bytes from memory at ptr +fn string_at_impl(ptr: usize, size: isize, vm: &VirtualMachine) -> PyResult { + if ptr == 0 { + return Err(vm.new_value_error("NULL pointer access")); + } + let ptr = ptr as *const u8; + let len = if size < 0 { + // size == -1 means use strlen + unsafe { libc::strlen(ptr as _) } + } else { + // Overflow check for huge size values + let size_usize = size as usize; + if size_usize > isize::MAX as usize / 2 { + return Err(vm.new_overflow_error("string too long")); + } + size_usize + }; + let bytes = unsafe { std::slice::from_raw_parts(ptr, len) }; + Ok(vm.ctx.new_bytes(bytes.to_vec()).into()) +} + +/// wstring_at implementation - read wide string from memory at ptr +fn wstring_at_impl(ptr: usize, size: isize, vm: &VirtualMachine) -> PyResult { + if ptr == 0 { + return Err(vm.new_value_error("NULL pointer access")); + } + let w_ptr = ptr as *const libc::wchar_t; + let len = if size < 0 { + unsafe { libc::wcslen(w_ptr) } + } else { + // Overflow check for huge size values + let size_usize = size as usize; + if size_usize > isize::MAX as usize / std::mem::size_of::() { + return Err(vm.new_overflow_error("string too long")); + } + size_usize + }; + let wchars = unsafe { std::slice::from_raw_parts(w_ptr, len) }; + + // Windows: wchar_t = u16 (UTF-16) -> use Wtf8Buf::from_wide + // macOS/Linux: wchar_t = i32 (UTF-32) -> convert via char::from_u32 + #[cfg(windows)] + { + use rustpython_common::wtf8::Wtf8Buf; + let wide: Vec = wchars.to_vec(); + let wtf8 = Wtf8Buf::from_wide(&wide); + Ok(vm.ctx.new_str(wtf8).into()) + } + #[cfg(not(windows))] + { + let s: String = wchars + .iter() + .filter_map(|&c| char::from_u32(c as u32)) + .collect(); + Ok(vm.ctx.new_str(s).into()) + } +} + +// cast_check_pointertype +fn cast_check_pointertype(ctype: &PyObject, vm: &VirtualMachine) -> bool { + use super::pointer::PyCPointerType; + + // PyCPointerTypeObject_Check + if ctype.class().fast_issubclass(PyCPointerType::static_type()) { + return true; + } + + // PyCFuncPtrTypeObject_Check - TODO + + // simple pointer types via StgInfo.proto (c_void_p, c_char_p, etc.) + if let Ok(type_attr) = ctype.get_attr("_type_", vm) + && let Some(s) = type_attr.downcast_ref::() + { + let c = s.as_str(); + if c.len() == 1 && "sPzUZXO".contains(c) { + return true; + } + } + + false +} + +/// cast implementation +/// _ctypes.c cast() +pub(super) fn cast_impl( + obj: PyObjectRef, + src: PyObjectRef, + ctype: PyObjectRef, + vm: &VirtualMachine, +) -> PyResult { + // 1. cast_check_pointertype + if !cast_check_pointertype(&ctype, vm) { + return Err(vm.new_type_error(format!( + "cast() argument 2 must be a pointer type, not {}", + ctype.class().name() + ))); + } + + // 2. Extract pointer value - matches c_void_p_from_param_impl order + let ptr_value: usize = if vm.is_none(&obj) { + // None → NULL pointer + 0 + } else if let Ok(int_val) = obj.try_int(vm) { + // int/long → direct pointer value + int_val.as_bigint().to_usize().unwrap_or(0) + } else if let Some(bytes) = obj.downcast_ref::() { + // bytes → buffer address (c_void_p_from_param: PyBytes_Check) + bytes.as_bytes().as_ptr() as usize + } else if let Some(s) = obj.downcast_ref::() { + // unicode/str → buffer address (c_void_p_from_param: PyUnicode_Check) + s.as_str().as_ptr() as usize + } else if let Some(ptr) = obj.downcast_ref::() { + // Pointer instance → contained pointer value + ptr.get_ptr_value() + } else if let Some(simple) = obj.downcast_ref::() { + // Simple type (c_void_p, c_char_p, etc.) → value from buffer + let buffer = simple.0.buffer.read(); + super::base::read_ptr_from_buffer(&buffer) + } else if let Some(cdata) = obj.downcast_ref::() { + // Array, Structure, Union → buffer address (b_ptr) + cdata.buffer.read().as_ptr() as usize + } else { + return Err(vm.new_type_error(format!( + "cast() argument 1 must be a ctypes instance, not {}", + obj.class().name() + ))); + }; + + // 3. Create result instance + let result = ctype.call((), vm)?; + + // 4. _objects reference tracking + // Share _objects dict between source and result, add id(src): src + if src.class().fast_issubclass(PyCData::static_type()) { + // Get the source's _objects, create dict if needed + let shared_objects: PyObjectRef = if let Some(src_cdata) = src.downcast_ref::() { + let mut src_objects = src_cdata.objects.write(); + if src_objects.is_none() { + // Create new dict + let dict = vm.ctx.new_dict(); + *src_objects = Some(dict.clone().into()); + dict.into() + } else if let Some(obj) = src_objects.as_ref() { + if obj.downcast_ref::().is_none() { + // Convert to dict (keep existing reference) + let dict = vm.ctx.new_dict(); + let id_key: PyObjectRef = vm.ctx.new_int(obj.get_id() as i64).into(); + let _ = dict.set_item(&*id_key, obj.clone(), vm); + *src_objects = Some(dict.clone().into()); + dict.into() + } else { + obj.clone() + } + } else { + vm.ctx.new_dict().into() + } + } else { + vm.ctx.new_dict().into() + }; + + // Add id(src): src to the shared dict + if let Some(dict) = shared_objects.downcast_ref::() { + let id_key: PyObjectRef = vm.ctx.new_int(src.get_id() as i64).into(); + let _ = dict.set_item(&*id_key, src.clone(), vm); + } + + // Set result's _objects to the shared dict + if let Some(result_cdata) = result.downcast_ref::() { + *result_cdata.objects.write() = Some(shared_objects); + } + } + + // 5. Store pointer value + if let Some(ptr) = result.downcast_ref::() { + ptr.set_ptr_value(ptr_value); + } else if let Some(cdata) = result.downcast_ref::() { + let bytes = ptr_value.to_ne_bytes(); + let mut buffer = cdata.buffer.write(); + let buf = buffer.to_mut(); + if buf.len() >= bytes.len() { + buf[..bytes.len()].copy_from_slice(&bytes); + } + } + + Ok(result) +} + +impl PyCFuncPtr { + /// Get function pointer address from buffer + fn get_func_ptr(&self) -> usize { + let buffer = self._base.buffer.read(); + super::base::read_ptr_from_buffer(&buffer) + } + + /// Get CodePtr from buffer for FFI calls + fn get_code_ptr(&self) -> Option { + let addr = self.get_func_ptr(); + if addr != 0 { + Some(CodePtr(addr as *mut _)) + } else { + None + } + } + + /// Create buffer with function pointer address + fn make_ptr_buffer(addr: usize) -> Vec { + addr.to_ne_bytes().to_vec() + } +} + impl Constructor for PyCFuncPtr { type Args = FuncArgs; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - // Handle different argument forms like CPython: - // 1. Empty args: create uninitialized + // Handle different argument forms: + // 1. Empty args: create uninitialized (NULL pointer) // 2. One integer argument: function address // 3. Tuple argument: (name, dll) form + // 4. Callable: callback creation + + let ptr_size = std::mem::size_of::(); if args.args.is_empty() { return PyCFuncPtr { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - ptr: PyRwLock::new(None), - needs_free: AtomicCell::new(false), - arg_types: PyRwLock::new(None), - _flags_: AtomicCell::new(0), - res_type: PyRwLock::new(None), - name: PyRwLock::new(None), - handler: vm.ctx.none(), + _base: PyCData::from_bytes(vec![0u8; ptr_size], None), + thunk: PyRwLock::new(None), + callable: PyRwLock::new(None), + converters: PyRwLock::new(None), + argtypes: PyRwLock::new(None), + restype: PyRwLock::new(None), + checker: PyRwLock::new(None), + errcheck: PyRwLock::new(None), + #[cfg(windows)] + index: PyRwLock::new(None), + #[cfg(windows)] + iid: PyRwLock::new(None), + paramflags: PyRwLock::new(None), } .into_ref_with_type(vm, cls) .map(Into::into); @@ -209,18 +784,68 @@ impl Constructor for PyCFuncPtr { let first_arg = &args.args[0]; + // Check for COM method form: (index, name, [paramflags], [iid]) + // First arg is integer (vtable index), second arg is string (method name) + if args.args.len() >= 2 + && first_arg.try_int(vm).is_ok() + && args.args[1].downcast_ref::().is_some() + { + #[cfg(windows)] + let index = first_arg.try_int(vm)?.as_bigint().to_usize().unwrap_or(0); + + // args[3] is iid (GUID struct, optional) + // Also check if args[2] is a GUID (has Data1 attribute) when args[3] is not present + #[cfg(windows)] + let iid = args.args.get(3).cloned().or_else(|| { + args.args.get(2).and_then(|arg| { + // If it's a GUID struct (has Data1 attribute), use it as IID + if arg.get_attr("Data1", vm).is_ok() { + Some(arg.clone()) + } else { + None + } + }) + }); + + // args[2] is paramflags (tuple or None) + let paramflags = args.args.get(2).filter(|arg| !vm.is_none(arg)).cloned(); + + return PyCFuncPtr { + _base: PyCData::from_bytes(vec![0u8; ptr_size], None), + thunk: PyRwLock::new(None), + callable: PyRwLock::new(None), + converters: PyRwLock::new(None), + argtypes: PyRwLock::new(None), + restype: PyRwLock::new(None), + checker: PyRwLock::new(None), + errcheck: PyRwLock::new(None), + #[cfg(windows)] + index: PyRwLock::new(Some(index)), + #[cfg(windows)] + iid: PyRwLock::new(iid), + paramflags: PyRwLock::new(paramflags), + } + .into_ref_with_type(vm, cls) + .map(Into::into); + } + // Check if first argument is an integer (function address) if let Ok(addr) = first_arg.try_int(vm) { let ptr_val = addr.as_bigint().to_usize().unwrap_or(0); return PyCFuncPtr { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - ptr: PyRwLock::new(Some(CodePtr(ptr_val as *mut _))), - needs_free: AtomicCell::new(false), - arg_types: PyRwLock::new(None), - _flags_: AtomicCell::new(0), - res_type: PyRwLock::new(None), - name: PyRwLock::new(Some(format!("CFuncPtr@{:#x}", ptr_val))), - handler: vm.ctx.new_int(ptr_val).into(), + _base: PyCData::from_bytes(Self::make_ptr_buffer(ptr_val), None), + thunk: PyRwLock::new(None), + callable: PyRwLock::new(None), + converters: PyRwLock::new(None), + argtypes: PyRwLock::new(None), + restype: PyRwLock::new(None), + checker: PyRwLock::new(None), + errcheck: PyRwLock::new(None), + #[cfg(windows)] + index: PyRwLock::new(None), + #[cfg(windows)] + iid: PyRwLock::new(None), + paramflags: PyRwLock::new(None), } .into_ref_with_type(vm, cls) .map(Into::into); @@ -234,53 +859,58 @@ impl Constructor for PyCFuncPtr { .downcast_ref::() .ok_or(vm.new_type_error("Expected a string"))? .to_string(); - let handler = tuple + let dll = tuple .iter() .nth(1) .ok_or(vm.new_type_error("Expected a tuple with at least 2 elements"))? .clone(); // Get library handle and load function - let handle = handler.try_int(vm); + let handle = dll.try_int(vm); let handle = match handle { Ok(handle) => handle.as_bigint().clone(), - Err(_) => handler + Err(_) => dll .get_attr("_handle", vm)? .try_int(vm)? .as_bigint() .clone(), }; - let library_cache = crate::stdlib::ctypes::library::libcache().read(); + let library_cache = super::library::libcache().read(); let library = library_cache .get_lib( handle .to_usize() - .ok_or(vm.new_value_error("Invalid handle".to_string()))?, + .ok_or(vm.new_value_error("Invalid handle"))?, ) - .ok_or_else(|| vm.new_value_error("Library not found".to_string()))?; + .ok_or_else(|| vm.new_value_error("Library not found"))?; let inner_lib = library.lib.lock(); let terminated = format!("{}\0", &name); - let code_ptr = if let Some(lib) = &*inner_lib { + let ptr_val = if let Some(lib) = &*inner_lib { let pointer: Symbol<'_, FP> = unsafe { lib.get(terminated.as_bytes()) .map_err(|err| err.to_string()) .map_err(|err| vm.new_attribute_error(err))? }; - Some(CodePtr(*pointer as *mut _)) + *pointer as usize } else { - None + 0 }; return PyCFuncPtr { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - ptr: PyRwLock::new(code_ptr), - needs_free: AtomicCell::new(false), - arg_types: PyRwLock::new(None), - _flags_: AtomicCell::new(0), - res_type: PyRwLock::new(None), - name: PyRwLock::new(Some(name)), - handler, + _base: PyCData::from_bytes(Self::make_ptr_buffer(ptr_val), None), + thunk: PyRwLock::new(None), + callable: PyRwLock::new(None), + converters: PyRwLock::new(None), + argtypes: PyRwLock::new(None), + restype: PyRwLock::new(None), + checker: PyRwLock::new(None), + errcheck: PyRwLock::new(None), + #[cfg(windows)] + index: PyRwLock::new(None), + #[cfg(windows)] + iid: PyRwLock::new(None), + paramflags: PyRwLock::new(None), } .into_ref_with_type(vm, cls) .map(Into::into); @@ -289,42 +919,36 @@ impl Constructor for PyCFuncPtr { // Check if first argument is a Python callable (callback creation) if first_arg.is_callable() { // Get argument types and result type from the class - let argtypes = cls.get_attr(vm.ctx.intern_str("_argtypes_")); - let restype = cls.get_attr(vm.ctx.intern_str("_restype_")); + let class_argtypes = cls.get_attr(vm.ctx.intern_str("_argtypes_")); + let class_restype = cls.get_attr(vm.ctx.intern_str("_restype_")); // Create the thunk (C-callable wrapper for the Python function) - let thunk = PyCThunk::new(first_arg.clone(), argtypes.clone(), restype.clone(), vm)?; + let thunk = PyCThunk::new( + first_arg.clone(), + class_argtypes.clone(), + class_restype.clone(), + vm, + )?; let code_ptr = thunk.code_ptr(); - - // Parse argument types for storage - let arg_type_vec: Option> = if let Some(ref args) = argtypes { - if vm.is_none(args) { - None - } else { - let mut types = Vec::new(); - for item in args.try_to_value::>(vm)? { - types.push(item.downcast::().map_err(|_| { - vm.new_type_error("_argtypes_ must be a sequence of types".to_string()) - })?); - } - Some(types) - } - } else { - None - }; + let ptr_val = code_ptr.0 as usize; // Store the thunk as a Python object to keep it alive let thunk_ref: PyRef = thunk.into_ref(&vm.ctx); return PyCFuncPtr { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - ptr: PyRwLock::new(Some(code_ptr)), - needs_free: AtomicCell::new(true), - arg_types: PyRwLock::new(arg_type_vec), - _flags_: AtomicCell::new(0), - res_type: PyRwLock::new(restype), - name: PyRwLock::new(Some("".to_string())), - handler: thunk_ref.into(), + _base: PyCData::from_bytes(Self::make_ptr_buffer(ptr_val), None), + thunk: PyRwLock::new(Some(thunk_ref)), + callable: PyRwLock::new(Some(first_arg.clone())), + converters: PyRwLock::new(None), + argtypes: PyRwLock::new(class_argtypes), + restype: PyRwLock::new(class_restype), + checker: PyRwLock::new(None), + errcheck: PyRwLock::new(None), + #[cfg(windows)] + index: PyRwLock::new(None), + #[cfg(windows)] + iid: PyRwLock::new(None), + paramflags: PyRwLock::new(None), } .into_ref_with_type(vm, cls) .map(Into::into); @@ -338,142 +962,1054 @@ impl Constructor for PyCFuncPtr { } } -impl Callable for PyCFuncPtr { - type Args = FuncArgs; - fn call(zelf: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { - // This is completely seperate from the C python implementation - - // Cif init - let arg_types: Vec<_> = match zelf.arg_types.read().clone() { - Some(tys) => tys, - None => args - .args - .clone() - .into_iter() - .map(|a| a.class().as_object().to_pyobject(vm).downcast().unwrap()) - .collect(), +// PyCFuncPtr call helpers (similar to callproc.c flow) + +/// Handle internal function addresses (PYFUNCTYPE special cases) +/// Returns Some(result) if handled, None if should continue with normal call +fn handle_internal_func(addr: usize, args: &FuncArgs, vm: &VirtualMachine) -> Option { + if addr == INTERNAL_CAST_ADDR { + let result: PyResult<(PyObjectRef, PyObjectRef, PyObjectRef)> = args.clone().bind(vm); + return Some(result.and_then(|(obj, src, ctype)| cast_impl(obj, src, ctype, vm))); + } + + if addr == INTERNAL_STRING_AT_ADDR { + let result: PyResult<(PyObjectRef, Option)> = args.clone().bind(vm); + return Some(result.and_then(|(ptr_arg, size_arg)| { + let ptr = extract_ptr_from_arg(&ptr_arg, vm)?; + let size = size_arg + .and_then(|s| s.try_int(vm).ok()) + .and_then(|i| i.as_bigint().to_isize()) + .unwrap_or(-1); + string_at_impl(ptr, size, vm) + })); + } + + if addr == INTERNAL_WSTRING_AT_ADDR { + let result: PyResult<(PyObjectRef, Option)> = args.clone().bind(vm); + return Some(result.and_then(|(ptr_arg, size_arg)| { + let ptr = extract_ptr_from_arg(&ptr_arg, vm)?; + let size = size_arg + .and_then(|s| s.try_int(vm).ok()) + .and_then(|i| i.as_bigint().to_isize()) + .unwrap_or(-1); + wstring_at_impl(ptr, size, vm) + })); + } + + None +} + +/// Call information extracted from PyCFuncPtr (argtypes, restype, etc.) +struct CallInfo { + explicit_arg_types: Option>, + restype_obj: Option, + restype_is_none: bool, + ffi_return_type: Type, + is_pointer_return: bool, +} + +/// Extract call information (argtypes, restype) from PyCFuncPtr +fn extract_call_info(zelf: &Py, vm: &VirtualMachine) -> PyResult { + // Get argtypes - first from instance, then from type's _argtypes_ + let explicit_arg_types: Option> = + if let Some(argtypes_obj) = zelf.argtypes.read().as_ref() { + if !vm.is_none(argtypes_obj) { + Some( + argtypes_obj + .try_to_value::>(vm)? + .into_iter() + .filter_map(|obj| obj.downcast::().ok()) + .collect(), + ) + } else { + None // argtypes is None -> use ConvParam + } + } else if let Some(class_argtypes) = zelf + .as_object() + .class() + .get_attr(vm.ctx.intern_str("_argtypes_")) + && !vm.is_none(&class_argtypes) + { + Some( + class_argtypes + .try_to_value::>(vm)? + .into_iter() + .filter_map(|obj| obj.downcast::().ok()) + .collect(), + ) + } else { + None // No argtypes -> use ConvParam }; - let ffi_arg_types = arg_types - .clone() - .iter() - .map(|t| ArgumentType::to_ffi_type(t, vm)) - .collect::>>()?; - let return_type = zelf.res_type.read(); - let ffi_return_type = return_type + + // Get restype - first from instance, then from class's _restype_ + let restype_obj = zelf.restype.read().clone().or_else(|| { + zelf.as_object() + .class() + .get_attr(vm.ctx.intern_str("_restype_")) + }); + + // Check if restype is explicitly None (return void) + let restype_is_none = restype_obj.as_ref().is_some_and(|t| vm.is_none(t)); + let ffi_return_type = if restype_is_none { + Type::void() + } else { + restype_obj .as_ref() .and_then(|t| t.clone().downcast::().ok()) - .and_then(|t| ReturnType::to_ffi_type(&t)) - .unwrap_or_else(Type::i32); - let cif = Cif::new(ffi_arg_types, ffi_return_type); - - // Call the function - let ffi_args = args - .args - .into_iter() - .enumerate() - .map(|(n, arg)| { - let arg_type = arg_types - .get(n) - .ok_or_else(|| vm.new_type_error("argument amount mismatch".to_string()))?; - arg_type.convert_object(arg, vm) - }) - .collect::, _>>()?; - let pointer = zelf.ptr.read(); - let code_ptr = pointer - .as_ref() - .ok_or_else(|| vm.new_type_error("Function pointer not set".to_string()))?; - let mut output: c_void = unsafe { cif.call(*code_ptr, &ffi_args) }; - let return_type = return_type + .and_then(|t| ReturnType::to_ffi_type(&t, vm)) + .unwrap_or_else(Type::i32) + }; + + // Check if return type is a pointer type (P, z, Z) - need special handling on 64-bit + let is_pointer_return = restype_obj + .as_ref() + .and_then(|t| t.clone().downcast::().ok()) + .and_then(|t| t.as_object().get_attr(vm.ctx.intern_str("_type_"), vm).ok()) + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())) + .is_some_and(|tc| matches!(tc.as_str(), "P" | "z" | "Z")); + + Ok(CallInfo { + explicit_arg_types, + restype_obj, + restype_is_none, + ffi_return_type, + is_pointer_return, + }) +} + +/// Parsed paramflags: (direction, name, default) tuples +/// direction: 1=IN, 2=OUT, 4=IN|OUT (or 1|2=3) +type ParsedParamFlags = Vec<(u32, Option, Option)>; + +/// Parse paramflags from PyCFuncPtr +fn parse_paramflags( + zelf: &Py, + vm: &VirtualMachine, +) -> PyResult> { + let Some(pf) = zelf.paramflags.read().as_ref().cloned() else { + return Ok(None); + }; + + let pf_vec = pf.try_to_value::>(vm)?; + let parsed = pf_vec + .into_iter() + .map(|item| { + let Some(tuple) = item.downcast_ref::() else { + // Single value means just the direction + let direction = item + .try_int(vm) + .ok() + .and_then(|i| i.as_bigint().to_u32()) + .unwrap_or(1); + return (direction, None, None); + }; + let direction = tuple + .first() + .and_then(|d| d.try_int(vm).ok()) + .and_then(|i| i.as_bigint().to_u32()) + .unwrap_or(1); + let name = tuple + .get(1) + .and_then(|n| n.downcast_ref::().map(|s| s.to_string())); + let default = tuple.get(2).cloned(); + (direction, name, default) + }) + .collect(); + Ok(Some(parsed)) +} + +/// Resolve COM method pointer from vtable (Windows only) +/// Returns (Some(CodePtr), true) if this is a COM method call, (None, false) otherwise +#[cfg(windows)] +fn resolve_com_method( + zelf: &Py, + args: &FuncArgs, + vm: &VirtualMachine, +) -> PyResult<(Option, bool)> { + let com_index = zelf.index.read(); + let Some(idx) = *com_index else { + return Ok((None, false)); + }; + + // First arg must be the COM object pointer + if args.args.is_empty() { + return Err( + vm.new_type_error("COM method requires at least one argument (self)".to_string()) + ); + } + + // Extract COM pointer value from first argument + let self_arg = &args.args[0]; + let com_ptr = if let Some(simple) = self_arg.downcast_ref::() { + let buffer = simple.0.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + super::base::read_ptr_from_buffer(&buffer) + } else { + 0 + } + } else if let Ok(int_val) = self_arg.try_int(vm) { + int_val.as_bigint().to_usize().unwrap_or(0) + } else { + return Err( + vm.new_type_error("COM method first argument must be a COM pointer".to_string()) + ); + }; + + if com_ptr == 0 { + return Err(vm.new_value_error("NULL COM pointer access")); + } + + // Read vtable pointer from COM object: vtable = *(void**)com_ptr + let vtable_ptr = unsafe { *(com_ptr as *const usize) }; + if vtable_ptr == 0 { + return Err(vm.new_value_error("NULL vtable pointer")); + } + + // Read function pointer from vtable: func = vtable[index] + let fptr = unsafe { + let vtable = vtable_ptr as *const usize; + *vtable.add(idx) + }; + + if fptr == 0 { + return Err(vm.new_value_error("NULL function pointer in vtable")); + } + + Ok((Some(CodePtr(fptr as *mut _)), true)) +} + +/// Prepared arguments for FFI call +struct PreparedArgs { + ffi_arg_types: Vec, + ffi_values: Vec, + out_buffers: Vec<(usize, PyObjectRef)>, +} + +/// Get buffer address from a ctypes object +fn get_buffer_addr(obj: &PyObjectRef) -> Option { + obj.downcast_ref::() + .map(|s| s.0.buffer.read().as_ptr() as usize) + .or_else(|| { + obj.downcast_ref::() + .map(|s| s.0.buffer.read().as_ptr() as usize) + }) + .or_else(|| { + obj.downcast_ref::() + .map(|s| s.0.buffer.read().as_ptr() as usize) + }) +} + +/// Create OUT buffer for a parameter type +fn create_out_buffer(arg_type: &PyTypeRef, vm: &VirtualMachine) -> PyResult { + // For POINTER(T) types, create T instance (the pointed-to type) + if arg_type.fast_issubclass(PyCPointer::static_type()) + && let Some(stg_info) = arg_type.stg_info_opt() + && let Some(ref proto) = stg_info.proto + { + return proto.as_object().call((), vm); + } + // Not a pointer type or no proto, create instance directly + arg_type.as_object().call((), vm) +} + +/// Build callargs when no argtypes specified (use ConvParam) +fn build_callargs_no_argtypes(args: &FuncArgs, vm: &VirtualMachine) -> PyResult { + let results: Vec<(Type, FfiArgValue)> = args + .args + .iter() + .map(|arg| conv_param(arg, vm)) + .collect::>>()?; + let (ffi_arg_types, ffi_values) = results.into_iter().unzip(); + Ok(PreparedArgs { + ffi_arg_types, + ffi_values, + out_buffers: Vec::new(), + }) +} + +/// Build callargs for regular function with argtypes (no paramflags) +fn build_callargs_simple( + args: &FuncArgs, + arg_types: &[PyTypeRef], + vm: &VirtualMachine, +) -> PyResult { + let ffi_arg_types = arg_types + .iter() + .map(|t| ArgumentType::to_ffi_type(t, vm)) + .collect::>>()?; + let ffi_values = args + .args + .iter() + .enumerate() + .map(|(n, arg)| { + let arg_type = arg_types + .get(n) + .ok_or_else(|| vm.new_type_error("argument amount mismatch"))?; + arg_type.convert_object(arg.clone(), vm) + }) + .collect::, _>>()?; + Ok(PreparedArgs { + ffi_arg_types, + ffi_values, + out_buffers: Vec::new(), + }) +} + +/// Build callargs with paramflags (handles IN/OUT parameters) +fn build_callargs_with_paramflags( + args: &FuncArgs, + arg_types: &[PyTypeRef], + paramflags: &ParsedParamFlags, + skip_first_arg: bool, // true for COM methods + vm: &VirtualMachine, +) -> PyResult { + let mut ffi_arg_types = Vec::new(); + let mut ffi_values = Vec::new(); + let mut out_buffers = Vec::new(); + + // For COM methods, first arg is self (pointer) + let mut caller_arg_idx = if skip_first_arg { + ffi_arg_types.push(Type::pointer()); + if !args.args.is_empty() { + ffi_values.push(conv_param(&args.args[0], vm)?.1); + } + 1usize + } else { + 0usize + }; + + // Add FFI types for all argtypes + for arg_type in arg_types { + ffi_arg_types.push(ArgumentType::to_ffi_type(arg_type, vm)?); + } + + // Process parameters based on paramflags + for (param_idx, (direction, _name, default)) in paramflags.iter().enumerate() { + let arg_type = arg_types + .get(param_idx) + .ok_or_else(|| vm.new_type_error("paramflags/argtypes mismatch"))?; + + let is_out = (*direction & 2) != 0; // OUT flag + let is_in = (*direction & 1) != 0 || *direction == 0; // IN flag or default + + if is_out && !is_in { + // Pure OUT parameter: create buffer, don't consume caller arg + let buffer = create_out_buffer(arg_type, vm)?; + let addr = get_buffer_addr(&buffer).ok_or_else(|| { + vm.new_type_error("Cannot create OUT buffer for this type".to_string()) + })?; + ffi_values.push(FfiArgValue::Pointer(addr)); + out_buffers.push((param_idx, buffer)); + } else { + // IN or IN|OUT: get from caller args or default + let arg = if caller_arg_idx < args.args.len() { + caller_arg_idx += 1; + args.args[caller_arg_idx - 1].clone() + } else if let Some(def) = default { + def.clone() + } else { + return Err(vm.new_type_error(format!("required argument {} missing", param_idx))); + }; + + if is_out { + // IN|OUT: track for return + out_buffers.push((param_idx, arg.clone())); + } + ffi_values.push(arg_type.convert_object(arg, vm)?); + } + } + + Ok(PreparedArgs { + ffi_arg_types, + ffi_values, + out_buffers, + }) +} + +/// Build call arguments (main dispatcher) +fn build_callargs( + args: &FuncArgs, + call_info: &CallInfo, + paramflags: Option<&ParsedParamFlags>, + is_com_method: bool, + vm: &VirtualMachine, +) -> PyResult { + let Some(ref arg_types) = call_info.explicit_arg_types else { + // No argtypes: use ConvParam + return build_callargs_no_argtypes(args, vm); + }; + + if let Some(pflags) = paramflags { + // Has paramflags: handle IN/OUT + build_callargs_with_paramflags(args, arg_types, pflags, is_com_method, vm) + } else if is_com_method { + // COM method without paramflags + let mut ffi_types = vec![Type::pointer()]; + ffi_types.extend( + arg_types + .iter() + .map(|t| ArgumentType::to_ffi_type(t, vm)) + .collect::>>()?, + ); + let mut ffi_vals = Vec::new(); + if !args.args.is_empty() { + ffi_vals.push(conv_param(&args.args[0], vm)?.1); + } + for (n, arg) in args.args.iter().skip(1).enumerate() { + let arg_type = arg_types + .get(n) + .ok_or_else(|| vm.new_type_error("argument amount mismatch"))?; + ffi_vals.push(arg_type.convert_object(arg.clone(), vm)?); + } + Ok(PreparedArgs { + ffi_arg_types: ffi_types, + ffi_values: ffi_vals, + out_buffers: Vec::new(), + }) + } else { + // Regular function + build_callargs_simple(args, arg_types, vm) + } +} + +/// Raw result from FFI call +enum RawResult { + Void, + Pointer(usize), + Value(libffi::low::ffi_arg), +} + +/// Execute FFI call +fn ctypes_callproc(code_ptr: CodePtr, prepared: &PreparedArgs, call_info: &CallInfo) -> RawResult { + let cif = Cif::new( + prepared.ffi_arg_types.clone(), + call_info.ffi_return_type.clone(), + ); + let ffi_args: Vec = prepared.ffi_values.iter().map(|v| v.as_arg()).collect(); + + if call_info.restype_is_none { + unsafe { cif.call::<()>(code_ptr, &ffi_args) }; + RawResult::Void + } else if call_info.is_pointer_return { + let result = unsafe { cif.call::(code_ptr, &ffi_args) }; + RawResult::Pointer(result) + } else { + let result = unsafe { cif.call::(code_ptr, &ffi_args) }; + RawResult::Value(result) + } +} + +/// Check and handle HRESULT errors (Windows) +#[cfg(windows)] +fn check_hresult(hresult: i32, zelf: &Py, vm: &VirtualMachine) -> PyResult<()> { + if hresult >= 0 { + return Ok(()); + } + + if zelf.iid.read().is_some() { + // Raise COMError + let ctypes_module = vm.import("_ctypes", 0)?; + let com_error_type = ctypes_module.get_attr("COMError", vm)?; + let com_error_type = com_error_type + .downcast::() + .map_err(|_| vm.new_type_error("COMError is not a type"))?; + let hresult_obj: PyObjectRef = vm.ctx.new_int(hresult).into(); + let text: PyObjectRef = vm + .ctx + .new_str(format!("HRESULT: 0x{:08X}", hresult as u32)) + .into(); + let details: PyObjectRef = vm.ctx.none(); + let exc = vm.invoke_exception( + com_error_type.to_owned(), + vec![text.clone(), details.clone()], + )?; + let _ = exc.as_object().set_attr("hresult", hresult_obj, vm); + let _ = exc.as_object().set_attr("text", text, vm); + let _ = exc.as_object().set_attr("details", details, vm); + Err(exc) + } else { + // Raise OSError + let exc = vm.new_os_error(format!("HRESULT: 0x{:08X}", hresult as u32)); + let _ = exc + .as_object() + .set_attr("winerror", vm.ctx.new_int(hresult), vm); + Err(exc) + } +} + +/// Convert raw FFI result to Python object +fn convert_raw_result( + raw_result: &mut RawResult, + call_info: &CallInfo, + vm: &VirtualMachine, +) -> Option { + match raw_result { + RawResult::Void => None, + RawResult::Pointer(ptr) => { + // Get type code from restype to determine conversion method + let type_code = call_info + .restype_obj + .as_ref() + .and_then(|t| t.clone().downcast::().ok()) + .and_then(|t| t.as_object().get_attr(vm.ctx.intern_str("_type_"), vm).ok()) + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); + + match type_code.as_deref() { + Some("z") => { + // c_char_p: NULL -> None, otherwise read C string -> bytes + if *ptr == 0 { + Some(vm.ctx.none()) + } else { + let cstr = unsafe { std::ffi::CStr::from_ptr(*ptr as _) }; + Some(vm.ctx.new_bytes(cstr.to_bytes().to_vec()).into()) + } + } + Some("Z") => { + // c_wchar_p: NULL -> None, otherwise read wide string -> str + if *ptr == 0 { + Some(vm.ctx.none()) + } else { + let wstr_ptr = *ptr as *const libc::wchar_t; + let mut len = 0; + unsafe { + while *wstr_ptr.add(len) != 0 { + len += 1; + } + } + let slice = unsafe { std::slice::from_raw_parts(wstr_ptr, len) }; + let s: String = slice + .iter() + .filter_map(|&c| char::from_u32(c as u32)) + .collect(); + Some(vm.ctx.new_str(s).into()) + } + } + _ => { + // c_void_p ("P") and other pointer types: NULL -> None, otherwise int + if *ptr == 0 { + Some(vm.ctx.none()) + } else { + Some(vm.ctx.new_int(*ptr).into()) + } + } + } + } + RawResult::Value(val) => call_info + .restype_obj .as_ref() .and_then(|f| f.clone().downcast::().ok()) - .map(|f| f.from_ffi_type(&mut output, vm).ok().flatten()) - .unwrap_or_else(|| Some(vm.ctx.new_int(output as i32).as_object().to_pyobject(vm))); - if let Some(return_type) = return_type { - Ok(return_type) - } else { - Ok(vm.ctx.none()) + .map(|f| { + f.from_ffi_type(val as *mut _ as *mut c_void, vm) + .ok() + .flatten() + }) + .unwrap_or_else(|| Some(vm.ctx.new_int(*val as usize).as_object().to_pyobject(vm))), + } +} + +/// Extract values from OUT buffers +fn extract_out_values( + out_buffers: Vec<(usize, PyObjectRef)>, + vm: &VirtualMachine, +) -> Vec { + out_buffers + .into_iter() + .map(|(_, buffer)| buffer.get_attr("value", vm).unwrap_or(buffer)) + .collect() +} + +/// Build final result (main function) +fn build_result( + mut raw_result: RawResult, + call_info: &CallInfo, + prepared: PreparedArgs, + zelf: &Py, + args: &FuncArgs, + vm: &VirtualMachine, +) -> PyResult { + // Check HRESULT on Windows + #[cfg(windows)] + if let RawResult::Value(val) = raw_result { + let is_hresult = call_info + .restype_obj + .as_ref() + .and_then(|t| t.clone().downcast::().ok()) + .is_some_and(|t| t.name().to_string() == "HRESULT"); + if is_hresult { + check_hresult(val as i32, zelf, vm)?; } } + + // Convert raw result to Python object + let mut result = convert_raw_result(&mut raw_result, call_info, vm); + + // Apply errcheck if set + if let Some(errcheck) = zelf.errcheck.read().as_ref() { + let args_tuple = PyTuple::new_ref(args.args.clone(), &vm.ctx); + let func_obj = zelf.as_object().to_owned(); + let result_obj = result.clone().unwrap_or_else(|| vm.ctx.none()); + result = Some(errcheck.call((result_obj, func_obj, args_tuple), vm)?); + } + + // Handle OUT parameter return values + if prepared.out_buffers.is_empty() { + return result.map(Ok).unwrap_or_else(|| Ok(vm.ctx.none())); + } + + let out_values = extract_out_values(prepared.out_buffers, vm); + Ok(match <[PyObjectRef; 1]>::try_from(out_values) { + Ok([single]) => single, + Err(v) => PyTuple::new_ref(v, &vm.ctx).into(), + }) } +impl Callable for PyCFuncPtr { + type Args = FuncArgs; + fn call(zelf: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { + // 1. Check for internal PYFUNCTYPE addresses + if let Some(result) = handle_internal_func(zelf.get_func_ptr(), &args, vm) { + return result; + } + + // 2. Resolve function pointer (COM or direct) + #[cfg(windows)] + let (func_ptr, is_com_method) = resolve_com_method(zelf, &args, vm)?; + #[cfg(not(windows))] + let (func_ptr, is_com_method) = (None::, false); + + // 3. Extract call info (argtypes, restype) + let call_info = extract_call_info(zelf, vm)?; + + // 4. Parse paramflags + let paramflags = parse_paramflags(zelf, vm)?; + + // 5. Build call arguments + let prepared = build_callargs(&args, &call_info, paramflags.as_ref(), is_com_method, vm)?; + + // 6. Get code pointer + let code_ptr = match func_ptr.or_else(|| zelf.get_code_ptr()) { + Some(cp) => cp, + None => { + debug_assert!(false, "NULL function pointer"); + // In release mode, this will crash like CPython + CodePtr(std::ptr::null_mut()) + } + }; + + // 7. Call the function + let raw_result = ctypes_callproc(code_ptr, &prepared, &call_info); + + // 8. Build result + build_result(raw_result, &call_info, prepared, zelf, &args, vm) + } +} + +// PyCFuncPtr_repr impl Representable for PyCFuncPtr { fn repr_str(zelf: &Py, _vm: &VirtualMachine) -> PyResult { - let index = zelf.ptr.read(); - let index = index.map(|ptr| ptr.0 as usize).unwrap_or(0); let type_name = zelf.class().name(); - if cfg!(windows) { - let index = index - 0x1000; - Ok(format!("")) - } else { - Ok(format!("<{type_name} object at {index:#x}>")) - } + // Use object id, not function pointer address + let addr = zelf.get_id(); + Ok(format!("<{} object at {:#x}>", type_name, addr)) } } -// TODO: fix -unsafe impl Send for PyCFuncPtr {} -unsafe impl Sync for PyCFuncPtr {} - #[pyclass(flags(BASETYPE), with(Callable, Constructor, Representable))] impl PyCFuncPtr { - #[pygetset(name = "_restype_")] + // restype getter/setter + #[pygetset] fn restype(&self) -> Option { - self.res_type.read().as_ref().cloned() + self.restype.read().clone() } - #[pygetset(name = "_restype_", setter)] - fn set_restype(&self, restype: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { - // has to be type, callable, or none - // TODO: Callable support - if vm.is_none(&restype) || restype.downcast_ref::().is_some() { - *self.res_type.write() = Some(restype); - Ok(()) + #[pygetset(setter)] + fn set_restype(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + // Must be type, callable, or None + if vm.is_none(&value) { + *self.restype.write() = None; + } else if value.downcast_ref::().is_some() || value.is_callable() { + *self.restype.write() = Some(value); } else { - Err(vm.new_type_error("restype must be a type, a callable, or None".to_string())) + return Err(vm.new_type_error("restype must be a type, a callable, or None")); } + Ok(()) } - #[pygetset(name = "argtypes")] - fn argtypes(&self, vm: &VirtualMachine) -> PyTupleRef { - PyTuple::new_ref( - self.arg_types - .read() - .clone() - .unwrap_or_default() - .into_iter() - .map(|t| t.to_pyobject(vm)) - .collect(), - &vm.ctx, - ) + // argtypes getter/setter + #[pygetset] + fn argtypes(&self, vm: &VirtualMachine) -> PyObjectRef { + self.argtypes + .read() + .clone() + .unwrap_or_else(|| vm.ctx.empty_tuple.clone().into()) } #[pygetset(name = "argtypes", setter)] - fn set_argtypes(&self, argtypes: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { - let none = vm.is_none(&argtypes); - if none { - *self.arg_types.write() = None; - Ok(()) + fn set_argtypes(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + if vm.is_none(&value) { + *self.argtypes.write() = None; } else { - let tuple = argtypes.downcast::().unwrap(); - *self.arg_types.write() = Some( - tuple - .iter() - .map(|obj| obj.clone().downcast::().unwrap()) - .collect::>(), - ); - Ok(()) + // Store the argtypes object directly as it is + *self.argtypes.write() = Some(value); } + Ok(()) } + // errcheck getter/setter #[pygetset] - fn __name__(&self) -> Option { - self.name.read().clone() + fn errcheck(&self) -> Option { + self.errcheck.read().clone() } #[pygetset(setter)] - fn set___name__(&self, name: String) -> PyResult<()> { - *self.name.write() = Some(name); - // TODO: update handle and stuff + fn set_errcheck(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + if vm.is_none(&value) { + *self.errcheck.write() = None; + } else if value.is_callable() { + *self.errcheck.write() = Some(value); + } else { + return Err(vm.new_type_error("errcheck must be a callable or None")); + } Ok(()) } + + // _flags_ getter (read-only, from type's class attribute or StgInfo) + #[pygetset] + fn _flags_(zelf: &Py, vm: &VirtualMachine) -> u32 { + // First try to get _flags_ from type's class attribute (for dynamically created types) + // This is how CDLL sets use_errno: class _FuncPtr(_CFuncPtr): _flags_ = flags + if let Ok(flags_attr) = zelf.class().as_object().get_attr("_flags_", vm) + && let Ok(flags_int) = flags_attr.try_to_value::(vm) + { + return flags_int; + } + + // Fallback to StgInfo for native types + use super::base::StgInfoFlags; + zelf.class() + .stg_info_opt() + .map(|stg| stg.flags.bits()) + .unwrap_or(StgInfoFlags::empty().bits()) + } + + // bool conversion - check if function pointer is set + #[pymethod] + fn __bool__(&self) -> bool { + self.get_func_ptr() != 0 + } +} + +// CThunkObject - FFI callback (thunk) implementation + +/// Userdata passed to the libffi callback. +struct ThunkUserData { + /// The Python callable to invoke + callable: PyObjectRef, + /// Argument types for conversion + arg_types: Vec, + /// Result type for conversion (None means void) + res_type: Option, +} + +/// Check if ty is a subclass of a simple type (like MyInt(c_int)). +fn is_simple_subclass(ty: &Py, vm: &VirtualMachine) -> bool { + let Ok(base) = ty.as_object().get_attr(vm.ctx.intern_str("__base__"), vm) else { + return false; + }; + base.get_attr(vm.ctx.intern_str("_type_"), vm).is_ok() +} + +/// Convert a C value to a Python object based on the type code. +fn ffi_to_python(ty: &Py, ptr: *const c_void, vm: &VirtualMachine) -> PyObjectRef { + let type_code = ty.type_code(vm); + let raw_value: PyObjectRef = unsafe { + match type_code.as_deref() { + Some("b") => vm.ctx.new_int(*(ptr as *const i8) as i32).into(), + Some("B") => vm.ctx.new_int(*(ptr as *const u8) as i32).into(), + Some("c") => vm.ctx.new_bytes(vec![*(ptr as *const u8)]).into(), + Some("h") => vm.ctx.new_int(*(ptr as *const i16) as i32).into(), + Some("H") => vm.ctx.new_int(*(ptr as *const u16) as i32).into(), + Some("i") => vm.ctx.new_int(*(ptr as *const i32)).into(), + Some("I") => vm.ctx.new_int(*(ptr as *const u32)).into(), + Some("l") => vm.ctx.new_int(*(ptr as *const libc::c_long)).into(), + Some("L") => vm.ctx.new_int(*(ptr as *const libc::c_ulong)).into(), + Some("q") => vm.ctx.new_int(*(ptr as *const libc::c_longlong)).into(), + Some("Q") => vm.ctx.new_int(*(ptr as *const libc::c_ulonglong)).into(), + Some("f") => vm.ctx.new_float(*(ptr as *const f32) as f64).into(), + Some("d") => vm.ctx.new_float(*(ptr as *const f64)).into(), + Some("z") => { + // c_char_p: C string pointer → Python bytes + let cstr_ptr = *(ptr as *const *const libc::c_char); + if cstr_ptr.is_null() { + vm.ctx.none() + } else { + let cstr = std::ffi::CStr::from_ptr(cstr_ptr); + vm.ctx.new_bytes(cstr.to_bytes().to_vec()).into() + } + } + Some("Z") => { + // c_wchar_p: wchar_t* → Python str + let wstr_ptr = *(ptr as *const *const libc::wchar_t); + if wstr_ptr.is_null() { + vm.ctx.none() + } else { + let mut len = 0; + while *wstr_ptr.add(len) != 0 { + len += 1; + } + let slice = std::slice::from_raw_parts(wstr_ptr, len); + let s: String = slice + .iter() + .filter_map(|&c| char::from_u32(c as u32)) + .collect(); + vm.ctx.new_str(s).into() + } + } + Some("P") => vm.ctx.new_int(*(ptr as *const usize)).into(), + Some("?") => vm.ctx.new_bool(*(ptr as *const u8) != 0).into(), + _ => return vm.ctx.none(), + } + }; + + if !is_simple_subclass(ty, vm) { + return raw_value; + } + ty.as_object() + .call((raw_value.clone(),), vm) + .unwrap_or(raw_value) +} + +/// Convert a Python object to a C value and store it at the result pointer +fn python_to_ffi(obj: PyResult, ty: &Py, result: *mut c_void, vm: &VirtualMachine) { + let Ok(obj) = obj else { return }; + + let type_code = ty.type_code(vm); + unsafe { + match type_code.as_deref() { + Some("b") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut i8) = i.as_bigint().to_i8().unwrap_or(0); + } + } + Some("B") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut u8) = i.as_bigint().to_u8().unwrap_or(0); + } + } + Some("c") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut u8) = i.as_bigint().to_u8().unwrap_or(0); + } + } + Some("h") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut i16) = i.as_bigint().to_i16().unwrap_or(0); + } + } + Some("H") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut u16) = i.as_bigint().to_u16().unwrap_or(0); + } + } + Some("i") => { + if let Ok(i) = obj.try_int(vm) { + let val = i.as_bigint().to_i32().unwrap_or(0); + *(result as *mut libffi::low::ffi_arg) = val as libffi::low::ffi_arg; + } + } + Some("I") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut u32) = i.as_bigint().to_u32().unwrap_or(0); + } + } + Some("l") | Some("q") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut i64) = i.as_bigint().to_i64().unwrap_or(0); + } + } + Some("L") | Some("Q") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut u64) = i.as_bigint().to_u64().unwrap_or(0); + } + } + Some("f") => { + if let Ok(f) = obj.try_float(vm) { + *(result as *mut f32) = f.to_f64() as f32; + } + } + Some("d") => { + if let Ok(f) = obj.try_float(vm) { + *(result as *mut f64) = f.to_f64(); + } + } + Some("P") | Some("z") | Some("Z") => { + if let Ok(i) = obj.try_int(vm) { + *(result as *mut usize) = i.as_bigint().to_usize().unwrap_or(0); + } + } + Some("?") => { + if let Ok(b) = obj.is_true(vm) { + *(result as *mut u8) = u8::from(b); + } + } + _ => {} + } + } +} + +/// The callback function that libffi calls when the closure is invoked. +unsafe extern "C" fn thunk_callback( + _cif: &low::ffi_cif, + result: &mut c_void, + args: *const *const c_void, + userdata: &ThunkUserData, +) { + with_current_vm(|vm| { + let py_args: Vec = userdata + .arg_types + .iter() + .enumerate() + .map(|(i, ty)| { + let arg_ptr = unsafe { *args.add(i) }; + ffi_to_python(ty, arg_ptr, vm) + }) + .collect(); + + let py_result = userdata.callable.call(py_args, vm); + + // Call unraisable hook if exception occurred + if let Err(exc) = &py_result { + let repr = userdata + .callable + .repr(vm) + .map(|s| s.to_string()) + .unwrap_or_else(|_| "".to_string()); + let msg = format!( + "Exception ignored on calling ctypes callback function {}", + repr + ); + vm.run_unraisable(exc.clone(), Some(msg), vm.ctx.none()); + } + + if let Some(ref res_type) = userdata.res_type { + python_to_ffi(py_result, res_type, result as *mut c_void, vm); + } + }); +} + +/// Holds the closure and userdata together to ensure proper lifetime. +struct ThunkData { + #[allow(dead_code)] + closure: Closure<'static>, + userdata_ptr: *mut ThunkUserData, +} + +impl Drop for ThunkData { + fn drop(&mut self) { + unsafe { + drop(Box::from_raw(self.userdata_ptr)); + } + } +} + +/// CThunkObject wraps a Python callable to make it callable from C code. +#[pyclass(name = "CThunkObject", module = "_ctypes")] +#[derive(PyPayload)] +pub(super) struct PyCThunk { + callable: PyObjectRef, + #[allow(dead_code)] + thunk_data: PyRwLock>, + code_ptr: CodePtr, +} + +impl Debug for PyCThunk { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PyCThunk") + .field("callable", &self.callable) + .finish() + } +} + +impl PyCThunk { + pub fn new( + callable: PyObjectRef, + arg_types: Option, + res_type: Option, + vm: &VirtualMachine, + ) -> PyResult { + let arg_type_vec: Vec = match arg_types { + Some(args) if !vm.is_none(&args) => args + .try_to_value::>(vm)? + .into_iter() + .map(|item| { + item.downcast::() + .map_err(|_| vm.new_type_error("_argtypes_ must be a sequence of types")) + }) + .collect::>>()?, + _ => Vec::new(), + }; + + let res_type_ref: Option = match res_type { + Some(ref rt) if !vm.is_none(rt) => Some( + rt.clone() + .downcast::() + .map_err(|_| vm.new_type_error("restype must be a ctypes type"))?, + ), + _ => None, + }; + + let ffi_arg_types: Vec = arg_type_vec + .iter() + .map(|ty| { + ty.type_code(vm) + .and_then(|code| get_ffi_type(&code)) + .unwrap_or(Type::pointer()) + }) + .collect(); + + let ffi_res_type = res_type_ref + .as_ref() + .and_then(|ty| ty.type_code(vm)) + .and_then(|code| get_ffi_type(&code)) + .unwrap_or(Type::void()); + + let cif = Cif::new(ffi_arg_types, ffi_res_type); + + let userdata = Box::new(ThunkUserData { + callable: callable.clone(), + arg_types: arg_type_vec, + res_type: res_type_ref, + }); + let userdata_ptr = Box::into_raw(userdata); + let userdata_ref: &'static ThunkUserData = unsafe { &*userdata_ptr }; + + let closure = Closure::new(cif, thunk_callback, userdata_ref); + let code_ptr = CodePtr(*closure.code_ptr() as *mut _); + + let thunk_data = ThunkData { + closure, + userdata_ptr, + }; + + Ok(Self { + callable, + thunk_data: PyRwLock::new(Some(thunk_data)), + code_ptr, + }) + } + + pub fn code_ptr(&self) -> CodePtr { + self.code_ptr + } +} + +unsafe impl Send for PyCThunk {} +unsafe impl Sync for PyCThunk {} + +#[pyclass] +impl PyCThunk { + #[pygetset] + fn callable(&self) -> PyObjectRef { + self.callable.clone() + } } diff --git a/crates/vm/src/stdlib/ctypes/library.rs b/crates/vm/src/stdlib/ctypes/library.rs index e918470b6c8..ec8ca91af0d 100644 --- a/crates/vm/src/stdlib/ctypes/library.rs +++ b/crates/vm/src/stdlib/ctypes/library.rs @@ -2,12 +2,12 @@ use crate::VirtualMachine; use libloading::Library; use rustpython_common::lock::{PyMutex, PyRwLock}; use std::collections::HashMap; -use std::ffi::c_void; +use std::ffi::{OsStr, c_void}; use std::fmt; use std::ptr::null; -pub struct SharedLibrary { - pub(crate) lib: PyMutex>, +pub(super) struct SharedLibrary { + pub(super) lib: PyMutex>, } impl fmt::Debug for SharedLibrary { @@ -17,13 +17,13 @@ impl fmt::Debug for SharedLibrary { } impl SharedLibrary { - pub fn new(name: &str) -> Result { + fn new(name: impl AsRef) -> Result { Ok(SharedLibrary { - lib: PyMutex::new(unsafe { Some(Library::new(name)?) }), + lib: PyMutex::new(unsafe { Some(Library::new(name.as_ref())?) }), }) } - pub fn get_pointer(&self) -> usize { + fn get_pointer(&self) -> usize { let lib_lock = self.lib.lock(); if let Some(l) = &*lib_lock { l as *const Library as usize @@ -32,12 +32,12 @@ impl SharedLibrary { } } - pub fn is_closed(&self) -> bool { + fn is_closed(&self) -> bool { let lib_lock = self.lib.lock(); lib_lock.is_none() } - pub fn close(&self) { + fn close(&self) { *self.lib.lock() = None; } } @@ -48,25 +48,24 @@ impl Drop for SharedLibrary { } } -pub struct ExternalLibs { +pub(super) struct ExternalLibs { libraries: HashMap, } impl ExternalLibs { - pub fn new() -> Self { + fn new() -> Self { Self { libraries: HashMap::new(), } } - #[allow(dead_code)] pub fn get_lib(&self, key: usize) -> Option<&SharedLibrary> { self.libraries.get(&key) } pub fn get_or_insert_lib( &mut self, - library_path: &str, + library_path: impl AsRef, _vm: &VirtualMachine, ) -> Result<(usize, &SharedLibrary), libloading::Error> { let new_lib = SharedLibrary::new(library_path)?; @@ -83,7 +82,7 @@ impl ExternalLibs { } }; - Ok((key, self.libraries.get(&key).unwrap())) + Ok((key, self.libraries.get(&key).expect("just inserted"))) } pub fn drop_lib(&mut self, key: usize) { @@ -91,10 +90,9 @@ impl ExternalLibs { } } -rustpython_common::static_cell! { - static LIBCACHE: PyRwLock; -} - -pub fn libcache() -> &'static PyRwLock { +pub(super) fn libcache() -> &'static PyRwLock { + rustpython_common::static_cell! { + static LIBCACHE: PyRwLock; + } LIBCACHE.get_or_init(|| PyRwLock::new(ExternalLibs::new())) } diff --git a/crates/vm/src/stdlib/ctypes/pointer.rs b/crates/vm/src/stdlib/ctypes/pointer.rs index 735034e7936..3ee39af3a7a 100644 --- a/crates/vm/src/stdlib/ctypes/pointer.rs +++ b/crates/vm/src/stdlib/ctypes/pointer.rs @@ -1,37 +1,164 @@ -use num_traits::ToPrimitive; -use rustpython_common::lock::PyRwLock; - -use crate::builtins::{PyType, PyTypeRef}; -use crate::function::FuncArgs; +use super::{PyCArray, PyCData, PyCSimple, PyCStructure, StgInfo, StgInfoFlags}; use crate::protocol::PyNumberMethods; -use crate::stdlib::ctypes::{CDataObject, PyCData}; -use crate::types::{AsNumber, Constructor}; -use crate::{AsObject, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine}; +use crate::types::{AsNumber, Constructor, Initializer}; +use crate::{ + AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, + builtins::{PyBytes, PyInt, PyList, PySlice, PyStr, PyType, PyTypeRef}, + class::StaticType, + function::{FuncArgs, OptionalArg}, +}; +use num_traits::ToPrimitive; #[pyclass(name = "PyCPointerType", base = PyType, module = "_ctypes")] #[derive(Debug)] #[repr(transparent)] -pub struct PyCPointerType(PyType); +pub(super) struct PyCPointerType(PyType); + +impl Initializer for PyCPointerType { + type Args = FuncArgs; + + fn init(zelf: crate::PyRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // Get the type as PyTypeRef + let obj: PyObjectRef = zelf.clone().into(); + let new_type: PyTypeRef = obj + .downcast() + .map_err(|_| vm.new_type_error("expected type"))?; + + new_type.check_not_initialized(vm)?; + + // Get the _type_ attribute (element type) + // PyCPointerType_init gets the element type from _type_ attribute + let proto = new_type + .as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|obj| obj.downcast::().ok()); + + // Initialize StgInfo for pointer type + let pointer_size = std::mem::size_of::(); + let mut stg_info = StgInfo::new(pointer_size, pointer_size); + stg_info.proto = proto; + stg_info.paramfunc = super::base::ParamFunc::Pointer; + stg_info.length = 1; + stg_info.flags |= StgInfoFlags::TYPEFLAG_ISPOINTER; + + // Set format string: "&" + if let Some(ref proto) = stg_info.proto { + let item_info = proto.stg_info_opt().expect("proto has StgInfo"); + let current_format = item_info.format.as_deref().unwrap_or("B"); + stg_info.format = Some(format!("&{}", current_format)); + } + + let _ = new_type.init_type_data(stg_info); -#[pyclass(flags(IMMUTABLETYPE), with(AsNumber))] + Ok(()) + } +} + +#[pyclass(flags(IMMUTABLETYPE), with(AsNumber, Initializer))] impl PyCPointerType { + #[pymethod] + fn from_param(zelf: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // zelf is the pointer type class that from_param was called on + let cls = zelf + .downcast::() + .map_err(|_| vm.new_type_error("from_param: expected a type"))?; + + // 1. None is allowed for pointer types + if vm.is_none(&value) { + return Ok(value); + } + + // 2. If already an instance of the requested type, return it + if value.is_instance(cls.as_object(), vm)? { + return Ok(value); + } + + // 3. If value is an instance of _type_ (the pointed-to type), wrap with byref + if let Ok(type_attr) = cls.as_object().get_attr("_type_", vm) + && let Ok(type_ref) = type_attr.downcast::() + && value.is_instance(type_ref.as_object(), vm)? + { + // Return byref(value) + return super::_ctypes::byref(value, crate::function::OptionalArg::Missing, vm); + } + + // 4. Array/Pointer instances with compatible proto + // "Array instances are also pointers when the item types are the same." + let is_pointer_or_array = value.downcast_ref::().is_some() + || value.downcast_ref::().is_some(); + + if is_pointer_or_array { + let is_compatible = { + if let Some(value_stginfo) = value.class().stg_info_opt() + && let Some(ref value_proto) = value_stginfo.proto + && let Some(cls_stginfo) = cls.stg_info_opt() + && let Some(ref cls_proto) = cls_stginfo.proto + { + // Check if value's proto is a subclass of target's proto + value_proto.fast_issubclass(cls_proto) + } else { + false + } + }; + if is_compatible { + return Ok(value); + } + } + + // 5. Check for _as_parameter_ attribute + if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { + return PyCPointerType::from_param(cls.as_object().to_owned(), as_parameter, vm); + } + + Err(vm.new_type_error(format!( + "expected {} instance instead of {}", + cls.name(), + value.class().name() + ))) + } + #[pymethod] fn __mul__(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { - use super::array::create_array_type_with_stg_info; + use super::array::array_type_from_ctype; + if n < 0 { return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); } - // Pointer size - let element_size = std::mem::size_of::(); - let total_size = element_size * (n as usize); - let stg_info = super::util::StgInfo::new_array( - total_size, - element_size, - n as usize, - cls.as_object().to_owned(), - element_size, - ); - create_array_type_with_stg_info(stg_info, vm) + // Use cached array type creation + array_type_from_ctype(cls.into(), n as usize, vm) + } + + // PyCPointerType_set_type: Complete an incomplete pointer type + #[pymethod] + fn set_type(zelf: PyTypeRef, typ: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + use crate::AsObject; + + // 1. Validate that typ is a type + let typ_type = typ + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("_type_ must be a type"))?; + + // 2. Validate that typ has storage info + if typ_type.stg_info_opt().is_none() { + return Err(vm.new_type_error("_type_ must have storage info")); + } + + // 3. Update StgInfo.proto and format using mutable access + if let Some(mut stg_info) = zelf.get_type_data_mut::() { + stg_info.proto = Some(typ_type.clone()); + + // Update format string: "&" + let item_info = typ_type.stg_info_opt().expect("proto has StgInfo"); + let current_format = item_info.format.as_deref().unwrap_or("B"); + stg_info.format = Some(format!("&{}", current_format)); + } + + // 4. Set _type_ attribute on the pointer type + zelf.as_object().set_attr("_type_", typ_type, vm)?; + + Ok(()) } } @@ -41,12 +168,12 @@ impl AsNumber for PyCPointerType { multiply: Some(|a, b, vm| { let cls = a .downcast_ref::() - .ok_or_else(|| vm.new_type_error("expected type".to_owned()))?; + .ok_or_else(|| vm.new_type_error("expected type"))?; let n = b .try_index(vm)? .as_bigint() .to_isize() - .ok_or_else(|| vm.new_overflow_error("array size too large".to_owned()))?; + .ok_or_else(|| vm.new_overflow_error("array size too large"))?; PyCPointerType::__mul__(cls.to_owned(), n, vm) }), ..PyNumberMethods::NOT_IMPLEMENTED @@ -55,6 +182,8 @@ impl AsNumber for PyCPointerType { } } +/// PyCPointer - Pointer instance +/// `contents` is a computed property, not a stored field. #[pyclass( name = "_Pointer", base = PyCData, @@ -62,26 +191,27 @@ impl AsNumber for PyCPointerType { module = "_ctypes" )] #[derive(Debug)] -pub struct PyCPointer { - _base: PyCData, - contents: PyRwLock, -} +#[repr(transparent)] +pub struct PyCPointer(pub PyCData); impl Constructor for PyCPointer { - type Args = (crate::function::OptionalArg,); - - fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - let args: Self::Args = args.bind(vm)?; - // Get the initial contents value if provided - let initial_contents = args.0.into_option().unwrap_or_else(|| vm.ctx.none()); + type Args = FuncArgs; - // Create a new PyCPointer instance with the provided value - PyCPointer { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - contents: PyRwLock::new(initial_contents), + fn slot_new(cls: PyTypeRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult { + // Pointer_new: Check if _type_ is defined + let has_type = cls.stg_info_opt().is_some_and(|info| info.proto.is_some()); + if !has_type { + return Err(vm.new_type_error("Cannot create instance: has no _type_")); } - .into_ref_with_type(vm, cls) - .map(Into::into) + + // Create a new PyCPointer instance with NULL pointer (all zeros) + // Initial contents is set via __init__ if provided + let cdata = PyCData::from_bytes(vec![0u8; std::mem::size_of::()], None); + // pointer instance has b_length set to 2 (for index 0 and 1) + cdata.length.store(2); + PyCPointer(cdata) + .into_ref_with_type(vm, cls) + .map(Into::into) } fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { @@ -89,186 +219,496 @@ impl Constructor for PyCPointer { } } -#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor))] +impl Initializer for PyCPointer { + type Args = (OptionalArg,); + + fn init(zelf: PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + let (value,) = args; + if let OptionalArg::Present(val) = value + && !vm.is_none(&val) + { + Self::set_contents(&zelf, val, vm)?; + } + Ok(()) + } +} + +#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor, Initializer))] impl PyCPointer { - // TODO: not correct + /// Get the pointer value stored in buffer as usize + pub fn get_ptr_value(&self) -> usize { + let buffer = self.0.buffer.read(); + super::base::read_ptr_from_buffer(&buffer) + } + + /// Set the pointer value in buffer + pub fn set_ptr_value(&self, value: usize) { + let mut buffer = self.0.buffer.write(); + let bytes = value.to_ne_bytes(); + if buffer.len() >= bytes.len() { + buffer.to_mut()[..bytes.len()].copy_from_slice(&bytes); + } + } + + /// Pointer_bool: returns True if pointer is not NULL + #[pymethod] + fn __bool__(&self) -> bool { + self.get_ptr_value() != 0 + } + + /// contents getter - reads address from b_ptr and creates an instance of the pointed-to type #[pygetset] - fn contents(&self) -> PyResult { - let contents = self.contents.read().clone(); - Ok(contents) + fn contents(zelf: &Py, vm: &VirtualMachine) -> PyResult { + // Pointer_get_contents + let ptr_val = zelf.get_ptr_value(); + if ptr_val == 0 { + return Err(vm.new_value_error("NULL pointer access")); + } + + // Get element type from StgInfo.proto + let stg_info = zelf.class().stg_info(vm)?; + let proto_type = stg_info.proto(); + let element_size = proto_type + .stg_info_opt() + .map_or(std::mem::size_of::(), |info| info.size); + + // Create instance that references the memory directly + // PyCData.into_ref_with_type works for all ctypes (simple, structure, union, array, pointer) + let cdata = unsafe { super::base::PyCData::at_address(ptr_val as *const u8, element_size) }; + cdata + .into_ref_with_type(vm, proto_type.to_owned()) + .map(Into::into) } + + /// contents setter - stores address in b_ptr and keeps reference + /// Pointer_set_contents #[pygetset(setter)] - fn set_contents(&self, contents: PyObjectRef, _vm: &VirtualMachine) -> PyResult<()> { - // Validate that the contents is a CData instance if we have a _type_ - // For now, just store it - *self.contents.write() = contents; + fn set_contents(zelf: &Py, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + // Get stginfo and proto for type validation + let stg_info = zelf.class().stg_info(vm)?; + let proto = stg_info.proto(); + + // Check if value is CData, or isinstance(value, proto) + let cdata = if let Some(c) = value.downcast_ref::() { + c + } else if value.is_instance(proto.as_object(), vm)? { + value + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected ctypes instance"))? + } else { + return Err(vm.new_type_error(format!( + "expected {} instead of {}", + proto.name(), + value.class().name() + ))); + }; + + // Set pointer value + { + let buffer = cdata.buffer.read(); + let addr = buffer.as_ptr() as usize; + drop(buffer); + zelf.set_ptr_value(addr); + } + + // KeepRef: store the object directly with index 1 + zelf.0.keep_ref(1, value.clone(), vm)?; + + // KeepRef: store GetKeepedObjects(dst) at index 0 + if let Some(kept) = cdata.objects.read().clone() { + zelf.0.keep_ref(0, kept, vm)?; + } + Ok(()) } + // Pointer_subscript #[pymethod] - fn __init__( - &self, - value: crate::function::OptionalArg, - _vm: &VirtualMachine, - ) -> PyResult<()> { - // Pointer can be initialized with 0 or 1 argument - // If 1 argument is provided, it should be a CData instance - if let crate::function::OptionalArg::Present(val) = value { - *self.contents.write() = val; + fn __getitem__(zelf: &Py, item: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // PyIndex_Check + if let Some(i) = item.downcast_ref::() { + let i = i.as_bigint().to_isize().ok_or_else(|| { + vm.new_index_error("cannot fit index into an index-sized integer") + })?; + // Note: Pointer does NOT adjust negative indices (no length) + Self::getitem_by_index(zelf, i, vm) + } + // PySlice_Check + else if let Some(slice) = item.downcast_ref::() { + Self::getitem_by_slice(zelf, slice, vm) + } else { + Err(vm.new_type_error("Pointer indices must be integer")) } - - Ok(()) } - #[pyclassmethod] - fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { - if address == 0 { - return Err(vm.new_value_error("NULL pointer access".to_owned())); + // Pointer_item + fn getitem_by_index(zelf: &Py, index: isize, vm: &VirtualMachine) -> PyResult { + // if (*(void **)self->b_ptr == NULL) { PyErr_SetString(...); } + let ptr_value = zelf.get_ptr_value(); + if ptr_value == 0 { + return Err(vm.new_value_error("NULL pointer access")); } - // Pointer just stores the address value - Ok(PyCPointer { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - contents: PyRwLock::new(vm.ctx.new_int(address).into()), + + // Get element type and size from StgInfo.proto + let stg_info = zelf.class().stg_info(vm)?; + let proto_type = stg_info.proto(); + let element_size = proto_type + .stg_info_opt() + .map_or(std::mem::size_of::(), |info| info.size); + + // offset = index * iteminfo->size + let offset = index * element_size as isize; + let addr = (ptr_value as isize + offset) as usize; + + // Check if it's a simple type (has _type_ attribute) + if let Ok(type_attr) = proto_type.as_object().get_attr("_type_", vm) + && let Ok(type_str) = type_attr.str(vm) + { + let type_code = type_str.to_string(); + return Self::read_value_at_address(addr, element_size, Some(&type_code), vm); } - .into_ref_with_type(vm, cls)? - .into()) + + // Complex type: create instance that references the memory directly (not a copy) + // This allows p[i].val = x to modify the original memory + // PyCData.into_ref_with_type works for all ctypes (array, structure, union, pointer) + let cdata = unsafe { super::base::PyCData::at_address(addr as *const u8, element_size) }; + cdata + .into_ref_with_type(vm, proto_type.to_owned()) + .map(Into::into) } - #[pyclassmethod] - fn from_buffer( - cls: PyTypeRef, - source: PyObjectRef, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::TryFromObject; - use crate::protocol::PyBuffer; + // Pointer_subscript slice handling (manual parsing, not PySlice_Unpack) + fn getitem_by_slice(zelf: &Py, slice: &PySlice, vm: &VirtualMachine) -> PyResult { + // Since pointers have no length, we have to dissect the slice ourselves + + // step: defaults to 1, step == 0 is error + let step: isize = if let Some(ref step_obj) = slice.step + && !vm.is_none(step_obj) + { + let s = step_obj + .try_int(vm)? + .as_bigint() + .to_isize() + .ok_or_else(|| vm.new_value_error("slice step too large"))?; + if s == 0 { + return Err(vm.new_value_error("slice step cannot be zero")); + } + s + } else { + 1 + }; + + // start: defaults to 0, but required if step < 0 + let start: isize = if let Some(ref start_obj) = slice.start + && !vm.is_none(start_obj) + { + start_obj + .try_int(vm)? + .as_bigint() + .to_isize() + .ok_or_else(|| vm.new_value_error("slice start too large"))? + } else { + if step < 0 { + return Err(vm.new_value_error("slice start is required for step < 0")); + } + 0 + }; - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); + // stop: ALWAYS required for pointers + if vm.is_none(&slice.stop) { + return Err(vm.new_value_error("slice stop is required")); } - let offset = offset as usize; - let size = std::mem::size_of::(); + let stop: isize = slice + .stop + .try_int(vm)? + .as_bigint() + .to_isize() + .ok_or_else(|| vm.new_value_error("slice stop too large"))?; - let buffer = PyBuffer::try_from_object(vm, source.clone())?; + // calculate length + let len: usize = if (step > 0 && start > stop) || (step < 0 && start < stop) { + 0 + } else if step > 0 { + ((stop - start - 1) / step + 1) as usize + } else { + ((stop - start + 1) / step + 1) as usize + }; + + // Get element info + let stg_info = zelf.class().stg_info(vm)?; + let element_size = if let Some(ref proto_type) = stg_info.proto { + proto_type.stg_info_opt().expect("proto has StgInfo").size + } else { + std::mem::size_of::() + }; + let type_code = stg_info + .proto + .as_ref() + .and_then(|p| p.as_object().get_attr("_type_", vm).ok()) + .and_then(|t| t.str(vm).ok()) + .map(|s| s.to_string()); - if buffer.desc.readonly { - return Err(vm.new_type_error("underlying buffer is not writable".to_owned())); + let ptr_value = zelf.get_ptr_value(); + + // c_char → bytes + if type_code.as_deref() == Some("c") { + if len == 0 { + return Ok(vm.ctx.new_bytes(vec![]).into()); + } + let mut result = Vec::with_capacity(len); + if step == 1 { + // Optimized contiguous copy + let start_addr = (ptr_value as isize + start * element_size as isize) as *const u8; + unsafe { + result.extend_from_slice(std::slice::from_raw_parts(start_addr, len)); + } + } else { + let mut cur = start; + for _ in 0..len { + let addr = (ptr_value as isize + cur * element_size as isize) as *const u8; + unsafe { + result.push(*addr); + } + cur += step; + } + } + return Ok(vm.ctx.new_bytes(result).into()); } - let buffer_len = buffer.desc.len; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); + // c_wchar → str + if type_code.as_deref() == Some("u") { + if len == 0 { + return Ok(vm.ctx.new_str("").into()); + } + let mut result = String::with_capacity(len); + let wchar_size = std::mem::size_of::(); + let mut cur = start; + for _ in 0..len { + let addr = (ptr_value as isize + cur * wchar_size as isize) as *const libc::wchar_t; + unsafe { + if let Some(c) = char::from_u32(*addr as u32) { + result.push(c); + } + } + cur += step; + } + return Ok(vm.ctx.new_str(result).into()); } - // Read pointer value from buffer - let bytes = buffer.obj_bytes(); - let ptr_bytes = &bytes[offset..offset + size]; - let ptr_val = usize::from_ne_bytes(ptr_bytes.try_into().expect("size is checked above")); + // other types → list with Pointer_item for each + let mut items = Vec::with_capacity(len); + let mut cur = start; + for _ in 0..len { + items.push(Self::getitem_by_index(zelf, cur, vm)?); + cur += step; + } + Ok(PyList::from(items).into_ref(&vm.ctx).into()) + } - Ok(PyCPointer { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - contents: PyRwLock::new(vm.ctx.new_int(ptr_val).into()), + // Pointer_ass_item + #[pymethod] + fn __setitem__( + zelf: &Py, + item: PyObjectRef, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + // Pointer does not support item deletion (value always provided) + // only integer indices supported for setitem + if let Some(i) = item.downcast_ref::() { + let i = i.as_bigint().to_isize().ok_or_else(|| { + vm.new_index_error("cannot fit index into an index-sized integer") + })?; + Self::setitem_by_index(zelf, i, value, vm) + } else { + Err(vm.new_type_error("Pointer indices must be integer")) } - .into_ref_with_type(vm, cls)? - .into()) } - #[pyclassmethod] - fn from_buffer_copy( - cls: PyTypeRef, - source: crate::function::ArgBytesLike, - offset: crate::function::OptionalArg, + fn setitem_by_index( + zelf: &Py, + index: isize, + value: PyObjectRef, vm: &VirtualMachine, - ) -> PyResult { - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); + ) -> PyResult<()> { + let ptr_value = zelf.get_ptr_value(); + if ptr_value == 0 { + return Err(vm.new_value_error("NULL pointer access")); } - let offset = offset as usize; - let size = std::mem::size_of::(); - let source_bytes = source.borrow_buf(); - let buffer_len = source_bytes.len(); + // Get element type, size and type_code from StgInfo.proto + let stg_info = zelf.class().stg_info(vm)?; + let proto_type = stg_info.proto(); - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); - } + // Get type code from proto's _type_ attribute + let type_code: Option = proto_type + .as_object() + .get_attr("_type_", vm) + .ok() + .and_then(|t| t.downcast_ref::().map(|s| s.to_string())); - // Read pointer value from buffer - let ptr_bytes = &source_bytes[offset..offset + size]; - let ptr_val = usize::from_ne_bytes(ptr_bytes.try_into().expect("size is checked above")); + let element_size = proto_type + .stg_info_opt() + .map_or(std::mem::size_of::(), |info| info.size); - Ok(PyCPointer { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - contents: PyRwLock::new(vm.ctx.new_int(ptr_val).into()), + // Calculate address + let offset = index * element_size as isize; + let addr = (ptr_value as isize + offset) as usize; + + // Write value at address + // Handle Structure/Array types by copying their buffer + if let Some(cdata) = value.downcast_ref::() + && (cdata.fast_isinstance(PyCStructure::static_type()) + || cdata.fast_isinstance(PyCArray::static_type()) + || cdata.fast_isinstance(PyCSimple::static_type())) + { + let src_buffer = cdata.buffer.read(); + let copy_len = src_buffer.len().min(element_size); + unsafe { + let dest_ptr = addr as *mut u8; + std::ptr::copy_nonoverlapping(src_buffer.as_ptr(), dest_ptr, copy_len); + } + } else { + // Handle z/Z specially to store converted value + if type_code.as_deref() == Some("z") + && let Some(bytes) = value.downcast_ref::() + { + let (converted, ptr_val) = super::base::ensure_z_null_terminated(bytes, vm); + unsafe { + *(addr as *mut usize) = ptr_val; + } + return zelf.0.keep_ref(index as usize, converted, vm); + } else if type_code.as_deref() == Some("Z") + && let Some(s) = value.downcast_ref::() + { + let (holder, ptr_val) = super::base::str_to_wchar_bytes(s.as_str(), vm); + unsafe { + *(addr as *mut usize) = ptr_val; + } + return zelf.0.keep_ref(index as usize, holder, vm); + } else { + Self::write_value_at_address(addr, element_size, &value, type_code.as_deref(), vm)?; + } } - .into_ref_with_type(vm, cls)? - .into()) + + // KeepRef: store reference to keep value alive using actual index + zelf.0.keep_ref(index as usize, value, vm) } - #[pyclassmethod] - fn in_dll( - cls: PyTypeRef, - dll: PyObjectRef, - name: crate::builtins::PyStrRef, + /// Read a value from memory address + fn read_value_at_address( + addr: usize, + size: usize, + type_code: Option<&str>, vm: &VirtualMachine, ) -> PyResult { - use libloading::Symbol; + unsafe { + let ptr = addr as *const u8; + match type_code { + Some("c") => Ok(vm.ctx.new_bytes(vec![*ptr]).into()), + Some("b") => Ok(vm.ctx.new_int(*(ptr as *const i8) as i32).into()), + Some("B") => Ok(vm.ctx.new_int(*ptr as i32).into()), + Some("h") => Ok(vm.ctx.new_int(*(ptr as *const i16) as i32).into()), + Some("H") => Ok(vm.ctx.new_int(*(ptr as *const u16) as i32).into()), + Some("i") | Some("l") => Ok(vm.ctx.new_int(*(ptr as *const i32)).into()), + Some("I") | Some("L") => Ok(vm.ctx.new_int(*(ptr as *const u32)).into()), + Some("q") => Ok(vm.ctx.new_int(*(ptr as *const i64)).into()), + Some("Q") => Ok(vm.ctx.new_int(*(ptr as *const u64)).into()), + Some("f") => Ok(vm.ctx.new_float(*(ptr as *const f32) as f64).into()), + Some("d") | Some("g") => Ok(vm.ctx.new_float(*(ptr as *const f64)).into()), + Some("P") | Some("z") | Some("Z") => { + Ok(vm.ctx.new_int(*(ptr as *const usize)).into()) + } + _ => { + // Default: read as bytes + let bytes = std::slice::from_raw_parts(ptr, size).to_vec(); + Ok(vm.ctx.new_bytes(bytes).into()) + } + } + } + } - // Get the library handle from dll object - let handle = if let Ok(int_handle) = dll.try_int(vm) { - // dll is an integer handle - int_handle - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - } else { - // dll is a CDLL/PyDLL/WinDLL object with _handle attribute - dll.get_attr("_handle", vm)? - .try_int(vm)? - .as_bigint() - .to_usize() - .ok_or_else(|| vm.new_value_error("Invalid library handle".to_owned()))? - }; + /// Write a value to memory address + fn write_value_at_address( + addr: usize, + size: usize, + value: &PyObject, + type_code: Option<&str>, + vm: &VirtualMachine, + ) -> PyResult<()> { + unsafe { + let ptr = addr as *mut u8; - // Get the library from cache - let library_cache = crate::stdlib::ctypes::library::libcache().read(); - let library = library_cache - .get_lib(handle) - .ok_or_else(|| vm.new_attribute_error("Library not found".to_owned()))?; + // Handle c_char_p (z) and c_wchar_p (Z) - store pointer address + // Note: PyBytes/PyStr cases are handled by caller (setitem_by_index) + match type_code { + Some("z") | Some("Z") => { + let ptr_val = if vm.is_none(value) { + 0usize + } else if let Ok(int_val) = value.try_index(vm) { + int_val.as_bigint().to_usize().unwrap_or(0) + } else { + return Err(vm.new_type_error( + "bytes/string or integer address expected".to_owned(), + )); + }; + *(ptr as *mut usize) = ptr_val; + return Ok(()); + } + _ => {} + } - // Get symbol address from library - let symbol_name = format!("{}\0", name.as_str()); - let inner_lib = library.lib.lock(); + // Try to get value as integer + if let Ok(int_val) = value.try_int(vm) { + let i = int_val.as_bigint(); + match size { + 1 => { + *ptr = i.to_u8().unwrap_or(0); + } + 2 => { + *(ptr as *mut i16) = i.to_i16().unwrap_or(0); + } + 4 => { + *(ptr as *mut i32) = i.to_i32().unwrap_or(0); + } + 8 => { + *(ptr as *mut i64) = i.to_i64().unwrap_or(0); + } + _ => { + let bytes = i.to_signed_bytes_le(); + let copy_len = bytes.len().min(size); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, copy_len); + } + } + return Ok(()); + } - let symbol_address = if let Some(lib) = &*inner_lib { - unsafe { - // Try to get the symbol from the library - let symbol: Symbol<'_, *mut u8> = lib.get(symbol_name.as_bytes()).map_err(|e| { - vm.new_attribute_error(format!("{}: symbol '{}' not found", e, name.as_str())) - })?; - *symbol as usize + // Try to get value as float + if let Ok(float_val) = value.try_float(vm) { + let f = float_val.to_f64(); + match size { + 4 => { + *(ptr as *mut f32) = f as f32; + } + 8 => { + *(ptr as *mut f64) = f; + } + _ => {} + } + return Ok(()); + } + + // Try bytes + if let Ok(bytes) = value.try_bytes_like(vm, |b| b.to_vec()) { + let copy_len = bytes.len().min(size); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, copy_len); + return Ok(()); } - } else { - return Err(vm.new_attribute_error("Library is closed".to_owned())); - }; - // For pointer types, we return a pointer to the symbol address - Ok(PyCPointer { - _base: PyCData::new(CDataObject::from_bytes(vec![], None)), - contents: PyRwLock::new(vm.ctx.new_int(symbol_address).into()), + Err(vm.new_type_error(format!( + "cannot convert {} to ctypes data", + value.class().name() + ))) } - .into_ref_with_type(vm, cls)? - .into()) } } diff --git a/crates/vm/src/stdlib/ctypes/simple.rs b/crates/vm/src/stdlib/ctypes/simple.rs new file mode 100644 index 00000000000..1c0ec250d72 --- /dev/null +++ b/crates/vm/src/stdlib/ctypes/simple.rs @@ -0,0 +1,1379 @@ +use super::_ctypes::CArgObject; +use super::array::{PyCArray, WCHAR_SIZE, wchar_to_bytes}; +use super::base::{ + CDATA_BUFFER_METHODS, FfiArgValue, PyCData, StgInfo, StgInfoFlags, buffer_to_ffi_value, + bytes_to_pyobject, +}; +use super::function::PyCFuncPtr; +use super::get_size; +use super::pointer::PyCPointer; +use crate::builtins::{PyByteArray, PyBytes, PyInt, PyNone, PyStr, PyType, PyTypeRef}; +use crate::convert::ToPyObject; +use crate::function::{Either, FuncArgs, OptionalArg}; +use crate::protocol::{BufferDescriptor, PyBuffer, PyNumberMethods}; +use crate::types::{AsBuffer, AsNumber, Constructor, Initializer, Representable}; +use crate::{AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine}; +use num_traits::ToPrimitive; +use std::fmt::Debug; + +/// Valid type codes for ctypes simple types +// spell-checker: disable-next-line +pub(super) const SIMPLE_TYPE_CHARS: &str = "cbBhHiIlLdfuzZqQPXOv?g"; + +/// Create a new simple type instance from a class +fn new_simple_type( + cls: Either<&PyObject, &Py>, + vm: &VirtualMachine, +) -> PyResult { + let cls = match cls { + Either::A(obj) => obj, + Either::B(typ) => typ.as_object(), + }; + + let _type_ = cls + .get_attr("_type_", vm) + .map_err(|_| vm.new_attribute_error("class must define a '_type_' attribute"))?; + + if !_type_.is_instance((&vm.ctx.types.str_type).as_ref(), vm)? { + return Err(vm.new_type_error("class must define a '_type_' string attribute")); + } + + let tp_str = _type_.str(vm)?.to_string(); + + if tp_str.len() != 1 { + return Err(vm.new_value_error(format!( + "class must define a '_type_' attribute which must be a string of length 1, str: {tp_str}" + ))); + } + + if !SIMPLE_TYPE_CHARS.contains(tp_str.as_str()) { + return Err(vm.new_attribute_error(format!( + "class must define a '_type_' attribute which must be\n a single character string containing one of {SIMPLE_TYPE_CHARS}, currently it is {tp_str}." + ))); + } + + let size = get_size(&tp_str); + Ok(PyCSimple(PyCData::from_bytes(vec![0u8; size], None))) +} + +fn set_primitive(_type_: &str, value: &PyObject, vm: &VirtualMachine) -> PyResult { + match _type_ { + "c" => { + // c_set: accepts bytes(len=1), bytearray(len=1), or int(0-255) + if value + .downcast_ref_if_exact::(vm) + .is_some_and(|v| v.len() == 1) + || value + .downcast_ref_if_exact::(vm) + .is_some_and(|v| v.borrow_buf().len() == 1) + || value.downcast_ref_if_exact::(vm).is_some_and(|v| { + v.as_bigint() + .to_i64() + .is_some_and(|n| (0..=255).contains(&n)) + }) + { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error("one character bytes, bytearray or integer expected")) + } + } + "u" => { + if let Ok(b) = value.str(vm).map(|v| v.to_string().chars().count() == 1) { + if b { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error("one character unicode string expected")) + } + } else { + Err(vm.new_type_error(format!( + "unicode string expected instead of {} instance", + value.class().name() + ))) + } + } + "b" | "h" | "H" | "i" | "I" | "l" | "q" | "L" | "Q" => { + // Support __index__ protocol + if value.try_index(vm).is_ok() { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error(format!( + "an integer is required (got type {})", + value.class().name() + ))) + } + } + "f" | "d" | "g" => { + // Handle int specially to check overflow + if let Some(int_obj) = value.downcast_ref_if_exact::(vm) { + // Check if int can fit in f64 + if int_obj.as_bigint().to_f64().is_some() { + return Ok(value.to_owned()); + } else { + return Err(vm.new_overflow_error("int too large to convert to float")); + } + } + // __float__ protocol + if value.try_float(vm).is_ok() { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error(format!("must be real number, not {}", value.class().name()))) + } + } + "?" => Ok(PyObjectRef::from( + vm.ctx.new_bool(value.to_owned().try_to_bool(vm)?), + )), + "v" => { + // VARIANT_BOOL: any truthy → True + Ok(PyObjectRef::from( + vm.ctx.new_bool(value.to_owned().try_to_bool(vm)?), + )) + } + "B" => { + // Support __index__ protocol + if value.try_index(vm).is_ok() { + // Store as-is, conversion to unsigned happens in the getter + Ok(value.to_owned()) + } else { + Err(vm.new_type_error(format!("int expected instead of {}", value.class().name()))) + } + } + "z" => { + if value.is(&vm.ctx.none) + || value.downcast_ref_if_exact::(vm).is_some() + || value.downcast_ref_if_exact::(vm).is_some() + { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error(format!( + "bytes or integer address expected instead of {} instance", + value.class().name() + ))) + } + } + "Z" => { + if value.is(&vm.ctx.none) + || value.downcast_ref_if_exact::(vm).is_some() + || value.downcast_ref_if_exact::(vm).is_some() + { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error(format!( + "unicode string or integer address expected instead of {} instance", + value.class().name() + ))) + } + } + // O_set: py_object accepts any Python object + "O" => Ok(value.to_owned()), + _ => { + // "P" + if value.downcast_ref_if_exact::(vm).is_some() + || value.downcast_ref_if_exact::(vm).is_some() + { + Ok(value.to_owned()) + } else { + Err(vm.new_type_error("cannot be converted to pointer")) + } + } + } +} + +#[pyclass(module = "_ctypes", name = "PyCSimpleType", base = PyType)] +#[derive(Debug)] +#[repr(transparent)] +pub struct PyCSimpleType(PyType); + +#[pyclass(flags(BASETYPE), with(AsNumber, Initializer))] +impl PyCSimpleType { + #[allow(clippy::new_ret_no_self)] + #[pymethod] + fn new(cls: PyTypeRef, _: OptionalArg, vm: &VirtualMachine) -> PyResult { + Ok(PyObjectRef::from( + new_simple_type(Either::B(&cls), vm)? + .into_ref_with_type(vm, cls)? + .clone(), + )) + } + + #[pymethod] + fn from_param(zelf: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // zelf is the class (e.g., c_int) that from_param was called on + let cls = zelf + .downcast::() + .map_err(|_| vm.new_type_error("from_param: expected a type"))?; + + // 1. If the value is already an instance of the requested type, return it + if value.is_instance(cls.as_object(), vm)? { + return Ok(value); + } + + // 2. Get the type code to determine conversion rules + let type_code = cls.type_code(vm); + + // 3. Handle None for pointer types (c_char_p, c_wchar_p, c_void_p) + if vm.is_none(&value) && matches!(type_code.as_deref(), Some("z") | Some("Z") | Some("P")) { + return Ok(value); + } + + // Helper to create CArgObject wrapping a simple instance + let create_simple_with_value = |type_str: &str, val: &PyObject| -> PyResult { + let simple = new_simple_type(Either::B(&cls), vm)?; + let buffer_bytes = value_to_bytes_endian(type_str, val, false, vm); + *simple.0.buffer.write() = std::borrow::Cow::Owned(buffer_bytes.clone()); + let simple_obj: PyObjectRef = simple.into_ref_with_type(vm, cls.clone())?.into(); + // from_param returns CArgObject, not the simple type itself + let tag = type_str.as_bytes().first().copied().unwrap_or(b'?'); + let ffi_value = buffer_to_ffi_value(type_str, &buffer_bytes); + Ok(CArgObject { + tag, + value: ffi_value, + obj: simple_obj, + size: 0, + offset: 0, + } + .to_pyobject(vm)) + }; + + // 4. Try to convert value based on type code + match type_code.as_deref() { + // Integer types: accept integers + Some(tc @ ("b" | "B" | "h" | "H" | "i" | "I" | "l" | "L" | "q" | "Q")) => { + if value.try_int(vm).is_ok() { + return create_simple_with_value(tc, &value); + } + } + // Float types: accept numbers + Some(tc @ ("f" | "d" | "g")) => { + if value.try_float(vm).is_ok() || value.try_int(vm).is_ok() { + return create_simple_with_value(tc, &value); + } + } + // c_char: 1 byte character + Some("c") => { + if let Some(bytes) = value.downcast_ref::() + && bytes.len() == 1 + { + return create_simple_with_value("c", &value); + } + if let Ok(int_val) = value.try_int(vm) + && int_val.as_bigint().to_u8().is_some() + { + return create_simple_with_value("c", &value); + } + return Err(vm.new_type_error( + "one character bytes, bytearray or integer expected".to_string(), + )); + } + // c_wchar: 1 unicode character + Some("u") => { + if let Some(s) = value.downcast_ref::() + && s.as_str().chars().count() == 1 + { + return create_simple_with_value("u", &value); + } + return Err(vm.new_type_error("one character unicode string expected")); + } + // c_char_p: bytes pointer + Some("z") => { + // 1. bytes → create CArgObject with null-terminated buffer + if let Some(bytes) = value.downcast_ref::() { + let (holder, ptr) = super::base::ensure_z_null_terminated(bytes, vm); + return Ok(CArgObject { + tag: b'z', + value: FfiArgValue::OwnedPointer(ptr, holder), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + // 2. Array/Pointer with c_char element type + if is_cchar_array_or_pointer(&value, vm) { + return Ok(value); + } + // 3. CArgObject (byref(c_char(...))) + if let Some(carg) = value.downcast_ref::() + && carg.tag == b'c' + { + return Ok(value.clone()); + } + } + // c_wchar_p: unicode pointer + Some("Z") => { + // 1. str → create CArgObject with null-terminated wchar buffer + if let Some(s) = value.downcast_ref::() { + let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm); + return Ok(CArgObject { + tag: b'Z', + value: FfiArgValue::OwnedPointer(ptr, holder), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + // 2. Array/Pointer with c_wchar element type + if is_cwchar_array_or_pointer(&value, vm) { + return Ok(value); + } + // 3. CArgObject (byref(c_wchar(...))) + if let Some(carg) = value.downcast_ref::() + && carg.tag == b'u' + { + return Ok(value.clone()); + } + } + // c_void_p: most flexible - accepts int, bytes, str, any array/pointer, funcptr + Some("P") => { + // 1. int → create c_void_p with that address + if value.try_int(vm).is_ok() { + return create_simple_with_value("P", &value); + } + // 2. bytes → create CArgObject with null-terminated buffer + if let Some(bytes) = value.downcast_ref::() { + let (holder, ptr) = super::base::ensure_z_null_terminated(bytes, vm); + return Ok(CArgObject { + tag: b'z', + value: FfiArgValue::OwnedPointer(ptr, holder), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + // 3. str → create CArgObject with null-terminated wchar buffer + if let Some(s) = value.downcast_ref::() { + let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm); + return Ok(CArgObject { + tag: b'Z', + value: FfiArgValue::OwnedPointer(ptr, holder), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + // 4. Any Array or Pointer → accept directly + if value.downcast_ref::().is_some() + || value.downcast_ref::().is_some() + { + return Ok(value); + } + // 5. CArgObject with 'P' tag (byref(c_void_p(...))) + if let Some(carg) = value.downcast_ref::() + && carg.tag == b'P' + { + return Ok(value.clone()); + } + // 6. PyCFuncPtr → extract function pointer address + if let Some(funcptr) = value.downcast_ref::() { + let ptr_val = { + let buffer = funcptr._base.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + usize::from_ne_bytes( + buffer[..std::mem::size_of::()].try_into().unwrap(), + ) + } else { + 0 + } + }; + return Ok(CArgObject { + tag: b'P', + value: FfiArgValue::Pointer(ptr_val), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + // 7. c_char_p or c_wchar_p instance → extract pointer value + if let Some(simple) = value.downcast_ref::() { + let value_type_code = value.class().type_code(vm); + if matches!(value_type_code.as_deref(), Some("z") | Some("Z")) { + let ptr_val = { + let buffer = simple.0.buffer.read(); + if buffer.len() >= std::mem::size_of::() { + usize::from_ne_bytes( + buffer[..std::mem::size_of::()].try_into().unwrap(), + ) + } else { + 0 + } + }; + return Ok(CArgObject { + tag: b'Z', + value: FfiArgValue::Pointer(ptr_val), + obj: value.clone(), + size: 0, + offset: 0, + } + .to_pyobject(vm)); + } + } + } + // c_bool + Some("?") => { + let bool_val = value.is_true(vm)?; + let bool_obj: PyObjectRef = vm.ctx.new_bool(bool_val).into(); + return create_simple_with_value("?", &bool_obj); + } + _ => {} + } + + // 5. Check for _as_parameter_ attribute + if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { + return PyCSimpleType::from_param(cls.as_object().to_owned(), as_parameter, vm); + } + + // 6. Type-specific error messages + match type_code.as_deref() { + Some("z") => Err(vm.new_type_error(format!( + "'{}' object cannot be interpreted as ctypes.c_char_p", + value.class().name() + ))), + Some("Z") => Err(vm.new_type_error(format!( + "'{}' object cannot be interpreted as ctypes.c_wchar_p", + value.class().name() + ))), + _ => Err(vm.new_type_error("wrong type")), + } + } + + #[pymethod] + fn __mul__(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { + PyCSimple::repeat(cls, n, vm) + } +} + +impl AsNumber for PyCSimpleType { + fn as_number() -> &'static PyNumberMethods { + static AS_NUMBER: PyNumberMethods = PyNumberMethods { + multiply: Some(|a, b, vm| { + // a is a PyCSimpleType instance (type object like c_char) + // b is int (array size) + let cls = a + .downcast_ref::() + .ok_or_else(|| vm.new_type_error("expected type"))?; + let n = b + .try_index(vm)? + .as_bigint() + .to_isize() + .ok_or_else(|| vm.new_overflow_error("array size too large"))?; + PyCSimple::repeat(cls.to_owned(), n, vm) + }), + ..PyNumberMethods::NOT_IMPLEMENTED + }; + &AS_NUMBER + } +} + +impl Initializer for PyCSimpleType { + type Args = FuncArgs; + + fn init(zelf: PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // type_init requires exactly 3 positional arguments: name, bases, dict + if args.args.len() != 3 { + return Err(vm.new_type_error(format!( + "type.__init__() takes 3 positional arguments but {} were given", + args.args.len() + ))); + } + + // Get the type from the metatype instance + let type_ref: PyTypeRef = zelf + .as_object() + .to_owned() + .downcast() + .map_err(|_| vm.new_type_error("expected type"))?; + + type_ref.check_not_initialized(vm)?; + + // Get _type_ attribute + let type_attr = match type_ref.as_object().get_attr("_type_", vm) { + Ok(attr) => attr, + Err(_) => { + return Err(vm.new_attribute_error("class must define a '_type_' attribute")); + } + }; + + // Validate _type_ is a string + let type_str = type_attr.str(vm)?.to_string(); + + // Validate _type_ is a single character + if type_str.len() != 1 { + return Err(vm.new_value_error( + "class must define a '_type_' attribute which must be a string of length 1" + .to_owned(), + )); + } + + // Validate _type_ is a valid type character + if !SIMPLE_TYPE_CHARS.contains(type_str.as_str()) { + return Err(vm.new_attribute_error(format!( + "class must define a '_type_' attribute which must be a single character string containing one of '{}', currently it is '{}'.", + SIMPLE_TYPE_CHARS, type_str + ))); + } + + // Initialize StgInfo + let size = super::get_size(&type_str); + let align = super::get_align(&type_str); + let mut stg_info = StgInfo::new(size, align); + + // Set format for PEP 3118 buffer protocol + // Format is endian prefix + type code (e.g., "" + }; + stg_info.format = Some(format!("{}{}", endian_prefix, type_str)); + stg_info.paramfunc = super::base::ParamFunc::Simple; + + // Set TYPEFLAG_ISPOINTER for pointer types: z (c_char_p), Z (c_wchar_p), + // P (c_void_p), s (char array), X (BSTR), O (py_object) + if matches!(type_str.as_str(), "z" | "Z" | "P" | "s" | "X" | "O") { + stg_info.flags |= StgInfoFlags::TYPEFLAG_ISPOINTER; + } + + super::base::set_or_init_stginfo(&type_ref, stg_info); + + // Create __ctype_le__ and __ctype_be__ swapped types + create_swapped_types(&type_ref, &type_str, vm)?; + + Ok(()) + } +} + +/// Create __ctype_le__ and __ctype_be__ swapped byte order types +/// On little-endian systems: __ctype_le__ = self, __ctype_be__ = swapped type +/// On big-endian systems: __ctype_be__ = self, __ctype_le__ = swapped type +/// +/// - Single-byte types (c, b, B): __ctype_le__ = __ctype_be__ = self +/// - Pointer/unsupported types (z, Z, P, u, O): NO __ctype_le__/__ctype_be__ attributes +/// - Multi-byte numeric types (h, H, i, I, l, L, q, Q, f, d, g, ?): create swapped types +fn create_swapped_types( + type_ref: &Py, + type_str: &str, + vm: &VirtualMachine, +) -> PyResult<()> { + use crate::builtins::PyDict; + + // Avoid infinite recursion - if __ctype_le__ already exists, skip + if type_ref.as_object().get_attr("__ctype_le__", vm).is_ok() { + return Ok(()); + } + + // Types that don't support byte order swapping - no __ctype_le__/__ctype_be__ + // c_void_p (P), c_char_p (z), c_wchar_p (Z), c_wchar (u), py_object (O) + let unsupported_types = ["P", "z", "Z", "u", "O"]; + if unsupported_types.contains(&type_str) { + return Ok(()); + } + + // Single-byte types - __ctype_le__ = __ctype_be__ = self (no swapping needed) + // c_char (c), c_byte (b), c_ubyte (B) + let single_byte_types = ["c", "b", "B"]; + if single_byte_types.contains(&type_str) { + type_ref + .as_object() + .set_attr("__ctype_le__", type_ref.as_object().to_owned(), vm)?; + type_ref + .as_object() + .set_attr("__ctype_be__", type_ref.as_object().to_owned(), vm)?; + return Ok(()); + } + + // Multi-byte types - create swapped type + // Check system byte order at compile time + let is_little_endian = cfg!(target_endian = "little"); + + // Create dict for the swapped (non-native) type + let swapped_dict: crate::PyRef = PyDict::default().into_ref(&vm.ctx); + swapped_dict.set_item("_type_", vm.ctx.new_str(type_str).into(), vm)?; + + // Create the swapped type using the same metaclass + let metaclass = type_ref.class(); + let bases = vm.ctx.new_tuple(vec![type_ref.as_object().to_owned()]); + + // Set placeholder first to prevent recursion + type_ref + .as_object() + .set_attr("__ctype_le__", vm.ctx.none(), vm)?; + type_ref + .as_object() + .set_attr("__ctype_be__", vm.ctx.none(), vm)?; + + // Create only the non-native endian type + let suffix = if is_little_endian { "_be" } else { "_le" }; + let swapped_type = metaclass.as_object().call( + ( + vm.ctx.new_str(format!("{}{}", type_ref.name(), suffix)), + bases, + swapped_dict.as_object().to_owned(), + ), + vm, + )?; + + // Set _swappedbytes_ on the swapped type to indicate byte swapping is needed + swapped_type.set_attr("_swappedbytes_", vm.ctx.none(), vm)?; + + // Update swapped type's StgInfo format to use opposite endian prefix + // Native uses '<' on little-endian, '>' on big-endian + // Swapped uses the opposite + if let Ok(swapped_type_ref) = swapped_type.clone().downcast::() + && let Some(mut sw_stg) = swapped_type_ref.get_type_data_mut::() + { + let swapped_prefix = if is_little_endian { ">" } else { "<" }; + sw_stg.format = Some(format!("{}{}", swapped_prefix, type_str)); + } + + // Set attributes based on system byte order + // Native endian attribute points to self, non-native points to swapped type + if is_little_endian { + // Little-endian system: __ctype_le__ = self, __ctype_be__ = swapped + type_ref + .as_object() + .set_attr("__ctype_le__", type_ref.as_object().to_owned(), vm)?; + type_ref + .as_object() + .set_attr("__ctype_be__", swapped_type.clone(), vm)?; + swapped_type.set_attr("__ctype_le__", type_ref.as_object().to_owned(), vm)?; + swapped_type.set_attr("__ctype_be__", swapped_type.clone(), vm)?; + } else { + // Big-endian system: __ctype_be__ = self, __ctype_le__ = swapped + type_ref + .as_object() + .set_attr("__ctype_be__", type_ref.as_object().to_owned(), vm)?; + type_ref + .as_object() + .set_attr("__ctype_le__", swapped_type.clone(), vm)?; + swapped_type.set_attr("__ctype_be__", type_ref.as_object().to_owned(), vm)?; + swapped_type.set_attr("__ctype_le__", swapped_type.clone(), vm)?; + } + + Ok(()) +} + +#[pyclass( + module = "_ctypes", + name = "_SimpleCData", + base = PyCData, + metaclass = "PyCSimpleType" +)] +#[repr(transparent)] +pub struct PyCSimple(pub PyCData); + +impl Debug for PyCSimple { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PyCSimple") + .field("size", &self.0.buffer.read().len()) + .finish() + } +} + +fn value_to_bytes_endian( + _type_: &str, + value: &PyObject, + swapped: bool, + vm: &VirtualMachine, +) -> Vec { + // Helper macro for endian conversion + macro_rules! to_bytes { + ($val:expr) => { + if swapped { + // Use opposite endianness + #[cfg(target_endian = "little")] + { + $val.to_be_bytes().to_vec() + } + #[cfg(target_endian = "big")] + { + $val.to_le_bytes().to_vec() + } + } else { + $val.to_ne_bytes().to_vec() + } + }; + } + + match _type_ { + "c" => { + // c_char - single byte (bytes, bytearray, or int 0-255) + if let Some(bytes) = value.downcast_ref::() + && !bytes.is_empty() + { + return vec![bytes.as_bytes()[0]]; + } + if let Some(bytearray) = value.downcast_ref::() { + let buf = bytearray.borrow_buf(); + if !buf.is_empty() { + return vec![buf[0]]; + } + } + if let Ok(int_val) = value.try_int(vm) + && let Some(v) = int_val.as_bigint().to_u8() + { + return vec![v]; + } + vec![0] + } + "u" => { + // c_wchar - platform-dependent size (2 on Windows, 4 on Unix) + if let Ok(s) = value.str(vm) + && let Some(c) = s.as_str().chars().next() + { + let mut buffer = vec![0u8; WCHAR_SIZE]; + wchar_to_bytes(c as u32, &mut buffer); + if swapped { + buffer.reverse(); + } + return buffer; + } + vec![0; WCHAR_SIZE] + } + "b" => { + // c_byte - signed char (1 byte) + // PyLong_AsLongMask pattern: wrapping for overflow values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().unwrap_or(0) as i8; + return vec![v as u8]; + } + vec![0] + } + "B" => { + // c_ubyte - unsigned char (1 byte) + // PyLong_AsUnsignedLongMask: wrapping for negative values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().map(|n| n as u8).unwrap_or(0); + return vec![v]; + } + vec![0] + } + "h" => { + // c_short (2 bytes) + // PyLong_AsLongMask pattern: wrapping for overflow values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().unwrap_or(0) as i16; + return to_bytes!(v); + } + vec![0; 2] + } + "H" => { + // c_ushort (2 bytes) + // PyLong_AsUnsignedLongMask: wrapping for negative values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().map(|n| n as u16).unwrap_or(0); + return to_bytes!(v); + } + vec![0; 2] + } + "i" => { + // c_int (4 bytes) + // PyLong_AsLongMask pattern: wrapping for overflow values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().unwrap_or(0) as i32; + return to_bytes!(v); + } + vec![0; 4] + } + "I" => { + // c_uint (4 bytes) + // PyLong_AsUnsignedLongMask: wrapping for negative values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().map(|n| n as u32).unwrap_or(0); + return to_bytes!(v); + } + vec![0; 4] + } + "l" => { + // c_long (platform dependent) + // PyLong_AsLongMask pattern: wrapping for overflow values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().unwrap_or(0) as libc::c_long; + return to_bytes!(v); + } + const SIZE: usize = std::mem::size_of::(); + vec![0; SIZE] + } + "L" => { + // c_ulong (platform dependent) + // PyLong_AsUnsignedLongMask: wrapping for negative values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val + .as_bigint() + .to_i128() + .map(|n| n as libc::c_ulong) + .unwrap_or(0); + return to_bytes!(v); + } + const SIZE: usize = std::mem::size_of::(); + vec![0; SIZE] + } + "q" => { + // c_longlong (8 bytes) + // PyLong_AsLongMask pattern: wrapping for overflow values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().unwrap_or(0) as i64; + return to_bytes!(v); + } + vec![0; 8] + } + "Q" => { + // c_ulonglong (8 bytes) + // PyLong_AsUnsignedLongLongMask: wrapping for negative values + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_i128().map(|n| n as u64).unwrap_or(0); + return to_bytes!(v); + } + vec![0; 8] + } + "f" => { + // c_float (4 bytes) - also accepts int + if let Ok(float_val) = value.try_float(vm) { + return to_bytes!(float_val.to_f64() as f32); + } + if let Ok(int_val) = value.try_int(vm) + && let Some(v) = int_val.as_bigint().to_f64() + { + return to_bytes!(v as f32); + } + vec![0; 4] + } + "d" => { + // c_double (8 bytes) - also accepts int + if let Ok(float_val) = value.try_float(vm) { + return to_bytes!(float_val.to_f64()); + } + if let Ok(int_val) = value.try_int(vm) + && let Some(v) = int_val.as_bigint().to_f64() + { + return to_bytes!(v); + } + vec![0; 8] + } + "g" => { + // long double - platform dependent size + // Store as f64, zero-pad to platform long double size + // Note: This may lose precision on platforms where long double > 64 bits + let f64_val = if let Ok(float_val) = value.try_float(vm) { + float_val.to_f64() + } else if let Ok(int_val) = value.try_int(vm) { + int_val.as_bigint().to_f64().unwrap_or(0.0) + } else { + 0.0 + }; + let f64_bytes = if swapped { + #[cfg(target_endian = "little")] + { + f64_val.to_be_bytes().to_vec() + } + #[cfg(target_endian = "big")] + { + f64_val.to_le_bytes().to_vec() + } + } else { + f64_val.to_ne_bytes().to_vec() + }; + // Pad to long double size + let long_double_size = super::get_size("g"); + let mut result = f64_bytes; + result.resize(long_double_size, 0); + result + } + "?" => { + // c_bool (1 byte) + if let Ok(b) = value.to_owned().try_to_bool(vm) { + return vec![if b { 1 } else { 0 }]; + } + vec![0] + } + "v" => { + // VARIANT_BOOL: True = 0xFFFF (-1 as i16), False = 0x0000 + if let Ok(b) = value.to_owned().try_to_bool(vm) { + let val: i16 = if b { -1 } else { 0 }; + return to_bytes!(val); + } + vec![0; 2] + } + "P" => { + // c_void_p - pointer type (platform pointer size) + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + return to_bytes!(v); + } + vec![0; std::mem::size_of::()] + } + "z" => { + // c_char_p - pointer to char (stores pointer value from int) + // PyBytes case is handled in slot_new/set_value with make_z_buffer() + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + return to_bytes!(v); + } + vec![0; std::mem::size_of::()] + } + "Z" => { + // c_wchar_p - pointer to wchar_t (stores pointer value from int) + // PyStr case is handled in slot_new/set_value with make_wchar_buffer() + if let Ok(int_val) = value.try_index(vm) { + let v = int_val.as_bigint().to_usize().unwrap_or(0); + return to_bytes!(v); + } + vec![0; std::mem::size_of::()] + } + "O" => { + // py_object - store object id as non-zero marker + // The actual object is stored in _objects + // Use object's id as a non-zero placeholder (indicates non-NULL) + let id = value.get_id(); + to_bytes!(id) + } + _ => vec![0], + } +} + +/// Check if value is a c_char array or pointer(c_char) +fn is_cchar_array_or_pointer(value: &PyObject, vm: &VirtualMachine) -> bool { + // Check Array with c_char element type + if let Some(arr) = value.downcast_ref::() + && let Some(info) = arr.class().stg_info_opt() + && let Some(ref elem_type) = info.element_type + && let Some(elem_code) = elem_type.class().type_code(vm) + { + return elem_code == "c"; + } + // Check Pointer to c_char + if let Some(ptr) = value.downcast_ref::() + && let Some(info) = ptr.class().stg_info_opt() + && let Some(ref proto) = info.proto + && let Some(proto_code) = proto.class().type_code(vm) + { + return proto_code == "c"; + } + false +} + +/// Check if value is a c_wchar array or pointer(c_wchar) +fn is_cwchar_array_or_pointer(value: &PyObject, vm: &VirtualMachine) -> bool { + // Check Array with c_wchar element type + if let Some(arr) = value.downcast_ref::() { + let info = arr.class().stg_info_opt().expect("array has StgInfo"); + let elem_type = info.element_type.as_ref().expect("array has element_type"); + if let Some(elem_code) = elem_type.class().type_code(vm) { + return elem_code == "u"; + } + } + // Check Pointer to c_wchar + if let Some(ptr) = value.downcast_ref::() { + let info = ptr.class().stg_info_opt().expect("pointer has StgInfo"); + if let Some(ref proto) = info.proto + && let Some(proto_code) = proto.class().type_code(vm) + { + return proto_code == "u"; + } + } + false +} + +impl Constructor for PyCSimple { + type Args = (OptionalArg,); + + fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { + let args: Self::Args = args.bind(vm)?; + let _type_ = cls + .type_code(vm) + .ok_or_else(|| vm.new_type_error("abstract class"))?; + // Save the initial argument for c_char_p/c_wchar_p _objects + let init_arg = args.0.into_option(); + + // Handle z/Z types with PyBytes/PyStr separately to avoid memory leak + if let Some(ref v) = init_arg { + if _type_ == "z" { + if let Some(bytes) = v.downcast_ref::() { + let (converted, ptr) = super::base::ensure_z_null_terminated(bytes, vm); + let buffer = ptr.to_ne_bytes().to_vec(); + let cdata = PyCData::from_bytes(buffer, Some(converted)); + return PyCSimple(cdata).into_ref_with_type(vm, cls).map(Into::into); + } + } else if _type_ == "Z" + && let Some(s) = v.downcast_ref::() + { + let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm); + let buffer = ptr.to_ne_bytes().to_vec(); + let cdata = PyCData::from_bytes(buffer, Some(holder)); + return PyCSimple(cdata).into_ref_with_type(vm, cls).map(Into::into); + } + } + + let value = if let Some(ref v) = init_arg { + set_primitive(_type_.as_str(), v, vm)? + } else { + match _type_.as_str() { + "c" | "u" => PyObjectRef::from(vm.ctx.new_bytes(vec![0])), + "b" | "B" | "h" | "H" | "i" | "I" | "l" | "q" | "L" | "Q" => { + PyObjectRef::from(vm.ctx.new_int(0)) + } + "f" | "d" | "g" => PyObjectRef::from(vm.ctx.new_float(0.0)), + "?" => PyObjectRef::from(vm.ctx.new_bool(false)), + _ => vm.ctx.none(), // "z" | "Z" | "P" + } + }; + + // Check if this is a swapped endian type (presence of attribute indicates swapping) + let swapped = cls.as_object().get_attr("_swappedbytes_", vm).is_ok(); + + let buffer = value_to_bytes_endian(&_type_, &value, swapped, vm); + + // For c_char_p (type "z"), c_wchar_p (type "Z"), and py_object (type "O"), + // store the initial value in _objects + let objects = if (_type_ == "z" || _type_ == "Z" || _type_ == "O") && init_arg.is_some() { + init_arg + } else { + None + }; + + PyCSimple(PyCData::from_bytes(buffer, objects)) + .into_ref_with_type(vm, cls) + .map(Into::into) + } + + fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { + unimplemented!("use slot_new") + } +} + +// Simple_repr +impl Representable for PyCSimple { + fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { + let cls = zelf.class(); + let type_name = cls.name(); + + // Check if base is _SimpleCData (direct simple type like c_int, c_char) + // vs subclass of simple type (like class X(c_int): pass) + let bases = cls.bases.read(); + let is_direct_simple = bases + .iter() + .any(|base| base.name().to_string() == "_SimpleCData"); + + if is_direct_simple { + // Direct SimpleCData: "typename(repr(value))" + let value = PyCSimple::value(zelf.to_owned().into(), vm)?; + let value_repr = value.repr(vm)?.to_string(); + Ok(format!("{}({})", type_name, value_repr)) + } else { + // Subclass: "" + let addr = zelf.get_id(); + Ok(format!("<{} object at {:#x}>", type_name, addr)) + } + } +} + +#[pyclass(flags(BASETYPE), with(Constructor, AsBuffer, AsNumber, Representable))] +impl PyCSimple { + #[pygetset] + fn _b0_(&self) -> Option { + self.0.base.read().clone() + } + + /// return True if any byte in buffer is non-zero + #[pymethod] + fn __bool__(&self) -> bool { + let buffer = self.0.buffer.read(); + // Simple_bool: memcmp(self->b_ptr, zeros, self->b_size) + buffer.iter().any(|&b| b != 0) + } + + #[pygetset] + pub fn value(instance: PyObjectRef, vm: &VirtualMachine) -> PyResult { + let zelf: &Py = instance + .downcast_ref() + .ok_or_else(|| vm.new_type_error("cannot get value of instance"))?; + + // Get _type_ from class + let cls = zelf.class(); + let type_attr = cls + .as_object() + .get_attr("_type_", vm) + .map_err(|_| vm.new_type_error("no _type_ attribute"))?; + let type_code = type_attr.str(vm)?.to_string(); + + // Special handling for c_char_p (z) and c_wchar_p (Z) + // z_get, Z_get - dereference pointer to get string + if type_code == "z" { + // c_char_p: read pointer from buffer, dereference to get bytes string + let buffer = zelf.0.buffer.read(); + let ptr = super::base::read_ptr_from_buffer(&buffer); + if ptr == 0 { + return Ok(vm.ctx.none()); + } + // Read null-terminated string at the address + unsafe { + let cstr = std::ffi::CStr::from_ptr(ptr as _); + return Ok(vm.ctx.new_bytes(cstr.to_bytes().to_vec()).into()); + } + } + if type_code == "Z" { + // c_wchar_p: read pointer from buffer, dereference to get wide string + let buffer = zelf.0.buffer.read(); + let ptr = super::base::read_ptr_from_buffer(&buffer); + if ptr == 0 { + return Ok(vm.ctx.none()); + } + // Read null-terminated wide string at the address + unsafe { + let w_ptr = ptr as *const libc::wchar_t; + let len = libc::wcslen(w_ptr); + let wchars = std::slice::from_raw_parts(w_ptr, len); + let s: String = wchars + .iter() + .filter_map(|&c| char::from_u32(c as u32)) + .collect(); + return Ok(vm.ctx.new_str(s).into()); + } + } + + // O_get: py_object - read PyObject pointer from buffer + if type_code == "O" { + let buffer = zelf.0.buffer.read(); + let ptr = super::base::read_ptr_from_buffer(&buffer); + if ptr == 0 { + return Err(vm.new_value_error("PyObject is NULL")); + } + // Non-NULL: return stored object from _objects if available + if let Some(obj) = zelf.0.objects.read().as_ref() { + return Ok(obj.clone()); + } + return Err(vm.new_value_error("PyObject is NULL")); + } + + // Check if this is a swapped endian type (presence of attribute indicates swapping) + let swapped = cls.as_object().get_attr("_swappedbytes_", vm).is_ok(); + + // Read value from buffer, swap bytes if needed + let buffer = zelf.0.buffer.read(); + let buffer_data: std::borrow::Cow<'_, [u8]> = if swapped { + // Reverse bytes for swapped endian types + let mut swapped_bytes = buffer.to_vec(); + swapped_bytes.reverse(); + std::borrow::Cow::Owned(swapped_bytes) + } else { + std::borrow::Cow::Borrowed(&*buffer) + }; + + let cls_ref = cls.to_owned(); + bytes_to_pyobject(&cls_ref, &buffer_data, vm).or_else(|_| { + // Fallback: return bytes as integer based on type + match type_code.as_str() { + "c" => { + if !buffer.is_empty() { + Ok(vm.ctx.new_bytes(vec![buffer[0]]).into()) + } else { + Ok(vm.ctx.new_bytes(vec![0]).into()) + } + } + "?" => { + let val = buffer.first().copied().unwrap_or(0); + Ok(vm.ctx.new_bool(val != 0).into()) + } + _ => Ok(vm.ctx.new_int(0).into()), + } + }) + } + + #[pygetset(setter)] + fn set_value(instance: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let zelf: PyRef = instance + .clone() + .downcast() + .map_err(|_| vm.new_type_error("cannot set value of instance"))?; + + // Get _type_ from class + let cls = zelf.class(); + let type_attr = cls + .as_object() + .get_attr("_type_", vm) + .map_err(|_| vm.new_type_error("no _type_ attribute"))?; + let type_code = type_attr.str(vm)?.to_string(); + + // Handle z/Z types with PyBytes/PyStr separately to avoid memory leak + if type_code == "z" { + if let Some(bytes) = value.downcast_ref::() { + let (converted, ptr) = super::base::ensure_z_null_terminated(bytes, vm); + *zelf.0.buffer.write() = std::borrow::Cow::Owned(ptr.to_ne_bytes().to_vec()); + *zelf.0.objects.write() = Some(converted); + return Ok(()); + } + } else if type_code == "Z" + && let Some(s) = value.downcast_ref::() + { + let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm); + *zelf.0.buffer.write() = std::borrow::Cow::Owned(ptr.to_ne_bytes().to_vec()); + *zelf.0.objects.write() = Some(holder); + return Ok(()); + } + + let content = set_primitive(&type_code, &value, vm)?; + + // Check if this is a swapped endian type (presence of attribute indicates swapping) + let swapped = instance + .class() + .as_object() + .get_attr("_swappedbytes_", vm) + .is_ok(); + + // Update buffer when value changes + let buffer_bytes = value_to_bytes_endian(&type_code, &content, swapped, vm); + *zelf.0.buffer.write() = std::borrow::Cow::Owned(buffer_bytes); + + // For c_char_p (type "z"), c_wchar_p (type "Z"), and py_object (type "O"), + // keep the reference in _objects + if type_code == "z" || type_code == "Z" || type_code == "O" { + *zelf.0.objects.write() = Some(value); + } + Ok(()) + } + + #[pyclassmethod] + fn repeat(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { + use super::array::array_type_from_ctype; + + if n < 0 { + return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); + } + // Use cached array type creation + array_type_from_ctype(cls.into(), n as usize, vm) + } + + /// Simple_from_outparm - convert output parameter back to Python value + /// For direct subclasses of _SimpleCData (e.g., c_int), returns the value. + /// For subclasses of those (e.g., class MyInt(c_int)), returns self. + #[pymethod] + fn __ctypes_from_outparam__(zelf: PyRef, vm: &VirtualMachine) -> PyResult { + // _ctypes_simple_instance: returns true if NOT a direct subclass of Simple_Type + // i.e., c_int (direct) -> false, MyInt(c_int) (subclass) -> true + let is_subclass_of_simple = { + let cls = zelf.class(); + let bases = cls.bases.read(); + // If base is NOT _SimpleCData, then it's a subclass of a subclass + !bases + .iter() + .any(|base| base.name().to_string() == "_SimpleCData") + }; + + if is_subclass_of_simple { + // Subclass of simple type (e.g., MyInt(c_int)): return self + Ok(zelf.into()) + } else { + // Direct simple type (e.g., c_int): return value + PyCSimple::value(zelf.into(), vm) + } + } +} + +impl PyCSimple { + /// Extract the value from this ctypes object as an owned FfiArgValue. + /// The value must be kept alive until after the FFI call completes. + pub fn to_ffi_value( + &self, + ty: libffi::middle::Type, + _vm: &VirtualMachine, + ) -> Option { + let buffer = self.0.buffer.read(); + let bytes: &[u8] = &buffer; + + if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u8().as_raw_ptr()) { + if !bytes.is_empty() { + return Some(FfiArgValue::U8(bytes[0])); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i8().as_raw_ptr()) { + if !bytes.is_empty() { + return Some(FfiArgValue::I8(bytes[0] as i8)); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u16().as_raw_ptr()) { + if bytes.len() >= 2 { + return Some(FfiArgValue::U16(u16::from_ne_bytes([bytes[0], bytes[1]]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i16().as_raw_ptr()) { + if bytes.len() >= 2 { + return Some(FfiArgValue::I16(i16::from_ne_bytes([bytes[0], bytes[1]]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u32().as_raw_ptr()) { + if bytes.len() >= 4 { + return Some(FfiArgValue::U32(u32::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], + ]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i32().as_raw_ptr()) { + if bytes.len() >= 4 { + return Some(FfiArgValue::I32(i32::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], + ]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::u64().as_raw_ptr()) { + if bytes.len() >= 8 { + return Some(FfiArgValue::U64(u64::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::i64().as_raw_ptr()) { + if bytes.len() >= 8 { + return Some(FfiArgValue::I64(i64::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::f32().as_raw_ptr()) { + if bytes.len() >= 4 { + return Some(FfiArgValue::F32(f32::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], + ]))); + } + } else if std::ptr::eq(ty.as_raw_ptr(), libffi::middle::Type::f64().as_raw_ptr()) { + if bytes.len() >= 8 { + return Some(FfiArgValue::F64(f64::from_ne_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]))); + } + } else if std::ptr::eq( + ty.as_raw_ptr(), + libffi::middle::Type::pointer().as_raw_ptr(), + ) && bytes.len() >= std::mem::size_of::() + { + let val = + usize::from_ne_bytes(bytes[..std::mem::size_of::()].try_into().unwrap()); + return Some(FfiArgValue::Pointer(val)); + } + None + } +} + +impl AsBuffer for PyCSimple { + fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + let buffer_len = zelf.0.buffer.read().len(); + let buf = PyBuffer::new( + zelf.to_owned().into(), + BufferDescriptor::simple(buffer_len, false), // readonly=false for ctypes + &CDATA_BUFFER_METHODS, + ); + Ok(buf) + } +} + +/// Simple_bool: return non-zero if any byte in buffer is non-zero +impl AsNumber for PyCSimple { + fn as_number() -> &'static PyNumberMethods { + static AS_NUMBER: PyNumberMethods = PyNumberMethods { + boolean: Some(|obj, _vm| { + let zelf = obj + .downcast_ref::() + .expect("PyCSimple::as_number called on non-PyCSimple"); + let buffer = zelf.0.buffer.read(); + // Simple_bool: memcmp(self->b_ptr, zeros, self->b_size) + // Returns true if any byte is non-zero + Ok(buffer.iter().any(|&b| b != 0)) + }), + ..PyNumberMethods::NOT_IMPLEMENTED + }; + &AS_NUMBER + } +} diff --git a/crates/vm/src/stdlib/ctypes/structure.rs b/crates/vm/src/stdlib/ctypes/structure.rs index ca67a2fe7d6..10b8812e42c 100644 --- a/crates/vm/src/stdlib/ctypes/structure.rs +++ b/crates/vm/src/stdlib/ctypes/structure.rs @@ -1,42 +1,60 @@ -use super::base::{CDataObject, PyCData}; -use super::field::PyCField; -use super::util::StgInfo; +use super::base::{CDATA_BUFFER_METHODS, PyCData, PyCField, StgInfo, StgInfoFlags}; use crate::builtins::{PyList, PyStr, PyTuple, PyType, PyTypeRef}; use crate::convert::ToPyObject; use crate::function::FuncArgs; -use crate::protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyNumberMethods}; -use crate::stdlib::ctypes::_ctypes::get_size; -use crate::types::{AsBuffer, AsNumber, Constructor}; -use crate::{AsObject, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine}; -use indexmap::IndexMap; +use crate::function::PySetterValue; +use crate::protocol::{BufferDescriptor, PyBuffer, PyNumberMethods}; +use crate::types::{AsBuffer, AsNumber, Constructor, Initializer, SetAttr}; +use crate::{AsObject, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine}; use num_traits::ToPrimitive; -use rustpython_common::lock::PyRwLock; +use std::borrow::Cow; use std::fmt::Debug; +/// Calculate Structure type size from _fields_ (sum of field sizes) +pub(super) fn calculate_struct_size(cls: &Py, vm: &VirtualMachine) -> PyResult { + if let Ok(fields_attr) = cls.as_object().get_attr("_fields_", vm) { + let fields: Vec = fields_attr.try_to_value(vm)?; + let mut total_size = 0usize; + + for field in fields.iter() { + if let Some(tuple) = field.downcast_ref::() + && let Some(field_type) = tuple.get(1) + { + total_size += super::_ctypes::sizeof(field_type.clone(), vm)?; + } + } + return Ok(total_size); + } + Ok(0) +} + /// PyCStructType - metaclass for Structure #[pyclass(name = "PyCStructType", base = PyType, module = "_ctypes")] #[derive(Debug)] #[repr(transparent)] -pub struct PyCStructType(PyType); +pub(super) struct PyCStructType(PyType); impl Constructor for PyCStructType { type Args = FuncArgs; fn slot_new(metatype: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - // 1. Create the new class using PyType::py_new + // 1. Create the new class using PyType::slot_new let new_class = crate::builtins::type_::PyType::slot_new(metatype, args, vm)?; - // 2. Process _fields_ if defined on the new class + // 2. Get the new type let new_type = new_class .clone() .downcast::() .map_err(|_| vm.new_type_error("expected type"))?; - // Only process _fields_ if defined directly on this class (not inherited) - if let Some(fields_attr) = new_type.get_direct_attr(vm.ctx.intern_str("_fields_")) { - Self::process_fields(&new_type, fields_attr, vm)?; - } + // 3. Mark base classes as finalized (subclassing finalizes the parent) + new_type.mark_bases_final(); + + // 4. Initialize StgInfo for the new type (initialized=false, to be set in init) + let stg_info = StgInfo::default(); + let _ = new_type.init_type_data(stg_info); + // Note: _fields_ processing moved to Initializer::init() Ok(new_class) } @@ -45,11 +63,102 @@ impl Constructor for PyCStructType { } } -#[pyclass(flags(BASETYPE), with(AsNumber, Constructor))] +impl Initializer for PyCStructType { + type Args = FuncArgs; + + fn init(zelf: crate::PyRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // Get the type as PyTypeRef by converting PyRef -> PyObjectRef -> PyRef + let obj: PyObjectRef = zelf.clone().into(); + let new_type: PyTypeRef = obj + .downcast() + .map_err(|_| vm.new_type_error("expected type"))?; + + // Backward compatibility: skip initialization for abstract types + if new_type + .get_direct_attr(vm.ctx.intern_str("_abstract_")) + .is_some() + { + return Ok(()); + } + + new_type.check_not_initialized(vm)?; + + // Process _fields_ if defined directly on this class (not inherited) + if let Some(fields_attr) = new_type.get_direct_attr(vm.ctx.intern_str("_fields_")) { + Self::process_fields(&new_type, fields_attr, vm)?; + } else { + // No _fields_ defined - try to copy from base class (PyCStgInfo_clone) + let (has_base_info, base_clone) = { + let bases = new_type.bases.read(); + if let Some(base) = bases.first() { + (base.stg_info_opt().is_some(), Some(base.clone())) + } else { + (false, None) + } + }; + + if has_base_info && let Some(ref base) = base_clone { + // Clone base StgInfo (release guard before getting mutable reference) + let stg_info_opt = base.stg_info_opt().map(|baseinfo| { + let mut stg_info = baseinfo.clone(); + stg_info.flags &= !StgInfoFlags::DICTFLAG_FINAL; // Clear FINAL in subclass + stg_info.initialized = true; + stg_info + }); + + if let Some(stg_info) = stg_info_opt { + // Mark base as FINAL (now guard is released) + if let Some(mut base_stg) = base.get_type_data_mut::() { + base_stg.flags |= StgInfoFlags::DICTFLAG_FINAL; + } + + super::base::set_or_init_stginfo(&new_type, stg_info); + return Ok(()); + } + } + + // No base StgInfo - create default + let mut stg_info = StgInfo::new(0, 1); + stg_info.paramfunc = super::base::ParamFunc::Structure; + stg_info.format = Some("B".to_string()); + super::base::set_or_init_stginfo(&new_type, stg_info); + } + + Ok(()) + } +} + +#[pyclass(flags(BASETYPE), with(AsNumber, Constructor, Initializer, SetAttr))] impl PyCStructType { + #[pymethod] + fn from_param(zelf: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // zelf is the structure type class that from_param was called on + let cls = zelf + .downcast::() + .map_err(|_| vm.new_type_error("from_param: expected a type"))?; + + // 1. If already an instance of the requested type, return it + if value.is_instance(cls.as_object(), vm)? { + return Ok(value); + } + + // 2. Check for _as_parameter_ attribute + if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { + return PyCStructType::from_param(cls.as_object().to_owned(), as_parameter, vm); + } + + Err(vm.new_type_error(format!( + "expected {} instance instead of {}", + cls.name(), + value.class().name() + ))) + } + /// Called when a new Structure subclass is created #[pyclassmethod] fn __init_subclass__(cls: PyTypeRef, vm: &VirtualMachine) -> PyResult<()> { + cls.mark_bases_final(); + // Check if _fields_ is defined if let Some(fields_attr) = cls.get_direct_attr(vm.ctx.intern_str("_fields_")) { Self::process_fields(&cls, fields_attr, vm)?; @@ -59,24 +168,63 @@ impl PyCStructType { /// Process _fields_ and create CField descriptors fn process_fields( - cls: &PyTypeRef, + cls: &Py, fields_attr: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { + // Check if this is a swapped byte order structure + let is_swapped = cls.as_object().get_attr("_swappedbytes_", vm).is_ok(); + // Try to downcast to list or tuple let fields: Vec = if let Some(list) = fields_attr.downcast_ref::() { list.borrow_vec().to_vec() } else if let Some(tuple) = fields_attr.downcast_ref::() { tuple.to_vec() } else { - return Err(vm.new_type_error("_fields_ must be a list or tuple".to_string())); + return Err(vm.new_type_error("_fields_ must be a list or tuple")); + }; + + let pack = super::base::get_usize_attr(cls.as_object(), "_pack_", 0, vm)?; + let forced_alignment = + super::base::get_usize_attr(cls.as_object(), "_align_", 1, vm)?.max(1); + + // Determine byte order for format string + let big_endian = super::base::is_big_endian(is_swapped); + + // Initialize offset, alignment, type flags, and ffi_field_types from base class + let ( + mut offset, + mut max_align, + mut has_pointer, + mut has_union, + mut has_bitfield, + mut ffi_field_types, + ) = { + let bases = cls.bases.read(); + if let Some(base) = bases.first() + && let Some(baseinfo) = base.stg_info_opt() + { + ( + baseinfo.size, + std::cmp::max(baseinfo.align, forced_alignment), + baseinfo.flags.contains(StgInfoFlags::TYPEFLAG_HASPOINTER), + baseinfo.flags.contains(StgInfoFlags::TYPEFLAG_HASUNION), + baseinfo.flags.contains(StgInfoFlags::TYPEFLAG_HASBITFIELD), + baseinfo.ffi_field_types.clone(), + ) + } else { + (0, forced_alignment, false, false, false, Vec::new()) + } }; - let mut offset = 0usize; + // Initialize PEP3118 format string + let mut format = String::from("T{"); + let mut last_end = 0usize; // Track end of last field for padding calculation + for (index, field) in fields.iter().enumerate() { let field_tuple = field .downcast_ref::() - .ok_or_else(|| vm.new_type_error("_fields_ must contain tuples".to_string()))?; + .ok_or_else(|| vm.new_type_error("_fields_ must contain tuples"))?; if field_tuple.len() < 2 { return Err(vm.new_type_error( @@ -86,99 +234,173 @@ impl PyCStructType { let name = field_tuple .first() - .unwrap() + .expect("len checked") .downcast_ref::() - .ok_or_else(|| vm.new_type_error("field name must be a string".to_string()))? + .ok_or_else(|| vm.new_type_error("field name must be a string"))? .to_string(); - let field_type = field_tuple.get(1).unwrap().clone(); + let field_type = field_tuple.get(1).expect("len checked").clone(); + + // For swapped byte order structures, validate field type supports byte swapping + if is_swapped { + super::base::check_other_endian_support(&field_type, vm)?; + } + + // Get size and alignment of the field type + let size = super::base::get_field_size(&field_type, vm)?; + let field_align = super::base::get_field_align(&field_type, vm); + + // Calculate effective alignment (PyCField_FromDesc) + let effective_align = if pack > 0 { + std::cmp::min(pack, field_align) + } else { + field_align + }; + + // Apply padding to align offset (cfield.c NO_BITFIELD case) + if effective_align > 0 && offset % effective_align != 0 { + let delta = effective_align - (offset % effective_align); + offset += delta; + } + + max_align = max_align.max(effective_align); + + // Propagate type flags from field type (HASPOINTER, HASUNION, HASBITFIELD) + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(field_stg) = type_obj.stg_info_opt() + { + // HASPOINTER: propagate if field is pointer or contains pointer + if field_stg.flags.intersects( + StgInfoFlags::TYPEFLAG_ISPOINTER | StgInfoFlags::TYPEFLAG_HASPOINTER, + ) { + has_pointer = true; + } + // HASUNION, HASBITFIELD: propagate directly + if field_stg.flags.contains(StgInfoFlags::TYPEFLAG_HASUNION) { + has_union = true; + } + if field_stg.flags.contains(StgInfoFlags::TYPEFLAG_HASBITFIELD) { + has_bitfield = true; + } + // Collect FFI type for this field + ffi_field_types.push(field_stg.to_ffi_type()); + } + + // Mark field type as finalized (using type as field finalizes it) + if let Some(type_obj) = field_type.downcast_ref::() { + if let Some(mut stg_info) = type_obj.get_type_data_mut::() { + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL; + } else { + // Create StgInfo with FINAL flag if it doesn't exist + let mut stg_info = StgInfo::new(size, field_align); + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL; + let _ = type_obj.init_type_data(stg_info); + } + } + + // Build format string: add padding before field + let padding = offset - last_end; + if padding > 0 { + if padding != 1 { + format.push_str(&padding.to_string()); + } + format.push('x'); + } + + // Get field format and add to format string + let field_format = super::base::get_field_format(&field_type, big_endian, vm); + + // Handle arrays: prepend shape + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(field_stg) = type_obj.stg_info_opt() + && !field_stg.shape.is_empty() + { + let shape_str = field_stg + .shape + .iter() + .map(|d| d.to_string()) + .collect::>() + .join(","); + format.push_str(&std::format!("({}){}", shape_str, field_format)); + } else { + format.push_str(&field_format); + } - // Get size of the field type - let size = Self::get_field_size(&field_type, vm)?; + // Add field name + format.push(':'); + format.push_str(&name); + format.push(':'); - // Create CField descriptor (accepts any ctypes type including arrays) - let c_field = PyCField::new(name.clone(), field_type, offset, size, index); + // Create CField descriptor with padding-adjusted offset + let field_type_ref = field_type + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("_fields_ type must be a ctypes type"))?; + let c_field = PyCField::new(field_type_ref, offset as isize, size as isize, index); // Set the CField as a class attribute - cls.set_attr(vm.ctx.intern_str(name), c_field.to_pyobject(vm)); + cls.set_attr(vm.ctx.intern_str(name.clone()), c_field.to_pyobject(vm)); + // Update tracking + last_end = offset + size; offset += size; } - Ok(()) - } + // Calculate total_align = max(max_align, forced_alignment) + let total_align = std::cmp::max(max_align, forced_alignment); - /// Get the size of a ctypes type - fn get_field_size(field_type: &PyObject, vm: &VirtualMachine) -> PyResult { - // Try to get _type_ attribute for simple types - if let Some(size) = field_type - .get_attr("_type_", vm) - .ok() - .and_then(|type_attr| type_attr.str(vm).ok()) - .and_then(|type_str| { - let s = type_str.to_string(); - (s.len() == 1).then(|| get_size(&s)) - }) - { - return Ok(size); - } + // Calculate aligned_size (PyCStructUnionType_update_stginfo) + let aligned_size = if total_align > 0 { + offset.div_ceil(total_align) * total_align + } else { + offset + }; - // Try sizeof for other types - if let Some(s) = field_type - .get_attr("size_of_instances", vm) - .ok() - .and_then(|size_method| size_method.call((), vm).ok()) - .and_then(|size| size.try_int(vm).ok()) - .and_then(|n| n.as_bigint().to_usize()) - { - return Ok(s); + // Complete format string: add final padding and close + let final_padding = aligned_size - last_end; + if final_padding > 0 { + if final_padding != 1 { + format.push_str(&final_padding.to_string()); + } + format.push('x'); + } + format.push('}'); + + // Store StgInfo with aligned size and total alignment + let mut stg_info = StgInfo::new(aligned_size, total_align); + stg_info.format = Some(format); + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL; // Mark as finalized + if has_pointer { + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASPOINTER; + } + if has_union { + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASUNION; } + if has_bitfield { + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASBITFIELD; + } + stg_info.paramfunc = super::base::ParamFunc::Structure; + // Set byte order: swap if _swappedbytes_ is defined + stg_info.big_endian = super::base::is_big_endian(is_swapped); + // Store FFI field types for structure passing + stg_info.ffi_field_types = ffi_field_types; + super::base::set_or_init_stginfo(cls, stg_info); - // Default to pointer size for unknown types - Ok(std::mem::size_of::()) - } + // Process _anonymous_ fields + super::base::make_anon_fields(cls, vm)?; - /// Get the alignment of a ctypes type - fn get_field_align(field_type: &PyObject, vm: &VirtualMachine) -> usize { - // Try to get _type_ attribute for simple types - if let Some(align) = field_type - .get_attr("_type_", vm) - .ok() - .and_then(|type_attr| type_attr.str(vm).ok()) - .and_then(|type_str| { - let s = type_str.to_string(); - (s.len() == 1).then(|| get_size(&s)) // alignment == size for simple types - }) - { - return align; - } - // Default alignment - 1 + Ok(()) } #[pymethod] fn __mul__(cls: PyTypeRef, n: isize, vm: &VirtualMachine) -> PyResult { - use super::array::create_array_type_with_stg_info; - use crate::stdlib::ctypes::_ctypes::size_of; + use super::array::array_type_from_ctype; if n < 0 { return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}"))); } - - // Calculate element size from the Structure type - let element_size = size_of(cls.clone().into(), vm)?; - - let total_size = element_size - .checked_mul(n as usize) - .ok_or_else(|| vm.new_overflow_error("array size too large".to_owned()))?; - let stg_info = super::util::StgInfo::new_array( - total_size, - element_size, - n as usize, - cls.clone().into(), - element_size, - ); - create_array_type_with_stg_info(stg_info, vm) + // Use cached array type creation + array_type_from_ctype(cls.into(), n as usize, vm) } } @@ -188,12 +410,12 @@ impl AsNumber for PyCStructType { multiply: Some(|a, b, vm| { let cls = a .downcast_ref::() - .ok_or_else(|| vm.new_type_error("expected type".to_owned()))?; + .ok_or_else(|| vm.new_type_error("expected type"))?; let n = b .try_index(vm)? .as_bigint() .to_isize() - .ok_or_else(|| vm.new_overflow_error("array size too large".to_owned()))?; + .ok_or_else(|| vm.new_overflow_error("array size too large"))?; PyCStructType::__mul__(cls.to_owned(), n, vm) }), ..PyNumberMethods::NOT_IMPLEMENTED @@ -202,14 +424,70 @@ impl AsNumber for PyCStructType { } } -/// Structure field info stored in instance -#[allow(dead_code)] -#[derive(Debug, Clone)] -pub struct FieldInfo { - pub name: String, - pub offset: usize, - pub size: usize, - pub type_ref: PyTypeRef, +impl SetAttr for PyCStructType { + fn setattro( + zelf: &Py, + attr_name: &Py, + value: PySetterValue, + vm: &VirtualMachine, + ) -> PyResult<()> { + // Check if _fields_ is being set + if attr_name.as_str() == "_fields_" { + let pytype: &Py = zelf.to_base(); + + // Check finalization in separate scope to release read lock before process_fields + // This prevents deadlock: process_fields needs write lock on the same RwLock + let is_final = { + let Some(stg_info) = pytype.get_type_data::() else { + return Err(vm.new_type_error("ctypes state is not initialized")); + }; + stg_info.is_final() + }; // Read lock released here + + if is_final { + return Err(vm.new_attribute_error("_fields_ is final")); + } + + // Process _fields_ and set attribute + let PySetterValue::Assign(fields_value) = value else { + return Err(vm.new_attribute_error("cannot delete _fields_")); + }; + // Process fields (this will also set DICTFLAG_FINAL) + PyCStructType::process_fields(pytype, fields_value.clone(), vm)?; + // Set the _fields_ attribute on the type + pytype + .attributes + .write() + .insert(vm.ctx.intern_str("_fields_"), fields_value); + return Ok(()); + } + // Delegate to PyType's setattro logic for type attributes + let attr_name_interned = vm.ctx.intern_str(attr_name.as_str()); + let pytype: &Py = zelf.to_base(); + + // Check for data descriptor first + if let Some(attr) = pytype.get_class_attr(attr_name_interned) { + let descr_set = attr.class().mro_find_map(|cls| cls.slots.descr_set.load()); + if let Some(descriptor) = descr_set { + return descriptor(&attr, pytype.to_owned().into(), value, vm); + } + } + + // Store in type's attributes dict + if let PySetterValue::Assign(value) = value { + pytype.attributes.write().insert(attr_name_interned, value); + } else { + let prev = pytype.attributes.write().shift_remove(attr_name_interned); + if prev.is_none() { + return Err(vm.new_attribute_error(format!( + "type object '{}' has no attribute '{}'", + pytype.name(), + attr_name.as_str(), + ))); + } + } + Ok(()) + } } /// PyCStructure - base class for Structure instances @@ -219,19 +497,13 @@ pub struct FieldInfo { base = PyCData, metaclass = "PyCStructType" )] -pub struct PyCStructure { - _base: PyCData, - /// Common CDataObject for memory buffer - pub(super) cdata: PyRwLock, - /// Field information (name -> FieldInfo) - #[allow(dead_code)] - pub(super) fields: PyRwLock>, -} +#[repr(transparent)] +pub struct PyCStructure(pub PyCData); impl Debug for PyCStructure { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PyCStructure") - .field("size", &self.cdata.read().size()) + .field("size", &self.0.size()) .finish() } } @@ -240,13 +512,22 @@ impl Constructor for PyCStructure { type Args = FuncArgs; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { + // Check for abstract class and extract values in a block to drop the borrow + let (total_size, total_align, length) = { + let stg_info = cls.stg_info(vm)?; + (stg_info.size, stg_info.align, stg_info.length) + }; + + // Mark the class as finalized (instance creation finalizes the type) + if let Some(mut stg_info_mut) = cls.get_type_data_mut::() { + stg_info_mut.flags |= StgInfoFlags::DICTFLAG_FINAL; + } + // Get _fields_ from the class using get_attr to properly search MRO let fields_attr = cls.as_object().get_attr("_fields_", vm).ok(); - let mut fields_map = IndexMap::new(); - let mut total_size = 0usize; - let mut max_align = 1usize; - + // Collect field names for initialization + let mut field_names: Vec = Vec::new(); if let Some(fields_attr) = fields_attr { let fields: Vec = if let Some(list) = fields_attr.downcast_ref::() { @@ -257,7 +538,6 @@ impl Constructor for PyCStructure { vec![] }; - let mut offset = 0usize; for field in fields.iter() { let Some(field_tuple) = field.downcast_ref::() else { continue; @@ -265,43 +545,21 @@ impl Constructor for PyCStructure { if field_tuple.len() < 2 { continue; } - let Some(name) = field_tuple.first().unwrap().downcast_ref::() else { - continue; - }; - let name = name.to_string(); - let field_type = field_tuple.get(1).unwrap().clone(); - let size = PyCStructType::get_field_size(&field_type, vm)?; - let field_align = PyCStructType::get_field_align(&field_type, vm); - max_align = max_align.max(field_align); - - let type_ref = field_type - .downcast::() - .unwrap_or_else(|_| vm.ctx.types.object_type.to_owned()); - - fields_map.insert( - name.clone(), - FieldInfo { - name, - offset, - size, - type_ref, - }, - ); - - offset += size; + if let Some(name) = field_tuple.first().unwrap().downcast_ref::() { + field_names.push(name.to_string()); + } } - total_size = offset; } - // Initialize buffer with zeros - let mut stg_info = StgInfo::new(total_size, max_align); - stg_info.length = fields_map.len(); - let cdata = CDataObject::from_stg_info(&stg_info); - let instance = PyCStructure { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - fields: PyRwLock::new(fields_map.clone()), + // Initialize buffer with zeros using computed size + let mut stg_info = StgInfo::new(total_size, total_align); + stg_info.length = if length > 0 { + length + } else { + field_names.len() }; + stg_info.paramfunc = super::base::ParamFunc::Structure; + let instance = PyCStructure(PyCData::from_stg_info(&stg_info)); // Handle keyword arguments for field initialization let py_instance = instance.into_ref_with_type(vm, cls.clone())?; @@ -309,21 +567,21 @@ impl Constructor for PyCStructure { // Set field values from kwargs using standard attribute setting for (key, value) in args.kwargs.iter() { - if fields_map.contains_key(key.as_str()) { + if field_names.iter().any(|n| n == key.as_str()) { py_obj.set_attr(vm.ctx.intern_str(key.as_str()), value.clone(), vm)?; } } // Set field values from positional args - let field_names: Vec = fields_map.keys().cloned().collect(); + if args.args.len() > field_names.len() { + return Err(vm.new_type_error("too many initializers".to_string())); + } for (i, value) in args.args.iter().enumerate() { - if i < field_names.len() { - py_obj.set_attr( - vm.ctx.intern_str(field_names[i].as_str()), - value.clone(), - vm, - )?; - } + py_obj.set_attr( + vm.ctx.intern_str(field_names[i].as_str()), + value.clone(), + vm, + )?; } Ok(py_instance.into()) @@ -337,11 +595,11 @@ impl Constructor for PyCStructure { // Note: GetAttr and SetAttr are not implemented here. // Field access is handled by CField descriptors registered on the class. -#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor))] +#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor, AsBuffer))] impl PyCStructure { #[pygetset] - fn _objects(&self) -> Option { - self.cdata.read().objects.clone() + fn _b0_(&self) -> Option { + self.0.base.read().clone() } #[pygetset] @@ -349,165 +607,30 @@ impl PyCStructure { // Return the _fields_ from the class, not instance vm.ctx.none() } - - #[pyclassmethod] - fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; - - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Read data from address - if address == 0 || size == 0 { - return Err(vm.new_value_error("NULL pointer access".to_owned())); - } - let data = unsafe { - let ptr = address as *const u8; - std::slice::from_raw_parts(ptr, size).to_vec() - }; - - // Create instance - let cdata = CDataObject::from_bytes(data, None); - Ok(PyCStructure { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - fields: PyRwLock::new(IndexMap::new()), - } - .into_ref_with_type(vm, cls)? - .into()) - } - - #[pyclassmethod] - fn from_buffer( - cls: PyTypeRef, - source: PyObjectRef, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::TryFromObject; - use crate::protocol::PyBuffer; - use crate::stdlib::ctypes::_ctypes::size_of; - - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; - - // Get buffer from source - let buffer = PyBuffer::try_from_object(vm, source.clone())?; - - // Check if buffer is writable - if buffer.desc.readonly { - return Err(vm.new_type_error("underlying buffer is not writable".to_owned())); - } - - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Check if buffer is large enough - let buffer_len = buffer.desc.len; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); - } - - // Read bytes from buffer at offset - let bytes = buffer.obj_bytes(); - let data = bytes[offset..offset + size].to_vec(); - - // Create instance - let cdata = CDataObject::from_bytes(data, Some(source)); - Ok(PyCStructure { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - fields: PyRwLock::new(IndexMap::new()), - } - .into_ref_with_type(vm, cls)? - .into()) - } - - #[pyclassmethod] - fn from_buffer_copy( - cls: PyTypeRef, - source: crate::function::ArgBytesLike, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; - - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; - - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Borrow bytes from source - let source_bytes = source.borrow_buf(); - let buffer_len = source_bytes.len(); - - // Check if buffer is large enough - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); - } - - // Copy bytes from buffer at offset - let data = source_bytes[offset..offset + size].to_vec(); - - // Create instance - let cdata = CDataObject::from_bytes(data, None); - Ok(PyCStructure { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - fields: PyRwLock::new(IndexMap::new()), - } - .into_ref_with_type(vm, cls)? - .into()) - } } -static STRUCTURE_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |buffer| { - rustpython_common::lock::PyMappedRwLockReadGuard::map( - rustpython_common::lock::PyRwLockReadGuard::map( - buffer.obj_as::().cdata.read(), - |x: &CDataObject| x, - ), - |x: &CDataObject| x.buffer.as_slice(), - ) - .into() - }, - obj_bytes_mut: |buffer| { - rustpython_common::lock::PyMappedRwLockWriteGuard::map( - rustpython_common::lock::PyRwLockWriteGuard::map( - buffer.obj_as::().cdata.write(), - |x: &mut CDataObject| x, - ), - |x: &mut CDataObject| x.buffer.as_mut_slice(), - ) - .into() - }, - release: |_| {}, - retain: |_| {}, -}; - impl AsBuffer for PyCStructure { fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { - let buffer_len = zelf.cdata.read().buffer.len(); + let buffer_len = zelf.0.buffer.read().len(); + + // PyCData_NewGetBuffer: use info->format if available, otherwise "B" + let format = zelf + .class() + .stg_info_opt() + .and_then(|info| info.format.clone()) + .unwrap_or_else(|| "B".to_string()); + + // Structure: ndim=0, shape=(), itemsize=struct_size let buf = PyBuffer::new( zelf.to_owned().into(), - BufferDescriptor::simple(buffer_len, false), // readonly=false for ctypes - &STRUCTURE_BUFFER_METHODS, + BufferDescriptor { + len: buffer_len, + readonly: false, + itemsize: buffer_len, + format: Cow::Owned(format), + dim_desc: vec![], // ndim=0 means empty dim_desc + }, + &CDATA_BUFFER_METHODS, ); Ok(buf) } diff --git a/crates/vm/src/stdlib/ctypes/thunk.rs b/crates/vm/src/stdlib/ctypes/thunk.rs deleted file mode 100644 index 2de2308e1a3..00000000000 --- a/crates/vm/src/stdlib/ctypes/thunk.rs +++ /dev/null @@ -1,319 +0,0 @@ -//! FFI callback (thunk) implementation for ctypes. -//! -//! This module implements CThunkObject which wraps Python callables -//! to be callable from C code via libffi closures. - -use crate::builtins::{PyStr, PyType, PyTypeRef}; -use crate::vm::thread::with_current_vm; -use crate::{PyObjectRef, PyPayload, PyResult, VirtualMachine}; -use libffi::low; -use libffi::middle::{Cif, Closure, CodePtr, Type}; -use num_traits::ToPrimitive; -use rustpython_common::lock::PyRwLock; -use std::ffi::c_void; -use std::fmt::Debug; - -use super::base::ffi_type_from_str; -/// Userdata passed to the libffi callback. -/// This contains everything needed to invoke the Python callable. -pub struct ThunkUserData { - /// The Python callable to invoke - pub callable: PyObjectRef, - /// Argument types for conversion - pub arg_types: Vec, - /// Result type for conversion (None means void) - pub res_type: Option, -} - -/// Get the type code string from a ctypes type -fn get_type_code(ty: &PyTypeRef, vm: &VirtualMachine) -> Option { - ty.get_attr(vm.ctx.intern_str("_type_")) - .and_then(|t| t.downcast_ref::().map(|s| s.to_string())) -} - -/// Convert a C value to a Python object based on the type code -fn ffi_to_python(ty: &PyTypeRef, ptr: *const c_void, vm: &VirtualMachine) -> PyObjectRef { - let type_code = get_type_code(ty, vm); - // SAFETY: ptr is guaranteed to be valid by libffi calling convention - unsafe { - match type_code.as_deref() { - Some("b") => vm.ctx.new_int(*(ptr as *const i8) as i32).into(), - Some("B") => vm.ctx.new_int(*(ptr as *const u8) as i32).into(), - Some("c") => vm.ctx.new_bytes(vec![*(ptr as *const u8)]).into(), - Some("h") => vm.ctx.new_int(*(ptr as *const i16) as i32).into(), - Some("H") => vm.ctx.new_int(*(ptr as *const u16) as i32).into(), - Some("i") => vm.ctx.new_int(*(ptr as *const i32)).into(), - Some("I") => vm.ctx.new_int(*(ptr as *const u32)).into(), - Some("l") => vm.ctx.new_int(*(ptr as *const libc::c_long)).into(), - Some("L") => vm.ctx.new_int(*(ptr as *const libc::c_ulong)).into(), - Some("q") => vm.ctx.new_int(*(ptr as *const libc::c_longlong)).into(), - Some("Q") => vm.ctx.new_int(*(ptr as *const libc::c_ulonglong)).into(), - Some("f") => vm.ctx.new_float(*(ptr as *const f32) as f64).into(), - Some("d") => vm.ctx.new_float(*(ptr as *const f64)).into(), - Some("P") | Some("z") | Some("Z") => vm.ctx.new_int(ptr as usize).into(), - _ => vm.ctx.none(), - } - } -} - -/// Convert a Python object to a C value and store it at the result pointer -fn python_to_ffi(obj: PyResult, ty: &PyTypeRef, result: *mut c_void, vm: &VirtualMachine) { - let obj = match obj { - Ok(o) => o, - Err(_) => return, // Exception occurred, leave result as-is - }; - - let type_code = get_type_code(ty, vm); - // SAFETY: result is guaranteed to be valid by libffi calling convention - unsafe { - match type_code.as_deref() { - Some("b") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut i8) = i.as_bigint().to_i8().unwrap_or(0); - } - } - Some("B") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut u8) = i.as_bigint().to_u8().unwrap_or(0); - } - } - Some("c") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut u8) = i.as_bigint().to_u8().unwrap_or(0); - } - } - Some("h") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut i16) = i.as_bigint().to_i16().unwrap_or(0); - } - } - Some("H") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut u16) = i.as_bigint().to_u16().unwrap_or(0); - } - } - Some("i") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut i32) = i.as_bigint().to_i32().unwrap_or(0); - } - } - Some("I") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut u32) = i.as_bigint().to_u32().unwrap_or(0); - } - } - Some("l") | Some("q") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut i64) = i.as_bigint().to_i64().unwrap_or(0); - } - } - Some("L") | Some("Q") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut u64) = i.as_bigint().to_u64().unwrap_or(0); - } - } - Some("f") => { - if let Ok(f) = obj.try_float(vm) { - *(result as *mut f32) = f.to_f64() as f32; - } - } - Some("d") => { - if let Ok(f) = obj.try_float(vm) { - *(result as *mut f64) = f.to_f64(); - } - } - Some("P") | Some("z") | Some("Z") => { - if let Ok(i) = obj.try_int(vm) { - *(result as *mut usize) = i.as_bigint().to_usize().unwrap_or(0); - } - } - _ => {} - } - } -} - -/// The callback function that libffi calls when the closure is invoked. -/// This function converts C arguments to Python objects, calls the Python -/// callable, and converts the result back to C. -unsafe extern "C" fn thunk_callback( - _cif: &low::ffi_cif, - result: &mut c_void, - args: *const *const c_void, - userdata: &ThunkUserData, -) { - with_current_vm(|vm| { - // Convert C arguments to Python objects - let py_args: Vec = userdata - .arg_types - .iter() - .enumerate() - .map(|(i, ty)| { - let arg_ptr = unsafe { *args.add(i) }; - ffi_to_python(ty, arg_ptr, vm) - }) - .collect(); - - // Call the Python callable - let py_result = userdata.callable.call(py_args, vm); - - // Convert result back to C type - if let Some(ref res_type) = userdata.res_type { - python_to_ffi(py_result, res_type, result as *mut c_void, vm); - } - }); -} - -/// Holds the closure and userdata together to ensure proper lifetime. -/// The userdata is leaked to create a 'static reference that the closure can use. -struct ThunkData { - #[allow(dead_code)] - closure: Closure<'static>, - /// Raw pointer to the leaked userdata, for cleanup - userdata_ptr: *mut ThunkUserData, -} - -impl Drop for ThunkData { - fn drop(&mut self) { - // SAFETY: We created this with Box::into_raw, so we can reclaim it - unsafe { - drop(Box::from_raw(self.userdata_ptr)); - } - } -} - -/// CThunkObject wraps a Python callable to make it callable from C code. -#[pyclass(name = "CThunkObject", module = "_ctypes")] -#[derive(PyPayload)] -pub struct PyCThunk { - /// The Python callable - callable: PyObjectRef, - /// The libffi closure (must be kept alive) - #[allow(dead_code)] - thunk_data: PyRwLock>, - /// The code pointer for the closure - code_ptr: CodePtr, -} - -impl Debug for PyCThunk { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PyCThunk") - .field("callable", &self.callable) - .finish() - } -} - -impl PyCThunk { - /// Create a new thunk wrapping a Python callable. - /// - /// # Arguments - /// * `callable` - The Python callable to wrap - /// * `arg_types` - Optional sequence of argument types - /// * `res_type` - Optional result type - /// * `vm` - The virtual machine - pub fn new( - callable: PyObjectRef, - arg_types: Option, - res_type: Option, - vm: &VirtualMachine, - ) -> PyResult { - // Parse argument types - let arg_type_vec: Vec = if let Some(args) = arg_types { - if vm.is_none(&args) { - Vec::new() - } else { - let mut types = Vec::new(); - for item in args.try_to_value::>(vm)? { - types.push(item.downcast::().map_err(|_| { - vm.new_type_error("_argtypes_ must be a sequence of types".to_string()) - })?); - } - types - } - } else { - Vec::new() - }; - - // Parse result type - let res_type_ref: Option = - if let Some(ref rt) = res_type { - if vm.is_none(rt) { - None - } else { - Some(rt.clone().downcast::().map_err(|_| { - vm.new_type_error("restype must be a ctypes type".to_string()) - })?) - } - } else { - None - }; - - // Build FFI types - let ffi_arg_types: Vec = arg_type_vec - .iter() - .map(|ty| { - get_type_code(ty, vm) - .and_then(|code| ffi_type_from_str(&code)) - .unwrap_or(Type::pointer()) - }) - .collect(); - - let ffi_res_type = res_type_ref - .as_ref() - .and_then(|ty| get_type_code(ty, vm)) - .and_then(|code| ffi_type_from_str(&code)) - .unwrap_or(Type::void()); - - // Create the CIF - let cif = Cif::new(ffi_arg_types, ffi_res_type); - - // Create userdata and leak it to get a 'static reference - let userdata = Box::new(ThunkUserData { - callable: callable.clone(), - arg_types: arg_type_vec, - res_type: res_type_ref, - }); - let userdata_ptr = Box::into_raw(userdata); - - // SAFETY: We maintain the userdata lifetime by storing it in ThunkData - // and cleaning it up in Drop - let userdata_ref: &'static ThunkUserData = unsafe { &*userdata_ptr }; - - // Create the closure - let closure = Closure::new(cif, thunk_callback, userdata_ref); - - // Get the code pointer - let code_ptr = CodePtr(*closure.code_ptr() as *mut _); - - // Store closure and userdata together - let thunk_data = ThunkData { - closure, - userdata_ptr, - }; - - Ok(Self { - callable, - thunk_data: PyRwLock::new(Some(thunk_data)), - code_ptr, - }) - } - - /// Get the code pointer for this thunk - pub fn code_ptr(&self) -> CodePtr { - self.code_ptr - } -} - -// SAFETY: PyCThunk is safe to send/sync because: -// - callable is a PyObjectRef which is Send+Sync -// - thunk_data contains the libffi closure which is heap-allocated -// - code_ptr is just a pointer to executable memory -unsafe impl Send for PyCThunk {} -unsafe impl Sync for PyCThunk {} - -#[pyclass] -impl PyCThunk { - #[pygetset] - fn callable(&self) -> PyObjectRef { - self.callable.clone() - } -} diff --git a/crates/vm/src/stdlib/ctypes/union.rs b/crates/vm/src/stdlib/ctypes/union.rs index 308a5e4e98f..500aa8e6244 100644 --- a/crates/vm/src/stdlib/ctypes/union.rs +++ b/crates/vm/src/stdlib/ctypes/union.rs @@ -1,40 +1,60 @@ -use super::base::{CDataObject, PyCData}; -use super::field::PyCField; -use super::util::StgInfo; +use super::base::{CDATA_BUFFER_METHODS, StgInfoFlags}; +use super::{PyCData, PyCField, StgInfo}; use crate::builtins::{PyList, PyStr, PyTuple, PyType, PyTypeRef}; use crate::convert::ToPyObject; use crate::function::FuncArgs; -use crate::protocol::{BufferDescriptor, BufferMethods, PyBuffer as ProtocolPyBuffer}; -use crate::stdlib::ctypes::_ctypes::get_size; -use crate::types::{AsBuffer, Constructor}; -use crate::{AsObject, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine}; -use num_traits::ToPrimitive; -use rustpython_common::lock::PyRwLock; +use crate::function::PySetterValue; +use crate::protocol::{BufferDescriptor, PyBuffer}; +use crate::types::{AsBuffer, Constructor, Initializer, SetAttr}; +use crate::{AsObject, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine}; +use std::borrow::Cow; + +/// Calculate Union type size from _fields_ (max field size) +pub(super) fn calculate_union_size(cls: &Py, vm: &VirtualMachine) -> PyResult { + if let Ok(fields_attr) = cls.as_object().get_attr("_fields_", vm) { + let fields: Vec = fields_attr.try_to_value(vm)?; + let mut max_size = 0usize; + + for field in fields.iter() { + if let Some(tuple) = field.downcast_ref::() + && let Some(field_type) = tuple.get(1) + { + let field_size = super::_ctypes::sizeof(field_type.clone(), vm)?; + max_size = max_size.max(field_size); + } + } + return Ok(max_size); + } + Ok(0) +} /// PyCUnionType - metaclass for Union #[pyclass(name = "UnionType", base = PyType, module = "_ctypes")] #[derive(Debug)] #[repr(transparent)] -pub struct PyCUnionType(PyType); +pub(super) struct PyCUnionType(PyType); impl Constructor for PyCUnionType { type Args = FuncArgs; fn slot_new(metatype: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - // 1. Create the new class using PyType::py_new - let new_class = crate::builtins::type_::PyType::slot_new(metatype, args, vm)?; + // 1. Create the new class using PyType::slot_new + let new_class = crate::builtins::PyType::slot_new(metatype, args, vm)?; - // 2. Process _fields_ if defined on the new class + // 2. Get the new type let new_type = new_class .clone() .downcast::() .map_err(|_| vm.new_type_error("expected type"))?; - // Only process _fields_ if defined directly on this class (not inherited) - if let Some(fields_attr) = new_type.get_direct_attr(vm.ctx.intern_str("_fields_")) { - Self::process_fields(&new_type, fields_attr, vm)?; - } + // 3. Mark base classes as finalized (subclassing finalizes the parent) + new_type.mark_bases_final(); + + // 4. Initialize StgInfo for the new type (initialized=false, to be set in init) + let stg_info = StgInfo::default(); + let _ = new_type.init_type_data(stg_info); + // Note: _fields_ processing moved to Initializer::init() Ok(new_class) } @@ -43,26 +63,132 @@ impl Constructor for PyCUnionType { } } +impl Initializer for PyCUnionType { + type Args = FuncArgs; + + fn init(zelf: crate::PyRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // Get the type as PyTypeRef by converting PyRef -> PyObjectRef -> PyRef + let obj: PyObjectRef = zelf.clone().into(); + let new_type: PyTypeRef = obj + .downcast() + .map_err(|_| vm.new_type_error("expected type"))?; + + // Check for _abstract_ attribute - skip initialization if present + if new_type + .get_direct_attr(vm.ctx.intern_str("_abstract_")) + .is_some() + { + return Ok(()); + } + + new_type.check_not_initialized(vm)?; + + // Process _fields_ if defined directly on this class (not inherited) + // Use set_attr to trigger setattro + if let Some(fields_attr) = new_type.get_direct_attr(vm.ctx.intern_str("_fields_")) { + new_type + .as_object() + .set_attr(vm.ctx.intern_str("_fields_"), fields_attr, vm)?; + } else { + // No _fields_ defined - try to copy from base class + let (has_base_info, base_clone) = { + let bases = new_type.bases.read(); + if let Some(base) = bases.first() { + (base.stg_info_opt().is_some(), Some(base.clone())) + } else { + (false, None) + } + }; + + if has_base_info && let Some(ref base) = base_clone { + // Clone base StgInfo (release guard before getting mutable reference) + let stg_info_opt = base.stg_info_opt().map(|baseinfo| { + let mut stg_info = baseinfo.clone(); + stg_info.flags &= !StgInfoFlags::DICTFLAG_FINAL; // Clear FINAL flag in subclass + stg_info.initialized = true; + stg_info + }); + + if let Some(stg_info) = stg_info_opt { + // Mark base as FINAL (now guard is released) + if let Some(mut base_stg) = base.get_type_data_mut::() { + base_stg.flags |= StgInfoFlags::DICTFLAG_FINAL; + } + + super::base::set_or_init_stginfo(&new_type, stg_info); + return Ok(()); + } + } + + // No base StgInfo - create default + let mut stg_info = StgInfo::new(0, 1); + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASUNION; + stg_info.paramfunc = super::base::ParamFunc::Union; + // PEP 3118 doesn't support union. Use 'B' for bytes. + stg_info.format = Some("B".to_string()); + super::base::set_or_init_stginfo(&new_type, stg_info); + } + + Ok(()) + } +} + impl PyCUnionType { /// Process _fields_ and create CField descriptors /// For Union, all fields start at offset 0 fn process_fields( - cls: &PyTypeRef, + cls: &Py, fields_attr: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { + // Check if already finalized + { + let Some(stg_info) = cls.get_type_data::() else { + return Err(vm.new_type_error("ctypes state is not initialized")); + }; + if stg_info.is_final() { + return Err(vm.new_attribute_error("_fields_ is final")); + } + } // Read lock released here + + // Check if this is a swapped byte order union + let is_swapped = cls.as_object().get_attr("_swappedbytes_", vm).is_ok(); + let fields: Vec = if let Some(list) = fields_attr.downcast_ref::() { list.borrow_vec().to_vec() } else if let Some(tuple) = fields_attr.downcast_ref::() { tuple.to_vec() } else { - return Err(vm.new_type_error("_fields_ must be a list or tuple".to_string())); + return Err(vm.new_type_error("_fields_ must be a list or tuple")); + }; + + let pack = super::base::get_usize_attr(cls.as_object(), "_pack_", 0, vm)?; + let forced_alignment = + super::base::get_usize_attr(cls.as_object(), "_align_", 1, vm)?.max(1); + + // Initialize size, alignment, type flags, and ffi_field_types from base class + // Note: Union fields always start at offset 0, but we inherit base size/align + let (mut max_size, mut max_align, mut has_pointer, mut has_bitfield, mut ffi_field_types) = { + let bases = cls.bases.read(); + if let Some(base) = bases.first() + && let Some(baseinfo) = base.stg_info_opt() + { + ( + baseinfo.size, + std::cmp::max(baseinfo.align, forced_alignment), + baseinfo.flags.contains(StgInfoFlags::TYPEFLAG_HASPOINTER), + baseinfo.flags.contains(StgInfoFlags::TYPEFLAG_HASBITFIELD), + baseinfo.ffi_field_types.clone(), + ) + } else { + (0, forced_alignment, false, false, Vec::new()) + } }; for (index, field) in fields.iter().enumerate() { let field_tuple = field .downcast_ref::() - .ok_or_else(|| vm.new_type_error("_fields_ must contain tuples".to_string()))?; + .ok_or_else(|| vm.new_type_error("_fields_ must contain tuples"))?; if field_tuple.len() < 2 { return Err(vm.new_type_error( @@ -72,66 +198,230 @@ impl PyCUnionType { let name = field_tuple .first() - .unwrap() + .expect("len checked") .downcast_ref::() - .ok_or_else(|| vm.new_type_error("field name must be a string".to_string()))? + .ok_or_else(|| vm.new_type_error("field name must be a string"))? .to_string(); - let field_type = field_tuple.get(1).unwrap().clone(); - let size = Self::get_field_size(&field_type, vm)?; + let field_type = field_tuple.get(1).expect("len checked").clone(); + + // For swapped byte order unions, validate field type supports byte swapping + if is_swapped { + super::base::check_other_endian_support(&field_type, vm)?; + } + + let size = super::base::get_field_size(&field_type, vm)?; + let field_align = super::base::get_field_align(&field_type, vm); + + // Calculate effective alignment + let effective_align = if pack > 0 { + std::cmp::min(pack, field_align) + } else { + field_align + }; + + max_size = max_size.max(size); + max_align = max_align.max(effective_align); + + // Propagate type flags from field type (HASPOINTER, HASBITFIELD) + if let Some(type_obj) = field_type.downcast_ref::() + && let Some(field_stg) = type_obj.stg_info_opt() + { + // HASPOINTER: propagate if field is pointer or contains pointer + if field_stg.flags.intersects( + StgInfoFlags::TYPEFLAG_ISPOINTER | StgInfoFlags::TYPEFLAG_HASPOINTER, + ) { + has_pointer = true; + } + // HASBITFIELD: propagate directly + if field_stg.flags.contains(StgInfoFlags::TYPEFLAG_HASBITFIELD) { + has_bitfield = true; + } + // Collect FFI type for this field + ffi_field_types.push(field_stg.to_ffi_type()); + } + + // Mark field type as finalized (using type as field finalizes it) + if let Some(type_obj) = field_type.downcast_ref::() { + if let Some(mut stg_info) = type_obj.get_type_data_mut::() { + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL; + } else { + // Create StgInfo with FINAL flag if it doesn't exist + let mut stg_info = StgInfo::new(size, field_align); + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL; + let _ = type_obj.init_type_data(stg_info); + } + } // For Union, all fields start at offset 0 - // Create CField descriptor (accepts any ctypes type including arrays) - let c_field = PyCField::new(name.clone(), field_type, 0, size, index); + let field_type_ref = field_type + .clone() + .downcast::() + .map_err(|_| vm.new_type_error("_fields_ type must be a ctypes type"))?; + let c_field = PyCField::new(field_type_ref, 0, size as isize, index); cls.set_attr(vm.ctx.intern_str(name), c_field.to_pyobject(vm)); } + // Calculate total_align and aligned_size + let total_align = std::cmp::max(max_align, forced_alignment); + let aligned_size = if total_align > 0 { + max_size.div_ceil(total_align) * total_align + } else { + max_size + }; + + // Store StgInfo with aligned size + let mut stg_info = StgInfo::new(aligned_size, total_align); + stg_info.flags |= StgInfoFlags::DICTFLAG_FINAL | StgInfoFlags::TYPEFLAG_HASUNION; + // PEP 3118 doesn't support union. Use 'B' for bytes. + stg_info.format = Some("B".to_string()); + if has_pointer { + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASPOINTER; + } + if has_bitfield { + stg_info.flags |= StgInfoFlags::TYPEFLAG_HASBITFIELD; + } + stg_info.paramfunc = super::base::ParamFunc::Union; + // Set byte order: swap if _swappedbytes_ is defined + stg_info.big_endian = super::base::is_big_endian(is_swapped); + // Store FFI field types for union passing + stg_info.ffi_field_types = ffi_field_types; + super::base::set_or_init_stginfo(cls, stg_info); + + // Process _anonymous_ fields + super::base::make_anon_fields(cls, vm)?; + Ok(()) } +} - fn get_field_size(field_type: &PyObject, vm: &VirtualMachine) -> PyResult { - if let Some(size) = field_type - .get_attr("_type_", vm) - .ok() - .and_then(|type_attr| type_attr.str(vm).ok()) - .and_then(|type_str| { - let s = type_str.to_string(); - (s.len() == 1).then(|| get_size(&s)) - }) - { - return Ok(size); +#[pyclass(flags(BASETYPE), with(Constructor, Initializer, SetAttr))] +impl PyCUnionType { + #[pymethod] + fn from_param(zelf: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // zelf is the union type class that from_param was called on + let cls = zelf + .downcast::() + .map_err(|_| vm.new_type_error("from_param: expected a type"))?; + + // 1. If already an instance of the requested type, return it + if value.is_instance(cls.as_object(), vm)? { + return Ok(value); } - if let Some(s) = field_type - .get_attr("size_of_instances", vm) - .ok() - .and_then(|size_method| size_method.call((), vm).ok()) - .and_then(|size| size.try_int(vm).ok()) - .and_then(|n| n.as_bigint().to_usize()) - { - return Ok(s); + // 2. Check for CArgObject (PyCArg_CheckExact) + if let Some(carg) = value.downcast_ref::() { + // Check against proto (for pointer types) + if let Some(stg_info) = cls.stg_info_opt() + && let Some(ref proto) = stg_info.proto + && carg.obj.is_instance(proto.as_object(), vm)? + { + return Ok(value); + } + // Fallback: check if the wrapped object is an instance of the requested type + if carg.obj.is_instance(cls.as_object(), vm)? { + return Ok(value); // Return the CArgObject as-is + } + // CArgObject but wrong type + return Err(vm.new_type_error(format!( + "expected {} instance instead of pointer to {}", + cls.name(), + carg.obj.class().name() + ))); } - Ok(std::mem::size_of::()) + // 3. Check for _as_parameter_ attribute + if let Ok(as_parameter) = value.get_attr("_as_parameter_", vm) { + return PyCUnionType::from_param(cls.as_object().to_owned(), as_parameter, vm); + } + + Err(vm.new_type_error(format!( + "expected {} instance instead of {}", + cls.name(), + value.class().name() + ))) + } + + /// Called when a new Union subclass is created + #[pyclassmethod] + fn __init_subclass__(cls: PyTypeRef, vm: &VirtualMachine) -> PyResult<()> { + cls.mark_bases_final(); + + // Check if _fields_ is defined + if let Some(fields_attr) = cls.get_direct_attr(vm.ctx.intern_str("_fields_")) { + Self::process_fields(&cls, fields_attr, vm)?; + } + Ok(()) } } -#[pyclass(flags(BASETYPE), with(Constructor))] -impl PyCUnionType {} +impl SetAttr for PyCUnionType { + fn setattro( + zelf: &Py, + attr_name: &Py, + value: PySetterValue, + vm: &VirtualMachine, + ) -> PyResult<()> { + let pytype: &Py = zelf.to_base(); + let attr_name_interned = vm.ctx.intern_str(attr_name.as_str()); + + // 1. First, do PyType's setattro (PyType_Type.tp_setattro first) + // Check for data descriptor first + if let Some(attr) = pytype.get_class_attr(attr_name_interned) { + let descr_set = attr.class().mro_find_map(|cls| cls.slots.descr_set.load()); + if let Some(descriptor) = descr_set { + descriptor(&attr, pytype.to_owned().into(), value.clone(), vm)?; + // After successful setattro, check if _fields_ and call process_fields + if attr_name.as_str() == "_fields_" + && let PySetterValue::Assign(fields_value) = value + { + PyCUnionType::process_fields(pytype, fields_value, vm)?; + } + return Ok(()); + } + } + + // Store in type's attributes dict + match &value { + PySetterValue::Assign(v) => { + pytype + .attributes + .write() + .insert(attr_name_interned, v.clone()); + } + PySetterValue::Delete => { + let prev = pytype.attributes.write().shift_remove(attr_name_interned); + if prev.is_none() { + return Err(vm.new_attribute_error(format!( + "type object '{}' has no attribute '{}'", + pytype.name(), + attr_name.as_str(), + ))); + } + } + } + + // 2. If _fields_, call process_fields (which checks FINAL internally) + if attr_name.as_str() == "_fields_" + && let PySetterValue::Assign(fields_value) = value + { + PyCUnionType::process_fields(pytype, fields_value, vm)?; + } + + Ok(()) + } +} /// PyCUnion - base class for Union #[pyclass(module = "_ctypes", name = "Union", base = PyCData, metaclass = "PyCUnionType")] -pub struct PyCUnion { - _base: PyCData, - /// Common CDataObject for memory buffer - pub(super) cdata: PyRwLock, -} +#[repr(transparent)] +pub struct PyCUnion(pub PyCData); impl std::fmt::Debug for PyCUnion { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PyCUnion") - .field("size", &self.cdata.read().size()) + .field("size", &self.0.size()) .finish() } } @@ -140,47 +430,22 @@ impl Constructor for PyCUnion { type Args = FuncArgs; fn slot_new(cls: PyTypeRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult { - // Get _fields_ from the class - let fields_attr = cls.as_object().get_attr("_fields_", vm).ok(); - - // Calculate union size (max of all field sizes) and alignment - let mut max_size = 0usize; - let mut max_align = 1usize; - - if let Some(fields_attr) = fields_attr { - let fields: Vec = if let Some(list) = fields_attr.downcast_ref::() - { - list.borrow_vec().to_vec() - } else if let Some(tuple) = fields_attr.downcast_ref::() { - tuple.to_vec() - } else { - vec![] - }; + // Check for abstract class and extract values in a block to drop the borrow + let (total_size, total_align) = { + let stg_info = cls.stg_info(vm)?; + (stg_info.size, stg_info.align) + }; - for field in fields.iter() { - let Some(field_tuple) = field.downcast_ref::() else { - continue; - }; - if field_tuple.len() < 2 { - continue; - } - let field_type = field_tuple.get(1).unwrap().clone(); - let size = PyCUnionType::get_field_size(&field_type, vm)?; - max_size = max_size.max(size); - // For simple types, alignment == size - max_align = max_align.max(size); - } + // Mark the class as finalized (instance creation finalizes the type) + if let Some(mut stg_info_mut) = cls.get_type_data_mut::() { + stg_info_mut.flags |= StgInfoFlags::DICTFLAG_FINAL; } - // Initialize buffer with zeros - let stg_info = StgInfo::new(max_size, max_align); - let cdata = CDataObject::from_stg_info(&stg_info); - PyCUnion { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - } - .into_ref_with_type(vm, cls) - .map(Into::into) + // Initialize buffer with zeros using computed size + let new_stg_info = StgInfo::new(total_size, total_align); + PyCUnion(PyCData::from_stg_info(&new_stg_info)) + .into_ref_with_type(vm, cls) + .map(Into::into) } fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { @@ -188,147 +453,125 @@ impl Constructor for PyCUnion { } } -#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor, AsBuffer))] impl PyCUnion { - #[pygetset] - fn _objects(&self) -> Option { - self.cdata.read().objects.clone() - } - - #[pyclassmethod] - fn from_address(cls: PyTypeRef, address: isize, vm: &VirtualMachine) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; - - // Get size from cls - let size = size_of(cls.clone().into(), vm)?; - - // Create instance with data from address - if address == 0 || size == 0 { - return Err(vm.new_value_error("NULL pointer access".to_owned())); - } - let stg_info = StgInfo::new(size, 1); - let cdata = CDataObject::from_stg_info(&stg_info); - Ok(PyCUnion { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - } - .into_ref_with_type(vm, cls)? - .into()) - } - - #[pyclassmethod] - fn from_buffer( - cls: PyTypeRef, - source: PyObjectRef, - offset: crate::function::OptionalArg, + /// Recursively initialize positional arguments through inheritance chain + /// Returns the number of arguments consumed + fn init_pos_args( + self_obj: &Py, + type_obj: &Py, + args: &[PyObjectRef], + kwargs: &indexmap::IndexMap, + index: usize, vm: &VirtualMachine, - ) -> PyResult { - use crate::TryFromObject; - use crate::protocol::PyBuffer; - use crate::stdlib::ctypes::_ctypes::size_of; - - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; - - let buffer = PyBuffer::try_from_object(vm, source.clone())?; + ) -> PyResult { + let mut current_index = index; + + // 1. First process base class fields recursively + // Recurse if base has StgInfo + let base_clone = { + let bases = type_obj.bases.read(); + if let Some(base) = bases.first() && + // Check if base has StgInfo + base.stg_info_opt().is_some() + { + Some(base.clone()) + } else { + None + } + }; - if buffer.desc.readonly { - return Err(vm.new_type_error("underlying buffer is not writable".to_owned())); + if let Some(ref base) = base_clone { + current_index = Self::init_pos_args(self_obj, base, args, kwargs, current_index, vm)?; } - let size = size_of(cls.clone().into(), vm)?; - let buffer_len = buffer.desc.len; + // 2. Process this class's _fields_ + if let Some(fields_attr) = type_obj.get_direct_attr(vm.ctx.intern_str("_fields_")) { + let fields: Vec = fields_attr.try_to_value(vm)?; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); + for field in fields.iter() { + if current_index >= args.len() { + break; + } + if let Some(tuple) = field.downcast_ref::() + && let Some(name) = tuple.first() + && let Some(name_str) = name.downcast_ref::() + { + let field_name = name_str.as_str().to_owned(); + // Check for duplicate in kwargs + if kwargs.contains_key(&field_name) { + return Err(vm.new_type_error(format!( + "duplicate values for field {:?}", + field_name + ))); + } + self_obj.as_object().set_attr( + vm.ctx.intern_str(field_name), + args[current_index].clone(), + vm, + )?; + current_index += 1; + } + } } - // Copy data from source buffer - let bytes = buffer.obj_bytes(); - let data = bytes[offset..offset + size].to_vec(); - - let cdata = CDataObject::from_bytes(data, None); - Ok(PyCUnion { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), - } - .into_ref_with_type(vm, cls)? - .into()) + Ok(current_index) } +} - #[pyclassmethod] - fn from_buffer_copy( - cls: PyTypeRef, - source: crate::function::ArgBytesLike, - offset: crate::function::OptionalArg, - vm: &VirtualMachine, - ) -> PyResult { - use crate::stdlib::ctypes::_ctypes::size_of; +impl Initializer for PyCUnion { + type Args = FuncArgs; - let offset = offset.unwrap_or(0); - if offset < 0 { - return Err(vm.new_value_error("offset cannot be negative".to_owned())); - } - let offset = offset as usize; + fn init(zelf: crate::PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { + // Struct_init: handle positional and keyword arguments + let cls = zelf.class().to_owned(); - let size = size_of(cls.clone().into(), vm)?; - let source_bytes = source.borrow_buf(); - let buffer_len = source_bytes.len(); + // 1. Process positional arguments recursively through inheritance chain + if !args.args.is_empty() { + let consumed = PyCUnion::init_pos_args(&zelf, &cls, &args.args, &args.kwargs, 0, vm)?; - if offset + size > buffer_len { - return Err(vm.new_value_error(format!( - "Buffer size too small ({} instead of at least {} bytes)", - buffer_len, - offset + size - ))); + if consumed < args.args.len() { + return Err(vm.new_type_error("too many initializers")); + } } - // Copy data from source - let data = source_bytes[offset..offset + size].to_vec(); - - let cdata = CDataObject::from_bytes(data, None); - Ok(PyCUnion { - _base: PyCData::new(cdata.clone()), - cdata: PyRwLock::new(cdata), + // 2. Process keyword arguments + for (key, value) in args.kwargs.iter() { + zelf.as_object() + .set_attr(vm.ctx.intern_str(key.as_str()), value.clone(), vm)?; } - .into_ref_with_type(vm, cls)? - .into()) + + Ok(()) } } -static UNION_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |buffer| { - rustpython_common::lock::PyRwLockReadGuard::map( - buffer.obj_as::().cdata.read(), - |x: &CDataObject| x.buffer.as_slice(), - ) - .into() - }, - obj_bytes_mut: |buffer| { - rustpython_common::lock::PyRwLockWriteGuard::map( - buffer.obj_as::().cdata.write(), - |x: &mut CDataObject| x.buffer.as_mut_slice(), - ) - .into() - }, - release: |_| {}, - retain: |_| {}, -}; +#[pyclass( + flags(BASETYPE, IMMUTABLETYPE), + with(Constructor, Initializer, AsBuffer) +)] +impl PyCUnion {} impl AsBuffer for PyCUnion { - fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { - let buffer_len = zelf.cdata.read().buffer.len(); - let buf = ProtocolPyBuffer::new( + fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + let buffer_len = zelf.0.buffer.read().len(); + + // PyCData_NewGetBuffer: use info->format if available, otherwise "B" + let format = zelf + .class() + .stg_info_opt() + .and_then(|info| info.format.clone()) + .unwrap_or_else(|| "B".to_string()); + + // Union: ndim=0, shape=(), itemsize=union_size + let buf = PyBuffer::new( zelf.to_owned().into(), - BufferDescriptor::simple(buffer_len, false), // readonly=false for ctypes - &UNION_BUFFER_METHODS, + BufferDescriptor { + len: buffer_len, + readonly: false, + itemsize: buffer_len, + format: Cow::Owned(format), + dim_desc: vec![], // ndim=0 means empty dim_desc + }, + &CDATA_BUFFER_METHODS, ); Ok(buf) } diff --git a/crates/vm/src/stdlib/ctypes/util.rs b/crates/vm/src/stdlib/ctypes/util.rs deleted file mode 100644 index b8c6def63ca..00000000000 --- a/crates/vm/src/stdlib/ctypes/util.rs +++ /dev/null @@ -1,88 +0,0 @@ -use crate::PyObjectRef; - -/// Storage information for ctypes types -/// Stored in TypeDataSlot of heap types (PyType::init_type_data/get_type_data) -#[derive(Clone)] -pub struct StgInfo { - pub initialized: bool, - pub size: usize, // number of bytes - pub align: usize, // alignment requirements - pub length: usize, // number of fields (for arrays/structures) - pub proto: Option, // Only for Pointer/ArrayObject - pub flags: i32, // calling convention and such - - // Array-specific fields (moved from PyCArrayType) - pub element_type: Option, // _type_ for arrays - pub element_size: usize, // size of each element -} - -// StgInfo is stored in type_data which requires Send + Sync. -// The PyObjectRef in proto/element_type fields is protected by the type system's locking mechanism. -// CPython: ctypes objects are not thread-safe by design; users must synchronize access. -unsafe impl Send for StgInfo {} -unsafe impl Sync for StgInfo {} - -impl std::fmt::Debug for StgInfo { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("StgInfo") - .field("initialized", &self.initialized) - .field("size", &self.size) - .field("align", &self.align) - .field("length", &self.length) - .field("proto", &self.proto) - .field("flags", &self.flags) - .field("element_type", &self.element_type) - .field("element_size", &self.element_size) - .finish() - } -} - -impl Default for StgInfo { - fn default() -> Self { - StgInfo { - initialized: false, - size: 0, - align: 1, - length: 0, - proto: None, - flags: 0, - element_type: None, - element_size: 0, - } - } -} - -impl StgInfo { - pub fn new(size: usize, align: usize) -> Self { - StgInfo { - initialized: true, - size, - align, - length: 0, - proto: None, - flags: 0, - element_type: None, - element_size: 0, - } - } - - /// Create StgInfo for an array type - pub fn new_array( - size: usize, - align: usize, - length: usize, - element_type: PyObjectRef, - element_size: usize, - ) -> Self { - StgInfo { - initialized: true, - size, - align, - length, - proto: None, - flags: 0, - element_type: Some(element_type), - element_size, - } - } -} diff --git a/crates/vm/src/stdlib/functools.rs b/crates/vm/src/stdlib/functools.rs index d5a42739e96..26dff8b4426 100644 --- a/crates/vm/src/stdlib/functools.rs +++ b/crates/vm/src/stdlib/functools.rs @@ -73,8 +73,8 @@ mod _functools { self.inner.read().keywords.clone() } - #[pymethod(name = "__reduce__")] - fn reduce(zelf: &Py, vm: &VirtualMachine) -> PyResult { + #[pymethod] + fn __reduce__(zelf: &Py, vm: &VirtualMachine) -> PyResult { let inner = zelf.inner.read(); let partial_type = zelf.class(); diff --git a/crates/vm/src/stdlib/operator.rs b/crates/vm/src/stdlib/operator.rs index 0c048ea2a3f..7877ddb0114 100644 --- a/crates/vm/src/stdlib/operator.rs +++ b/crates/vm/src/stdlib/operator.rs @@ -323,7 +323,7 @@ mod _operator { ) -> PyResult { let res = match (a, b) { (Either::A(a), Either::A(b)) => { - if !a.is_ascii() || !b.is_ascii() { + if !a.isascii() || !b.isascii() { return Err(vm.new_type_error( "comparing strings with non-ASCII characters is not supported", )); diff --git a/crates/vm/src/types/structseq.rs b/crates/vm/src/types/structseq.rs index be0a1c9a70c..2b6a2530b02 100644 --- a/crates/vm/src/types/structseq.rs +++ b/crates/vm/src/types/structseq.rs @@ -199,7 +199,7 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { .ok_or_else(|| vm.new_type_error("unexpected payload for __repr__"))?; let field_names = Self::Data::REQUIRED_FIELD_NAMES; - let format_field = |(value, name): (&PyObjectRef, _)| { + let format_field = |(value, name): (&PyObject, _)| { let s = value.repr(vm)?; Ok(format!("{name}={s}")) }; @@ -212,6 +212,7 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { } else { let fields: PyResult> = zelf .iter() + .map(|value| value.as_ref()) .zip(field_names.iter().copied()) .map(format_field) .collect();