From 93a6ec6717b84c8d153916651235ab0925807af0 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 3 Feb 2026 16:04:59 +0000 Subject: [PATCH 01/13] Revert "WIP delete codegen related changes temporarily" This reverts commit bf0866f726aeab0e705095d5bd529d8cca098f92. --- mypyc/codegen/emit.py | 123 +++++++++++++++++++++++++++++++++++- mypyc/codegen/emitclass.py | 4 +- mypyc/codegen/emitfunc.py | 55 ++++++++-------- mypyc/codegen/emitmodule.py | 16 ++++- mypyc/test/test_emitfunc.py | 97 ++++++++++++++++++++++++++-- 5 files changed, 258 insertions(+), 37 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 409018d28410..f75f8d8a6bde 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -21,16 +21,25 @@ REG_PREFIX, STATIC_PREFIX, TYPE_PREFIX, + TYPE_VAR_PREFIX, ) from mypyc.ir.class_ir import ClassIR, all_concrete_classes from mypyc.ir.func_ir import FUNC_STATICMETHOD, FuncDecl, FuncIR, get_text_signature -from mypyc.ir.ops import BasicBlock, Value +from mypyc.ir.ops import ( + NAMESPACE_MODULE, + NAMESPACE_STATIC, + NAMESPACE_TYPE, + NAMESPACE_TYPE_VAR, + BasicBlock, + Value, +) from mypyc.ir.rtypes import ( RInstance, RPrimitive, RTuple, RType, RUnion, + RVec, int_rprimitive, is_bool_or_bit_rprimitive, is_bytearray_rprimitive, @@ -56,14 +65,24 @@ is_uint8_rprimitive, object_rprimitive, optional_value_type, + vec_api_by_item_type, + vec_item_type_tags, ) from mypyc.namegen import NameGenerator, exported_name +from mypyc.primitives.registry import builtin_names from mypyc.sametype import is_same_type # Whether to insert debug asserts for all error handling, to quickly # catch errors propagating without exceptions set. DEBUG_ERRORS: Final = False +PREFIX_MAP: Final = { + NAMESPACE_STATIC: STATIC_PREFIX, + NAMESPACE_TYPE: TYPE_PREFIX, + NAMESPACE_MODULE: MODULE_PREFIX, + NAMESPACE_TYPE_VAR: TYPE_VAR_PREFIX, +} + class HeaderDeclaration: """A representation of a declaration in C. @@ -326,6 +345,13 @@ def ctype_spaced(self, rtype: RType) -> str: else: return ctype + " " + def set_undefined_value(self, target: str, rtype: RType) -> None: + if isinstance(rtype, RVec): + self.emit_line(f"{target}.len = -1;") + self.emit_line(f"{target}.buf = NULL;") + else: + self.emit_line(f"{target} = {self.c_undefined_value(rtype)};") + def c_undefined_value(self, rtype: RType) -> str: if not rtype.is_unboxed: return "NULL" @@ -333,6 +359,8 @@ def c_undefined_value(self, rtype: RType) -> str: return rtype.c_undefined elif isinstance(rtype, RTuple): return self.tuple_undefined_value(rtype) + elif isinstance(rtype, RVec): + return f"({self.ctype(rtype)}) {{ -1, NULL }}" assert False, rtype def c_error_value(self, rtype: RType) -> str: @@ -435,6 +463,12 @@ def error_value_check(self, rtype: RType, value: str, compare: str) -> str: return self.tuple_undefined_check_cond( rtype, value, self.c_error_value, compare, check_exception=False ) + elif isinstance(rtype, RVec): + if compare == "==": + return f"{value}.len < 0" + elif compare == "!=": + return f"{value}.len >= 0" + assert False, compare else: return f"{value} {compare} {self.c_error_value(rtype)}" @@ -466,6 +500,8 @@ def tuple_undefined_check_cond( return self.tuple_undefined_check_cond( item_type, tuple_expr_in_c + f".f{i}", c_type_compare_val, compare ) + elif isinstance(item_type, RVec): + return f"{tuple_expr_in_c}.f{i}.len {compare} -1" else: check = f"{tuple_expr_in_c}.f{i} {compare} {c_type_compare_val(item_type)}" if rtuple.error_overlap and check_exception: @@ -485,6 +521,8 @@ def c_initializer_undefined_value(self, rtype: RType) -> str: return f"{{ {int_rprimitive.c_undefined} }}" items = ", ".join([self.c_initializer_undefined_value(t) for t in rtype.types]) return f"{{ {items} }}" + elif isinstance(rtype, RVec): + return "{ -1, NULL }" else: return self.c_undefined_value(rtype) @@ -518,6 +556,9 @@ def emit_inc_ref(self, dest: str, rtype: RType, *, rare: bool = False) -> None: elif isinstance(rtype, RTuple): for i, item_type in enumerate(rtype.types): self.emit_inc_ref(f"{dest}.f{i}", item_type) + elif isinstance(rtype, RVec): + # TODO: Only use the X variant if buf can be NULL + self.emit_line(f"Py_XINCREF({dest}.buf);") elif not rtype.is_unboxed: # Always inline, since this is a simple but very hot op if rtype.may_be_immortal or not HAVE_IMMORTAL: @@ -546,6 +587,12 @@ def emit_dec_ref( elif isinstance(rtype, RTuple): for i, item_type in enumerate(rtype.types): self.emit_dec_ref(f"{dest}.f{i}", item_type, is_xdec=is_xdec, rare=rare) + elif isinstance(rtype, RVec): + # TODO: Only use the X variant if buf can be NULL + if rare: + self.emit_line(f"CPy_XDecRef({dest}.buf);") + else: + self.emit_line(f"CPy_XDECREF({dest}.buf);") elif not rtype.is_unboxed: if rare: self.emit_line(f"CPy_{x}DecRef({dest});") @@ -555,6 +602,8 @@ def emit_dec_ref( self.emit_line(f"CPy_{x}DECREF({dest});") else: self.emit_line(f"CPy_{x}DECREF_NO_IMM({dest});") + elif rtype.is_refcounted: + assert False, f"dec_ref not implemented for {rtype}" # Otherwise assume it's an unboxed, pointerless value and do nothing. def pretty_name(self, typ: RType) -> str: @@ -751,6 +800,14 @@ def emit_cast( elif isinstance(typ, RTuple): assert not optional self.emit_tuple_cast(src, dest, typ, declare_dest, error, src_type) + elif isinstance(typ, RVec): + # TODO: Actually perform the type check, this is a no-op + if declare_dest: + self.emit_line("PyObject *{};".format(dest)) + self.emit_arg_check(src, dest, typ, "", optional) + self.emit_line("{} = {};".format(dest, src)) + if optional: + self.emit_line("}") else: assert False, "Cast not implemented: %s" % typ @@ -894,6 +951,7 @@ def emit_unbox( declare_dest: If True, also declare the variable 'dest' error: What happens on error raise_exception: If True, also raise TypeError on failure + optional: If True, NULL src value is allowed and will map to error value borrow: If True, create a borrowed reference """ @@ -1025,10 +1083,53 @@ def emit_unbox( self.emit_line("}") if optional: self.emit_line("}") + elif isinstance(typ, RVec): + if declare_dest: + self.emit_line(f"{self.ctype(typ)} {dest};") + + if optional: + self.emit_line(f"if ({src} == NULL) {{") + self.emit_line(f"{dest} = {self.c_error_value(typ)};") + self.emit_line("} else {") + + specialized_api_name = vec_api_by_item_type.get(typ.item_type) + if specialized_api_name is not None: + self.emit_line(f"{dest} = {specialized_api_name}.unbox({src});") + else: + depth = typ.depth() + unwrapped = typ.unwrap_item_type() + if unwrapped in vec_item_type_tags: + type_value = str(vec_item_type_tags[unwrapped]) + else: + type_value = self.vec_item_type_c(typ) + if depth == 0: + self.emit_line(f"{dest} = VecTApi.unbox({src}, {type_value});") + else: + self.emit_line(f"{dest} = VecNestedApi.unbox({src}, {type_value}, {depth});") + + self.emit_line(f"if (VEC_IS_ERROR({dest})) {{") + self.emit_line(failure) + self.emit_line("}") + if optional: + self.emit_line("}") else: assert False, "Unboxing not implemented: %s" % typ + def vec_item_type_c(self, typ: RVec) -> str: + item_type = typ.unwrap_item_type() + type_value = f"(size_t){self.type_c_ptr(item_type)}" + if typ.is_optional(): + type_value = f"{type_value} | 1" + return type_value + + def type_c_ptr(self, typ: RPrimitive | RInstance) -> str | None: + if isinstance(typ, RPrimitive) and typ.is_refcounted: + return "&" + builtin_names[typ.name][1] + elif isinstance(typ, RInstance): + return self.type_struct_name(typ.class_ir) + return None + def emit_box( self, src: str, dest: str, typ: RType, declare_dest: bool = False, can_borrow: bool = False ) -> None: @@ -1083,6 +1184,20 @@ def emit_box( inner_name = self.temp_name() self.emit_box(f"{src}.f{i}", inner_name, typ.types[i], declare_dest=True) self.emit_line(f"PyTuple_SET_ITEM({dest}, {i}, {inner_name});") + elif isinstance(typ, RVec): + specialized_api_name = vec_api_by_item_type.get(typ.item_type) + if specialized_api_name is not None: + api = specialized_api_name + elif typ.depth() > 0: + api = "VecNestedApi" + else: + api = "VecTApi" + # Empty vecs of this sort don't describe item type, so it needs to be + # passed explicitly. + item_type = self.vec_item_type_c(typ) + self.emit_line(f"{declaration}{dest} = {api}.box({src}, {item_type});") + return + self.emit_line(f"{declaration}{dest} = {api}.box({src});") else: assert not typ.is_unboxed # Type is boxed -- trivially just assign. @@ -1096,6 +1211,8 @@ def emit_error_check(self, value: str, rtype: RType, failure: str) -> None: else: cond = self.tuple_undefined_check_cond(rtype, value, self.c_error_value, "==") self.emit_line(f"if ({cond}) {{") + elif isinstance(rtype, RVec): + self.emit_line(f"if ({value}.len < 0) {{") elif rtype.error_overlap: # The error value is also valid as a normal value, so we need to also check # for a raised exception. @@ -1120,6 +1237,8 @@ def emit_gc_visit(self, target: str, rtype: RType) -> None: elif isinstance(rtype, RTuple): for i, item_type in enumerate(rtype.types): self.emit_gc_visit(f"{target}.f{i}", item_type) + elif isinstance(rtype, RVec): + self.emit_line(f"Py_VISIT({target}.buf);") elif self.ctype(rtype) == "PyObject *": # The simplest case. self.emit_line(f"Py_VISIT({target});") @@ -1144,6 +1263,8 @@ def emit_gc_clear(self, target: str, rtype: RType) -> None: elif isinstance(rtype, RTuple): for i, item_type in enumerate(rtype.types): self.emit_gc_clear(f"{target}.f{i}", item_type) + elif isinstance(rtype, RVec): + self.emit_line(f"Py_CLEAR({target}.buf);") elif self.ctype(rtype) == "PyObject *" and self.c_undefined_value(rtype) == "NULL": # The simplest case. self.emit_line(f"Py_CLEAR({target});") diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 8c3fa5de98f8..387065d93bcf 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -653,7 +653,7 @@ def generate_setup_for_class( # We don't need to set this field to NULL since tp_alloc() already # zero-initializes `self`. if value != "NULL": - emitter.emit_line(rf"self->{emitter.attr(attr)} = {value};") + emitter.set_undefined_value(f"self->{emitter.attr(attr)}", rtype) # Initialize attributes to default values, if necessary if defaults_fn is not None: @@ -1194,7 +1194,7 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N if deletable: emitter.emit_line("} else") - emitter.emit_line(f" self->{attr_field} = {emitter.c_undefined_value(rtype)};") + emitter.set_undefined_value(f" self->{attr_field}", rtype) if rtype.error_overlap: emitter.emit_attr_bitmap_clear("self", rtype, cl, attr) emitter.emit_line("return 0;") diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py index 3f1bbab58895..c1202d1c928c 100644 --- a/mypyc/codegen/emitfunc.py +++ b/mypyc/codegen/emitfunc.py @@ -5,25 +5,19 @@ from typing import Final from mypyc.analysis.blockfreq import frequently_executed_blocks -from mypyc.codegen.emit import DEBUG_ERRORS, Emitter, TracebackAndGotoHandler, c_array_initializer -from mypyc.common import ( - GENERATOR_ATTRIBUTE_PREFIX, - HAVE_IMMORTAL, - MODULE_PREFIX, - NATIVE_PREFIX, - REG_PREFIX, - STATIC_PREFIX, - TYPE_PREFIX, - TYPE_VAR_PREFIX, +from mypyc.codegen.emit import ( + DEBUG_ERRORS, + PREFIX_MAP, + Emitter, + TracebackAndGotoHandler, + c_array_initializer, ) +from mypyc.common import GENERATOR_ATTRIBUTE_PREFIX, HAVE_IMMORTAL, NATIVE_PREFIX, REG_PREFIX from mypyc.ir.class_ir import ClassIR from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD, FuncDecl, FuncIR, all_values from mypyc.ir.ops import ( ERR_FALSE, - NAMESPACE_MODULE, - NAMESPACE_STATIC, NAMESPACE_TYPE, - NAMESPACE_TYPE_VAR, Assign, AssignMulti, BasicBlock, @@ -82,6 +76,7 @@ RStruct, RTuple, RType, + RVec, is_bool_or_bit_rprimitive, is_int32_rprimitive, is_int64_rprimitive, @@ -221,6 +216,10 @@ def error_value_check(self, value: Value, compare: str) -> str: return self.emitter.tuple_undefined_check_cond( typ, self.reg(value), self.c_error_value, compare ) + elif isinstance(typ, RVec): + # Error values for vecs are represented by a negative length. + vec_compare = ">=" if compare == "!=" else "<" + return f"{self.reg(value)}.len {vec_compare} 0" else: return f"{self.reg(value)} {compare} {self.c_error_value(typ)}" @@ -289,10 +288,16 @@ def visit_assign(self, op: Assign) -> None: # clang whines about self assignment (which we might generate # for some casts), so don't emit it. if dest != src: - # We sometimes assign from an integer prepresentation of a pointer - # to a real pointer, and C compilers insist on a cast. - if op.src.type.is_unboxed and not op.dest.type.is_unboxed: + src_type = op.src.type + dest_type = op.dest.type + if src_type.is_unboxed and not dest_type.is_unboxed: + # We sometimes assign from an integer prepresentation of a pointer + # to a real pointer, and C compilers insist on a cast. src = f"(void *){src}" + elif not src_type.is_unboxed and dest_type.is_unboxed: + # We sometimes assign a pointer to an integer type (e.g. to create + # tagged pointers), and here we need an explicit cast. + src = f"({self.emitter.ctype(dest_type)}){src}" self.emit_line(f"{dest} = {src};") def visit_assign_multi(self, op: AssignMulti) -> None: @@ -312,11 +317,14 @@ def visit_assign_multi(self, op: AssignMulti) -> None: ) def visit_load_error_value(self, op: LoadErrorValue) -> None: + reg = self.reg(op) if isinstance(op.type, RTuple): values = [self.c_undefined_value(item) for item in op.type.types] tmp = self.temp_name() self.emit_line("{} {} = {{ {} }};".format(self.ctype(op.type), tmp, ", ".join(values))) - self.emit_line(f"{self.reg(op)} = {tmp};") + self.emit_line(f"{reg} = {tmp};") + elif isinstance(op.type, RVec): + self.emitter.set_undefined_value(reg, op.type) else: self.emit_line(f"{self.reg(op)} = {self.c_error_value(op.type)};") @@ -523,16 +531,9 @@ def visit_set_attr(self, op: SetAttr) -> None: if op.error_kind == ERR_FALSE: self.emitter.emit_line(f"{dest} = 1;") - PREFIX_MAP: Final = { - NAMESPACE_STATIC: STATIC_PREFIX, - NAMESPACE_TYPE: TYPE_PREFIX, - NAMESPACE_MODULE: MODULE_PREFIX, - NAMESPACE_TYPE_VAR: TYPE_VAR_PREFIX, - } - def visit_load_static(self, op: LoadStatic) -> None: dest = self.reg(op) - prefix = self.PREFIX_MAP[op.namespace] + prefix = PREFIX_MAP[op.namespace] name = self.emitter.static_name(op.identifier, op.module_name, prefix) if op.namespace == NAMESPACE_TYPE: name = "(PyObject *)%s" % name @@ -540,7 +541,7 @@ def visit_load_static(self, op: LoadStatic) -> None: def visit_init_static(self, op: InitStatic) -> None: value = self.reg(op.value) - prefix = self.PREFIX_MAP[op.namespace] + prefix = PREFIX_MAP[op.namespace] name = self.emitter.static_name(op.identifier, op.module_name, prefix) if op.namespace == NAMESPACE_TYPE: value = "(PyTypeObject *)%s" % value @@ -845,7 +846,7 @@ def visit_load_address(self, op: LoadAddress) -> None: if isinstance(op.src, Register): src = self.reg(op.src) elif isinstance(op.src, LoadStatic): - prefix = self.PREFIX_MAP[op.src.namespace] + prefix = PREFIX_MAP[op.src.namespace] src = self.emitter.static_name(op.src.identifier, op.src.module_name, prefix) else: src = op.src diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index da82f14e92f2..554379b8848a 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -27,7 +27,7 @@ from mypy.options import Options from mypy.plugin import Plugin, ReportConfigContext from mypy.util import hash_digest, json_dumps -from mypyc.analysis.capsule_deps import find_implicit_op_dependencies +from mypyc.analysis.capsule_deps import find_class_dependencies, find_implicit_op_dependencies from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.emit import ( Emitter, @@ -56,7 +56,7 @@ short_id_from_name, ) from mypyc.errors import Errors -from mypyc.ir.deps import LIBRT_BASE64, LIBRT_STRINGS, LIBRT_TIME, SourceDep +from mypyc.ir.deps import LIBRT_BASE64, LIBRT_STRINGS, LIBRT_TIME, LIBRT_VECS, SourceDep from mypyc.ir.func_ir import FuncIR from mypyc.ir.module_ir import ModuleIR, ModuleIRs, deserialize_modules from mypyc.ir.ops import DeserMaps, LoadLiteral @@ -271,6 +271,12 @@ def compile_scc_to_ir( do_copy_propagation(fn, compiler_options) do_flag_elimination(fn, compiler_options) + # Calculate implicit dependencies from class attribute types + for cl in module.classes: + deps = find_class_dependencies(cl) + if deps is not None: + module.dependencies.update(deps) + return modules @@ -634,6 +640,8 @@ def generate_c_for_modules(self) -> list[tuple[str, str]]: ext_declarations.emit_line("#include ") if any(LIBRT_TIME in mod.dependencies for mod in self.modules.values()): ext_declarations.emit_line("#include