From b10810667abb0fd1f6d7a6e8cba71b54676397a6 Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Wed, 1 Jul 2026 12:10:02 +0200 Subject: [PATCH] Map foreign Arrow C Data Interface struct pointers to void * The Arrow C Data Interface structs (ArrowSchema, ArrowArray) are forward-declared, layout-less foreign types with no MEOS semantics, so a pointer to one is ABI-identical to void *. Emitting them as void * lets every binding's existing opaque-pointer-family handling wrap them uniformly instead of each generator needing an Arrow-specific case: a permissive generator maps an unrecognised pointer to a raw pointer, while a conservative one skips it. Normalise struct ArrowSchema * / ArrowArray * to void * in the declared and canonical spellings, mirroring the existing _Bool -> bool normalisation. Add a generation-time guard that warns when a forward-declared foreign struct pointer appears in the API but is not listed, so it is classified explicitly rather than diverging silently per binding. --- parser/extractors.py | 49 ++++++++++++++++++++++++++++++++++++++++++-- run.py | 10 +++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/parser/extractors.py b/parser/extractors.py index a5855e9..dd6e360 100644 --- a/parser/extractors.py +++ b/parser/extractors.py @@ -19,6 +19,51 @@ def _canonical_spelling(ty) -> str: _BOOL_SPELLINGS = {"bool", "_Bool"} +# External ABI structs that appear in the MEOS public API only at the FFI +# boundary — the Arrow C Data Interface structs. They are forward-declared +# with no field layout and carry no MEOS semantics, so a pointer to one is +# ABI-identical to ``void *``. Emitting them as ``void *`` lets every binding's +# opaque-pointer-family handling (JNR ``Pointer``, cffi ``_ffi.CData``, Go +# ``unsafe.Pointer``, .NET ``IntPtr``, rust ``*mut c_void``) wrap them +# uniformly. The idiomatic Arrow bridge — allocating the struct and importing +# it through the language's Arrow library — lives in each binding's hand-written +# layer, keyed off the ``*_to_arrow`` / ``*_from_arrow`` function name, as with +# any other opaque-pointer-family value. +_EXTERNAL_OPAQUE_STRUCTS = ("ArrowSchema", "ArrowArray") + + +def _demote_external_opaque(spelling: str) -> str: + # Map a pointer to an external, layout-less ABI struct to the equivalent + # ``void``-pointer spelling, preserving const qualifiers and pointer depth. + for name in _EXTERNAL_OPAQUE_STRUCTS: + spelling = re.sub(rf"\bstruct\s+{name}\b", "void", spelling) + spelling = re.sub(rf"\b{name}\b", "void", spelling) + return spelling + + +def find_unlisted_foreign_structs(idl) -> list: + # A MEOS type is typedef'd, so its declared ``cType`` appears bare (``Pose + # *``) in at least one signature; a foreign, forward-declared ABI struct is + # never typedef'd, so it only ever appears elaborated (``struct ArrowSchema + # *``). Any base name seen only in the elaborated form, and not already + # normalised to ``void *`` by ``_EXTERNAL_OPAQUE_STRUCTS``, is an external + # type the bindings handle divergently (permissive ones map it to a raw + # pointer, conservative ones skip it). Surface it so it is classified + # explicitly instead of silently diverging per binding. + elaborated, bare = set(), set() + for fn in idl.get("functions", []): + spellings = [p.get("cType") for p in fn.get("params", [])] + spellings.append(fn.get("returnType", {}).get("c")) + for sp in spellings: + if not isinstance(sp, str) or "*" not in sp: + continue + base = re.sub(r"\b(const|struct)\b|\*", " ", sp).strip() + if not base: + continue + (elaborated if re.search(r"\bstruct\b", sp) else bare).add(base) + return sorted(elaborated - bare - set(_EXTERNAL_OPAQUE_STRUCTS)) + + def _c_spelling(ty) -> str: # Return the declared C spelling, with ``_Bool`` normalised to ``"bool"``. # Two bool representations arise depending on which postgres_int_defs.h is @@ -28,7 +73,7 @@ def _c_spelling(ty) -> str: spelling = ty.spelling if spelling == "_Bool": return "bool" - return spelling + return _demote_external_opaque(spelling) def _canonical_c_spelling(ty) -> str: @@ -42,7 +87,7 @@ def _canonical_c_spelling(ty) -> str: # Fallback: also catch _Bool reached through other typedef chains if ty.get_canonical().kind == clang.cindex.TypeKind.BOOL: return "bool" - return _canonical_spelling(ty) + return _demote_external_opaque(_canonical_spelling(ty)) def extract_function(node) -> dict: diff --git a/run.py b/run.py index 640dd9e..8e3ee48 100644 --- a/run.py +++ b/run.py @@ -5,6 +5,7 @@ from parser.parser import parse_all_headers, merge_meta from parser.portable import attach_portable_aliases from parser.typerecover import recover_collapsed_types +from parser.extractors import find_unlisted_foreign_structs HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") @@ -40,6 +41,15 @@ def main(): file=sys.stderr) idl = attach_portable_aliases(idl, PORTABLE_PATH) + # Surface any forward-declared external ABI struct pointer not yet + # normalised to void * (see parser.extractors._EXTERNAL_OPAQUE_STRUCTS), + # so a new one is classified explicitly instead of diverging per binding. + unlisted = find_unlisted_foreign_structs(idl) + if unlisted: + print(f" WARNING: unlisted external struct pointer(s) in the API: " + f"{', '.join(unlisted)} — add to _EXTERNAL_OPAQUE_STRUCTS to map " + f"them to void * uniformly across bindings", file=sys.stderr) + idl_path = OUTPUT_DIR / "meos-idl.json" with open(idl_path, "w") as f: json.dump(idl, f, indent=2)