diff --git a/language-extensions/python/src/PythonParam.cpp b/language-extensions/python/src/PythonParam.cpp index 24a72c2b..d458325c 100644 --- a/language-extensions/python/src/PythonParam.cpp +++ b/language-extensions/python/src/PythonParam.cpp @@ -345,8 +345,12 @@ void PythonStringParam::RetrieveValueAndStrLenInd(bp::object mainNames char *utf16str = PyBytes_AsString(PyUnicode_AsUTF16String(tempObj.ptr())); // Reinterpret the bytes as wchar_t *, which we will return. + // The buffer contains UTF-16 code units in native byte order. The string always starts with a BOM mark. + // (https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_AsUTF16String) + // We treat it as an array of 16-bit code units (CharType expected to be the size of wchar_t). // - CharType *wData = reinterpret_cast(utf16str); + static_assert(sizeof(CharType) == sizeof(wchar_t), "CharType must match wchar_t size for UTF-16 reinterpretation."); + CharType *wData = reinterpret_cast(utf16str); // CodeQL [SM02986]: The buffer is properly aligned (divisible by 2), already contains real UTF-16 data (SQL NVARCHAR), and we know its exact length (not relying on null termination); so treating it as wchar_t* is safe. // Ignore 2 byte BOM at front of wData that was added by PyUnicode_AsUTF16String //