From 760b49f80b12a040abfc7966088245a52e9628b3 Mon Sep 17 00:00:00 2001
From: Mohammad Hossein Namaki <monamaki@microsoft.com>
Date: Fri, 17 Apr 2026 13:59:30 -0700
Subject: [PATCH] codeql for cast

---
 language-extensions/python/src/PythonParam.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/language-extensions/python/src/PythonParam.cpp b/language-extensions/python/src/PythonParam.cpp
index 24a72c2b..d458325c 100644
--- a/language-extensions/python/src/PythonParam.cpp
+++ b/language-extensions/python/src/PythonParam.cpp
@@ -345,8 +345,12 @@ void PythonStringParam<CharType>::RetrieveValueAndStrLenInd(bp::object mainNames
 					char *utf16str = PyBytes_AsString(PyUnicode_AsUTF16String(tempObj.ptr()));
 
 					// Reinterpret the bytes as wchar_t *, which we will return.
+					// The buffer contains UTF-16 code units in native byte order. The string always starts with a BOM mark.
+					// (https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_AsUTF16String)
+					// We treat it as an array of 16-bit code units (CharType expected to be the size of wchar_t).
 					//
-					CharType *wData = reinterpret_cast<CharType *>(utf16str);
+					static_assert(sizeof(CharType) == sizeof(wchar_t), "CharType must match wchar_t size for UTF-16 reinterpretation.");
+					CharType *wData = reinterpret_cast<CharType *>(utf16str); // CodeQL [SM02986]: The buffer is properly aligned (divisible by 2), already contains real UTF-16 data (SQL NVARCHAR), and we know its exact length (not relying on null termination); so treating it as wchar_t* is safe.
 
 					// Ignore 2 byte BOM at front of wData that was added by PyUnicode_AsUTF16String
 					//