googleapis · JoshuaProvoste · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
@@ -0,0 +1,78 @@
+# Refactoring Plan: Migrating from Pickle/Cloudpickle to Msgpack
+
+This document outlines the technical strategy to eliminate insecure deserialization vulnerabilities in the `google-cloud-aiplatform` SDK by replacing `pickle` and `cloudpickle` with **Msgpack**.
+
+## 1. Objective
+Harden the SDK's persistence and transport layers by adopting a schema-driven, non-executable serialization format. This effectively neutralizes RCE vectors originating from untrusted Cloud Storage (GCS) or Network (SMB) artifacts.
+
+## 2. Dependency Management
+- **Add Dependency**: Add `msgpack >= 1.0.0` to `setup.py` under the core `install_requires` or relevant extras (`prediction`, `reasoningengine`).
+- **Remove Dependency**: Deprecate `cloudpickle` usage in `vertexai` preview modules.
+
+## 3. Implementation Strategy
+
+### Phase 0: Environment & Branch Management
+- **Action**: Create a dedicated security branch to isolate the refactoring changes.
+- **Command**: 
+  ```bash
+  git checkout -b security/fix-rce-msgpack-migration
+  ```
+
+### Phase 1: Harden Static Predictors (`pickle`)
+Target: `google/cloud/aiplatform/prediction/`
+- **Action**: Replace `pickle.load` and `joblib.load` with `msgpack.unpackb`.
+- **Logic**: 
+    - Convert model metadata and configuration to Msgpack-compatible dictionaries.
+    - For weights (NumPy/SciPy), use `msgpack-numpy` or direct byte-stream buffers.
+- **Files**:
+    - `google/cloud/aiplatform/prediction/sklearn/predictor.py`
+    - `google/cloud/aiplatform/prediction/xgboost/predictor.py`
+
+### Phase 2: Secure Dynamic Engines (`cloudpickle`)
+Target: `vertexai/agent_engines/` and `vertexai/reasoning_engines/`
+- **Challenge**: `cloudpickle` is used to ship live Python code. `msgpack` is data-only.
+- **Action**: 
+    - Separate **Logic** from **State**. 
+    - Use `msgpack` for the state (variables, parameters).
+    - For logic, transition to a **Manifest-based loading** or **Module-import** pattern where the code must exist in the environment or be provided as a source string that is validated before execution.
+- **Files**:
+    - `vertexai/agent_engines/_agent_engines.py`
+    - `vertexai/reasoning_engines/_reasoning_engines.py`
+
+### Phase 3: Metadata and Transport Hardening
+Target: `google/cloud/aiplatform/metadata/`
+- **Action**: Replace debug/logging `pickle.dumps` in GRPC transports with `msgpack.packb`.
+- **Files**:
+    - `google/cloud/aiplatform/metadata/_models.py`
+    - `google/cloud/aiplatform_v1/services/dataset_service/transports/grpc.py`
+
+### Phase 4: Code Hygiene & Formatting
+- **Action**: Enforce Google-specific code style across all modified files to ensure maintainability and compliance with the upstream repository.
+- **Tools**:
+    - `isort`: Standardize import ordering.
+    - `pyink`: Apply Google-compliant code formatting (an adoption of Black with Google's specific line-length and style overrides).
+
+---
+
+## 4. Security Enhancements (The "Double Lock")
+
+### A. Digital Signatures (Integrity)
+- **Mechanism**: Implement a signing hook during `dump/pack`.
+- **Implementation**: Calculate an HMAC-SHA256 (using a project-level key) on the serialized Msgpack blob.
+- **Verification**: Refuse to `unpack` any artifact that lacks a valid signature.
+
+### B. URI/Path Sanitization
+- **Mechanism**: Block UNC/SMB paths.
+- **Action**: Modify `google/cloud/aiplatform/utils/prediction_utils.py` and `path_utils.py` to:
+    - Strictly enforce `gs://` or local filesystem paths.
+    - Explicitly deny paths starting with `\\` or containing `smb://` protocols.
+
+---
+
+## 5. Verification Plan
+1. **Unit Tests**: Update existing serialization tests to verify that `pickle` imports have been removed.
+2. **Compatibility Check**: Ensure that Msgpack serialization preserves the precision of ML model parameters.
+3. **Exploit Regression**: Verify that the SMB-based PoC from `GUIDE.md` now fails with a "Format not supported" or "Signature missing" error.
+
+---
+*Generated as part of the JoshuaProvoste/python-aiplatform fork security audit.*
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import re
-
 from collections import defaultdict
 
 # [region]-docker.pkg.dev/vertex-ai/prediction/[framework]-[accelerator].[version]:latest
@@ -305,3 +304,4 @@
 MODEL_FILENAME_BST = "model.bst"
 MODEL_FILENAME_JOBLIB = "model.joblib"
 MODEL_FILENAME_PKL = "model.pkl"
+MODEL_FILENAME_MSGPACK = "model.msgpack"
@@ -15,15 +15,17 @@
 # limitations under the License.
 #
 
-import joblib
-import numpy as np
 import os
 import pickle
 import warnings
 
+import joblib
+import msgpack
+import numpy as np
+
 from google.cloud.aiplatform.constants import prediction
-from google.cloud.aiplatform.utils import prediction_utils
 from google.cloud.aiplatform.prediction.predictor import Predictor
+from google.cloud.aiplatform.utils import prediction_utils, security_utils
 
 
 class SklearnPredictor(Predictor):
@@ -54,45 +56,42 @@ def load(self, artifacts_uri: str, **kwargs) -> None:
 
         if allowed_extensions is None:
             warnings.warn(
-                "No 'allowed_extensions' provided. Loading model artifacts from "
-                "untrusted sources may lead to remote code execution.",
+                "No 'allowed_extensions' provided. Models are now required to be in "
+                "signed msgpack format for security.",
                 UserWarning,
             )
 
+        # 1. First, check for the new secure format (Signed Msgpack)
+        if os.path.exists(prediction.MODEL_FILENAME_MSGPACK):
+            with open(prediction.MODEL_FILENAME_MSGPACK, "rb") as f:
+                signed_data = f.read()
+                # Verify HMAC integrity before unpacking
+                verified_data = security_utils.verify_blob(signed_data)
+                # Unpack the model state
+                # Note: This assumes the model has been packed using a compatible
+                # msgpack-based serialization strategy for Sklearn.
+                self._model = msgpack.unpackb(verified_data, raw=False)
+            return
+
+        # 2. Block insecure formats if redirection is possible
         prediction_utils.download_model_artifacts(artifacts_uri)
-        if os.path.exists(
-            prediction.MODEL_FILENAME_JOBLIB
-        ) and prediction_utils.is_extension_allowed(
-            filename=prediction.MODEL_FILENAME_JOBLIB,
-            allowed_extensions=allowed_extensions,
-        ):
-            warnings.warn(
-                f"Loading {prediction.MODEL_FILENAME_JOBLIB} using joblib pickle, which is unsafe. "
-                "Only load files from trusted sources.",
-                RuntimeWarning,
-            )
-            self._model = joblib.load(prediction.MODEL_FILENAME_JOBLIB)
-        elif os.path.exists(
+
+        if os.path.exists(prediction.MODEL_FILENAME_JOBLIB) or os.path.exists(
             prediction.MODEL_FILENAME_PKL
-        ) and prediction_utils.is_extension_allowed(
-            filename=prediction.MODEL_FILENAME_PKL,
-            allowed_extensions=allowed_extensions,
         ):
-            warnings.warn(
-                f"Loading {prediction.MODEL_FILENAME_PKL} using pickle, which is unsafe. "
-                "Only load files from trusted sources.",
-                RuntimeWarning,
-            )
-            self._model = pickle.load(open(prediction.MODEL_FILENAME_PKL, "rb"))
-        else:
-            valid_filenames = [
-                prediction.MODEL_FILENAME_JOBLIB,
-                prediction.MODEL_FILENAME_PKL,
-            ]
-            raise ValueError(
-                f"One of the following model files must be provided and allowed: {valid_filenames}."
+            raise RuntimeError(
+                "Security Error: Insecure model formats (.pkl, .joblib) are no longer "
+                "supported by this version of the SDK. Please migrate your models to "
+                "signed msgpack using the migration utility."
             )
 
+        valid_filenames = [
+            prediction.MODEL_FILENAME_MSGPACK,
+        ]
+        raise ValueError(
+            f"One of the following model files must be provided and allowed: {valid_filenames}."
+        )
+
     def preprocess(self, prediction_input: dict) -> np.ndarray:
         """Converts the request body to a numpy array before prediction.
         Args:

@@ -15,18 +15,19 @@
 # limitations under the License.
 #
 
-import joblib
 import logging
 import os
 import pickle
 import warnings
 
+import joblib
+import msgpack
 import numpy as np
 import xgboost as xgb
 
 from google.cloud.aiplatform.constants import prediction
-from google.cloud.aiplatform.utils import prediction_utils
 from google.cloud.aiplatform.prediction.predictor import Predictor
+from google.cloud.aiplatform.utils import prediction_utils, security_utils
 
 
 class XgboostPredictor(Predictor):
@@ -56,62 +57,48 @@ def load(self, artifacts_uri: str, **kwargs) -> None:
 
         if allowed_extensions is None:
             warnings.warn(
-                "No 'allowed_extensions' provided. Loading model artifacts from "
-                "untrusted sources may lead to remote code execution.",
+                "No 'allowed_extensions' provided. Models are now required to be in "
+                "signed msgpack or native .bst format for security.",
                 UserWarning,
             )
 
+        # 1. First, check for the new secure format (Signed Msgpack)
+        if os.path.exists(prediction.MODEL_FILENAME_MSGPACK):
+            with open(prediction.MODEL_FILENAME_MSGPACK, "rb") as f:
+                signed_data = f.read()
+                # Verify HMAC integrity before unpacking
+                verified_data = security_utils.verify_blob(signed_data)
+                # Unpack the booster state
+                # Note: This requires a compatible msgpack-to-XGBoost strategy.
+                booster = msgpack.unpackb(verified_data, raw=False)
+            self._booster = booster
+            return
+
+        # 2. Check for native .bst (Safer but requires validation)
+        if os.path.exists(prediction.MODEL_FILENAME_BST):
+            booster = xgb.Booster(model_file=prediction.MODEL_FILENAME_BST)
+            self._booster = booster
+            return
+
+        # 3. Block insecure formats
         prediction_utils.download_model_artifacts(artifacts_uri)
 
-        if os.path.exists(
-            prediction.MODEL_FILENAME_BST
-        ) and prediction_utils.is_extension_allowed(
-            filename=prediction.MODEL_FILENAME_BST,
-            allowed_extensions=allowed_extensions,
-        ):
-            booster = xgb.Booster(model_file=prediction.MODEL_FILENAME_BST)
-        elif os.path.exists(
-            prediction.MODEL_FILENAME_JOBLIB
-        ) and prediction_utils.is_extension_allowed(
-            filename=prediction.MODEL_FILENAME_JOBLIB,
-            allowed_extensions=allowed_extensions,
-        ):
-            warnings.warn(
-                f"Loading {prediction.MODEL_FILENAME_JOBLIB} using joblib pickle, which is unsafe. "
-                "Only load files from trusted sources.",
-                RuntimeWarning,
-            )
-            try:
-                booster = joblib.load(prediction.MODEL_FILENAME_JOBLIB)
-            except KeyError:
-                logging.info(
-                    "Loading model using joblib failed. "
-                    "Loading model using xgboost.Booster instead."
-                )
-                booster = xgb.Booster()
-                booster.load_model(prediction.MODEL_FILENAME_JOBLIB)
-        elif os.path.exists(
+        if os.path.exists(prediction.MODEL_FILENAME_JOBLIB) or os.path.exists(
             prediction.MODEL_FILENAME_PKL
-        ) and prediction_utils.is_extension_allowed(
-            filename=prediction.MODEL_FILENAME_PKL,
-            allowed_extensions=allowed_extensions,
         ):
-            warnings.warn(
-                f"Loading {prediction.MODEL_FILENAME_PKL} using pickle, which is unsafe. "
-                "Only load files from trusted sources.",
-                RuntimeWarning,
-            )
-            booster = pickle.load(open(prediction.MODEL_FILENAME_PKL, "rb"))
-        else:
-            valid_filenames = [
-                prediction.MODEL_FILENAME_BST,
-                prediction.MODEL_FILENAME_JOBLIB,
-                prediction.MODEL_FILENAME_PKL,
-            ]
-            raise ValueError(
-                f"One of the following model files must be provided and allowed: {valid_filenames}."
+            raise RuntimeError(
+                "Security Error: Insecure model formats (.pkl, .joblib) are no longer "
+                "supported by this version of the SDK. Please migrate your models to "
+                "signed msgpack or native .bst using the migration utility."
             )
-        self._booster = booster
+
+        valid_filenames = [
+            prediction.MODEL_FILENAME_MSGPACK,
+            prediction.MODEL_FILENAME_BST,
+        ]
+        raise ValueError(
+            f"One of the following model files must be provided and allowed: {valid_filenames}."
+        )
 
     def preprocess(self, prediction_input: dict) -> xgb.DMatrix:
         """Converts the request body to a Data Matrix before prediction.

@@ -17,21 +17,20 @@
 
 import datetime
 import glob
-import uuid
-
-# Version detection and compatibility layer for google-cloud-storage v2/v3
-from importlib.metadata import version as get_version
 import logging
 import os
 import pathlib
 import tempfile
-from typing import Optional, TYPE_CHECKING
+import uuid
 import warnings
+# Version detection and compatibility layer for google-cloud-storage v2/v3
+from importlib.metadata import version as get_version
+from typing import TYPE_CHECKING, Optional
 
 from google.auth import credentials as auth_credentials
-from google.cloud import storage
 from packaging.version import Version
 
+from google.cloud import storage
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform.utils import resource_manager_utils
 
@@ -106,6 +105,9 @@ def blob_from_uri(uri: str, client: storage.Client) -> storage.Blob:
     Returns:
         storage.Blob: Blob instance
     """
+    from google.cloud.aiplatform.utils import security_utils
+
+    security_utils.validate_uri(uri)
     if _USE_FROM_URI:
         return storage.Blob.from_uri(uri, client=client)
     else:
@@ -126,6 +128,9 @@ def bucket_from_uri(uri: str, client: storage.Client) -> storage.Bucket:
     Returns:
         storage.Bucket: Bucket instance
     """
+    from google.cloud.aiplatform.utils import security_utils
+
+    security_utils.validate_uri(uri)
     if _USE_FROM_URI:
         return storage.Bucket.from_uri(uri, client=client)
     else:
@@ -502,6 +507,10 @@ def validate_gcs_path(gcs_path: str) -> None:
     Raises:
         ValueError if gcs_path is invalid.
     """
+    from google.cloud.aiplatform.utils import security_utils
+
+    security_utils.validate_uri(gcs_path)
+
     if not gcs_path.startswith("gs://"):
         raise ValueError(
             f"Invalid GCS path {gcs_path}. Please provide a valid GCS path starting with 'gs://'"