From 133433d93413e9c35f8aa1e1ea024bbc8a1cab40 Mon Sep 17 00:00:00 2001
From: Piotr Duda <piotrekno1@gmail.com>
Date: Sat, 14 Mar 2026 15:15:00 +0100
Subject: [PATCH 1/2] Support for mlx

---
 README.md                         |   3 +-
 examples/mlx_example.py           |  78 ++++++++
 wildedge/client.py                |   3 +
 wildedge/integrations/mlx.py      | 308 ++++++++++++++++++++++++++++++
 wildedge/integrations/registry.py |   1 +
 5 files changed, 392 insertions(+), 1 deletion(-)
 create mode 100644 examples/mlx_example.py
 create mode 100644 wildedge/integrations/mlx.py

diff --git a/README.md b/README.md
index 66f181f..6865964 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Coverage](https://codecov.io/gh/wildedge/wildedge-python/branch/main/graph/badge.svg)](https://codecov.io/gh/wildedge/wildedge-python)
 
-On-device ML inference monitoring for Python. Track in-dept, model quality & performance information.
+On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata — no inputs or outputs captured.
 
 ## Install
 
@@ -57,6 +57,7 @@ client.instrument("transformers", hubs=["huggingface"])
 | Integration | Patches | Hub tracking | Example |
 |---|---|---|---|
 | `transformers` | `pipeline()`, `AutoModel.from_pretrained()` | `huggingface` | [transformers_example.py](examples/transformers_example.py) |
+| `mlx` | `mlx_lm.load()`, `mlx_lm.generate()` | `huggingface` | [mlx_example.py](examples/mlx_example.py) |
 | `timm` | `timm.create_model()` | `huggingface`, `torchhub` | [timm_example.py](examples/timm_example.py) |
 | `gguf` | `llama_cpp.Llama.__init__` | `huggingface` | [gguf_example.py](examples/gguf_example.py) |
 | `onnx` | `ort.InferenceSession` | `huggingface` | [onnx_example.py](examples/onnx_example.py) |
diff --git a/examples/mlx_example.py b/examples/mlx_example.py
new file mode 100644
index 0000000..75f7cca
--- /dev/null
+++ b/examples/mlx_example.py
@@ -0,0 +1,78 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["wildedge-sdk", "mlx-lm"]
+#
+# [tool.uv.sources]
+# wildedge-sdk = { path = "..", editable = true }
+# ///
+"""
+MLX / mlx-lm integration example — Apple Silicon only.
+
+WildEdge patches mlx_lm.load and mlx_lm.generate at client initialisation.
+Load timing, HuggingFace Hub download tracking, inference metrics (tokens/sec,
+token counts), and unload tracking all happen automatically.
+
+Usage:
+    uv run mlx_example.py
+    uv run mlx_example.py --model mlx-community/Llama-3.2-1B-Instruct-4bit
+    uv run mlx_example.py --model mlx-community/Mistral-7B-Instruct-v0.3-4bit
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import mlx_lm
+
+import wildedge
+
+PROMPTS = [
+    "Explain on-device ML inference in one sentence.",
+    "What makes Apple Silicon well-suited for local AI?",
+    "Name three advantages of privacy-preserving inference.",
+]
+
+DEFAULT_MODEL = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="WildEdge + mlx-lm example")
+    parser.add_argument(
+        "--model",
+        default=DEFAULT_MODEL,
+        help=f"HuggingFace repo or local path (default: {DEFAULT_MODEL})",
+    )
+    parser.add_argument(
+        "--max-tokens",
+        type=int,
+        default=80,
+        help="Max tokens to generate per prompt (default: 80)",
+    )
+    args = parser.parse_args()
+
+    # instrument() patches mlx_lm.load and mlx_lm.generate — must be called
+    # before any model is loaded.
+    client = wildedge.WildEdge(app_version="1.0.0")  # set WILDEDGE_DSN env var
+    client.instrument("mlx", hubs=["huggingface"])
+
+    print(f"\nLoading {args.model} ...")
+    model, tokenizer = mlx_lm.load(args.model)  # load + download tracked automatically
+
+    print(f"\nRunning {len(PROMPTS)} prompts (max_tokens={args.max_tokens}):\n")
+    for i, prompt in enumerate(PROMPTS, 1):
+        response = mlx_lm.generate(
+            model,
+            tokenizer,
+            prompt=prompt,
+            max_tokens=args.max_tokens,
+            verbose=False,
+        )
+        print(f"[{i}] Q: {prompt}")
+        print(f"    A: {response}\n")
+
+    client.flush()
+    print("Done — events flushed to WildEdge.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wildedge/client.py b/wildedge/client.py
index df38253..92de53e 100644
--- a/wildedge/client.py
+++ b/wildedge/client.py
@@ -20,6 +20,7 @@
 from wildedge.integrations.base import BaseExtractor
 from wildedge.integrations.gguf import GgufExtractor
 from wildedge.integrations.keras import KerasExtractor
+from wildedge.integrations.mlx import MlxExtractor
 from wildedge.integrations.onnx import OnnxExtractor
 from wildedge.integrations.pytorch import PytorchExtractor
 from wildedge.integrations.registry import noop_integrations, supported_integrations
@@ -84,6 +85,7 @@ def parse_dsn(dsn: str) -> tuple[str, str, str]:
     GgufExtractor(),
     UltralyticsExtractor(),
     TransformersExtractor(),
+    MlxExtractor(),
     PytorchExtractor(),
     TensorflowExtractor(),
     KerasExtractor(),
@@ -107,6 +109,7 @@ class WildEdge:
     NOOP_INTEGRATIONS = noop_integrations()
     PATCH_INSTALLERS = {
         "gguf": GgufExtractor.install_auto_load_patch,
+        "mlx": MlxExtractor.install_auto_load_patch,
         "onnx": OnnxExtractor.install_auto_load_patch,
         "timm": PytorchExtractor.install_timm_patch,
         "tensorflow": TensorflowExtractor.install_auto_load_patch,
diff --git a/wildedge/integrations/mlx.py b/wildedge/integrations/mlx.py
new file mode 100644
index 0000000..e46d48d
--- /dev/null
+++ b/wildedge/integrations/mlx.py
@@ -0,0 +1,308 @@
+"""MLX / mlx-lm integration."""
+
+from __future__ import annotations
+
+import threading
+import time
+from typing import TYPE_CHECKING
+
+from wildedge import constants
+from wildedge.events.inference import GenerationOutputMeta, TextInputMeta
+from wildedge.integrations.base import BaseExtractor, patch_instance_call_once
+from wildedge.logging import logger
+from wildedge.model import ModelInfo
+from wildedge.timing import elapsed_ms
+
+try:
+    import mlx.core as _mx
+    import mlx.nn as _mlx_nn
+    from mlx.utils import tree_flatten as _tree_flatten
+except ImportError:
+    _mx = None  # type: ignore[assignment]
+    _mlx_nn = None  # type: ignore[assignment]
+    _tree_flatten = None  # type: ignore[assignment]
+
+try:
+    import mlx_lm as _mlx_lm
+except ImportError:
+    _mlx_lm = None  # type: ignore[assignment]
+
+if TYPE_CHECKING:
+    from wildedge.model import ModelHandle
+
+# --- Patch state ---
+_mlx_patched = False
+_MLX_PATCH_LOCK = threading.Lock()
+MLX_AUTO_LOAD_PATCH_NAME = "mlx_auto_load"
+MLX_GENERATE_PATCH_NAME = "mlx_generate"
+MLX_CALL_PATCH_NAME = "mlx_call"
+MLX_HANDLE_ATTR = "__wildedge_mlx_handle__"
+
+# Thread-local flag: suppress __call__ tracking inside mlx_lm.generate's
+# autoregressive loop (which calls model() once per token).
+_inside_mlx_generate = threading.local()
+
+
+def _debug_failure(context: str, exc: BaseException) -> None:
+    logger.debug("wildedge: mlx %s failed: %s", context, exc)
+
+
+def _is_mlx_module(obj: object) -> bool:
+    for cls in type(obj).__mro__:
+        if cls.__name__ == "Module" and "mlx" in cls.__module__:
+            return True
+    return False
+
+
+def _extract_model_args(obj: object) -> tuple[str | None, str | None]:
+    """Returns (model_type, quantization_str) from model.args. Never raises."""
+    try:
+        args = getattr(obj, "args", None)
+        if args is None:
+            return None, None
+        model_type = getattr(args, "model_type", None) or None
+        quant = getattr(args, "quantization", None)
+        if quant is not None:
+            bits = getattr(quant, "bits", None)
+            q_str = f"q{int(bits)}" if bits else "quantized"
+        else:
+            q_str = _detect_quantization_from_layers(obj)
+        return model_type, q_str
+    except Exception as exc:
+        _debug_failure("model args extraction", exc)
+        return None, None
+
+
+def _detect_quantization_from_layers(obj: object) -> str | None:
+    """Inspect layer class names for quantized linear layers."""
+    try:
+        for _, module in obj.named_modules():  # type: ignore[union-attr]
+            cls_name = type(module).__name__
+            if "Quantized" in cls_name or "quantized" in cls_name:
+                return "quantized"
+    except Exception:
+        pass
+    return None
+
+
+def _count_tokens(tokenizer: object, text: str) -> int | None:
+    try:
+        return len(tokenizer.encode(text))  # type: ignore[union-attr]
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Direct __call__ patch (non-LM / manual-registration use case)
+# ---------------------------------------------------------------------------
+
+
+def _build_mlx_call_patch(original_call):  # type: ignore[no-untyped-def]
+    def patched_call(self_inner, *args, **kwargs):  # type: ignore[no-untyped-def]
+        # Suppress during mlx_lm.generate's autoregressive token loop
+        if getattr(_inside_mlx_generate, "active", False):
+            return original_call(self_inner, *args, **kwargs)
+
+        handle = getattr(self_inner, MLX_HANDLE_ATTR, None)
+        if handle is None:
+            return original_call(self_inner, *args, **kwargs)
+
+        t0 = time.perf_counter()
+        try:
+            result = original_call(self_inner, *args, **kwargs)
+            handle.track_inference(duration_ms=elapsed_ms(t0), success=True)
+            return result
+        except Exception as exc:
+            handle.track_error(
+                error_code="UNKNOWN",
+                error_message=str(exc)[: constants.ERROR_MSG_MAX_LEN],
+            )
+            raise
+
+    return patched_call
+
+
+# ---------------------------------------------------------------------------
+# Extractor
+# ---------------------------------------------------------------------------
+
+
+class MlxExtractor(BaseExtractor):
+    def can_handle(self, obj: object) -> bool:
+        return _is_mlx_module(obj)
+
+    def extract_info(
+        self, obj: object, overrides: dict
+    ) -> tuple[str | None, ModelInfo]:
+        model_type, quantization = _extract_model_args(obj)
+
+        model_name = model_type or type(obj).__name__
+        model_id = overrides.pop("id", None) or model_name
+        family = overrides.pop("family", None) or model_type
+        version = overrides.pop("version", "unknown")
+        source = overrides.pop("source", "huggingface")
+        quantization = overrides.pop("quantization", None) or quantization
+
+        info = ModelInfo(
+            model_name=model_name,
+            model_version=version,
+            model_source=source,
+            model_format="mlx",
+            model_family=family,
+            quantization=quantization,
+        )
+        for k, v in overrides.items():
+            if hasattr(info, k):
+                setattr(info, k, v)
+
+        return model_id, info
+
+    def memory_bytes(self, obj: object) -> int | None:
+        if _tree_flatten is None:
+            return None
+        try:
+            return sum(
+                v.nbytes
+                for _, v in _tree_flatten(obj.parameters())  # type: ignore[union-attr]
+                if hasattr(v, "nbytes")
+            )
+        except Exception as exc:
+            _debug_failure("memory estimation", exc)
+            return None
+
+    def install_hooks(self, obj: object, handle: ModelHandle) -> None:
+        setattr(obj, MLX_HANDLE_ATTR, handle)
+        patch_instance_call_once(
+            obj,
+            patch_name=MLX_CALL_PATCH_NAME,
+            make_patched_call=_build_mlx_call_patch,
+        )
+
+    # -----------------------------------------------------------------------
+    # Auto-load patches
+    # -----------------------------------------------------------------------
+
+    @classmethod
+    def install_auto_load_patch(cls, client_ref: object) -> None:
+        """Patch mlx_lm.load and mlx_lm.generate for automatic tracking.
+
+        Called once at WildEdge client initialisation.
+
+        - ``mlx_lm.load(path_or_repo)`` is timed; model ID is captured from
+          the path argument; HuggingFace Hub downloads are recorded.
+        - ``mlx_lm.generate(model, tokenizer, prompt, ...)`` is patched to
+          emit a single inference event per call with token counts and
+          tokens/second. The autoregressive ``model()`` loop inside generate
+          is suppressed via a thread-local guard so it does not double-count.
+        """
+        global _mlx_patched
+        if _mlx_patched or _mlx_lm is None:
+            return
+
+        with _MLX_PATCH_LOCK:
+            if _mlx_patched:
+                return
+            cls._patch_load(client_ref)
+            cls._patch_generate(client_ref)
+            _mlx_patched = True
+
+    @classmethod
+    def _patch_load(cls, client_ref: object) -> None:
+        original_load = _mlx_lm.load
+        if (
+            getattr(original_load, "__wildedge_patch_name__", None)
+            == MLX_AUTO_LOAD_PATCH_NAME
+        ):
+            return
+
+        def patched_load(path_or_hf_repo, *args, **kwargs):  # type: ignore[no-untyped-def]
+            c = client_ref()  # type: ignore[call-arg]
+            hub_before = (
+                c._snapshot_hub_caches() if c is not None and not c.closed else {}
+            )
+            t0 = time.perf_counter()
+            result = original_load(path_or_hf_repo, *args, **kwargs)
+            load_ms = elapsed_ms(t0)
+
+            # mlx_lm.load returns (model, tokenizer)
+            model = result[0] if isinstance(result, tuple) else result
+
+            if c is not None and not c.closed:
+                downloads = c._diff_hub_caches(hub_before, load_ms) or None
+                model_id = str(path_or_hf_repo) if path_or_hf_repo else None
+                c._on_model_auto_loaded(
+                    model,
+                    load_ms=load_ms,
+                    downloads=downloads,
+                    model_id=model_id,
+                )
+
+            return result
+
+        patched_load.__wildedge_patch_name__ = MLX_AUTO_LOAD_PATCH_NAME  # type: ignore[attr-defined]
+        patched_load.__wildedge_original_call__ = original_load  # type: ignore[attr-defined]
+        _mlx_lm.load = patched_load
+
+    @classmethod
+    def _patch_generate(cls, client_ref: object) -> None:  # noqa: ARG003
+        original_generate = _mlx_lm.generate
+        if (
+            getattr(original_generate, "__wildedge_patch_name__", None)
+            == MLX_GENERATE_PATCH_NAME
+        ):
+            return
+
+        def patched_generate(model, tokenizer, prompt, *args, **kwargs):  # type: ignore[no-untyped-def]
+            handle: ModelHandle | None = getattr(model, MLX_HANDLE_ATTR, None)
+
+            tokens_in = _count_tokens(tokenizer, prompt) if tokenizer else None
+            input_meta = TextInputMeta(token_count=tokens_in) if tokens_in else None
+
+            _inside_mlx_generate.active = True
+            t0 = time.perf_counter()
+            try:
+                result = original_generate(model, tokenizer, prompt, *args, **kwargs)
+                duration_ms = elapsed_ms(t0)
+            except Exception as exc:
+                _inside_mlx_generate.active = False
+                if handle is not None:
+                    handle.track_error(
+                        error_code="UNKNOWN",
+                        error_message=str(exc)[: constants.ERROR_MSG_MAX_LEN],
+                    )
+                raise
+            finally:
+                _inside_mlx_generate.active = False
+
+            if handle is not None:
+                output_text = (
+                    result.text
+                    if hasattr(result, "text")
+                    else (result if isinstance(result, str) else None)
+                )
+                tokens_out: int | None = None
+                tps: float | None = None
+                if output_text and tokenizer:
+                    tokens_out = _count_tokens(tokenizer, output_text)
+                    if tokens_out and duration_ms > 0:
+                        tps = round(tokens_out / (duration_ms / 1000), 1)
+
+                handle.track_inference(
+                    duration_ms=duration_ms,
+                    batch_size=1,
+                    input_modality="text",
+                    output_modality="generation",
+                    input_meta=input_meta,
+                    output_meta=GenerationOutputMeta(
+                        tokens_in=tokens_in,
+                        tokens_out=tokens_out,
+                        tokens_per_second=tps,
+                    ),
+                    success=True,
+                )
+
+            return result
+
+        patched_generate.__wildedge_patch_name__ = MLX_GENERATE_PATCH_NAME  # type: ignore[attr-defined]
+        patched_generate.__wildedge_original_call__ = original_generate  # type: ignore[attr-defined]
+        _mlx_lm.generate = patched_generate
diff --git a/wildedge/integrations/registry.py b/wildedge/integrations/registry.py
index 6b09b7a..f39a54a 100644
--- a/wildedge/integrations/registry.py
+++ b/wildedge/integrations/registry.py
@@ -32,6 +32,7 @@ class IntegrationSpec:
     IntegrationSpec("tensorflow", ("tensorflow",), "client_patch"),
     IntegrationSpec("ultralytics", ("ultralytics",), "client_patch"),
     IntegrationSpec("transformers", ("transformers",), "client_patch"),
+    IntegrationSpec("mlx", ("mlx_lm",), "client_patch"),
 )
 
 INTEGRATIONS_BY_NAME: dict[str, IntegrationSpec] = {

From e73aa731f135eebd820694161b8aeaa19c747bd5 Mon Sep 17 00:00:00 2001
From: Piotr Duda <piotrekno1@gmail.com>
Date: Sat, 14 Mar 2026 15:18:55 +0100
Subject: [PATCH 2/2] Support for mlx

---
 README.md                               | 30 ++++++++++++-------------
 examples/chatgpt_example.py             |  2 +-
 examples/django_gemma/gemmaapp/views.py |  4 ++--
 examples/django_gemma/gunicorn.conf.py  |  2 +-
 examples/feedback_example.py            |  2 +-
 examples/gguf_example.py                |  2 +-
 examples/gguf_gemma_manual_example.py   |  4 ++--
 examples/keras_example.py               |  2 +-
 examples/mlx_example.py                 |  6 ++---
 examples/onnx_example.py                |  2 +-
 examples/pytorch_example.py             |  2 +-
 examples/timm_example.py                |  4 ++--
 examples/transformers_example.py        |  6 ++---
 tests/test_consumer.py                  |  2 +-
 tests/test_integrations.py              |  2 +-
 tests/test_integrations_ultralytics.py  |  2 +-
 wildedge/autoload/sitecustomize.py      |  2 +-
 wildedge/hubs/torchhub.py               |  4 ++--
 wildedge/integrations/registry.py       |  3 +--
 wildedge/integrations/ultralytics.py    |  2 +-
 20 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 6865964..524df87 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Coverage](https://codecov.io/gh/wildedge/wildedge-python/branch/main/graph/badge.svg)](https://codecov.io/gh/wildedge/wildedge-python)
 
-On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata — no inputs or outputs captured.
+On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata. No inputs or outputs captured.
 
 ## Install
 
@@ -16,9 +16,9 @@ On-device ML inference monitoring for Python. Tracks latency, errors, and model
 uv add wildedge-sdk
 ```
 
-## CLI — zero code changes
+## CLI
 
-Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts — no SDK calls required in user code.
+Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts. No SDK calls required in user code.
 
 ```bash
 WILDEDGE_DSN="https://<secret>@ingest.wildedge.dev/<key>" \
@@ -61,10 +61,10 @@ client.instrument("transformers", hubs=["huggingface"])
 | `timm` | `timm.create_model()` | `huggingface`, `torchhub` | [timm_example.py](examples/timm_example.py) |
 | `gguf` | `llama_cpp.Llama.__init__` | `huggingface` | [gguf_example.py](examples/gguf_example.py) |
 | `onnx` | `ort.InferenceSession` | `huggingface` | [onnx_example.py](examples/onnx_example.py) |
-| `ultralytics` | `ultralytics.YOLO.__init__` | — | — |
-| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | — | [tensorflow_example.py](examples/tensorflow_example.py) |
+| `ultralytics` | `ultralytics.YOLO.__init__` | - | - |
+| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | - | [tensorflow_example.py](examples/tensorflow_example.py) |
 | `torch` | forward hooks via `client.load()` | `torchhub` | [pytorch_example.py](examples/pytorch_example.py) |
-| `keras` | forward hooks via `client.load()` | — | [keras_example.py](examples/keras_example.py) |
+| `keras` | forward hooks via `client.load()` | - | [keras_example.py](examples/keras_example.py) |
 
 For `torch` and `keras`, models are user-defined subclasses so there's no constructor to patch. Use `client.load()` to get load/unload tracking alongside inference:
 
@@ -89,20 +89,20 @@ def run(input):
 
 | Parameter | Default | Env var | Description |
 |---|---|---|---|
-| `dsn` | — | `WILDEDGE_DSN` | `https://<secret>@ingest.wildedge.dev/<key>` |
-| `app_version` | `None` | — | Your app's version string |
+| `dsn` | - | `WILDEDGE_DSN` | `https://<secret>@ingest.wildedge.dev/<key>` |
+| `app_version` | `None` | - | Your app's version string |
 | `app_identity` | `<project_key>` | `WILDEDGE_APP_IDENTITY` | Namespace for offline persistence; set per-app in multi-process workloads |
 | `debug` | `false` | `WILDEDGE_DEBUG` | Log events to console |
-| `batch_size` | `10` | — | Events per transmission (1–100) |
-| `flush_interval_sec` | `60` | — | Max seconds between flushes (1–3600) |
-| `max_queue_size` | `200` | — | In-memory buffer limit (10–10000) |
-| `enable_offline_persistence` | `true` | — | Persist unsent events to disk and replay on restart |
-| `max_event_age_sec` | `900` | — | Max age before dead-lettering |
-| `enable_dead_letter_persistence` | `false` | — | Persist dropped batches to disk |
+| `batch_size` | `10` | - | Events per transmission (1-100) |
+| `flush_interval_sec` | `60` | - | Max seconds between flushes (1-3600) |
+| `max_queue_size` | `200` | - | In-memory buffer limit (10-10000) |
+| `enable_offline_persistence` | `true` | - | Persist unsent events to disk and replay on restart |
+| `max_event_age_sec` | `900` | - | Max age before dead-lettering |
+| `enable_dead_letter_persistence` | `false` | - | Persist dropped batches to disk |
 
 ## Privacy
 
-WildEdge captures **no inputs or outputs** — only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS.
+WildEdge captures **no inputs or outputs**. Only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS.
 
 Report security issues to security@wildedge.dev.
 
diff --git a/examples/chatgpt_example.py b/examples/chatgpt_example.py
index dc00b39..edb6cb3 100644
--- a/examples/chatgpt_example.py
+++ b/examples/chatgpt_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""ChatGPT (OpenAI API) — fully manual integration.
+"""ChatGPT (OpenAI API): fully manual integration.
 
 Shows how to instrument a remote LLM with no local model file.
 Tracks input/output token counts, generation config, latency, errors,
diff --git a/examples/django_gemma/gemmaapp/views.py b/examples/django_gemma/gemmaapp/views.py
index 264079e..2bc3cf2 100644
--- a/examples/django_gemma/gemmaapp/views.py
+++ b/examples/django_gemma/gemmaapp/views.py
@@ -1,7 +1,7 @@
 """Gemma inference view.
 
 The Llama constructor is patched automatically by `wildedge run --integrations gguf`
-via sitecustomize.py — load/unload/inference events are tracked without any
+via sitecustomize.py; load/unload/inference events are tracked without any
 wildedge imports here.
 
 On macOS, waitress (thread-pool, no fork) is used as the WSGI server.
@@ -30,7 +30,7 @@
     verbose=False,
 )
 
-# Llama inference is not thread-safe on a single context — serialise requests.
+# Llama inference is not thread-safe on a single context; serialise requests.
 _llm_lock = threading.Lock()
 
 
diff --git a/examples/django_gemma/gunicorn.conf.py b/examples/django_gemma/gunicorn.conf.py
index 3f496b7..3fce87d 100644
--- a/examples/django_gemma/gunicorn.conf.py
+++ b/examples/django_gemma/gunicorn.conf.py
@@ -1,4 +1,4 @@
-"""Gunicorn configuration — Linux only.
+"""Gunicorn configuration. Linux only.
 
 On macOS use waitress instead (demo.sh selects automatically).
 Requires llama-cpp-python built without Metal:
diff --git a/examples/feedback_example.py b/examples/feedback_example.py
index 6dd57c1..11f9e7c 100644
--- a/examples/feedback_example.py
+++ b/examples/feedback_example.py
@@ -6,7 +6,7 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-Feedback example — run with: uv run feedback_example.py
+Feedback example. Run with: uv run feedback_example.py
 
 Simulates an automated quality gate: after each inference, the top-1 confidence
 score drives a thumbs_up / thumbs_down feedback event with no human input.
diff --git a/examples/gguf_example.py b/examples/gguf_example.py
index cc3543b..074d535 100644
--- a/examples/gguf_example.py
+++ b/examples/gguf_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""GGUF / llama.cpp integration example — run with: uv run gguf_example.py"""
+"""GGUF / llama.cpp integration example. Run with: uv run gguf_example.py"""
 
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
diff --git a/examples/gguf_gemma_manual_example.py b/examples/gguf_gemma_manual_example.py
index 2418187..b1f0c0a 100644
--- a/examples/gguf_gemma_manual_example.py
+++ b/examples/gguf_gemma_manual_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""Gemma 2 GGUF — fully manual integration, no auto-instrumentation.
+"""Gemma 2 GGUF: fully manual integration, no auto-instrumentation.
 
 Shows explicit download / load / inference / error tracking without
 client.instrument() or any automatic hooks.
@@ -40,7 +40,7 @@
     llm = Llama(model_path, n_ctx=2048, n_gpu_layers=-1, verbose=False)
 load_ms = t.elapsed_ms
 
-# All metadata supplied explicitly — no extractor runs, no hooks installed.
+# All metadata supplied explicitly. No extractor runs, no hooks installed.
 handle = client.register_model(
     llm,
     model_id="gemma-2-2b-it-q4",
diff --git a/examples/keras_example.py b/examples/keras_example.py
index c854c89..b782139 100644
--- a/examples/keras_example.py
+++ b/examples/keras_example.py
@@ -6,7 +6,7 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-Keras integration example — run with: uv run keras_example.py
+Keras integration example. Run with: uv run keras_example.py
 
 Keras models are user-defined subclasses, so wildedge cannot patch the
 constructor directly like with timm or PyTorch. Use client.load() to
diff --git a/examples/mlx_example.py b/examples/mlx_example.py
index 75f7cca..2c87a3a 100644
--- a/examples/mlx_example.py
+++ b/examples/mlx_example.py
@@ -6,7 +6,7 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-MLX / mlx-lm integration example — Apple Silicon only.
+MLX / mlx-lm integration example. Apple Silicon only.
 
 WildEdge patches mlx_lm.load and mlx_lm.generate at client initialisation.
 Load timing, HuggingFace Hub download tracking, inference metrics (tokens/sec,
@@ -50,7 +50,7 @@ def main() -> None:
     )
     args = parser.parse_args()
 
-    # instrument() patches mlx_lm.load and mlx_lm.generate — must be called
+    # instrument() patches mlx_lm.load and mlx_lm.generate; must be called
     # before any model is loaded.
     client = wildedge.WildEdge(app_version="1.0.0")  # set WILDEDGE_DSN env var
     client.instrument("mlx", hubs=["huggingface"])
@@ -71,7 +71,7 @@ def main() -> None:
         print(f"    A: {response}\n")
 
     client.flush()
-    print("Done — events flushed to WildEdge.")
+    print("Done. Events flushed to WildEdge.")
 
 
 if __name__ == "__main__":
diff --git a/examples/onnx_example.py b/examples/onnx_example.py
index 73dc77c..a5a9232 100644
--- a/examples/onnx_example.py
+++ b/examples/onnx_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""ONNX Runtime integration example — run with: uv run onnx_example.py"""
+"""ONNX Runtime integration example. Run with: uv run onnx_example.py"""
 
 import numpy as np
 import onnxruntime as ort
diff --git a/examples/pytorch_example.py b/examples/pytorch_example.py
index b28760e..c384b15 100644
--- a/examples/pytorch_example.py
+++ b/examples/pytorch_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""PyTorch integration example — run with: uv run pytorch_example.py"""
+"""PyTorch integration example. Run with: uv run pytorch_example.py"""
 
 import torch
 import torch.nn as nn
diff --git a/examples/timm_example.py b/examples/timm_example.py
index 47ba3a7..1a302e7 100644
--- a/examples/timm_example.py
+++ b/examples/timm_example.py
@@ -6,9 +6,9 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-timm integration example — run with: uv run timm_example.py
+timm integration example. Run with: uv run timm_example.py
 
-timm models are standard PyTorch nn.Module subclasses — wildedge patches
+timm models are standard PyTorch nn.Module subclasses; wildedge patches
 timm.create_model at client initialisation, so load timing, download tracking,
 and unload tracking happen automatically. Inference tracking uses the existing
 PyTorch forward hooks.
diff --git a/examples/transformers_example.py b/examples/transformers_example.py
index 86912a0..f8bdc68 100644
--- a/examples/transformers_example.py
+++ b/examples/transformers_example.py
@@ -69,7 +69,7 @@ def run_embed() -> None:
     print("Feature extraction (BERT):")
     for sent in sentences:
         result = pipe(sent)
-        # result shape: [1, seq_len, hidden_size] — take CLS token embedding
+        # result shape: [1, seq_len, hidden_size]; take CLS token embedding
         cls_embedding = result[0][0]
         dims = len(cls_embedding)
         norm = sum(v**2 for v in cls_embedding) ** 0.5
@@ -89,7 +89,7 @@ def main() -> None:
     args = parser.parse_args()
 
     # instrument() patches transformers.pipeline and AutoModel.from_pretrained
-    # before any model is loaded — everything below is tracked automatically.
+    # before any model is loaded; everything below is tracked automatically.
     client = wildedge.WildEdge(app_version="1.0.0")  # set WILDEDGE_DSN env var
     client.instrument("transformers", hubs=["huggingface"])
 
@@ -99,7 +99,7 @@ def main() -> None:
     ]()
 
     client.flush()
-    print("\nDone — events flushed to WildEdge.")
+    print("\nDone. Events flushed to WildEdge.")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_consumer.py b/tests/test_consumer.py
index 0084c3d..5553a47 100644
--- a/tests/test_consumer.py
+++ b/tests/test_consumer.py
@@ -411,7 +411,7 @@ def test_flush_is_noop_after_pause_before_resume(self, monkeypatch):
 
         consumer._pause()
         # stopped is False (reset by _pause) and queue is empty, so flush
-        # calls drain_once which returns False immediately — no transmit calls.
+        # calls drain_once which returns False immediately; no transmit calls.
         consumer.flush(timeout=0.1)
         mock_tx.send.assert_not_called()
 
diff --git a/tests/test_integrations.py b/tests/test_integrations.py
index 838b915..46e4157 100644
--- a/tests/test_integrations.py
+++ b/tests/test_integrations.py
@@ -42,7 +42,7 @@ def make_handle(publish_spy) -> ModelHandle:
 
 
 # ---------------------------------------------------------------------------
-# Fake objects — no ML libraries required
+# Fake objects; no ML libraries required
 # ---------------------------------------------------------------------------
 
 # ONNX
diff --git a/tests/test_integrations_ultralytics.py b/tests/test_integrations_ultralytics.py
index e8b2b6a..1a05458 100644
--- a/tests/test_integrations_ultralytics.py
+++ b/tests/test_integrations_ultralytics.py
@@ -21,7 +21,7 @@
 from wildedge.model import ModelHandle, ModelInfo
 
 # ---------------------------------------------------------------------------
-# Fake objects — no ultralytics required
+# Fake objects; no ultralytics required
 # ---------------------------------------------------------------------------
 
 
diff --git a/wildedge/autoload/sitecustomize.py b/wildedge/autoload/sitecustomize.py
index 92fcf86..c77db79 100644
--- a/wildedge/autoload/sitecustomize.py
+++ b/wildedge/autoload/sitecustomize.py
@@ -34,7 +34,7 @@
 
 
 # Chain any pre-existing sitecustomize that would otherwise be shadowed.
-# Use importlib to find and exec it directly — avoids sys.modules manipulation
+# Use importlib to find and exec it directly; avoids sys.modules manipulation
 # which can trigger CPython's module GC and clear globals mid-execution.
 def _load_existing_sitecustomize() -> None:
     import importlib.util as _iutil
diff --git a/wildedge/hubs/torchhub.py b/wildedge/hubs/torchhub.py
index 0ac5293..1a91bfb 100644
--- a/wildedge/hubs/torchhub.py
+++ b/wildedge/hubs/torchhub.py
@@ -20,9 +20,9 @@
 ------------
 ``torch.hub.get_dir()`` (default ``~/.cache/torch/hub``) contains:
 
-- ``checkpoints/<name>-<8hexchars>.ext`` — weight files downloaded by
+- ``checkpoints/<name>-<8hexchars>.ext``: weight files downloaded by
   ``torch.hub.download_url_to_file`` or ``torch.utils.model_zoo.load_url``.
-- ``<owner>_<repo>_<ref>/`` — cloned GitHub repo directories created by
+- ``<owner>_<repo>_<ref>/``: cloned GitHub repo directories created by
   ``torch.hub.load``.
 """
 
diff --git a/wildedge/integrations/registry.py b/wildedge/integrations/registry.py
index f39a54a..bd3e07f 100644
--- a/wildedge/integrations/registry.py
+++ b/wildedge/integrations/registry.py
@@ -2,8 +2,7 @@
 
 Contains only ML *framework* integrations (inference tracking, load/unload
 timing).  Model hub and repository trackers (download provenance) live in
-``wildedge.hubs.registry`` — they are orthogonal concerns with different
-activation semantics.
+``wildedge.hubs.registry``. They have different activation semantics.
 """
 
 from __future__ import annotations
diff --git a/wildedge/integrations/ultralytics.py b/wildedge/integrations/ultralytics.py
index c4aa6b7..3f9902f 100644
--- a/wildedge/integrations/ultralytics.py
+++ b/wildedge/integrations/ultralytics.py
@@ -234,7 +234,7 @@ def classify_output_meta(
 def weights_file_exists(model_arg: object) -> bool:
     """Return True if the weights file appears to already be on disk."""
     if not isinstance(model_arg, str):
-        return True  # not a path string — weights already in memory or a loaded object
+        return True  # not a path string; weights already in memory or a loaded object
     p = Path(model_arg)
     if p.is_file():
         return True