wild-edge · piotrekno1 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/README.md b/README.md
@@ -8,17 +8,17 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Coverage](https://codecov.io/gh/wildedge/wildedge-python/branch/main/graph/badge.svg)](https://codecov.io/gh/wildedge/wildedge-python)
 
-On-device ML inference monitoring for Python. Track in-dept, model quality & performance information.
+On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata. No inputs or outputs captured.
 
 ## Install
 
 ```bash
 uv add wildedge-sdk
 ```
 
-## CLI — zero code changes
+## CLI
 
-Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts — no SDK calls required in user code.
+Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts. No SDK calls required in user code.
 
 ```bash
 WILDEDGE_DSN="https://<secret>@ingest.wildedge.dev/<key>" \
@@ -57,13 +57,14 @@ client.instrument("transformers", hubs=["huggingface"])
 | Integration | Patches | Hub tracking | Example |
 |---|---|---|---|
 | `transformers` | `pipeline()`, `AutoModel.from_pretrained()` | `huggingface` | [transformers_example.py](examples/transformers_example.py) |
+| `mlx` | `mlx_lm.load()`, `mlx_lm.generate()` | `huggingface` | [mlx_example.py](examples/mlx_example.py) |
 | `timm` | `timm.create_model()` | `huggingface`, `torchhub` | [timm_example.py](examples/timm_example.py) |
 | `gguf` | `llama_cpp.Llama.__init__` | `huggingface` | [gguf_example.py](examples/gguf_example.py) |
 | `onnx` | `ort.InferenceSession` | `huggingface` | [onnx_example.py](examples/onnx_example.py) |
-| `ultralytics` | `ultralytics.YOLO.__init__` | — | — |
-| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | — | [tensorflow_example.py](examples/tensorflow_example.py) |
+| `ultralytics` | `ultralytics.YOLO.__init__` | - | - |
+| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | - | [tensorflow_example.py](examples/tensorflow_example.py) |
 | `torch` | forward hooks via `client.load()` | `torchhub` | [pytorch_example.py](examples/pytorch_example.py) |
-| `keras` | forward hooks via `client.load()` | — | [keras_example.py](examples/keras_example.py) |
+| `keras` | forward hooks via `client.load()` | - | [keras_example.py](examples/keras_example.py) |
 
 For `torch` and `keras`, models are user-defined subclasses so there's no constructor to patch. Use `client.load()` to get load/unload tracking alongside inference:
 
@@ -88,20 +89,20 @@ def run(input):
 
 | Parameter | Default | Env var | Description |
 |---|---|---|---|
-| `dsn` | — | `WILDEDGE_DSN` | `https://<secret>@ingest.wildedge.dev/<key>` |
-| `app_version` | `None` | — | Your app's version string |
+| `dsn` | - | `WILDEDGE_DSN` | `https://<secret>@ingest.wildedge.dev/<key>` |
+| `app_version` | `None` | - | Your app's version string |
 | `app_identity` | `<project_key>` | `WILDEDGE_APP_IDENTITY` | Namespace for offline persistence; set per-app in multi-process workloads |
 | `debug` | `false` | `WILDEDGE_DEBUG` | Log events to console |
-| `batch_size` | `10` | — | Events per transmission (1–100) |
-| `flush_interval_sec` | `60` | — | Max seconds between flushes (1–3600) |
-| `max_queue_size` | `200` | — | In-memory buffer limit (10–10000) |
-| `enable_offline_persistence` | `true` | — | Persist unsent events to disk and replay on restart |
-| `max_event_age_sec` | `900` | — | Max age before dead-lettering |
-| `enable_dead_letter_persistence` | `false` | — | Persist dropped batches to disk |
+| `batch_size` | `10` | - | Events per transmission (1-100) |
+| `flush_interval_sec` | `60` | - | Max seconds between flushes (1-3600) |
+| `max_queue_size` | `200` | - | In-memory buffer limit (10-10000) |
+| `enable_offline_persistence` | `true` | - | Persist unsent events to disk and replay on restart |
+| `max_event_age_sec` | `900` | - | Max age before dead-lettering |
+| `enable_dead_letter_persistence` | `false` | - | Persist dropped batches to disk |
 
 ## Privacy
 
-WildEdge captures **no inputs or outputs** — only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS.
+WildEdge captures **no inputs or outputs**. Only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS.
 
 Report security issues to security@wildedge.dev.
 

diff --git a/examples/chatgpt_example.py b/examples/chatgpt_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""ChatGPT (OpenAI API) — fully manual integration.
+"""ChatGPT (OpenAI API): fully manual integration.
 
 Shows how to instrument a remote LLM with no local model file.
 Tracks input/output token counts, generation config, latency, errors,

diff --git a/examples/django_gemma/gemmaapp/views.py b/examples/django_gemma/gemmaapp/views.py
@@ -1,7 +1,7 @@
 """Gemma inference view.
 
 The Llama constructor is patched automatically by `wildedge run --integrations gguf`
-via sitecustomize.py — load/unload/inference events are tracked without any
+via sitecustomize.py; load/unload/inference events are tracked without any
 wildedge imports here.
 
 On macOS, waitress (thread-pool, no fork) is used as the WSGI server.
@@ -30,7 +30,7 @@
     verbose=False,
 )
 
-# Llama inference is not thread-safe on a single context — serialise requests.
+# Llama inference is not thread-safe on a single context; serialise requests.
 _llm_lock = threading.Lock()
 
 

diff --git a/examples/django_gemma/gunicorn.conf.py b/examples/django_gemma/gunicorn.conf.py
@@ -1,4 +1,4 @@
-"""Gunicorn configuration — Linux only.
+"""Gunicorn configuration. Linux only.
 
 On macOS use waitress instead (demo.sh selects automatically).
 Requires llama-cpp-python built without Metal:

diff --git a/examples/feedback_example.py b/examples/feedback_example.py
@@ -6,7 +6,7 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-Feedback example — run with: uv run feedback_example.py
+Feedback example. Run with: uv run feedback_example.py
 
 Simulates an automated quality gate: after each inference, the top-1 confidence
 score drives a thumbs_up / thumbs_down feedback event with no human input.

diff --git a/examples/gguf_example.py b/examples/gguf_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""GGUF / llama.cpp integration example — run with: uv run gguf_example.py"""
+"""GGUF / llama.cpp integration example. Run with: uv run gguf_example.py"""
 
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama

diff --git a/examples/gguf_gemma_manual_example.py b/examples/gguf_gemma_manual_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""Gemma 2 GGUF — fully manual integration, no auto-instrumentation.
+"""Gemma 2 GGUF: fully manual integration, no auto-instrumentation.
 
 Shows explicit download / load / inference / error tracking without
 client.instrument() or any automatic hooks.
@@ -40,7 +40,7 @@
     llm = Llama(model_path, n_ctx=2048, n_gpu_layers=-1, verbose=False)
 load_ms = t.elapsed_ms
 
-# All metadata supplied explicitly — no extractor runs, no hooks installed.
+# All metadata supplied explicitly. No extractor runs, no hooks installed.
 handle = client.register_model(
     llm,
     model_id="gemma-2-2b-it-q4",

diff --git a/examples/keras_example.py b/examples/keras_example.py
@@ -6,7 +6,7 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-Keras integration example — run with: uv run keras_example.py
+Keras integration example. Run with: uv run keras_example.py
 
 Keras models are user-defined subclasses, so wildedge cannot patch the
 constructor directly like with timm or PyTorch. Use client.load() to

diff --git a/examples/mlx_example.py b/examples/mlx_example.py
@@ -0,0 +1,78 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["wildedge-sdk", "mlx-lm"]
+#
+# [tool.uv.sources]
+# wildedge-sdk = { path = "..", editable = true }
+# ///
+"""
+MLX / mlx-lm integration example. Apple Silicon only.
+
+WildEdge patches mlx_lm.load and mlx_lm.generate at client initialisation.
+Load timing, HuggingFace Hub download tracking, inference metrics (tokens/sec,
+token counts), and unload tracking all happen automatically.
+
+Usage:
+    uv run mlx_example.py
+    uv run mlx_example.py --model mlx-community/Llama-3.2-1B-Instruct-4bit
+    uv run mlx_example.py --model mlx-community/Mistral-7B-Instruct-v0.3-4bit
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import mlx_lm
+
+import wildedge
+
+PROMPTS = [
+    "Explain on-device ML inference in one sentence.",
+    "What makes Apple Silicon well-suited for local AI?",
+    "Name three advantages of privacy-preserving inference.",
+]
+
+DEFAULT_MODEL = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="WildEdge + mlx-lm example")
+    parser.add_argument(
+        "--model",
+        default=DEFAULT_MODEL,
+        help=f"HuggingFace repo or local path (default: {DEFAULT_MODEL})",
+    )
+    parser.add_argument(
+        "--max-tokens",
+        type=int,
+        default=80,
+        help="Max tokens to generate per prompt (default: 80)",
+    )
+    args = parser.parse_args()
+
+    # instrument() patches mlx_lm.load and mlx_lm.generate; must be called
+    # before any model is loaded.
+    client = wildedge.WildEdge(app_version="1.0.0")  # set WILDEDGE_DSN env var
+    client.instrument("mlx", hubs=["huggingface"])
+
+    print(f"\nLoading {args.model} ...")
+    model, tokenizer = mlx_lm.load(args.model)  # load + download tracked automatically
+
+    print(f"\nRunning {len(PROMPTS)} prompts (max_tokens={args.max_tokens}):\n")
+    for i, prompt in enumerate(PROMPTS, 1):
+        response = mlx_lm.generate(
+            model,
+            tokenizer,
+            prompt=prompt,
+            max_tokens=args.max_tokens,
+            verbose=False,
+        )
+        print(f"[{i}] Q: {prompt}")
+        print(f"    A: {response}\n")
+
+    client.flush()
+    print("Done. Events flushed to WildEdge.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnx_example.py b/examples/onnx_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""ONNX Runtime integration example — run with: uv run onnx_example.py"""
+"""ONNX Runtime integration example. Run with: uv run onnx_example.py"""
 
 import numpy as np
 import onnxruntime as ort

diff --git a/examples/pytorch_example.py b/examples/pytorch_example.py
@@ -5,7 +5,7 @@
 # [tool.uv.sources]
 # wildedge-sdk = { path = "..", editable = true }
 # ///
-"""PyTorch integration example — run with: uv run pytorch_example.py"""
+"""PyTorch integration example. Run with: uv run pytorch_example.py"""
 
 import torch
 import torch.nn as nn

diff --git a/examples/timm_example.py b/examples/timm_example.py
@@ -6,9 +6,9 @@
 # wildedge-sdk = { path = "..", editable = true }
 # ///
 """
-timm integration example — run with: uv run timm_example.py
+timm integration example. Run with: uv run timm_example.py
 
-timm models are standard PyTorch nn.Module subclasses — wildedge patches
+timm models are standard PyTorch nn.Module subclasses; wildedge patches
 timm.create_model at client initialisation, so load timing, download tracking,
 and unload tracking happen automatically. Inference tracking uses the existing
 PyTorch forward hooks.

diff --git a/examples/transformers_example.py b/examples/transformers_example.py
@@ -69,7 +69,7 @@ def run_embed() -> None:
     print("Feature extraction (BERT):")
     for sent in sentences:
         result = pipe(sent)
-        # result shape: [1, seq_len, hidden_size] — take CLS token embedding
+        # result shape: [1, seq_len, hidden_size]; take CLS token embedding
         cls_embedding = result[0][0]
         dims = len(cls_embedding)
         norm = sum(v**2 for v in cls_embedding) ** 0.5
@@ -89,7 +89,7 @@ def main() -> None:
     args = parser.parse_args()
 
     # instrument() patches transformers.pipeline and AutoModel.from_pretrained
-    # before any model is loaded — everything below is tracked automatically.
+    # before any model is loaded; everything below is tracked automatically.
     client = wildedge.WildEdge(app_version="1.0.0")  # set WILDEDGE_DSN env var
     client.instrument("transformers", hubs=["huggingface"])
 
@@ -99,7 +99,7 @@ def main() -> None:
     ]()
 
     client.flush()
-    print("\nDone — events flushed to WildEdge.")
+    print("\nDone. Events flushed to WildEdge.")
 
 
 if __name__ == "__main__":

diff --git a/tests/test_consumer.py b/tests/test_consumer.py
@@ -411,7 +411,7 @@ def test_flush_is_noop_after_pause_before_resume(self, monkeypatch):
 
         consumer._pause()
         # stopped is False (reset by _pause) and queue is empty, so flush
-        # calls drain_once which returns False immediately — no transmit calls.
+        # calls drain_once which returns False immediately; no transmit calls.
         consumer.flush(timeout=0.1)
         mock_tx.send.assert_not_called()
 

diff --git a/tests/test_integrations.py b/tests/test_integrations.py
@@ -42,7 +42,7 @@ def make_handle(publish_spy) -> ModelHandle:
 
 
 # ---------------------------------------------------------------------------
-# Fake objects — no ML libraries required
+# Fake objects; no ML libraries required
 # ---------------------------------------------------------------------------
 
 # ONNX

diff --git a/tests/test_integrations_ultralytics.py b/tests/test_integrations_ultralytics.py
@@ -21,7 +21,7 @@
 from wildedge.model import ModelHandle, ModelInfo
 
 # ---------------------------------------------------------------------------
-# Fake objects — no ultralytics required
+# Fake objects; no ultralytics required
 # ---------------------------------------------------------------------------
 
 

diff --git a/wildedge/autoload/sitecustomize.py b/wildedge/autoload/sitecustomize.py
@@ -34,7 +34,7 @@
 
 
 # Chain any pre-existing sitecustomize that would otherwise be shadowed.
-# Use importlib to find and exec it directly — avoids sys.modules manipulation
+# Use importlib to find and exec it directly; avoids sys.modules manipulation
 # which can trigger CPython's module GC and clear globals mid-execution.
 def _load_existing_sitecustomize() -> None:
     import importlib.util as _iutil

diff --git a/wildedge/client.py b/wildedge/client.py
@@ -20,6 +20,7 @@
 from wildedge.integrations.base import BaseExtractor
 from wildedge.integrations.gguf import GgufExtractor
 from wildedge.integrations.keras import KerasExtractor
+from wildedge.integrations.mlx import MlxExtractor
 from wildedge.integrations.onnx import OnnxExtractor
 from wildedge.integrations.pytorch import PytorchExtractor
 from wildedge.integrations.registry import noop_integrations, supported_integrations
@@ -84,6 +85,7 @@ def parse_dsn(dsn: str) -> tuple[str, str, str]:
     GgufExtractor(),
     UltralyticsExtractor(),
     TransformersExtractor(),
+    MlxExtractor(),
     PytorchExtractor(),
     TensorflowExtractor(),
     KerasExtractor(),
@@ -107,6 +109,7 @@ class WildEdge:
     NOOP_INTEGRATIONS = noop_integrations()
     PATCH_INSTALLERS = {
         "gguf": GgufExtractor.install_auto_load_patch,
+        "mlx": MlxExtractor.install_auto_load_patch,
         "onnx": OnnxExtractor.install_auto_load_patch,
         "timm": PytorchExtractor.install_timm_patch,
         "tensorflow": TensorflowExtractor.install_auto_load_patch,

diff --git a/wildedge/hubs/torchhub.py b/wildedge/hubs/torchhub.py
@@ -20,9 +20,9 @@
 ------------
 ``torch.hub.get_dir()`` (default ``~/.cache/torch/hub``) contains:
 
-- ``checkpoints/<name>-<8hexchars>.ext`` — weight files downloaded by
+- ``checkpoints/<name>-<8hexchars>.ext``: weight files downloaded by
   ``torch.hub.download_url_to_file`` or ``torch.utils.model_zoo.load_url``.
-- ``<owner>_<repo>_<ref>/`` — cloned GitHub repo directories created by
+- ``<owner>_<repo>_<ref>/``: cloned GitHub repo directories created by
   ``torch.hub.load``.
 """