From 133433d93413e9c35f8aa1e1ea024bbc8a1cab40 Mon Sep 17 00:00:00 2001 From: Piotr Duda Date: Sat, 14 Mar 2026 15:15:00 +0100 Subject: [PATCH 1/2] Support for mlx --- README.md | 3 +- examples/mlx_example.py | 78 ++++++++ wildedge/client.py | 3 + wildedge/integrations/mlx.py | 308 ++++++++++++++++++++++++++++++ wildedge/integrations/registry.py | 1 + 5 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 examples/mlx_example.py create mode 100644 wildedge/integrations/mlx.py diff --git a/README.md b/README.md index 66f181f..6865964 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Coverage](https://codecov.io/gh/wildedge/wildedge-python/branch/main/graph/badge.svg)](https://codecov.io/gh/wildedge/wildedge-python) -On-device ML inference monitoring for Python. Track in-dept, model quality & performance information. +On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata — no inputs or outputs captured. ## Install @@ -57,6 +57,7 @@ client.instrument("transformers", hubs=["huggingface"]) | Integration | Patches | Hub tracking | Example | |---|---|---|---| | `transformers` | `pipeline()`, `AutoModel.from_pretrained()` | `huggingface` | [transformers_example.py](examples/transformers_example.py) | +| `mlx` | `mlx_lm.load()`, `mlx_lm.generate()` | `huggingface` | [mlx_example.py](examples/mlx_example.py) | | `timm` | `timm.create_model()` | `huggingface`, `torchhub` | [timm_example.py](examples/timm_example.py) | | `gguf` | `llama_cpp.Llama.__init__` | `huggingface` | [gguf_example.py](examples/gguf_example.py) | | `onnx` | `ort.InferenceSession` | `huggingface` | [onnx_example.py](examples/onnx_example.py) | diff --git a/examples/mlx_example.py b/examples/mlx_example.py new file mode 100644 index 0000000..75f7cca --- /dev/null +++ b/examples/mlx_example.py @@ -0,0 +1,78 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = ["wildedge-sdk", "mlx-lm"] +# +# [tool.uv.sources] +# wildedge-sdk = { path = "..", editable = true } +# /// +""" +MLX / mlx-lm integration example — Apple Silicon only. + +WildEdge patches mlx_lm.load and mlx_lm.generate at client initialisation. +Load timing, HuggingFace Hub download tracking, inference metrics (tokens/sec, +token counts), and unload tracking all happen automatically. + +Usage: + uv run mlx_example.py + uv run mlx_example.py --model mlx-community/Llama-3.2-1B-Instruct-4bit + uv run mlx_example.py --model mlx-community/Mistral-7B-Instruct-v0.3-4bit +""" + +from __future__ import annotations + +import argparse + +import mlx_lm + +import wildedge + +PROMPTS = [ + "Explain on-device ML inference in one sentence.", + "What makes Apple Silicon well-suited for local AI?", + "Name three advantages of privacy-preserving inference.", +] + +DEFAULT_MODEL = "mlx-community/Llama-3.2-1B-Instruct-4bit" + + +def main() -> None: + parser = argparse.ArgumentParser(description="WildEdge + mlx-lm example") + parser.add_argument( + "--model", + default=DEFAULT_MODEL, + help=f"HuggingFace repo or local path (default: {DEFAULT_MODEL})", + ) + parser.add_argument( + "--max-tokens", + type=int, + default=80, + help="Max tokens to generate per prompt (default: 80)", + ) + args = parser.parse_args() + + # instrument() patches mlx_lm.load and mlx_lm.generate — must be called + # before any model is loaded. + client = wildedge.WildEdge(app_version="1.0.0") # set WILDEDGE_DSN env var + client.instrument("mlx", hubs=["huggingface"]) + + print(f"\nLoading {args.model} ...") + model, tokenizer = mlx_lm.load(args.model) # load + download tracked automatically + + print(f"\nRunning {len(PROMPTS)} prompts (max_tokens={args.max_tokens}):\n") + for i, prompt in enumerate(PROMPTS, 1): + response = mlx_lm.generate( + model, + tokenizer, + prompt=prompt, + max_tokens=args.max_tokens, + verbose=False, + ) + print(f"[{i}] Q: {prompt}") + print(f" A: {response}\n") + + client.flush() + print("Done — events flushed to WildEdge.") + + +if __name__ == "__main__": + main() diff --git a/wildedge/client.py b/wildedge/client.py index df38253..92de53e 100644 --- a/wildedge/client.py +++ b/wildedge/client.py @@ -20,6 +20,7 @@ from wildedge.integrations.base import BaseExtractor from wildedge.integrations.gguf import GgufExtractor from wildedge.integrations.keras import KerasExtractor +from wildedge.integrations.mlx import MlxExtractor from wildedge.integrations.onnx import OnnxExtractor from wildedge.integrations.pytorch import PytorchExtractor from wildedge.integrations.registry import noop_integrations, supported_integrations @@ -84,6 +85,7 @@ def parse_dsn(dsn: str) -> tuple[str, str, str]: GgufExtractor(), UltralyticsExtractor(), TransformersExtractor(), + MlxExtractor(), PytorchExtractor(), TensorflowExtractor(), KerasExtractor(), @@ -107,6 +109,7 @@ class WildEdge: NOOP_INTEGRATIONS = noop_integrations() PATCH_INSTALLERS = { "gguf": GgufExtractor.install_auto_load_patch, + "mlx": MlxExtractor.install_auto_load_patch, "onnx": OnnxExtractor.install_auto_load_patch, "timm": PytorchExtractor.install_timm_patch, "tensorflow": TensorflowExtractor.install_auto_load_patch, diff --git a/wildedge/integrations/mlx.py b/wildedge/integrations/mlx.py new file mode 100644 index 0000000..e46d48d --- /dev/null +++ b/wildedge/integrations/mlx.py @@ -0,0 +1,308 @@ +"""MLX / mlx-lm integration.""" + +from __future__ import annotations + +import threading +import time +from typing import TYPE_CHECKING + +from wildedge import constants +from wildedge.events.inference import GenerationOutputMeta, TextInputMeta +from wildedge.integrations.base import BaseExtractor, patch_instance_call_once +from wildedge.logging import logger +from wildedge.model import ModelInfo +from wildedge.timing import elapsed_ms + +try: + import mlx.core as _mx + import mlx.nn as _mlx_nn + from mlx.utils import tree_flatten as _tree_flatten +except ImportError: + _mx = None # type: ignore[assignment] + _mlx_nn = None # type: ignore[assignment] + _tree_flatten = None # type: ignore[assignment] + +try: + import mlx_lm as _mlx_lm +except ImportError: + _mlx_lm = None # type: ignore[assignment] + +if TYPE_CHECKING: + from wildedge.model import ModelHandle + +# --- Patch state --- +_mlx_patched = False +_MLX_PATCH_LOCK = threading.Lock() +MLX_AUTO_LOAD_PATCH_NAME = "mlx_auto_load" +MLX_GENERATE_PATCH_NAME = "mlx_generate" +MLX_CALL_PATCH_NAME = "mlx_call" +MLX_HANDLE_ATTR = "__wildedge_mlx_handle__" + +# Thread-local flag: suppress __call__ tracking inside mlx_lm.generate's +# autoregressive loop (which calls model() once per token). +_inside_mlx_generate = threading.local() + + +def _debug_failure(context: str, exc: BaseException) -> None: + logger.debug("wildedge: mlx %s failed: %s", context, exc) + + +def _is_mlx_module(obj: object) -> bool: + for cls in type(obj).__mro__: + if cls.__name__ == "Module" and "mlx" in cls.__module__: + return True + return False + + +def _extract_model_args(obj: object) -> tuple[str | None, str | None]: + """Returns (model_type, quantization_str) from model.args. Never raises.""" + try: + args = getattr(obj, "args", None) + if args is None: + return None, None + model_type = getattr(args, "model_type", None) or None + quant = getattr(args, "quantization", None) + if quant is not None: + bits = getattr(quant, "bits", None) + q_str = f"q{int(bits)}" if bits else "quantized" + else: + q_str = _detect_quantization_from_layers(obj) + return model_type, q_str + except Exception as exc: + _debug_failure("model args extraction", exc) + return None, None + + +def _detect_quantization_from_layers(obj: object) -> str | None: + """Inspect layer class names for quantized linear layers.""" + try: + for _, module in obj.named_modules(): # type: ignore[union-attr] + cls_name = type(module).__name__ + if "Quantized" in cls_name or "quantized" in cls_name: + return "quantized" + except Exception: + pass + return None + + +def _count_tokens(tokenizer: object, text: str) -> int | None: + try: + return len(tokenizer.encode(text)) # type: ignore[union-attr] + except Exception: + return None + + +# --------------------------------------------------------------------------- +# Direct __call__ patch (non-LM / manual-registration use case) +# --------------------------------------------------------------------------- + + +def _build_mlx_call_patch(original_call): # type: ignore[no-untyped-def] + def patched_call(self_inner, *args, **kwargs): # type: ignore[no-untyped-def] + # Suppress during mlx_lm.generate's autoregressive token loop + if getattr(_inside_mlx_generate, "active", False): + return original_call(self_inner, *args, **kwargs) + + handle = getattr(self_inner, MLX_HANDLE_ATTR, None) + if handle is None: + return original_call(self_inner, *args, **kwargs) + + t0 = time.perf_counter() + try: + result = original_call(self_inner, *args, **kwargs) + handle.track_inference(duration_ms=elapsed_ms(t0), success=True) + return result + except Exception as exc: + handle.track_error( + error_code="UNKNOWN", + error_message=str(exc)[: constants.ERROR_MSG_MAX_LEN], + ) + raise + + return patched_call + + +# --------------------------------------------------------------------------- +# Extractor +# --------------------------------------------------------------------------- + + +class MlxExtractor(BaseExtractor): + def can_handle(self, obj: object) -> bool: + return _is_mlx_module(obj) + + def extract_info( + self, obj: object, overrides: dict + ) -> tuple[str | None, ModelInfo]: + model_type, quantization = _extract_model_args(obj) + + model_name = model_type or type(obj).__name__ + model_id = overrides.pop("id", None) or model_name + family = overrides.pop("family", None) or model_type + version = overrides.pop("version", "unknown") + source = overrides.pop("source", "huggingface") + quantization = overrides.pop("quantization", None) or quantization + + info = ModelInfo( + model_name=model_name, + model_version=version, + model_source=source, + model_format="mlx", + model_family=family, + quantization=quantization, + ) + for k, v in overrides.items(): + if hasattr(info, k): + setattr(info, k, v) + + return model_id, info + + def memory_bytes(self, obj: object) -> int | None: + if _tree_flatten is None: + return None + try: + return sum( + v.nbytes + for _, v in _tree_flatten(obj.parameters()) # type: ignore[union-attr] + if hasattr(v, "nbytes") + ) + except Exception as exc: + _debug_failure("memory estimation", exc) + return None + + def install_hooks(self, obj: object, handle: ModelHandle) -> None: + setattr(obj, MLX_HANDLE_ATTR, handle) + patch_instance_call_once( + obj, + patch_name=MLX_CALL_PATCH_NAME, + make_patched_call=_build_mlx_call_patch, + ) + + # ----------------------------------------------------------------------- + # Auto-load patches + # ----------------------------------------------------------------------- + + @classmethod + def install_auto_load_patch(cls, client_ref: object) -> None: + """Patch mlx_lm.load and mlx_lm.generate for automatic tracking. + + Called once at WildEdge client initialisation. + + - ``mlx_lm.load(path_or_repo)`` is timed; model ID is captured from + the path argument; HuggingFace Hub downloads are recorded. + - ``mlx_lm.generate(model, tokenizer, prompt, ...)`` is patched to + emit a single inference event per call with token counts and + tokens/second. The autoregressive ``model()`` loop inside generate + is suppressed via a thread-local guard so it does not double-count. + """ + global _mlx_patched + if _mlx_patched or _mlx_lm is None: + return + + with _MLX_PATCH_LOCK: + if _mlx_patched: + return + cls._patch_load(client_ref) + cls._patch_generate(client_ref) + _mlx_patched = True + + @classmethod + def _patch_load(cls, client_ref: object) -> None: + original_load = _mlx_lm.load + if ( + getattr(original_load, "__wildedge_patch_name__", None) + == MLX_AUTO_LOAD_PATCH_NAME + ): + return + + def patched_load(path_or_hf_repo, *args, **kwargs): # type: ignore[no-untyped-def] + c = client_ref() # type: ignore[call-arg] + hub_before = ( + c._snapshot_hub_caches() if c is not None and not c.closed else {} + ) + t0 = time.perf_counter() + result = original_load(path_or_hf_repo, *args, **kwargs) + load_ms = elapsed_ms(t0) + + # mlx_lm.load returns (model, tokenizer) + model = result[0] if isinstance(result, tuple) else result + + if c is not None and not c.closed: + downloads = c._diff_hub_caches(hub_before, load_ms) or None + model_id = str(path_or_hf_repo) if path_or_hf_repo else None + c._on_model_auto_loaded( + model, + load_ms=load_ms, + downloads=downloads, + model_id=model_id, + ) + + return result + + patched_load.__wildedge_patch_name__ = MLX_AUTO_LOAD_PATCH_NAME # type: ignore[attr-defined] + patched_load.__wildedge_original_call__ = original_load # type: ignore[attr-defined] + _mlx_lm.load = patched_load + + @classmethod + def _patch_generate(cls, client_ref: object) -> None: # noqa: ARG003 + original_generate = _mlx_lm.generate + if ( + getattr(original_generate, "__wildedge_patch_name__", None) + == MLX_GENERATE_PATCH_NAME + ): + return + + def patched_generate(model, tokenizer, prompt, *args, **kwargs): # type: ignore[no-untyped-def] + handle: ModelHandle | None = getattr(model, MLX_HANDLE_ATTR, None) + + tokens_in = _count_tokens(tokenizer, prompt) if tokenizer else None + input_meta = TextInputMeta(token_count=tokens_in) if tokens_in else None + + _inside_mlx_generate.active = True + t0 = time.perf_counter() + try: + result = original_generate(model, tokenizer, prompt, *args, **kwargs) + duration_ms = elapsed_ms(t0) + except Exception as exc: + _inside_mlx_generate.active = False + if handle is not None: + handle.track_error( + error_code="UNKNOWN", + error_message=str(exc)[: constants.ERROR_MSG_MAX_LEN], + ) + raise + finally: + _inside_mlx_generate.active = False + + if handle is not None: + output_text = ( + result.text + if hasattr(result, "text") + else (result if isinstance(result, str) else None) + ) + tokens_out: int | None = None + tps: float | None = None + if output_text and tokenizer: + tokens_out = _count_tokens(tokenizer, output_text) + if tokens_out and duration_ms > 0: + tps = round(tokens_out / (duration_ms / 1000), 1) + + handle.track_inference( + duration_ms=duration_ms, + batch_size=1, + input_modality="text", + output_modality="generation", + input_meta=input_meta, + output_meta=GenerationOutputMeta( + tokens_in=tokens_in, + tokens_out=tokens_out, + tokens_per_second=tps, + ), + success=True, + ) + + return result + + patched_generate.__wildedge_patch_name__ = MLX_GENERATE_PATCH_NAME # type: ignore[attr-defined] + patched_generate.__wildedge_original_call__ = original_generate # type: ignore[attr-defined] + _mlx_lm.generate = patched_generate diff --git a/wildedge/integrations/registry.py b/wildedge/integrations/registry.py index 6b09b7a..f39a54a 100644 --- a/wildedge/integrations/registry.py +++ b/wildedge/integrations/registry.py @@ -32,6 +32,7 @@ class IntegrationSpec: IntegrationSpec("tensorflow", ("tensorflow",), "client_patch"), IntegrationSpec("ultralytics", ("ultralytics",), "client_patch"), IntegrationSpec("transformers", ("transformers",), "client_patch"), + IntegrationSpec("mlx", ("mlx_lm",), "client_patch"), ) INTEGRATIONS_BY_NAME: dict[str, IntegrationSpec] = { From e73aa731f135eebd820694161b8aeaa19c747bd5 Mon Sep 17 00:00:00 2001 From: Piotr Duda Date: Sat, 14 Mar 2026 15:18:55 +0100 Subject: [PATCH 2/2] Support for mlx --- README.md | 30 ++++++++++++------------- examples/chatgpt_example.py | 2 +- examples/django_gemma/gemmaapp/views.py | 4 ++-- examples/django_gemma/gunicorn.conf.py | 2 +- examples/feedback_example.py | 2 +- examples/gguf_example.py | 2 +- examples/gguf_gemma_manual_example.py | 4 ++-- examples/keras_example.py | 2 +- examples/mlx_example.py | 6 ++--- examples/onnx_example.py | 2 +- examples/pytorch_example.py | 2 +- examples/timm_example.py | 4 ++-- examples/transformers_example.py | 6 ++--- tests/test_consumer.py | 2 +- tests/test_integrations.py | 2 +- tests/test_integrations_ultralytics.py | 2 +- wildedge/autoload/sitecustomize.py | 2 +- wildedge/hubs/torchhub.py | 4 ++-- wildedge/integrations/registry.py | 3 +-- wildedge/integrations/ultralytics.py | 2 +- 20 files changed, 42 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 6865964..524df87 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Coverage](https://codecov.io/gh/wildedge/wildedge-python/branch/main/graph/badge.svg)](https://codecov.io/gh/wildedge/wildedge-python) -On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata — no inputs or outputs captured. +On-device ML inference monitoring for Python. Tracks latency, errors, and model metadata. No inputs or outputs captured. ## Install @@ -16,9 +16,9 @@ On-device ML inference monitoring for Python. Tracks latency, errors, and model uv add wildedge-sdk ``` -## CLI — zero code changes +## CLI -Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts — no SDK calls required in user code. +Drop `wildedge run` in front of your existing command. WildEdge instruments the runtime before your code starts. No SDK calls required in user code. ```bash WILDEDGE_DSN="https://@ingest.wildedge.dev/" \ @@ -61,10 +61,10 @@ client.instrument("transformers", hubs=["huggingface"]) | `timm` | `timm.create_model()` | `huggingface`, `torchhub` | [timm_example.py](examples/timm_example.py) | | `gguf` | `llama_cpp.Llama.__init__` | `huggingface` | [gguf_example.py](examples/gguf_example.py) | | `onnx` | `ort.InferenceSession` | `huggingface` | [onnx_example.py](examples/onnx_example.py) | -| `ultralytics` | `ultralytics.YOLO.__init__` | — | — | -| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | — | [tensorflow_example.py](examples/tensorflow_example.py) | +| `ultralytics` | `ultralytics.YOLO.__init__` | - | - | +| `tensorflow` | `tf.keras.models.load_model`, `tf.saved_model.load` | - | [tensorflow_example.py](examples/tensorflow_example.py) | | `torch` | forward hooks via `client.load()` | `torchhub` | [pytorch_example.py](examples/pytorch_example.py) | -| `keras` | forward hooks via `client.load()` | — | [keras_example.py](examples/keras_example.py) | +| `keras` | forward hooks via `client.load()` | - | [keras_example.py](examples/keras_example.py) | For `torch` and `keras`, models are user-defined subclasses so there's no constructor to patch. Use `client.load()` to get load/unload tracking alongside inference: @@ -89,20 +89,20 @@ def run(input): | Parameter | Default | Env var | Description | |---|---|---|---| -| `dsn` | — | `WILDEDGE_DSN` | `https://@ingest.wildedge.dev/` | -| `app_version` | `None` | — | Your app's version string | +| `dsn` | - | `WILDEDGE_DSN` | `https://@ingest.wildedge.dev/` | +| `app_version` | `None` | - | Your app's version string | | `app_identity` | `` | `WILDEDGE_APP_IDENTITY` | Namespace for offline persistence; set per-app in multi-process workloads | | `debug` | `false` | `WILDEDGE_DEBUG` | Log events to console | -| `batch_size` | `10` | — | Events per transmission (1–100) | -| `flush_interval_sec` | `60` | — | Max seconds between flushes (1–3600) | -| `max_queue_size` | `200` | — | In-memory buffer limit (10–10000) | -| `enable_offline_persistence` | `true` | — | Persist unsent events to disk and replay on restart | -| `max_event_age_sec` | `900` | — | Max age before dead-lettering | -| `enable_dead_letter_persistence` | `false` | — | Persist dropped batches to disk | +| `batch_size` | `10` | - | Events per transmission (1-100) | +| `flush_interval_sec` | `60` | - | Max seconds between flushes (1-3600) | +| `max_queue_size` | `200` | - | In-memory buffer limit (10-10000) | +| `enable_offline_persistence` | `true` | - | Persist unsent events to disk and replay on restart | +| `max_event_age_sec` | `900` | - | Max age before dead-lettering | +| `enable_dead_letter_persistence` | `false` | - | Persist dropped batches to disk | ## Privacy -WildEdge captures **no inputs or outputs** — only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS. +WildEdge captures **no inputs or outputs**. Only metadata: latency, errors, model info, and download provenance. All inference runs locally; only telemetry is transmitted over HTTPS. Report security issues to security@wildedge.dev. diff --git a/examples/chatgpt_example.py b/examples/chatgpt_example.py index dc00b39..edb6cb3 100644 --- a/examples/chatgpt_example.py +++ b/examples/chatgpt_example.py @@ -5,7 +5,7 @@ # [tool.uv.sources] # wildedge-sdk = { path = "..", editable = true } # /// -"""ChatGPT (OpenAI API) — fully manual integration. +"""ChatGPT (OpenAI API): fully manual integration. Shows how to instrument a remote LLM with no local model file. Tracks input/output token counts, generation config, latency, errors, diff --git a/examples/django_gemma/gemmaapp/views.py b/examples/django_gemma/gemmaapp/views.py index 264079e..2bc3cf2 100644 --- a/examples/django_gemma/gemmaapp/views.py +++ b/examples/django_gemma/gemmaapp/views.py @@ -1,7 +1,7 @@ """Gemma inference view. The Llama constructor is patched automatically by `wildedge run --integrations gguf` -via sitecustomize.py — load/unload/inference events are tracked without any +via sitecustomize.py; load/unload/inference events are tracked without any wildedge imports here. On macOS, waitress (thread-pool, no fork) is used as the WSGI server. @@ -30,7 +30,7 @@ verbose=False, ) -# Llama inference is not thread-safe on a single context — serialise requests. +# Llama inference is not thread-safe on a single context; serialise requests. _llm_lock = threading.Lock() diff --git a/examples/django_gemma/gunicorn.conf.py b/examples/django_gemma/gunicorn.conf.py index 3f496b7..3fce87d 100644 --- a/examples/django_gemma/gunicorn.conf.py +++ b/examples/django_gemma/gunicorn.conf.py @@ -1,4 +1,4 @@ -"""Gunicorn configuration — Linux only. +"""Gunicorn configuration. Linux only. On macOS use waitress instead (demo.sh selects automatically). Requires llama-cpp-python built without Metal: diff --git a/examples/feedback_example.py b/examples/feedback_example.py index 6dd57c1..11f9e7c 100644 --- a/examples/feedback_example.py +++ b/examples/feedback_example.py @@ -6,7 +6,7 @@ # wildedge-sdk = { path = "..", editable = true } # /// """ -Feedback example — run with: uv run feedback_example.py +Feedback example. Run with: uv run feedback_example.py Simulates an automated quality gate: after each inference, the top-1 confidence score drives a thumbs_up / thumbs_down feedback event with no human input. diff --git a/examples/gguf_example.py b/examples/gguf_example.py index cc3543b..074d535 100644 --- a/examples/gguf_example.py +++ b/examples/gguf_example.py @@ -5,7 +5,7 @@ # [tool.uv.sources] # wildedge-sdk = { path = "..", editable = true } # /// -"""GGUF / llama.cpp integration example — run with: uv run gguf_example.py""" +"""GGUF / llama.cpp integration example. Run with: uv run gguf_example.py""" from huggingface_hub import hf_hub_download from llama_cpp import Llama diff --git a/examples/gguf_gemma_manual_example.py b/examples/gguf_gemma_manual_example.py index 2418187..b1f0c0a 100644 --- a/examples/gguf_gemma_manual_example.py +++ b/examples/gguf_gemma_manual_example.py @@ -5,7 +5,7 @@ # [tool.uv.sources] # wildedge-sdk = { path = "..", editable = true } # /// -"""Gemma 2 GGUF — fully manual integration, no auto-instrumentation. +"""Gemma 2 GGUF: fully manual integration, no auto-instrumentation. Shows explicit download / load / inference / error tracking without client.instrument() or any automatic hooks. @@ -40,7 +40,7 @@ llm = Llama(model_path, n_ctx=2048, n_gpu_layers=-1, verbose=False) load_ms = t.elapsed_ms -# All metadata supplied explicitly — no extractor runs, no hooks installed. +# All metadata supplied explicitly. No extractor runs, no hooks installed. handle = client.register_model( llm, model_id="gemma-2-2b-it-q4", diff --git a/examples/keras_example.py b/examples/keras_example.py index c854c89..b782139 100644 --- a/examples/keras_example.py +++ b/examples/keras_example.py @@ -6,7 +6,7 @@ # wildedge-sdk = { path = "..", editable = true } # /// """ -Keras integration example — run with: uv run keras_example.py +Keras integration example. Run with: uv run keras_example.py Keras models are user-defined subclasses, so wildedge cannot patch the constructor directly like with timm or PyTorch. Use client.load() to diff --git a/examples/mlx_example.py b/examples/mlx_example.py index 75f7cca..2c87a3a 100644 --- a/examples/mlx_example.py +++ b/examples/mlx_example.py @@ -6,7 +6,7 @@ # wildedge-sdk = { path = "..", editable = true } # /// """ -MLX / mlx-lm integration example — Apple Silicon only. +MLX / mlx-lm integration example. Apple Silicon only. WildEdge patches mlx_lm.load and mlx_lm.generate at client initialisation. Load timing, HuggingFace Hub download tracking, inference metrics (tokens/sec, @@ -50,7 +50,7 @@ def main() -> None: ) args = parser.parse_args() - # instrument() patches mlx_lm.load and mlx_lm.generate — must be called + # instrument() patches mlx_lm.load and mlx_lm.generate; must be called # before any model is loaded. client = wildedge.WildEdge(app_version="1.0.0") # set WILDEDGE_DSN env var client.instrument("mlx", hubs=["huggingface"]) @@ -71,7 +71,7 @@ def main() -> None: print(f" A: {response}\n") client.flush() - print("Done — events flushed to WildEdge.") + print("Done. Events flushed to WildEdge.") if __name__ == "__main__": diff --git a/examples/onnx_example.py b/examples/onnx_example.py index 73dc77c..a5a9232 100644 --- a/examples/onnx_example.py +++ b/examples/onnx_example.py @@ -5,7 +5,7 @@ # [tool.uv.sources] # wildedge-sdk = { path = "..", editable = true } # /// -"""ONNX Runtime integration example — run with: uv run onnx_example.py""" +"""ONNX Runtime integration example. Run with: uv run onnx_example.py""" import numpy as np import onnxruntime as ort diff --git a/examples/pytorch_example.py b/examples/pytorch_example.py index b28760e..c384b15 100644 --- a/examples/pytorch_example.py +++ b/examples/pytorch_example.py @@ -5,7 +5,7 @@ # [tool.uv.sources] # wildedge-sdk = { path = "..", editable = true } # /// -"""PyTorch integration example — run with: uv run pytorch_example.py""" +"""PyTorch integration example. Run with: uv run pytorch_example.py""" import torch import torch.nn as nn diff --git a/examples/timm_example.py b/examples/timm_example.py index 47ba3a7..1a302e7 100644 --- a/examples/timm_example.py +++ b/examples/timm_example.py @@ -6,9 +6,9 @@ # wildedge-sdk = { path = "..", editable = true } # /// """ -timm integration example — run with: uv run timm_example.py +timm integration example. Run with: uv run timm_example.py -timm models are standard PyTorch nn.Module subclasses — wildedge patches +timm models are standard PyTorch nn.Module subclasses; wildedge patches timm.create_model at client initialisation, so load timing, download tracking, and unload tracking happen automatically. Inference tracking uses the existing PyTorch forward hooks. diff --git a/examples/transformers_example.py b/examples/transformers_example.py index 86912a0..f8bdc68 100644 --- a/examples/transformers_example.py +++ b/examples/transformers_example.py @@ -69,7 +69,7 @@ def run_embed() -> None: print("Feature extraction (BERT):") for sent in sentences: result = pipe(sent) - # result shape: [1, seq_len, hidden_size] — take CLS token embedding + # result shape: [1, seq_len, hidden_size]; take CLS token embedding cls_embedding = result[0][0] dims = len(cls_embedding) norm = sum(v**2 for v in cls_embedding) ** 0.5 @@ -89,7 +89,7 @@ def main() -> None: args = parser.parse_args() # instrument() patches transformers.pipeline and AutoModel.from_pretrained - # before any model is loaded — everything below is tracked automatically. + # before any model is loaded; everything below is tracked automatically. client = wildedge.WildEdge(app_version="1.0.0") # set WILDEDGE_DSN env var client.instrument("transformers", hubs=["huggingface"]) @@ -99,7 +99,7 @@ def main() -> None: ]() client.flush() - print("\nDone — events flushed to WildEdge.") + print("\nDone. Events flushed to WildEdge.") if __name__ == "__main__": diff --git a/tests/test_consumer.py b/tests/test_consumer.py index 0084c3d..5553a47 100644 --- a/tests/test_consumer.py +++ b/tests/test_consumer.py @@ -411,7 +411,7 @@ def test_flush_is_noop_after_pause_before_resume(self, monkeypatch): consumer._pause() # stopped is False (reset by _pause) and queue is empty, so flush - # calls drain_once which returns False immediately — no transmit calls. + # calls drain_once which returns False immediately; no transmit calls. consumer.flush(timeout=0.1) mock_tx.send.assert_not_called() diff --git a/tests/test_integrations.py b/tests/test_integrations.py index 838b915..46e4157 100644 --- a/tests/test_integrations.py +++ b/tests/test_integrations.py @@ -42,7 +42,7 @@ def make_handle(publish_spy) -> ModelHandle: # --------------------------------------------------------------------------- -# Fake objects — no ML libraries required +# Fake objects; no ML libraries required # --------------------------------------------------------------------------- # ONNX diff --git a/tests/test_integrations_ultralytics.py b/tests/test_integrations_ultralytics.py index e8b2b6a..1a05458 100644 --- a/tests/test_integrations_ultralytics.py +++ b/tests/test_integrations_ultralytics.py @@ -21,7 +21,7 @@ from wildedge.model import ModelHandle, ModelInfo # --------------------------------------------------------------------------- -# Fake objects — no ultralytics required +# Fake objects; no ultralytics required # --------------------------------------------------------------------------- diff --git a/wildedge/autoload/sitecustomize.py b/wildedge/autoload/sitecustomize.py index 92fcf86..c77db79 100644 --- a/wildedge/autoload/sitecustomize.py +++ b/wildedge/autoload/sitecustomize.py @@ -34,7 +34,7 @@ # Chain any pre-existing sitecustomize that would otherwise be shadowed. -# Use importlib to find and exec it directly — avoids sys.modules manipulation +# Use importlib to find and exec it directly; avoids sys.modules manipulation # which can trigger CPython's module GC and clear globals mid-execution. def _load_existing_sitecustomize() -> None: import importlib.util as _iutil diff --git a/wildedge/hubs/torchhub.py b/wildedge/hubs/torchhub.py index 0ac5293..1a91bfb 100644 --- a/wildedge/hubs/torchhub.py +++ b/wildedge/hubs/torchhub.py @@ -20,9 +20,9 @@ ------------ ``torch.hub.get_dir()`` (default ``~/.cache/torch/hub``) contains: -- ``checkpoints/-<8hexchars>.ext`` — weight files downloaded by +- ``checkpoints/-<8hexchars>.ext``: weight files downloaded by ``torch.hub.download_url_to_file`` or ``torch.utils.model_zoo.load_url``. -- ``__/`` — cloned GitHub repo directories created by +- ``__/``: cloned GitHub repo directories created by ``torch.hub.load``. """ diff --git a/wildedge/integrations/registry.py b/wildedge/integrations/registry.py index f39a54a..bd3e07f 100644 --- a/wildedge/integrations/registry.py +++ b/wildedge/integrations/registry.py @@ -2,8 +2,7 @@ Contains only ML *framework* integrations (inference tracking, load/unload timing). Model hub and repository trackers (download provenance) live in -``wildedge.hubs.registry`` — they are orthogonal concerns with different -activation semantics. +``wildedge.hubs.registry``. They have different activation semantics. """ from __future__ import annotations diff --git a/wildedge/integrations/ultralytics.py b/wildedge/integrations/ultralytics.py index c4aa6b7..3f9902f 100644 --- a/wildedge/integrations/ultralytics.py +++ b/wildedge/integrations/ultralytics.py @@ -234,7 +234,7 @@ def classify_output_meta( def weights_file_exists(model_arg: object) -> bool: """Return True if the weights file appears to already be on disk.""" if not isinstance(model_arg, str): - return True # not a path string — weights already in memory or a loaded object + return True # not a path string; weights already in memory or a loaded object p = Path(model_arg) if p.is_file(): return True