Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -686,3 +686,12 @@ Entry format:
- Details: Added repo purpose/runtime constraints, ownership table, safe-edit boundaries, required verification matrix, role-based agent cards, A2A-style task contract, mandatory handoff envelope, single-agent loop, actor-critic workflow, reusable lessons-learned section, worked examples, and explicit AGENTS review triggers. Critic concerns addressed in the update: keep single-agent as default to avoid unnecessary delegation, require executable evidence for actor-vs-critic disputes, and keep memory logging critical-only instead of turning the file into an activity log.
- Verification: `sed -n '1,260p' AGENTS.md`; `rg -n "Module And File Ownership|Safe-Edit Boundaries|Required Verification Commands|Agent Cards|A2A-Style Task Contract|Actor-Critic|AGENTS Review Triggers|ML-20260317-001" AGENTS.md`
- Links: `AGENTS.md`

- ID: `ML-20260508-001`
- Timestamp: `2026-05-08T23:08:05Z`
- Type: `change`
- Summary: Shared HF text serving now auto-selects ONNX artifacts for CPU-only runtime when the HF repo declares a compatible runtime manifest.
- Criticality: Serving runtime architecture change affecting model loading, artifact downloads, output decoding, and API metadata across generic text-classification deployments.
- Details: `ThHfModelBase` keeps Transformers/PT as the default GPU and fallback path, but CPU-only `HF_RUNTIME=auto` now loads `artifact_manifest.json`, selects a declared ONNX Runtime artifact, downloads only safe allow-patterns, loads schema and contract decoder from HF artifacts, and exposes the decoded artifact contract through the existing text-classifier flow. Business API response shaping now passes through generic model/runtime metadata emitted by serving.
- Verification: `python3 -m unittest extensions.serving.test_th_hf_model_base extensions.serving.test_th_text_classifier extensions.serving.test_th_privacy_filter extensions.business.edge_inference_api.test_text_classifier_inference_api extensions.business.edge_inference_api.test_privacy_filter_inference_api`; `python3 -m py_compile extensions/serving/default_inference/nlp/th_hf_model_base.py extensions/business/edge_inference_api/text_classifier_inference_api.py`; required serving gate `python3 -m unittest extensions.serving.model_testing.test_llm_servings` currently fails at import with `ImportError: cannot import name 'Logger' from 'naeural_core'`.
- Links: `extensions/serving/default_inference/nlp/th_hf_model_base.py`, `extensions/business/edge_inference_api/text_classifier_inference_api.py`, `extensions/serving/test_th_hf_model_base.py`
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,14 @@ def _build_result_from_inference( # pylint: disable=arguments-differ
result_payload["tokenizer_name"] = inference["TOKENIZER_NAME"]
if "PIPELINE_TASK" in inference:
result_payload["pipeline_task"] = inference["PIPELINE_TASK"]
if "MODEL" in inference:
result_payload["model"] = inference["MODEL"]
if "MODEL_VERSION" in inference:
result_payload["model_version"] = inference["MODEL_VERSION"]
if "MODEL_REVISION" in inference:
result_payload["model_revision"] = inference["MODEL_REVISION"]
if "HF_RUNTIME" in inference:
result_payload["hf_runtime"] = inference["HF_RUNTIME"]
if "RUNTIME" in inference:
result_payload["runtime"] = inference["RUNTIME"]
return result_payload
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ def test_build_result_from_inference_uses_findings_key(self):
"FINDINGS_COUNT": 1,
"MODEL_NAME": "openai/privacy-filter",
"PIPELINE_TASK": "token-classification",
"MODEL": {"model_key": "privacy_filter", "model_version": "2026.05.09"},
"MODEL_VERSION": "2026.05.09",
"MODEL_REVISION": "rev-privacy",
"HF_RUNTIME": "pt",
"RUNTIME": "transformers",
},
metadata={},
request_data={"metadata": {}, "parameters": {"text": "example text"}},
Expand All @@ -73,6 +78,14 @@ def test_build_result_from_inference_uses_findings_key(self):
self.assertEqual(result_payload["findings_count"], 1)
self.assertEqual(result_payload["model_name"], "openai/privacy-filter")
self.assertEqual(result_payload["pipeline_task"], "token-classification")
self.assertEqual(
result_payload["model"],
{"model_key": "privacy_filter", "model_version": "2026.05.09"},
)
self.assertEqual(result_payload["model_version"], "2026.05.09")
self.assertEqual(result_payload["model_revision"], "rev-privacy")
self.assertEqual(result_payload["hf_runtime"], "pt")
self.assertEqual(result_payload["runtime"], "transformers")


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,33 @@ def test_build_result_from_inference_preserves_classifier_output(self):
self.assertEqual(result_payload["model_name"], "openai/privacy-filter")
self.assertEqual(result_payload["pipeline_task"], "token-classification")

def test_build_result_from_inference_preserves_runtime_model_metadata(self):
plugin = TextClassifierInferenceApiPlugin()

result_payload = plugin._build_result_from_inference( # pylint: disable=protected-access
request_id="req-onnx",
inference={
"REQUEST_ID": "req-onnx",
"TEXT": "example text",
"result": {"prediction": "safe"},
"MODEL": {"key": "generic_text_classifier", "version": "2026.05.09"},
"MODEL_VERSION": "2026.05.09",
"HF_RUNTIME": "onnx_fp32",
"RUNTIME": "onnxruntime",
},
metadata={},
request_data={"metadata": {}, "parameters": {"text": "example text"}},
)

self.assertEqual(result_payload["classification"], {"prediction": "safe"})
self.assertEqual(
result_payload["model"],
{"key": "generic_text_classifier", "version": "2026.05.09"},
)
self.assertEqual(result_payload["model_version"], "2026.05.09")
self.assertEqual(result_payload["hf_runtime"], "onnx_fp32")
self.assertEqual(result_payload["runtime"], "onnxruntime")

def test_handle_inferences_falls_back_to_payload_request_id(self):
plugin = TextClassifierInferenceApiPlugin()
plugin._requests = {"req-1": {"status": "pending"}} # pylint: disable=protected-access
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,16 @@ def _build_result_from_inference(
result_payload["tokenizer_name"] = inference["TOKENIZER_NAME"]
if "PIPELINE_TASK" in inference:
result_payload["pipeline_task"] = inference["PIPELINE_TASK"]
if "MODEL" in inference:
result_payload["model"] = inference["MODEL"]
if "MODEL_VERSION" in inference:
result_payload["model_version"] = inference["MODEL_VERSION"]
if "MODEL_REVISION" in inference:
result_payload["model_revision"] = inference["MODEL_REVISION"]
if "HF_RUNTIME" in inference:
result_payload["hf_runtime"] = inference["HF_RUNTIME"]
if "RUNTIME" in inference:
result_payload["runtime"] = inference["RUNTIME"]
return result_payload

def handle_inference_for_request(
Expand Down
Loading