Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cloud_pipelines_backend/instrumentation/error_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,30 @@
import json
import re

try:
from ..launchers.interfaces import LauncherError as _LauncherError

_LAUNCHER_ERROR_AVAILABLE = True
except ImportError:
_LauncherError = None # type: ignore[assignment,misc]
_LAUNCHER_ERROR_AVAILABLE = False

_POD_NAME_PATTERN = re.compile(r"(?:task|tangle(?:-ce)?)-[a-zA-Z0-9]+-[a-zA-Z0-9]+")
_OBJECT_REPR_PATTERN = re.compile(r"<[^>]+ object at 0x[0-9a-fA-F]+>")
_HEX_ADDRESS_PATTERN = re.compile(r"\b0x[0-9a-fA-F]+\b")
_UUID_PATTERN = re.compile(
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.IGNORECASE
)
_LONG_ALNUM_ID_PATTERN = re.compile(r"\b[a-zA-Z0-9]{16,}\b")
# Matches from the first `{"`, `{'`, or `{ "` / `{ '` to end of string.
# Both the embedded dict/JSON literal and any trailing message text are replaced
# with `{...}` — the greedy match is intentional: anything after a runtime-data
# dict in an error message is typically also variable and should not affect grouping.
_JSON_OBJECT_PATTERN = re.compile(r"\{\s*['\"].*", re.DOTALL)


def _strip_generic(*, message: str) -> str:
message = _JSON_OBJECT_PATTERN.sub("{...}", message)
Comment thread
Mbeaulne marked this conversation as resolved.
message = _OBJECT_REPR_PATTERN.sub("{object}", message)
message = _HEX_ADDRESS_PATTERN.sub("{addr}", message)
message = _UUID_PATTERN.sub("{uuid}", message)
Expand Down Expand Up @@ -85,13 +99,21 @@ def _normalize_orchestrator_error(*, exception: BaseException) -> str | None:
return f"OrchestratorError: {message}"


def _normalize_launcher_error(*, exception: BaseException) -> str | None:
if not _LAUNCHER_ERROR_AVAILABLE or not isinstance(exception, _LauncherError):
return None
message = _JSON_OBJECT_PATTERN.sub("{...}", str(exception))
return f"LauncherError: {message.strip()}"


def normalize_error_message(*, exception: BaseException) -> str:
"""Return a stable normalized string for error grouping."""
for normalizer in (
_normalize_k8s_api_exception,
_normalize_max_retry_error,
_normalize_unicode_decode_error,
_normalize_orchestrator_error,
_normalize_launcher_error,
):
result = normalizer(exception=exception)
if result is not None:
Expand Down
56 changes: 56 additions & 0 deletions tests/instrumentation/test_error_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,52 @@ def test_strips_object_repr(self):
)


class TestNormalizeLauncherError:
def _make_launcher_error(
self, message: str, cause: BaseException | None = None
) -> Exception:
try:
from cloud_pipelines_backend.launchers.interfaces import LauncherError
except ImportError:
pytest.skip("LauncherError not importable")
if cause:
try:
raise LauncherError(message) from cause
except LauncherError as exc:
return exc
return LauncherError(message)

def test_strips_pod_spec_json(self):
pod_spec = (
"{'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'name': 'task-abc-xyz'}}"
)
exc = self._make_launcher_error(f"Failed to create pod: {pod_spec}")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: Failed to create pod: {...}"

def test_with_timeout_cause(self):
cause = TimeoutError("The read operation timed out")
exc = self._make_launcher_error(
"Failed to create pod: {'apiVersion': 'v1'}", cause=cause
)
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: Failed to create pod: {...}"

def test_no_colon_in_message(self):
exc = self._make_launcher_error("launch failed")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: launch failed"

def test_multi_colon_diagnostic_preserved(self):
exc = self._make_launcher_error(
"creating pod: spec invalid: missing field 'name'"
)
result = error_normalization.normalize_error_message(exception=exc)
assert (
result == "LauncherError: creating pod: spec invalid: missing field 'name'"
)


Comment thread
Mbeaulne marked this conversation as resolved.
class TestFallback:
def test_strips_hex_address(self):
exc = ValueError("object at 0xdeadbeef failed")
Expand All @@ -204,3 +250,13 @@ def test_stable_message_unchanged(self):
exc = AttributeError("'NoneType' object has no attribute 'encode'")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "AttributeError: 'NoneType' object has no attribute 'encode'"

def test_strips_json_object(self):
exc = RuntimeError("operation failed: {'key': 'value', 'nested': {'a': 1}}")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "RuntimeError: operation failed: {...}"

def test_strips_json_object_double_quotes(self):
exc = RuntimeError('operation failed: {"key": "value"}')
result = error_normalization.normalize_error_message(exception=exc)
assert result == "RuntimeError: operation failed: {...}"
Loading