From fc13dfe3b489e8d25a85c1d95b9854e7d476c6be Mon Sep 17 00:00:00 2001 From: Claude Code Date: Fri, 15 May 2026 10:35:07 +0300 Subject: [PATCH] fix(diagnostics): classify HTTP timeouts and string-wrapped 5xx as known codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HTTP transport adapter wraps context.DeadlineExceeded and non-2xx responses as plain "transport error: ..." strings before bubbling them up. The typed errors.Is / statusError paths the classifier relied on never fire for those, so every hf.co timeout or 504 surfaced to the UI as MCPX_UNKNOWN_UNCLASSIFIED ("Server Error") even though the cause was a well-known transient upstream condition. Add two recovery rules in classifyHTTP: - context.DeadlineExceeded on http transport -> new MCPX_HTTP_TIMEOUT (severity: warn — usually transient, not a config bug) - substring "context deadline exceeded" on http transport -> same - "request failed with status NNN" / "notification failed with status NNN" -> route through DiagnoseHTTPStatus so 401/403/404/5xx all get their existing typed codes Tests cover both the typed and stringified forms taken verbatim from production server-hugginface.log. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/diagnostics/classifier.go | 59 +++++++++++++++++++++++-- internal/diagnostics/classifier_test.go | 45 +++++++++++++++++++ internal/diagnostics/codes.go | 1 + internal/diagnostics/registry.go | 10 +++++ 4 files changed, 111 insertions(+), 4 deletions(-) diff --git a/internal/diagnostics/classifier.go b/internal/diagnostics/classifier.go index 8a242831..aa158db7 100644 --- a/internal/diagnostics/classifier.go +++ b/internal/diagnostics/classifier.go @@ -199,11 +199,11 @@ func classifyStdio(err error, hints ClassifierHints) Code { } // classifyHTTP handles HTTP/SSE transport errors including TLS, DNS, and -// structured HTTP status errors. HTTP status classification requires the -// caller to wrap a statusError — see DiagnoseHTTPStatus below. +// structured HTTP status errors. HTTP status classification prefers a typed +// statusError (DiagnoseHTTPStatus below) but also falls back to a string match +// because the upstream layer commonly stringifies the error before bubbling it +// up. func classifyHTTP(err error, hints ClassifierHints) Code { - _ = hints - // DNS lookup errors are reported as *net.DNSError. var dnsErr *net.DNSError if errors.As(err, &dnsErr) { @@ -213,6 +213,7 @@ func classifyHTTP(err error, hints ClassifierHints) Code { // TLS verification: mcp-go surfaces these as *tls.CertificateVerificationError // in recent releases; we avoid a direct import dependency by string match. msg := err.Error() + lmsg := strings.ToLower(msg) if strings.Contains(msg, "x509:") || strings.Contains(msg, "tls: ") || strings.Contains(msg, "certificate") { return HTTPTLSFailed } @@ -222,9 +223,59 @@ func classifyHTTP(err error, hints ClassifierHints) Code { return HTTPConnRefuse } + // HTTP request timeouts. The upstream HTTP transport bubbles + // context.DeadlineExceeded up wrapped in a free-text "transport error: ... + // context deadline exceeded" string. Try the typed errors.Is path first + // (cheap, exact); fall back to substring on the http transport hint to + // catch the stringified form. Without this, hf.co/mcp slowdowns surface + // to the UI as MCPX_UNKNOWN_UNCLASSIFIED. + if errors.Is(err, context.DeadlineExceeded) && hints.Transport == "http" { + return HTTPTimeout + } + if hints.Transport == "http" && strings.Contains(lmsg, "context deadline exceeded") { + return HTTPTimeout + } + + // HTTP status text fallback. The upstream layer wraps non-2xx responses + // as a plain string ("transport error: request failed with status 504: ..."). + // The typed statusError path used by DiagnoseHTTPStatus() never fires for + // those, so we substring-match the canonical phrasing here. + if hints.Transport == "http" { + if code := matchHTTPStatusText(lmsg); code != "" { + return code + } + } + return "" } +// matchHTTPStatusText extracts a status code from the canonical +// "request failed with status NNN" / "notification failed with status NNN" +// phrasing emitted by the HTTP transport adapter. Returns empty when no +// recognised status appears. +func matchHTTPStatusText(lmsg string) Code { + const marker = "status " + idx := strings.Index(lmsg, marker) + for idx != -1 { + rest := lmsg[idx+len(marker):] + // Need at least three digits. + if len(rest) >= 3 && isDigit(rest[0]) && isDigit(rest[1]) && isDigit(rest[2]) { + status := int(rest[0]-'0')*100 + int(rest[1]-'0')*10 + int(rest[2]-'0') + if c := DiagnoseHTTPStatus(status); c != "" { + return c + } + } + next := strings.Index(lmsg[idx+1:], marker) + if next == -1 { + break + } + idx += 1 + next + } + return "" +} + +func isDigit(b byte) bool { return b >= '0' && b <= '9' } + // classifyNetwork handles host-environment network issues. func classifyNetwork(err error, hints ClassifierHints) Code { _ = hints diff --git a/internal/diagnostics/classifier_test.go b/internal/diagnostics/classifier_test.go index a181ed7f..964504b4 100644 --- a/internal/diagnostics/classifier_test.go +++ b/internal/diagnostics/classifier_test.go @@ -71,6 +71,51 @@ func TestClassify_HTTP_ConnRefused(t *testing.T) { } } +func TestClassify_HTTP_Timeout(t *testing.T) { + // Real upstream timeout — the http transport wraps context.DeadlineExceeded + // with the operation name. Must classify as MCPX_HTTP_TIMEOUT, not + // MCPX_UNKNOWN_UNCLASSIFIED. + err := fmt.Errorf("post %q: %w", "https://hf.co/mcp", context.DeadlineExceeded) + got := Classify(err, ClassifierHints{Transport: "http"}) + if got != HTTPTimeout { + t.Errorf("Classify(timeout) = %q, want %q", got, HTTPTimeout) + } +} + +func TestClassify_HTTP_TimeoutStringWrapped(t *testing.T) { + // The upstream manager often re-wraps as a plain string ("transport error: ... + // context deadline exceeded"). The typed errors.Is path can't see through that, + // so the classifier must also catch the substring on the http transport hint. + err := errors.New(`failed to list tools: transport error: failed to send request: failed to send request: Post "https://hf.co/mcp": context deadline exceeded`) + got := Classify(err, ClassifierHints{Transport: "http"}) + if got != HTTPTimeout { + t.Errorf("Classify(string-wrapped timeout) = %q, want %q", got, HTTPTimeout) + } +} + +func TestClassify_HTTP_StatusFromText(t *testing.T) { + // 5xx responses arrive at the classifier as a plain string from the + // upstream layer (the typed statusError path is bypassed by the wrapping). + // Must map to HTTPServerErr / HTTPUnauth / etc. instead of UNCLASSIFIED. + cases := []struct { + err string + want Code + }{ + {`transport error: request failed with status 504: 504`, HTTPServerErr}, + {`transport error: request failed with status 502 Bad Gateway`, HTTPServerErr}, + {`failed to send initialized notification: notification failed with status 504: ...`, HTTPServerErr}, + {`transport error: request failed with status 401`, HTTPUnauth}, + {`transport error: request failed with status 403 Forbidden`, HTTPForbidden}, + {`request failed with status 404`, HTTPNotFound}, + } + for _, tc := range cases { + got := Classify(errors.New(tc.err), ClassifierHints{Transport: "http"}) + if got != tc.want { + t.Errorf("Classify(%q) = %q, want %q", tc.err, got, tc.want) + } + } +} + func TestClassify_NetworkOffline(t *testing.T) { err := &net.OpError{Op: "dial", Err: syscall.ENETUNREACH} got := Classify(err, ClassifierHints{}) diff --git a/internal/diagnostics/codes.go b/internal/diagnostics/codes.go index 392bc16d..383a4dab 100644 --- a/internal/diagnostics/codes.go +++ b/internal/diagnostics/codes.go @@ -33,6 +33,7 @@ const ( HTTPNotFound Code = "MCPX_HTTP_404" HTTPServerErr Code = "MCPX_HTTP_5XX" HTTPConnRefuse Code = "MCPX_HTTP_CONN_REFUSED" + HTTPTimeout Code = "MCPX_HTTP_TIMEOUT" ) // DOCKER domain — Docker isolation subsystem failures. diff --git a/internal/diagnostics/registry.go b/internal/diagnostics/registry.go index 4d145cff..2c914243 100644 --- a/internal/diagnostics/registry.go +++ b/internal/diagnostics/registry.go @@ -207,6 +207,16 @@ func seedHTTP() { }, DocsURL: docsURL(HTTPConnRefuse), }) + register(CatalogEntry{ + Code: HTTPTimeout, + Severity: SeverityWarn, + UserMessage: "The upstream server did not respond in time. This is usually transient.", + FixSteps: []FixStep{ + {Type: FixStepCommand, Label: "Test reachability", Command: "curl -v "}, + {Type: FixStepLink, Label: "Upstream status page", URL: docsURL(HTTPTimeout)}, + }, + DocsURL: docsURL(HTTPTimeout), + }) } // --- DOCKER --------------------------------------------------------------