Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions .github/workflows/build-cli.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ jobs:
name: E2E Tests
runs-on: ubuntu-latest
needs: [build]
# The act + gitea scenarios run serially (-parallel 1, see the test step),
# which trades wall-clock for reliability, so the job needs more than the
# default headroom. 70m covers the 60m go test budget plus checkout, Go
# install, and container teardown.
timeout-minutes: 70
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3

Expand All @@ -61,7 +66,10 @@ jobs:

- name: Run E2E tests
working-directory: e2e
# Cap subtest parallelism at 2 to match e2e.yaml's E2E_PARALLEL. The
# 4-core / ~7.9GB runner is OOM-killed at 4 (silent FAIL, see #104), and
# the extra container load widens the gitea push-vs-Contents-API race.
run: go test -v -parallel 2 -timeout 30m ./...
# Run scenarios serially (-parallel 1). The 4-core / ~7.9GB runner is
# OOM-killed at 4 (silent FAIL, see #104), and even at 2 the concurrent
# act + gitea container load throttles gitea (405 "try again later") and
# destabilises act runs, producing intermittent failures unrelated to
# the product. Serial execution removes that contention; the longer
# 60m timeout covers the resulting slower wall-clock.
run: go test -v -parallel 1 -timeout 60m ./...
8 changes: 5 additions & 3 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
parallel:
description: 'Subtest parallelism (lower = slower but more reliable)'
required: false
default: '2'
default: '1'

permissions:
contents: read
Expand Down Expand Up @@ -76,8 +76,10 @@ jobs:
# RAM. Each scenario spins up gitea + act + N job containers; at
# the default GOMAXPROCS=4, four scenarios concurrently exhaust
# memory and the test process is OOM-killed (silent FAIL with no
# per-test output, see #104).
E2E_PARALLEL: ${{ github.event.inputs.parallel || '2' }}
# per-test output, see #104). Even at 2 the concurrent container
# load throttles gitea and destabilises act runs, so the default is
# serial (1); raise it per-dispatch only when chasing wall-clock.
E2E_PARALLEL: ${{ github.event.inputs.parallel || '1' }}
run: |
go test -v \
-timeout "$E2E_TIMEOUT" \
Expand Down
108 changes: 48 additions & 60 deletions e2e/harness/gitea.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,26 @@ func (g *GiteaContainer) Container() testcontainers.Container {
return g.container
}

// newJSONRequest builds an authenticated gitea API request carrying the given
// JSON body. It is the request factory passed to doRetry: each call returns a
// fresh *http.Request with an unread body so a throttled call can be replayed.
// A nil body produces a request with no payload (used for GET/DELETE).
func (g *GiteaContainer) newJSONRequest(ctx context.Context, method, url string, body []byte) (*http.Request, error) {
var reader io.Reader
if body != nil {
reader = bytes.NewReader(body)
}
req, err := http.NewRequestWithContext(ctx, method, url, reader)
if err != nil {
return nil, fmt.Errorf("build %s request: %w", method, err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
req.SetBasicAuth(AdminUsername, AdminPassword)
return req, nil
}

// Terminate stops and removes the container
func (g *GiteaContainer) Terminate(ctx context.Context) error {
return g.container.Terminate(ctx)
Expand Down Expand Up @@ -296,16 +316,10 @@ func (g *GiteaContainer) CreateCommitOnBranch(ctx context.Context, repo *Repo, b
return "", fmt.Errorf("marshal change-files payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/contents", g.url, AdminUsername, repo.Name),
bytes.NewReader(body))
if err != nil {
return "", fmt.Errorf("build change-files request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/contents", g.url, AdminUsername, repo.Name)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, body)
})
if err != nil {
return "", fmt.Errorf("change-files request: %w", err)
}
Expand Down Expand Up @@ -410,16 +424,10 @@ func (g *GiteaContainer) CreateBranch(ctx context.Context, repo *Repo, name, fro
return fmt.Errorf("marshal create-branch payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/branches", g.url, AdminUsername, repo.Name),
bytes.NewReader(body))
if err != nil {
return fmt.Errorf("build create-branch request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/branches", g.url, AdminUsername, repo.Name)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, body)
})
if err != nil {
return fmt.Errorf("create-branch request: %w", err)
}
Expand Down Expand Up @@ -700,16 +708,10 @@ func (g *GiteaContainer) CreatePR(ctx context.Context, repo *Repo, head, base, t
return 0, fmt.Errorf("marshal create-pr payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls", g.url, AdminUsername, repo.Name),
bytes.NewReader(reqBody))
if err != nil {
return 0, fmt.Errorf("build create-pr request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls", g.url, AdminUsername, repo.Name)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, reqBody)
})
if err != nil {
return 0, fmt.Errorf("create-pr request: %w", err)
}
Expand Down Expand Up @@ -812,16 +814,10 @@ func (g *GiteaContainer) createLabel(ctx context.Context, repo *Repo, name strin
return 0, fmt.Errorf("marshal create-label payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/labels", g.url, AdminUsername, repo.Name),
bytes.NewReader(body))
if err != nil {
return 0, fmt.Errorf("build create-label request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/labels", g.url, AdminUsername, repo.Name)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, body)
})
if err != nil {
return 0, fmt.Errorf("create-label request: %w", err)
}
Expand Down Expand Up @@ -852,16 +848,10 @@ func (g *GiteaContainer) applyLabels(ctx context.Context, repo *Repo, index int6
return fmt.Errorf("marshal apply-labels payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/issues/%d/labels", g.url, AdminUsername, repo.Name, index),
bytes.NewReader(body))
if err != nil {
return fmt.Errorf("build apply-labels request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/issues/%d/labels", g.url, AdminUsername, repo.Name, index)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, body)
})
if err != nil {
return fmt.Errorf("apply-labels request: %w", err)
}
Expand Down Expand Up @@ -893,16 +883,14 @@ func (g *GiteaContainer) MergePR(ctx context.Context, repo *Repo, index int64, s
return fmt.Errorf("marshal merge-pr payload: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST",
fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d/merge", g.url, AdminUsername, repo.Name, index),
bytes.NewReader(body))
if err != nil {
return fmt.Errorf("build merge-pr request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth(AdminUsername, AdminPassword)

resp, err := http.DefaultClient.Do(req)
// The merge POST is the call most exposed to gitea's "Please try again
// later" throttle under load, so it is wrapped in the bounded transient
// retry. The retry is safe: gitea returns the 405 throttle BEFORE applying
// the merge, so a re-issue cannot double-merge.
url := fmt.Sprintf("%s/api/v1/repos/%s/%s/pulls/%d/merge", g.url, AdminUsername, repo.Name, index)
resp, err := doRetry(ctx, func() (*http.Request, error) {
return g.newJSONRequest(ctx, "POST", url, body)
})
if err != nil {
return fmt.Errorf("merge-pr request: %w", err)
}
Expand Down
115 changes: 115 additions & 0 deletions e2e/harness/gitea_retry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package harness

import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"
)

// giteaRetryAttempts bounds how many times a throttled gitea REST call is
// retried before the error is surfaced.
const giteaRetryAttempts = 5

// giteaRetryBackoff is the base delay between retry attempts. Attempt n waits
// giteaRetryBackoff*n, giving a short linear backoff that clears gitea's
// transient "try again later" throttle without materially slowing the suite.
const giteaRetryBackoff = 250 * time.Millisecond

// transientResponse reports whether an HTTP response from gitea represents a
// transient, safe-to-retry condition rather than a real client error.
//
// Under container and gitea load the suite observes two throttling shapes:
// - 405 Method Not Allowed with a body of {"message":"Please try again
// later"} (gitea's mergeability/lock throttle), and
// - 5xx server errors while gitea is briefly overwhelmed.
//
// A 405 that is NOT the "try again later" throttle is a genuine client error
// (wrong method / disallowed operation) and must not be retried, so the body is
// inspected. Other 4xx codes are real client errors and are never retried,
// which keeps legitimate expect-failure assertions deterministic.
func transientResponse(status int, body string) bool {
if status == http.StatusMethodNotAllowed {
return strings.Contains(strings.ToLower(body), "try again later")
}
return status >= 500 && status <= 599
}

// doRetry issues the request built by newReq, retrying on transient gitea
// throttling responses and transient transport errors. newReq must return a
// fresh *http.Request on every call so the request body can be replayed safely.
//
// Retries are bounded and apply ONLY to idempotent-by-effect throttling cases:
// a transient response is one where gitea explicitly asked the caller to try
// again (or returned 5xx) BEFORE applying any state change, so re-issuing the
// request does not double-apply a mutation. Real 4xx client errors and a
// successful response are returned to the caller immediately.
//
// On success the caller owns the returned response body and must close it. On a
// transient response the body is drained and closed between attempts.
func doRetry(ctx context.Context, newReq func() (*http.Request, error)) (*http.Response, error) {
var lastErr error
for attempt := 1; attempt <= giteaRetryAttempts; attempt++ {
req, err := newReq()
if err != nil {
return nil, err
}

resp, err := http.DefaultClient.Do(req)
if err != nil {
// Transport-level errors (connection reset, refused) under load are
// transient; retry within the attempt budget.
lastErr = err
if !sleepBeforeRetry(ctx, attempt) {
return nil, ctx.Err()
}
continue
}

if attempt < giteaRetryAttempts && transientResponseBody(resp) {
// Drain and close so the connection can be reused, then back off.
drained, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
lastErr = fmt.Errorf("gitea transient response: %s - %s", resp.Status, strings.TrimSpace(string(drained)))
if !sleepBeforeRetry(ctx, attempt) {
return nil, ctx.Err()
}
continue
}

return resp, nil
}

return nil, fmt.Errorf("gitea request failed after %d attempts: %w", giteaRetryAttempts, lastErr)
}

// transientResponseBody peeks the response body to classify a transient gitea
// throttle, then rewinds it so the caller still sees the full body. Only used
// when a retry is still possible.
func transientResponseBody(resp *http.Response) bool {
if resp.StatusCode != http.StatusMethodNotAllowed && (resp.StatusCode < 500 || resp.StatusCode > 599) {
return false
}
body, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if err != nil {
// Body was consumed; rebuild an empty one so callers do not panic.
resp.Body = io.NopCloser(strings.NewReader(""))
return false
}
resp.Body = io.NopCloser(strings.NewReader(string(body)))
return transientResponse(resp.StatusCode, string(body))
}

// sleepBeforeRetry waits the linear backoff for the given attempt, returning
// false if the context is cancelled first.
func sleepBeforeRetry(ctx context.Context, attempt int) bool {
select {
case <-ctx.Done():
return false
case <-time.After(time.Duration(attempt) * giteaRetryBackoff):
return true
}
}
Loading
Loading