diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..962f096 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# Default code owners for all files +* @kaio6fellipe diff --git a/.github/workflows/ami-build.yml b/.github/workflows/ami-build.yml index 2579239..96d9371 100644 --- a/.github/workflows/ami-build.yml +++ b/.github/workflows/ami-build.yml @@ -35,9 +35,7 @@ on: paths: - "infra/packer/**" -permissions: - contents: read - id-token: write +permissions: {} env: AMI_DISTRIBUTION_REGIONS: >- @@ -45,6 +43,9 @@ env: jobs: build: + permissions: + contents: read + id-token: write runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -72,7 +73,7 @@ jobs: aws-region: us-east-2 - name: Setup Packer - uses: hashicorp/setup-packer@main + uses: hashicorp/setup-packer@c3d53c525d422944e50ee27b840746d6522b08de # v3.2.0 - name: Packer init working-directory: infra/packer diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..e2f983c --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,37 @@ +name: CodeQL + +on: + push: + branches: + - main + pull_request: + branches: + - main + schedule: + - cron: '0 6 * * 3' + +permissions: {} + +jobs: + analyze: + name: Analyze Go + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Initialize CodeQL + uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1 + with: + languages: go + + - name: Build + run: cd lambda && go build ./... + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1 + with: + category: /language:go diff --git a/.github/workflows/label-old-prs.yml b/.github/workflows/label-old-prs.yml index fe36767..579e717 100644 --- a/.github/workflows/label-old-prs.yml +++ b/.github/workflows/label-old-prs.yml @@ -20,13 +20,14 @@ on: default: '200' type: string -permissions: - contents: read - pull-requests: write - issues: write +permissions: {} jobs: labeler: + permissions: + contents: read + pull-requests: write + issues: write runs-on: [self-hosted, medium] steps: - name: Checkout diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 937e524..81d6e63 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -4,10 +4,7 @@ on: pull_request: types: [opened, synchronize, reopened] -permissions: - contents: read - pull-requests: write - issues: write +permissions: {} jobs: labeler: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f150f91..e81e705 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,11 +5,12 @@ on: tags: - "v*" -permissions: - contents: write +permissions: {} jobs: release: + permissions: + contents: write runs-on: [self-hosted, release] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 diff --git a/.github/workflows/scorecard-pr.yml b/.github/workflows/scorecard-pr.yml new file mode 100644 index 0000000..af6da9f --- /dev/null +++ b/.github/workflows/scorecard-pr.yml @@ -0,0 +1,123 @@ +name: Scorecard PR Check + +on: + pull_request: + branches: + - main + +permissions: {} + +jobs: + scorecard-check: + name: Scorecard Check + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: read + checks: read + env: + SCORECARD_VERSION: "5.4.0" + SCORECARD_CHECKSUM: "e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" + SCORE_THRESHOLD: "7.0" + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - name: Install scorecard CLI + run: | + set -euo pipefail + TARBALL="scorecard_${SCORECARD_VERSION}_linux_amd64.tar.gz" + curl -sLO "https://github.com/ossf/scorecard/releases/download/v${SCORECARD_VERSION}/${TARBALL}" + echo "${SCORECARD_CHECKSUM} ${TARBALL}" | sha256sum --check --strict + tar xzf "${TARBALL}" scorecard + chmod +x scorecard + sudo mv scorecard /usr/local/bin/scorecard + rm "${TARBALL}" + + - name: Run scorecard + id: scorecard + env: + GITHUB_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + scorecard --repo="github.com/${{ github.repository }}" \ + --commit="${{ github.event.pull_request.head.sha }}" \ + --format=json --show-details > scorecard.json + SCORE=$(jq -r '.score' scorecard.json) + echo "score=${SCORE}" >> "$GITHUB_OUTPUT" + echo "Scorecard overall score: ${SCORE}" + + - name: Comment on PR + if: github.event.pull_request.head.repo.full_name == github.repository + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 + with: + script: | + const fs = require('fs'); + const data = JSON.parse(fs.readFileSync('scorecard.json', 'utf8')); + const score = data.score; + const threshold = parseFloat('${{ env.SCORE_THRESHOLD }}'); + const passed = score >= threshold; + const icon = passed ? ':white_check_mark:' : ':x:'; + const repo = '${{ github.repository }}'; + + const checks = data.checks + .sort((a, b) => a.name.localeCompare(b.name)) + .map(c => { + const s = c.score === -1 ? 'N/A' : `${c.score}/10`; + const raw = (c.reason || '').replace(/\|/g, '\\|'); + const reason = raw.length > 80 + ? raw.substring(0, 77) + '...' + : raw; + return `| ${c.name} | ${s} | ${reason} |`; + }) + .join('\n'); + + let body = `## OpenSSF Scorecard — ${score}/10 ${icon}\n\n`; + body += `| Check | Score | Details |\n`; + body += `|-------|-------|---------|`; + body += `\n${checks}\n\n`; + + if (!passed) { + body += `> :rotating_light: Score ${score} is below threshold ${threshold} — this check will fail.\n\n`; + } + + body += `> Threshold: ${threshold} | [Full report](https://securityscorecards.dev/viewer/?uri=github.com/${repo})\n`; + + const marker = ''; + body = marker + '\n' + body; + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.find(c => c.body.includes(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body, + }); + } + + - name: Enforce threshold + run: | + SCORE="${{ steps.scorecard.outputs.score }}" + THRESHOLD="${{ env.SCORE_THRESHOLD }}" + if [ "$(echo "${SCORE} < ${THRESHOLD}" | bc -l)" -eq 1 ]; then + echo "::error::OpenSSF Scorecard score ${SCORE} is below threshold ${THRESHOLD}" + exit 1 + fi + echo "Scorecard score ${SCORE} meets threshold ${THRESHOLD}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..7a02aba --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,45 @@ +name: OSSF Scorecard + +on: + schedule: + - cron: '0 6 * * 1' + push: + branches: + - main + workflow_dispatch: + +permissions: {} + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + security-events: write + id-token: write + contents: read + actions: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - name: Run Scorecard + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + publish_results: true + + - name: Upload SARIF artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: scorecard-sarif + path: results.sarif + retention-days: 5 + + - name: Upload SARIF to Security tab + uses: github/codeql-action/upload-sarif@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1 + with: + sarif_file: results.sarif diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..a32fe58 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,58 @@ +name: Security Scanning + +on: + push: + branches: + - main + pull_request: + branches: + - main + +permissions: {} + +jobs: + gitleaks: + name: Secret Scan + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 0 + + - name: Install Gitleaks + run: | + set -euo pipefail + GITLEAKS_VERSION="8.30.1" + curl -sLO "https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + tar xzf "gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" gitleaks + sudo mv gitleaks /usr/local/bin/gitleaks + rm "gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + + - name: Run Gitleaks + run: gitleaks detect --source . --verbose + + govulncheck: + name: Vulnerability Scan (Go) + runs-on: ubuntu-latest + permissions: + contents: read + defaults: + run: + working-directory: lambda + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Set up Go + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6 + with: + go-version-file: lambda/go.mod + + - name: Install govulncheck + run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4 + + - name: Run govulncheck + run: govulncheck ./... diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ab23dc3..4617c38 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,12 +6,13 @@ on: pull_request: branches: [main] -permissions: - contents: read +permissions: {} jobs: test: name: fmt, vet, test and coverage + permissions: + contents: read runs-on: [self-hosted, large] defaults: run: @@ -37,6 +38,8 @@ jobs: lint: name: lint + permissions: + contents: read runs-on: [self-hosted, large] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 diff --git a/.golangci.yml b/.golangci.yml index 6756b67..b31a21d 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -8,20 +8,54 @@ linters: default: none enable: - errcheck + - gocyclo + - gosec - govet - ineffassign + - misspell + - revive - staticcheck + - unconvert - unused settings: errcheck: check-type-assertions: true check-blank: true + gocyclo: + min-complexity: 15 + gosec: + excludes: + - G115 + revive: + rules: + - name: exported + severity: warning + - name: var-naming + severity: warning + - name: error-return + severity: warning + - name: error-naming + severity: warning + - name: unused-parameter + severity: warning exclusions: rules: - path: _test\.go linters: - errcheck + - gosec + - gocyclo + - revive + - path: _test\.go + text: "exported" + linters: + - revive formatters: enable: - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/devopsfactory-io/jit-runners diff --git a/lambda/internal/config/config.go b/lambda/internal/config/config.go index 9f51ccc..e50a8ff 100644 --- a/lambda/internal/config/config.go +++ b/lambda/internal/config/config.go @@ -16,7 +16,7 @@ import ( type Config struct { // GitHub App credentials. AppID string - PrivateKey string + PrivateKey string //nolint:gosec // G117: not a hardcoded credential, loaded from env/secrets manager WebhookSecret string // SQS queue URL for scale-up messages. @@ -72,14 +72,8 @@ func LoadWithClient(ctx context.Context, client SecretsReader) (*Config, error) DefaultAMI: os.Getenv("EC2_DEFAULT_AMI"), } - if cfg.AppID == "" { - return nil, fmt.Errorf("GITHUB_APP_ID is required") - } - if cfg.QueueURL == "" { - return nil, fmt.Errorf("SQS_QUEUE_URL is required") - } - if cfg.TableName == "" { - return nil, fmt.Errorf("DYNAMODB_TABLE_NAME is required") + if err := validateRequiredEnv(cfg); err != nil { + return nil, err } // Parse subnet IDs (comma-separated). @@ -94,7 +88,29 @@ func LoadWithClient(ctx context.Context, client SecretsReader) (*Config, error) } } - // Load secrets. + if err := loadSecrets(ctx, cfg, client); err != nil { + return nil, err + } + + return cfg, nil +} + +// validateRequiredEnv checks that required environment variables are set on the config. +func validateRequiredEnv(cfg *Config) error { + if cfg.AppID == "" { + return fmt.Errorf("GITHUB_APP_ID is required") + } + if cfg.QueueURL == "" { + return fmt.Errorf("SQS_QUEUE_URL is required") + } + if cfg.TableName == "" { + return fmt.Errorf("DYNAMODB_TABLE_NAME is required") + } + return nil +} + +// loadSecrets loads webhook secret and private key from Secrets Manager or environment. +func loadSecrets(ctx context.Context, cfg *Config, client SecretsReader) error { webhookSecretARN := os.Getenv("GITHUB_APP_WEBHOOK_SECRET_ARN") privateKeyARN := os.Getenv("GITHUB_APP_PRIVATE_KEY_SECRET_ARN") @@ -102,14 +118,14 @@ func LoadWithClient(ctx context.Context, client SecretsReader) (*Config, error) if client == nil { awsCfg, err := config.LoadDefaultConfig(ctx) if err != nil { - return nil, fmt.Errorf("load AWS config: %w", err) + return fmt.Errorf("load AWS config: %w", err) } client = secretsmanager.NewFromConfig(awsCfg) } if webhookSecretARN != "" { secret, err := getSecret(ctx, client, webhookSecretARN) if err != nil { - return nil, fmt.Errorf("webhook secret: %w", err) + return fmt.Errorf("webhook secret: %w", err) } cfg.WebhookSecret = secret } else { @@ -118,7 +134,7 @@ func LoadWithClient(ctx context.Context, client SecretsReader) (*Config, error) if privateKeyARN != "" { secret, err := getSecret(ctx, client, privateKeyARN) if err != nil { - return nil, fmt.Errorf("private key: %w", err) + return fmt.Errorf("private key: %w", err) } cfg.PrivateKey = secret } else { @@ -130,12 +146,12 @@ func LoadWithClient(ctx context.Context, client SecretsReader) (*Config, error) } if cfg.WebhookSecret == "" { - return nil, fmt.Errorf("webhook secret is required (GITHUB_APP_WEBHOOK_SECRET or GITHUB_APP_WEBHOOK_SECRET_ARN)") + return fmt.Errorf("webhook secret is required (GITHUB_APP_WEBHOOK_SECRET or GITHUB_APP_WEBHOOK_SECRET_ARN)") } if cfg.PrivateKey == "" { - return nil, fmt.Errorf("private key is required (GITHUB_APP_PRIVATE_KEY or GITHUB_APP_PRIVATE_KEY_SECRET_ARN)") + return fmt.Errorf("private key is required (GITHUB_APP_PRIVATE_KEY or GITHUB_APP_PRIVATE_KEY_SECRET_ARN)") } - return cfg, nil + return nil } func getSecret(ctx context.Context, client SecretsReader, arn string) (string, error) { diff --git a/lambda/internal/ec2/launcher.go b/lambda/internal/ec2/launcher.go index 58c7101..c5a2f6f 100644 --- a/lambda/internal/ec2/launcher.go +++ b/lambda/internal/ec2/launcher.go @@ -9,8 +9,8 @@ import ( "github.com/aws/aws-sdk-go-v2/service/ec2/types" ) -// EC2API abstracts the EC2 RunInstances API for testing. -type EC2API interface { +// API abstracts the EC2 RunInstances API for testing. +type API interface { RunInstances(ctx context.Context, input *ec2.RunInstancesInput, opts ...func(*ec2.Options)) (*ec2.RunInstancesOutput, error) TerminateInstances(ctx context.Context, input *ec2.TerminateInstancesInput, opts ...func(*ec2.Options)) (*ec2.TerminateInstancesOutput, error) DescribeInstances(ctx context.Context, input *ec2.DescribeInstancesInput, opts ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) @@ -18,11 +18,11 @@ type EC2API interface { // Launcher manages EC2 instance lifecycle for runners. type Launcher struct { - client EC2API + client API } // NewLauncher creates a Launcher with the given EC2 client. -func NewLauncher(client EC2API) *Launcher { +func NewLauncher(client API) *Launcher { return &Launcher{client: client} } diff --git a/lambda/internal/github/client.go b/lambda/internal/github/client.go index 4be232f..982ad0f 100644 --- a/lambda/internal/github/client.go +++ b/lambda/internal/github/client.go @@ -77,7 +77,7 @@ func installationTokenWithBase(ctx context.Context, appID, privateKeyPEM string, req.Header.Set("Authorization", "Bearer "+jwtStr) req.Header.Set("X-GitHub-Api-Version", "2022-11-28") - resp, err := http.DefaultClient.Do(req) + resp, err := http.DefaultClient.Do(req) //nolint:gosec // G704: URL from GitHub API constant if err != nil { return "", fmt.Errorf("request installation token: %w", err) } @@ -126,7 +126,7 @@ func (c *Client) GenerateJITConfig(ctx context.Context, ownerRepo string, name s req.Header.Set("X-GitHub-Api-Version", "2022-11-28") req.Header.Set("Content-Type", "application/json") - resp, err := c.httpClient.Do(req) + resp, err := c.httpClient.Do(req) //nolint:gosec // G704: URL from GitHub API constant if err != nil { return nil, fmt.Errorf("request JIT config: %w", err) } diff --git a/lambda/internal/runner/cleanup.go b/lambda/internal/runner/cleanup.go index 74f0552..19aff41 100644 --- a/lambda/internal/runner/cleanup.go +++ b/lambda/internal/runner/cleanup.go @@ -58,21 +58,8 @@ func (c *Cleaner) Run(ctx context.Context) (*CleanupResult, error) { return result, err } staleThreshold := now - int64(c.staleThresholdMinutes*60) - for _, r := range pending { - if r.CreatedAt < staleThreshold { - log.Printf("cleanup: terminating stale pending runner %s (instance %s)", r.RunnerID, r.InstanceID) - if err := c.ec2.Terminate(ctx, r.InstanceID); err != nil { - log.Printf("cleanup: failed to terminate %s: %v", r.InstanceID, err) - result.Errors++ - continue - } - if err := c.store.UpdateStatus(ctx, r.Repository, r.JobID, StatusFailed); err != nil { - log.Printf("cleanup: failed to update status for %s: %v", r.RunnerID, err) - result.Errors++ - continue - } - result.StaleTerminated++ - } + if err := c.cleanupStaleInstances(ctx, pending, staleThreshold, "pending", result); err != nil { + return result, err } // 2. Clean up stuck "running" instances. @@ -81,9 +68,35 @@ func (c *Cleaner) Run(ctx context.Context) (*CleanupResult, error) { return result, err } maxAgeThreshold := now - int64(c.maxAgeMinutes*60) - for _, r := range running { - if r.CreatedAt < maxAgeThreshold { - log.Printf("cleanup: terminating stuck running runner %s (instance %s)", r.RunnerID, r.InstanceID) + if err := c.cleanupStaleInstances(ctx, running, maxAgeThreshold, "running", result); err != nil { + return result, err + } + + // 3. Detect orphaned EC2 instances (tagged but not in DynamoDB). + allRecords := append(pending, running...) + completed, err := c.store.ListByStatus(ctx, StatusCompleted) + if err != nil { + return result, fmt.Errorf("list completed runners: %w", err) + } + failed, err := c.store.ListByStatus(ctx, StatusFailed) + if err != nil { + return result, fmt.Errorf("list failed runners: %w", err) + } + allRecords = append(allRecords, completed...) + allRecords = append(allRecords, failed...) + + if err := c.reconcileOrphanInstances(ctx, allRecords, result); err != nil { + return result, err + } + + return result, nil +} + +// cleanupStaleInstances terminates instances that have been in the given status longer than the threshold. +func (c *Cleaner) cleanupStaleInstances(ctx context.Context, records []*Record, threshold int64, statusLabel string, result *CleanupResult) error { + for _, r := range records { + if r.CreatedAt < threshold { + log.Printf("cleanup: terminating stale %s runner %s (instance %s)", statusLabel, r.RunnerID, r.InstanceID) if err := c.ec2.Terminate(ctx, r.InstanceID); err != nil { log.Printf("cleanup: failed to terminate %s: %v", r.InstanceID, err) result.Errors++ @@ -97,25 +110,17 @@ func (c *Cleaner) Run(ctx context.Context) (*CleanupResult, error) { result.StaleTerminated++ } } + return nil +} - // 3. Detect orphaned EC2 instances (tagged but not in DynamoDB). +// reconcileOrphanInstances finds EC2 instances not tracked in DynamoDB and terminates them. +func (c *Cleaner) reconcileOrphanInstances(ctx context.Context, knownRecords []*Record, result *CleanupResult) error { managedIDs, err := c.ec2.ListManagedInstances(ctx) if err != nil { - return result, err + return err } knownIDs := make(map[string]bool) - allRecords := append(pending, running...) - completed, err := c.store.ListByStatus(ctx, StatusCompleted) - if err != nil { - return result, fmt.Errorf("list completed runners: %w", err) - } - failed, err := c.store.ListByStatus(ctx, StatusFailed) - if err != nil { - return result, fmt.Errorf("list failed runners: %w", err) - } - allRecords = append(allRecords, completed...) - allRecords = append(allRecords, failed...) - for _, r := range allRecords { + for _, r := range knownRecords { knownIDs[r.InstanceID] = true } for _, id := range managedIDs { @@ -129,6 +134,5 @@ func (c *Cleaner) Run(ctx context.Context) (*CleanupResult, error) { result.OrphanTerminated++ } } - - return result, nil + return nil } diff --git a/lambda/internal/sqs/publisher.go b/lambda/internal/sqs/publisher.go index fbd2003..7d0caba 100644 --- a/lambda/internal/sqs/publisher.go +++ b/lambda/internal/sqs/publisher.go @@ -11,19 +11,19 @@ import ( const defaultDelaySeconds = 30 -// SQSSender abstracts the SQS SendMessage API for testing. -type SQSSender interface { +// Sender abstracts the SQS SendMessage API for testing. +type Sender interface { SendMessage(ctx context.Context, input *sqs.SendMessageInput, opts ...func(*sqs.Options)) (*sqs.SendMessageOutput, error) } // Publisher sends scale-up messages to SQS. type Publisher struct { - client SQSSender + client Sender queueURL string } // NewPublisher creates a Publisher for the given queue URL. -func NewPublisher(client SQSSender, queueURL string) *Publisher { +func NewPublisher(client Sender, queueURL string) *Publisher { return &Publisher{ client: client, queueURL: queueURL,