From 5492c114ab9e11b75f8683d9da1a752fd4822d3e Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Thu, 14 May 2026 13:05:54 +0100 Subject: [PATCH] feat: add paths-ignore input; default-skip vendored/fixture content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validators that scan content patterns must distinguish a target file from a vendored / fixture / training-corpus file that legitimately contains the very pattern being checked. Same architectural lesson as hyperpolymath/hypatia#243 — that scanner had 11/11 `secret_detected` findings turn out to be FPs in `.audittraining/`, `lib/rules/`, `scripts/fix-scripts/`, and `test/` paths until provenance was added. This action was firing "Missing required identity field" on every .a2ml file in `verified-container-spec/` (a vendored project tree) consumed by stapeln and other sibling repos. The vendored manifests have their own identity declarations in their upstream context — flagging them here is noise that no consumer can usefully act on. New input: paths-ignore: | vendor/ vendored/ verified-container-spec/ .audittraining/ integration/fixtures/ test/fixtures/ tests/fixtures/ Newline-separated, substring match against each candidate file's path, default-on so consumers benefit without editing every workflow. Pass an empty string to disable. README updated with rationale + usage. validate-a2ml.sh prints a notice for each skipped file count so the carve-out is auditable in the action log. Co-Authored-By: Claude Opus 4.7 --- README.adoc | 21 +++++++++++++++++++++ action.yml | 20 ++++++++++++++++++++ validate-a2ml.sh | 41 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/README.adoc b/README.adoc index 9a2e4ed..8becf25 100644 --- a/README.adoc +++ b/README.adoc @@ -40,6 +40,8 @@ jobs: with: path: '.' # Directory to scan (default: repo root) strict: 'false' # Promote warnings to errors (default: false) + # paths-ignore: defaults to vendored / fixture patterns; override + # via newline-separated string. Use '' to disable. ---- === Inputs @@ -55,8 +57,27 @@ jobs: | `strict` | `false` | When `true`, warnings become errors and the action fails on any issue + +| `paths-ignore` +| _vendored & fixture defaults_ +| Newline-separated path fragments to skip. Substring match against each + file path. Default set: `vendor/`, `vendored/`, `verified-container-spec/`, + `.audittraining/`, `integration/fixtures/`, `test/fixtures/`, + `tests/fixtures/`. Pass an empty string (`paths-ignore: ''`) to disable + and scan everything. See https://github.com/hyperpolymath/hypatia/pull/243 + for the architectural rationale (content-pattern validators must + distinguish targets from fixtures / vendored / training-corpus files + that legitimately contain the very pattern being checked). |=== +==== Why default-on path exemptions? + +A2ML files inside vendored projects (e.g. `verified-container-spec/`) have +their own identity declarations elsewhere or are themselves training corpora. +Flagging every such file as "missing identity field" is provenance noise, +not signal. The defaults match the canonical RSR vendored-content paths; +override for project-specific carve-outs. + === Outputs [cols="1,3"] diff --git a/action.yml b/action.yml index 973549d..524f6b2 100644 --- a/action.yml +++ b/action.yml @@ -29,6 +29,25 @@ inputs: will fail on any validation issue. Defaults to false. required: false default: 'false' + paths-ignore: + description: >- + Newline-separated path fragments to skip. Each line is matched as a + substring against the file's path. Defaults to common vendored / + training-corpus / fixture patterns so consumers don't have to repeat + this carve-out in every repo. Pass an empty string to disable. + Pattern follows hyperpolymath/hypatia#243 — validators that scan + content patterns must distinguish a target file from a fixture / + vendored / training-corpus file that legitimately contains the + pattern being checked. + required: false + default: | + vendor/ + vendored/ + verified-container-spec/ + .audittraining/ + integration/fixtures/ + test/fixtures/ + tests/fixtures/ outputs: files-scanned: @@ -50,5 +69,6 @@ runs: env: INPUT_PATH: ${{ inputs.path }} INPUT_STRICT: ${{ inputs.strict }} + INPUT_PATHS_IGNORE: ${{ inputs.paths-ignore }} run: | "${GITHUB_ACTION_PATH}/validate-a2ml.sh" diff --git a/validate-a2ml.sh b/validate-a2ml.sh index 1a7cec0..45ed0ab 100755 --- a/validate-a2ml.sh +++ b/validate-a2ml.sh @@ -26,6 +26,30 @@ set -euo pipefail SCAN_PATH="${INPUT_PATH:-.}" STRICT="${INPUT_STRICT:-false}" +PATHS_IGNORE_RAW="${INPUT_PATHS_IGNORE:-}" + +# Parse paths-ignore: newline-separated fragments, blank lines and # comments +# stripped. Each fragment is a substring match against the file path. Pattern +# adopted from hyperpolymath/hypatia#243 — content-pattern validators must +# distinguish a target from a vendored / fixture file that legitimately +# contains the very pattern being checked. +PATHS_IGNORE=() +while IFS= read -r _frag; do + # Strip leading and trailing whitespace (canonical bash idiom). + _frag="${_frag#"${_frag%%[![:space:]]*}"}" + _frag="${_frag%"${_frag##*[![:space:]]}"}" + [[ -z "$_frag" || "$_frag" == \#* ]] && continue + PATHS_IGNORE+=("$_frag") +done <<< "$PATHS_IGNORE_RAW" + +# Returns 0 if path should be skipped (matches any ignore fragment) +path_ignored() { + local p="$1" frag + for frag in "${PATHS_IGNORE[@]}"; do + [[ "$p" == *"$frag"* ]] && return 0 + done + return 1 +} # Counters FILES_SCANNED=0 @@ -193,7 +217,22 @@ echo "Scanning ${SCAN_PATH} for .a2ml files..." echo "" # Find all .a2ml files, excluding .git directory -mapfile -t a2ml_files < <(find "$SCAN_PATH" -name '*.a2ml' -not -path '*/.git/*' -type f | sort) +mapfile -t a2ml_candidates < <(find "$SCAN_PATH" -name '*.a2ml' -not -path '*/.git/*' -type f | sort) + +# Apply paths-ignore filter +a2ml_files=() +SKIPPED=0 +for _f in "${a2ml_candidates[@]}"; do + if path_ignored "$_f"; then + SKIPPED=$((SKIPPED + 1)) + continue + fi + a2ml_files+=("$_f") +done + +if [[ $SKIPPED -gt 0 ]]; then + echo "::notice::Skipped ${SKIPPED} file(s) matching paths-ignore" +fi if [[ ${#a2ml_files[@]} -eq 0 ]]; then echo "::notice::No .a2ml files found in ${SCAN_PATH}"