Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 316 additions & 0 deletions .github/scripts/validate-a2ml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: PMPL-1.0-or-later
# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
#
# validate-a2ml.sh — A2ML manifest validation script
#
# Scans for .a2ml files and validates:
# 1. Required fields: agent-id or pedigree name, version
# 2. SPDX-License-Identifier header presence
# 3. Attestation block structure (if present)
# 4. Section heading syntax ([section] or ## section)
#
# Environment variables:
# INPUT_PATH — Directory to scan (default: .)
# INPUT_STRICT — Promote warnings to errors (default: false)
#
# Exit codes:
# 0 — All files valid (or only warnings in non-strict mode)
# 1 — Validation errors found

set -euo pipefail

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

SCAN_PATH="${INPUT_PATH:-.}"
STRICT="${INPUT_STRICT:-false}"
PATHS_IGNORE_RAW="${INPUT_PATHS_IGNORE:-}"

# Parse paths-ignore: newline-separated fragments, blank lines and # comments
# stripped. Each fragment is a substring match against the file path. Pattern
# adopted from hyperpolymath/hypatia#243 — content-pattern validators must
# distinguish a target from a vendored / fixture file that legitimately
# contains the very pattern being checked.
PATHS_IGNORE=()
while IFS= read -r _frag; do
# Strip leading and trailing whitespace (canonical bash idiom).
_frag="${_frag#"${_frag%%[![:space:]]*}"}"
_frag="${_frag%"${_frag##*[![:space:]]}"}"
[[ -z "$_frag" || "$_frag" == \#* ]] && continue
PATHS_IGNORE+=("$_frag")
done <<< "$PATHS_IGNORE_RAW"

# Returns 0 if path should be skipped (matches any ignore fragment)
path_ignored() {
local p="$1" frag
for frag in "${PATHS_IGNORE[@]}"; do
[[ "$p" == *"$frag"* ]] && return 0
done
return 1
}

# Counters
FILES_SCANNED=0
ERRORS=0
WARNINGS=0

# ---------------------------------------------------------------------------
# Helper: emit GitHub annotation
# ---------------------------------------------------------------------------
# Usage: annotate <level> <file> <line> <message>
# level: error | warning | notice
annotate() {
local level="$1" file="$2" line="$3" message="$4"
echo "::${level} file=${file},line=${line}::${message}"
}

# ---------------------------------------------------------------------------
# Helper: report issue (respects strict mode)
# ---------------------------------------------------------------------------
# Usage: report_issue <severity> <file> <line> <message>
# severity: error | warning
report_issue() {
local severity="$1" file="$2" line="$3" message="$4"

if [[ "$severity" == "warning" && "$STRICT" == "true" ]]; then
severity="error"
fi

annotate "$severity" "$file" "$line" "$message"

if [[ "$severity" == "error" ]]; then
ERRORS=$((ERRORS + 1))
else
WARNINGS=$((WARNINGS + 1))
fi
}

# ---------------------------------------------------------------------------
# Validator: check a single .a2ml file
# ---------------------------------------------------------------------------
validate_a2ml() {
local file="$1"
FILES_SCANNED=$((FILES_SCANNED + 1))

# --- Check 1: SPDX header ---
# The SPDX-License-Identifier should appear in the first 10 lines
local has_spdx=false
local line_num=0
while IFS= read -r line; do
line_num=$((line_num + 1))
if [[ $line_num -gt 10 ]]; then
break
fi
if [[ "$line" == *"SPDX-License-Identifier"* ]]; then
has_spdx=true
break
fi
done < "$file"

if [[ "$has_spdx" == "false" ]]; then
report_issue "warning" "$file" 1 \
"Missing SPDX-License-Identifier in first 10 lines"
fi

# --- Check 2: Required identity fields ---
# A2ML files must contain either:
# - agent-id = "..." or agent_id = "..."
# - pedigree block with name field
# - name = "..." at top level (for AI manifests)
# - project = "..." (for STATE.a2ml)
local has_identity=false
local has_version=false
line_num=0

while IFS= read -r line; do
line_num=$((line_num + 1))

# Check for identity fields (various A2ML patterns). Accept TOML
# (`key = …`) and YAML-flavoured (`key: …`) forms, and the
# canonical-/prefixed-name + id keys used by clade/anchor docs.
if [[ "$line" =~ ^[[:space:]]*(agent[-_]id|name|project|id|canonical-name|prefixed-name)[[:space:]]*[=:] ]]; then
has_identity=true
fi
# Check for version field (TOML or YAML form)
if [[ "$line" =~ ^[[:space:]]*(version|schema_version)[[:space:]]*[=:] ]]; then
has_version=true
fi
done < "$file"

# AI manifest files (0-AI-MANIFEST.a2ml, 0.1-AI-MANIFEST.a2ml, etc.)
# use markdown-style headers and free text, so identity check is relaxed
local basename
basename="$(basename "$file")"
local is_manifest=false
if [[ "$basename" == *"AI-MANIFEST"* ]]; then
is_manifest=true
fi
# Canonical typed manifests under .machine_readable/6a2/ — identity comes
# from the enclosing directory + filename, not an in-file field. Sibling
# files in the same directory (ECOSYSTEM.a2ml, STATE.a2ml) DO carry their
# own name/project and continue to be validated normally.
case "$basename" in

Check failure on line 154 in .github/scripts/validate-a2ml.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add a default case (*) to handle unexpected values.

See more on https://sonarcloud.io/project/issues?id=hyperpolymath_stapeln&issues=AZ4s-eQCMCNCfLPD3IJ5&open=AZ4s-eQCMCNCfLPD3IJ5&pullRequest=43
AGENTIC.a2ml|META.a2ml|NEUROSYM.a2ml|PLAYBOOK.a2ml)
is_manifest=true
;;
# Dockerfile-style top-level typed manifests (Intentfile, Trustfile, …)
# use markdown-flavoured A2ML; identity is carried by the parent repo.
*file.a2ml)
is_manifest=true
;;
esac

# Path-identified instruction docs under .machine_readable/agent_instructions/
# (coverage/debt/methodology…) derive identity from directory + filename,
# like the 6a2 typed manifests, and carry only a [metadata] version block.
case "$file" in

Check failure on line 168 in .github/scripts/validate-a2ml.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add a default case (*) to handle unexpected values.

See more on https://sonarcloud.io/project/issues?id=hyperpolymath_stapeln&issues=AZ4s-eQCMCNCfLPD3IJ6&open=AZ4s-eQCMCNCfLPD3IJ6&pullRequest=43
*/.machine_readable/agent_instructions/*.a2ml)
is_manifest=true
;;
esac

# Contractile-shape A2ML files use `@directive:` syntax instead of
# TOML `key = value`. Trustfile.a2ml, Intentfile.a2ml, Mustfile.a2ml,
# Adjustfile.a2ml etc. are policy / trust / intent / abstract files
# whose identity is implicit in their @-prefixed directives
# (`@trust-level`, `@intent`, ...) rather than a TOML name/version
# pair. Treating them as manifest-shape produces 100% false positives —
# they're a different A2ML doc type. Detected by the presence of any
# contractile directive in the file body.
local is_contractile_shape=false
if grep -qE '^@(abstract|trust-level|trust-boundary|trust-actions|trust-deny|intent|must|adjust|end)([[:space:]]*:|$)' "$file"; then
is_contractile_shape=true
fi

if [[ "$has_identity" == "false" && "$is_manifest" == "false" && "$is_contractile_shape" == "false" ]]; then
report_issue "error" "$file" 1 \
"Missing required identity field (agent-id, name, or project)"
fi

if [[ "$has_version" == "false" && "$is_manifest" == "false" && "$is_contractile_shape" == "false" ]]; then
report_issue "warning" "$file" 1 \
"Missing version or schema_version field"
fi

# --- Check 3: Attestation block structure ---
# If file contains [attestation] or ## ATTESTATION, validate it has
# required sub-fields: proof or signature
local in_attestation=false
local attestation_line=0
local attestation_has_content=false
line_num=0

while IFS= read -r line; do
line_num=$((line_num + 1))

# Detect attestation section start
if [[ "$line" =~ ^\[attestation\] ]] || [[ "$line" =~ ^##[[:space:]]+[Aa]ttestation ]] || [[ "$line" =~ ^##[[:space:]]+ATTESTATION ]]; then
in_attestation=true
attestation_line=$line_num
continue
fi

# Detect next section (ends attestation block)
if [[ "$in_attestation" == "true" ]]; then
if [[ "$line" =~ ^\[.+\] ]] || [[ "$line" =~ ^##[[:space:]] ]]; then
in_attestation=false
continue
fi
# Check for content in attestation block
if [[ "$line" =~ (proof|signature|verified|hash)[[:space:]]*= ]]; then
attestation_has_content=true
fi
fi
done < "$file"

if [[ $attestation_line -gt 0 && "$attestation_has_content" == "false" ]]; then
report_issue "warning" "$file" "$attestation_line" \
"Attestation block found but missing proof/signature/hash fields"
fi

# --- Check 4: Section heading syntax ---
# Validate that [section] headings are well-formed (no unclosed brackets)
line_num=0
while IFS= read -r line; do
line_num=$((line_num + 1))
# Lines starting with [ should have a matching ]
if [[ "$line" =~ ^\[ && ! "$line" =~ ^\[.+\] ]]; then
# Exclude markdown-style links and multi-line values
if [[ ! "$line" =~ ^\[.*\]\( && ! "$line" =~ ^\[TODO && ! "$line" =~ ^\[YOUR ]]; then

Check warning on line 241 in .github/scripts/validate-a2ml.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Merge this if statement with the enclosing one.

See more on https://sonarcloud.io/project/issues?id=hyperpolymath_stapeln&issues=AZ4s-eQCMCNCfLPD3IJ7&open=AZ4s-eQCMCNCfLPD3IJ7&pullRequest=43
report_issue "warning" "$file" "$line_num" \

Check warning on line 242 in .github/scripts/validate-a2ml.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of using the literal 'warning' 5 times.

See more on https://sonarcloud.io/project/issues?id=hyperpolymath_stapeln&issues=AZ4s-eQCMCNCfLPD3IJ9&open=AZ4s-eQCMCNCfLPD3IJ9&pullRequest=43
"Possibly malformed section heading: unclosed bracket"
fi
fi
done < "$file"
}

# ---------------------------------------------------------------------------
# Main: discover and validate .a2ml files
# ---------------------------------------------------------------------------

echo "::group::A2ML Manifest Validation"
echo "Scanning ${SCAN_PATH} for .a2ml files..."
echo ""

# Find all .a2ml files, excluding .git directory
mapfile -t a2ml_candidates < <(find "$SCAN_PATH" -name '*.a2ml' -not -path '*/.git/*' -type f | sort)

# Apply paths-ignore filter
a2ml_files=()
SKIPPED=0
for _f in "${a2ml_candidates[@]}"; do
if path_ignored "$_f"; then
SKIPPED=$((SKIPPED + 1))
continue
fi
a2ml_files+=("$_f")
done

if [[ $SKIPPED -gt 0 ]]; then
echo "::notice::Skipped ${SKIPPED} file(s) matching paths-ignore"
fi

if [[ ${#a2ml_files[@]} -eq 0 ]]; then
echo "::notice::No .a2ml files found in ${SCAN_PATH}"
echo "files_scanned=0" >> "$GITHUB_OUTPUT" 2>/dev/null || true
echo "errors=0" >> "$GITHUB_OUTPUT" 2>/dev/null || true
echo "warnings=0" >> "$GITHUB_OUTPUT" 2>/dev/null || true
echo "::endgroup::"
exit 0
fi

echo "Found ${#a2ml_files[@]} .a2ml file(s)"
echo ""

for file in "${a2ml_files[@]}"; do
echo " Validating: ${file}"
validate_a2ml "$file"
done

echo ""
echo "────────────────────────────────────────"
echo "Files scanned: ${FILES_SCANNED}"
echo "Errors: ${ERRORS}"
echo "Warnings: ${WARNINGS}"
echo "Strict mode: ${STRICT}"
echo "────────────────────────────────────────"

# Write outputs for GitHub Actions
{
echo "files_scanned=${FILES_SCANNED}"
echo "errors=${ERRORS}"
echo "warnings=${WARNINGS}"
} >> "$GITHUB_OUTPUT" 2>/dev/null || true

echo "::endgroup::"

# Exit with failure if errors were found
if [[ $ERRORS -gt 0 ]]; then
echo "::error::A2ML validation failed with ${ERRORS} error(s)"

Check warning on line 311 in .github/scripts/validate-a2ml.sh

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Redirect this error message to stderr (>&2).

See more on https://sonarcloud.io/project/issues?id=hyperpolymath_stapeln&issues=AZ4s-eQCMCNCfLPD3IJ8&open=AZ4s-eQCMCNCfLPD3IJ8&pullRequest=43
exit 1
fi

echo "A2ML validation passed."
exit 0
15 changes: 11 additions & 4 deletions .github/workflows/dogfood-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,19 @@ jobs:
echo "::warning::No .a2ml manifest files found. Every RSR repo should have 0-AI-MANIFEST.a2ml"
fi

# Uses an in-repo, corrected copy of the a2ml validator rather than the
# external hyperpolymath/a2ml-validate-action. The pinned upstream SHA
# predated the path-identified-manifest exemptions and false-positived
# on this repo's .machine_readable/* docs; the only upstream commit with
# the correct exemptions ships a corrupted script line (embedded newline)
# that crashes the action. The vendored script fixes that line and
# recognises the clade/anchor/agent-instruction identity shapes.
- name: Validate A2ML manifests
if: steps.detect.outputs.count > 0
uses: hyperpolymath/a2ml-validate-action@b2f28c39491c0d1ff131b8fb9e197bfea79e411e # main
with:
path: '.'
strict: 'false'
env:
INPUT_PATH: '.'
INPUT_STRICT: 'false'
run: bash .github/scripts/validate-a2ml.sh

- name: Write summary
run: |
Expand Down
Loading
Loading