diff --git a/.github/workflows/lint-skills.yml b/.github/workflows/lint-skills.yml index 4d15f822..4e716a92 100644 --- a/.github/workflows/lint-skills.yml +++ b/.github/workflows/lint-skills.yml @@ -13,42 +13,5 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Validate frontmatter in skill and role files - run: | - EXIT_CODE=0 - REQUIRED_FIELDS=("name" "description" "version" "author" "license" "injection-hardened" "allowed-tools" "tags" "role" "phase" "frameworks" "difficulty" "time_estimate") - - FILES=$(find skills/ roles/ -name 'SKILL.md' 2>/dev/null || true) - - if [ -z "$FILES" ]; then - echo "No .md files found in skills/ or roles/." - exit 0 - fi - - while IFS= read -r file; do - echo "Checking: $file" - - FRONTMATTER=$(awk '/^---$/{if(++c==2) exit} c==1' "$file") - - if [ -z "$FRONTMATTER" ]; then - echo " ERROR: No YAML frontmatter found (missing --- delimiters)" - EXIT_CODE=1 - continue - fi - - for field in "${REQUIRED_FIELDS[@]}"; do - if ! echo "$FRONTMATTER" | grep -qE "^${field}:"; then - echo " ERROR: Missing required field: $field" - EXIT_CODE=1 - fi - done - done <<< "$FILES" - - if [ "$EXIT_CODE" -ne 0 ]; then - echo "" - echo "FAIL: One or more files have missing required frontmatter fields." - exit 1 - fi - - echo "" - echo "All frontmatter checks passed." + - name: Validate skill schema + run: ruby scripts/validate_skill_schema.rb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 99d3793a..033e3034 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -144,7 +144,13 @@ argument-hint: "[target-file-or-directory]" ``` Use [SKILL_TEMPLATE.md](SKILL_TEMPLATE.md) as the source of truth for the -required body sections and submission checklist. +required body sections and submission checklist. The machine-readable +frontmatter contract lives in [schemas/skill.schema.json](schemas/skill.schema.json) +and is enforced by CI. Run it locally before opening a PR: + +```bash +ruby scripts/validate_skill_schema.rb +``` --- diff --git a/README.md b/README.md index 786fd5b3..dc29dfcb 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,14 @@ argument-hint: "[target-file-or-directory]" # context: fork # optional ``` +The machine-readable schema for this frontmatter lives at +[`schemas/skill.schema.json`](schemas/skill.schema.json). Validate all skills +and role bundles locally with: + +```bash +ruby scripts/validate_skill_schema.rb +``` + ### Progressive disclosure (keep `SKILL.md` lean) Claude's skill guidance: when a `SKILL.md` would exceed ~500 lines, **don't inline everything** — split detail into sibling reference files in the same directory and link to them from `SKILL.md`. The agent loads a reference only when it needs it, so the entrypoint stays cheap to load. diff --git a/schemas/skill.schema.json b/schemas/skill.schema.json new file mode 100644 index 00000000..077ff508 --- /dev/null +++ b/schemas/skill.schema.json @@ -0,0 +1,138 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/UnitOneAI/SecuritySkills/schemas/skill.schema.json", + "title": "SecuritySkills SKILL.md Frontmatter", + "description": "Canonical machine-readable contract for SecuritySkills SKILL.md frontmatter. The Markdown body remains the place for detailed outputs, evidence, remediation, and references, while this schema validates the metadata agents and CI need for discovery.", + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "description", + "tags", + "role", + "phase", + "frameworks", + "difficulty", + "time_estimate", + "version", + "author", + "license", + "allowed-tools", + "injection-hardened" + ], + "properties": { + "name": { + "type": "string", + "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$", + "description": "Kebab-case skill identifier. For skills, this must match the skill directory name." + }, + "description": { + "type": "string", + "minLength": 40, + "description": "Agent-facing summary of what the skill does and when it should be invoked." + }, + "tags": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$" + }, + "description": "Discovery tags covering domain, activity, technology, or role context." + }, + "role": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$" + }, + "description": "Role bundles that should include or invoke this skill." + }, + "phase": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$" + }, + "description": "Lifecycle phases where the skill applies." + }, + "frameworks": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 2 + }, + "description": "Frameworks, standards, taxonomies, or benchmark versions cited by the skill. Control IDs in findings must resolve to these references." + }, + "difficulty": { + "type": "string", + "enum": ["beginner", "intermediate", "advanced"], + "description": "Expected operator skill level." + }, + "time_estimate": { + "type": "string", + "minLength": 3, + "description": "Expected time to run the skill for a typical target." + }, + "version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Semantic version for the skill contract and content." + }, + "author": { + "type": "string", + "minLength": 2, + "description": "GitHub handle, organization, or maintainer identity." + }, + "license": { + "type": "string", + "minLength": 2, + "description": "License covering the skill content." + }, + "allowed-tools": { + "oneOf": [ + { + "type": "string", + "minLength": 2 + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 2 + } + } + ], + "description": "Tool names the skill may use. Existing skills may use a comma-separated string; new tooling may normalize this to an array." + }, + "injection-hardened": { + "type": "boolean", + "description": "True only after the skill has been reviewed against prompt injection guidance." + }, + "argument-hint": { + "type": "string", + "minLength": 2, + "description": "Optional invocation argument hint shown to users or agents." + }, + "context": { + "type": "string", + "minLength": 2, + "description": "Optional execution context hint." + }, + "disable-model-invocation": { + "type": "boolean", + "description": "Optional role-bundle guard for workflows that should not directly invoke a model." + } + }, + "x-securityskills": { + "bodySections": { + "outputs": "Document expected findings, evidence, remediation, or deliverables in the Markdown body.", + "references": "Document authoritative framework and control references in the Markdown body or sibling reference files." + }, + "referenceFiles": "Long framework tables, tool rules, benchmark checklists, and language-specific guidance should live in sibling Markdown files linked from SKILL.md." + } +} diff --git a/scripts/validate_skill_schema.rb b/scripts/validate_skill_schema.rb new file mode 100755 index 00000000..075a979c --- /dev/null +++ b/scripts/validate_skill_schema.rb @@ -0,0 +1,166 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "json" +require "yaml" + +ROOT = File.expand_path("..", __dir__) +SCHEMA_PATH = File.join(ROOT, "schemas", "skill.schema.json") +DEFAULT_GLOBS = [ + File.join(ROOT, "skills", "*", "*", "SKILL.md"), + File.join(ROOT, "roles", "*", "SKILL.md") +].freeze + +def usage + warn "Usage: ruby scripts/validate_skill_schema.rb [SKILL.md ...]" +end + +def load_schema + JSON.parse(File.read(SCHEMA_PATH)) +rescue Errno::ENOENT + abort "Schema not found: #{SCHEMA_PATH}" +rescue JSON::ParserError => e + abort "Invalid JSON schema #{SCHEMA_PATH}: #{e.message}" +end + +def skill_files(args) + files = args.empty? ? DEFAULT_GLOBS.flat_map { |pattern| Dir.glob(pattern) } : args + files.map { |path| File.expand_path(path, Dir.pwd) }.sort +end + +def frontmatter_for(path) + text = File.read(path) + match = text.match(/\A---\s*\n(.*?)\n---\s*(?:\n|\z)/m) + raise "missing YAML frontmatter delimited by ---" unless match + + YAML.safe_load(match[1], permitted_classes: [], aliases: false) || {} +rescue Psych::SyntaxError => e + raise "invalid YAML frontmatter: #{e.message}" +end + +def type_name(value) + case value + when String then "string" + when Array then "array" + when Hash then "object" + when TrueClass, FalseClass then "boolean" + when Integer then "integer" + when Float then "number" + when NilClass then "null" + else value.class.name + end +end + +def validate_type(value, expected) + Array(expected).include?(type_name(value)) +end + +def validate_string(path, value, schema, errors) + return unless value.is_a?(String) + + min = schema["minLength"] + errors << "#{path} must be at least #{min} characters" if min && value.length < min + + pattern = schema["pattern"] + return unless pattern + + errors << "#{path} must match /#{pattern}/" unless Regexp.new(pattern).match?(value) +end + +def validate_array(path, value, schema, errors) + return unless value.is_a?(Array) + + min = schema["minItems"] + errors << "#{path} must contain at least #{min} item(s)" if min && value.length < min + + item_schema = schema["items"] + return unless item_schema + + value.each_with_index do |item, index| + validate_value("#{path}[#{index}]", item, item_schema, errors) + end +end + +def validate_value(path, value, schema, errors) + if schema["oneOf"] + nested = schema["oneOf"].map do |candidate| + candidate_errors = [] + validate_value(path, value, candidate, candidate_errors) + candidate_errors + end + errors << "#{path} must match one allowed schema" if nested.none?(&:empty?) + return + end + + expected_type = schema["type"] + if expected_type && !validate_type(value, expected_type) + errors << "#{path} must be #{Array(expected_type).join(' or ')}, got #{type_name(value)}" + return + end + + enum = schema["enum"] + errors << "#{path} must be one of #{enum.join(', ')}" if enum && !enum.include?(value) + + validate_string(path, value, schema, errors) + validate_array(path, value, schema, errors) +end + +def validate_document(frontmatter, schema) + errors = [] + + schema.fetch("required", []).each do |field| + errors << "missing required field: #{field}" unless frontmatter.key?(field) + end + + properties = schema.fetch("properties", {}) + unless schema.fetch("additionalProperties", true) + frontmatter.each_key do |key| + errors << "unknown field: #{key}" unless properties.key?(key) + end + end + + frontmatter.each do |key, value| + next unless properties.key?(key) + + validate_value(key, value, properties[key], errors) + end + + errors +end + +def validate_name_matches_path(path, frontmatter) + return [] unless path.include?("#{File::SEPARATOR}skills#{File::SEPARATOR}") + + expected = File.basename(File.dirname(path)) + actual = frontmatter["name"] + actual == expected ? [] : ["name must match skill directory '#{expected}', got '#{actual}'"] +end + +schema = load_schema +files = skill_files(ARGV) + +if files.empty? + usage + abort "No SKILL.md files found." +end + +failed = false +files.each do |path| + relative = path.delete_prefix("#{ROOT}#{File::SEPARATOR}") + begin + frontmatter = frontmatter_for(path) + errors = validate_document(frontmatter, schema) + validate_name_matches_path(path, frontmatter) + rescue StandardError => e + errors = [e.message] + end + + if errors.empty? + puts "OK: #{relative}" + else + failed = true + puts "FAIL: #{relative}" + errors.each { |error| puts " - #{error}" } + end +end + +exit(failed ? 1 : 0) diff --git a/skills/compliance/iso27001-gap/SKILL.md b/skills/compliance/iso27001-gap/SKILL.md index ff8d0279..b7649907 100644 --- a/skills/compliance/iso27001-gap/SKILL.md +++ b/skills/compliance/iso27001-gap/SKILL.md @@ -10,7 +10,7 @@ description: > tags: [compliance, iso27001, isms] role: [vciso, security-engineer] phase: [assess, operate] -frameworks: [ISO/IEC-27001:2022, ISO/IEC-27002:2022] +frameworks: ["ISO/IEC-27001:2022", "ISO/IEC-27002:2022"] difficulty: intermediate time_estimate: "90-180min" version: "1.0.0"