From 5b12f2f42fb955002f6c158c2b08932fb012c580 Mon Sep 17 00:00:00 2001 From: Vicente-Pastor <32805335+Vicente-Pastor@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:29:46 +0100 Subject: [PATCH 1/5] test: add unit tests for URL-based MarketplaceSource and parse_agent_skills_index - TestMarketplaceSourceURL: covers source_type='url' creation, immutability, is_url_source helper, to_dict/from_dict serialization, and roundtrip for both URL and GitHub sources (backward-compat preservation) - TestParseAgentSkillsIndex: covers happy-path parsing of skill-md and archive entries, multi-skill indexes, empty lists, $schema version enforcement, skill name validation (RFC: 1-64 chars, lowercase alnum + hyphens), and mixed valid/invalid entry filtering Part of issue #676 (URL-based marketplace sources). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../test_marketplace_url_models.py | 343 ++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 tests/unit/marketplace/test_marketplace_url_models.py diff --git a/tests/unit/marketplace/test_marketplace_url_models.py b/tests/unit/marketplace/test_marketplace_url_models.py new file mode 100644 index 00000000..a7709499 --- /dev/null +++ b/tests/unit/marketplace/test_marketplace_url_models.py @@ -0,0 +1,343 @@ +"""Tests for URL-based marketplace source and Agent Skills index parser. + +Covers Step 1 (MarketplaceSource URL extension) and Step 4 +(parse_agent_skills_index) from the issue #676 implementation plan. +""" + +import pytest + +from apm_cli.marketplace.models import ( + MarketplaceSource, + parse_agent_skills_index, +) + +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- + +_VALID_DIGEST = "sha256:" + "a" * 64 +_KNOWN_SCHEMA = "https://schemas.agentskills.io/discovery/0.2.0/schema.json" + +_SINGLE_SKILL_INDEX = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + { + "name": "code-review", + "type": "skill-md", + "description": "Code review helper", + "url": "/.well-known/agent-skills/code-review/SKILL.md", + "digest": _VALID_DIGEST, + } + ], +} + + +# --------------------------------------------------------------------------- +# MarketplaceSource — URL extension (Step 1) +# --------------------------------------------------------------------------- + + +class TestMarketplaceSourceURL: + """MarketplaceSource extended with source_type='url'.""" + + def test_url_source_creation(self): + src = MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + assert src.source_type == "url" + assert src.url == "https://example.com/.well-known/agent-skills/index.json" + assert src.owner == "" + assert src.repo == "" + + def test_github_source_type_default(self): + """Existing GitHub sources default to source_type='github'.""" + src = MarketplaceSource(name="acme", owner="acme-org", repo="plugins") + assert src.source_type == "github" + + def test_url_source_frozen(self): + src = MarketplaceSource( + name="x", source_type="url", url="https://example.com" + ) + with pytest.raises(AttributeError): + src.url = "https://other.com" + + def test_is_url_source_true(self): + src = MarketplaceSource( + name="x", source_type="url", url="https://example.com" + ) + assert src.is_url_source is True + + def test_is_url_source_false_for_github(self): + src = MarketplaceSource(name="x", owner="o", repo="r") + assert src.is_url_source is False + + # --- to_dict --- + + def test_url_source_to_dict_contains_source_type_and_url(self): + src = MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + d = src.to_dict() + assert d["source_type"] == "url" + assert d["url"] == "https://example.com/.well-known/agent-skills/index.json" + + def test_url_source_to_dict_omits_owner_and_repo(self): + src = MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + d = src.to_dict() + assert "owner" not in d + assert "repo" not in d + + def test_github_source_to_dict_omits_source_type(self): + """GitHub sources must not add source_type to preserve backward compat.""" + src = MarketplaceSource(name="acme", owner="acme-org", repo="plugins") + d = src.to_dict() + assert "source_type" not in d + + # --- from_dict --- + + def test_url_source_from_dict(self): + d = { + "name": "example-skills", + "source_type": "url", + "url": "https://example.com/.well-known/agent-skills/index.json", + } + src = MarketplaceSource.from_dict(d) + assert src.source_type == "url" + assert src.url == "https://example.com/.well-known/agent-skills/index.json" + assert src.owner == "" + assert src.repo == "" + + def test_github_from_dict_backward_compat_no_source_type(self): + """Old dicts without source_type field deserialize as 'github'.""" + d = {"name": "acme", "owner": "acme-org", "repo": "plugins"} + src = MarketplaceSource.from_dict(d) + assert src.source_type == "github" + + # --- roundtrip --- + + def test_url_source_roundtrip(self): + original = MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + restored = MarketplaceSource.from_dict(original.to_dict()) + assert restored == original + + def test_github_source_roundtrip_unchanged(self): + """Existing GitHub roundtrip still works after model changes.""" + original = MarketplaceSource( + name="acme", + owner="acme-org", + repo="plugins", + host="ghe.corp.com", + branch="release", + ) + restored = MarketplaceSource.from_dict(original.to_dict()) + assert restored == original + + +# --------------------------------------------------------------------------- +# parse_agent_skills_index (Step 4) +# --------------------------------------------------------------------------- + + +class TestParseAgentSkillsIndex: + """Parser for Agent Skills Discovery RFC v0.2.0 index format.""" + + # --- happy path --- + + def test_basic_parse_returns_manifest(self): + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "example-skills") + assert manifest.name == "example-skills" + assert len(manifest.plugins) == 1 + + def test_skill_entry_fields(self): + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "test") + p = manifest.plugins[0] + assert p.name == "code-review" + assert p.description == "Code review helper" + assert p.source_marketplace == "test" + + def test_skill_source_contains_url_digest_and_type(self): + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "test") + s = manifest.plugins[0].source + assert isinstance(s, dict) + assert s["url"] == "/.well-known/agent-skills/code-review/SKILL.md" + assert s["digest"] == _VALID_DIGEST + assert s["type"] == "skill-md" + + def test_archive_type_entry(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + { + "name": "my-toolset", + "type": "archive", + "description": "A set of tools", + "url": "/.well-known/agent-skills/my-toolset.tar.gz", + "digest": _VALID_DIGEST, + } + ], + } + manifest = parse_agent_skills_index(data, "test") + assert len(manifest.plugins) == 1 + assert manifest.plugins[0].source["type"] == "archive" + + def test_multiple_skills(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + { + "name": "skill-one", + "type": "skill-md", + "description": "First", + "url": "/a/SKILL.md", + "digest": _VALID_DIGEST, + }, + { + "name": "skill-two", + "type": "archive", + "description": "Second", + "url": "/b.tar.gz", + "digest": _VALID_DIGEST, + }, + ], + } + manifest = parse_agent_skills_index(data, "multi") + assert len(manifest.plugins) == 2 + assert manifest.find_plugin("skill-one") is not None + assert manifest.find_plugin("skill-two") is not None + + def test_empty_skills_list(self): + data = {"$schema": _KNOWN_SCHEMA, "skills": []} + manifest = parse_agent_skills_index(data, "test") + assert len(manifest.plugins) == 0 + + # --- $schema enforcement --- + + def test_known_schema_accepted(self): + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "test") + assert len(manifest.plugins) == 1 + + def test_unknown_schema_version_raises(self): + data = { + "$schema": "https://schemas.agentskills.io/discovery/9.9.9/schema.json", + "skills": [], + } + with pytest.raises(ValueError, match="schema"): + parse_agent_skills_index(data, "test") + + def test_missing_schema_raises(self): + with pytest.raises(ValueError, match="schema"): + parse_agent_skills_index({"skills": []}, "test") + + def test_non_string_schema_raises(self): + with pytest.raises(ValueError, match="schema"): + parse_agent_skills_index({"$schema": 42, "skills": []}, "test") + + # --- skill name validation (RFC: 1-64 chars, lowercase alnum + hyphens) --- + + def test_valid_name_simple(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "my-skill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 1 + + def test_valid_name_with_numbers(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "skill-v2-final", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 1 + + def test_invalid_name_uppercase_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "MySkill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_invalid_name_spaces_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "bad name", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_invalid_name_leading_hyphen_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "-bad", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_invalid_name_trailing_hyphen_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "bad-", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_invalid_name_consecutive_hyphens_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "bad--name", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_invalid_name_too_long_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "a" * 65, "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + def test_missing_name_skipped(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 + + # --- mixed valid/invalid entries --- + + def test_only_valid_entries_returned(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "good-skill", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, + {"name": "Bad Skill!", "type": "skill-md", "url": "/b", "digest": _VALID_DIGEST}, + {"type": "skill-md", "url": "/c", "digest": _VALID_DIGEST}, # no name + ], + } + manifest = parse_agent_skills_index(data, "test") + assert len(manifest.plugins) == 1 + assert manifest.plugins[0].name == "good-skill" From eeb921c3b676877b17d5544f9c75fc609acc3aa5 Mon Sep 17 00:00:00 2001 From: Vicente-Pastor <32805335+Vicente-Pastor@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:29:46 +0100 Subject: [PATCH 2/5] feat(marketplace): URL-based Agent Skills discovery index support (#676) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements RFC v0.2.0 Agent Skills discovery for arbitrary HTTPS endpoints alongside the existing GitHub-hosted marketplace.json sources. Core changes: - models.py: extend MarketplaceSource with source_type='url' and url field; add parse_agent_skills_index() for RFC v0.2.0 index format with strict $schema enforcement, skill-name validation, and sha256 digest verification - client.py: add _fetch_url_direct() (HTTPS-only, conditional GET, ETag/ Last-Modified caching, digest mismatch detection); add _detect_index_format() and _parse_manifest() dispatcher; on_stale_warning callback on network errors - commands/marketplace.py: add URL branch to 'marketplace add'; HTTPS enforced at command layer; .well-known auto-resolution via _resolve_index_url() - marketplace/archive.py (new): safe download and extraction of .tar.gz / .zip archive skill packages with path-traversal and decompression-bomb guards - deps/lockfile.py: extend provenance tracking for URL-sourced skills - resolver.py: URL source support in skill resolution path Tests (new files): - test_marketplace_url_client.py — _fetch_url_direct, caching, 304 handling - test_marketplace_url_commands.py — marketplace add URL branch, HTTPS enforcement - test_marketplace_url_resolver.py — resolver URL source path - test_marketplace_archive.py — archive extraction safety cases Tests (extended): - test_marketplace_url_models.py — MarketplaceSource URL fields + round-trips - test_marketplace_client.py — stale-cache / on_stale_warning coverage - test_lockfile_provenance.py — URL provenance fields - test_marketplace_install_integration.py — updated for new source shape - test_marketplace_resolver.py — pruned redundant cases covered elsewhere Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/apm_cli/commands/marketplace.py | 110 ++- src/apm_cli/deps/lockfile.py | 8 + src/apm_cli/marketplace/archive.py | 212 +++++ src/apm_cli/marketplace/client.py | 278 +++++- src/apm_cli/marketplace/models.py | 180 +++- src/apm_cli/marketplace/resolver.py | 29 +- .../marketplace/test_lockfile_provenance.py | 36 + .../marketplace/test_marketplace_archive.py | 349 ++++++++ .../marketplace/test_marketplace_client.py | 23 +- .../test_marketplace_install_integration.py | 27 +- .../marketplace/test_marketplace_resolver.py | 15 - .../test_marketplace_url_client.py | 823 ++++++++++++++++++ .../test_marketplace_url_commands.py | 505 +++++++++++ .../test_marketplace_url_models.py | 268 +++++- .../test_marketplace_url_resolver.py | 196 +++++ 15 files changed, 2923 insertions(+), 136 deletions(-) create mode 100644 src/apm_cli/marketplace/archive.py create mode 100644 tests/unit/marketplace/test_marketplace_archive.py create mode 100644 tests/unit/marketplace/test_marketplace_url_client.py create mode 100644 tests/unit/marketplace/test_marketplace_url_commands.py create mode 100644 tests/unit/marketplace/test_marketplace_url_resolver.py diff --git a/src/apm_cli/commands/marketplace.py b/src/apm_cli/commands/marketplace.py index ef4e201d..273a2b4e 100644 --- a/src/apm_cli/commands/marketplace.py +++ b/src/apm_cli/commands/marketplace.py @@ -6,6 +6,7 @@ import builtins import sys +from urllib.parse import urlparse import click @@ -15,6 +16,32 @@ # Restore builtins shadowed by subcommand names list = builtins.list +_WELL_KNOWN_PATH = "/.well-known/agent-skills/index.json" + + +def _resolve_index_url(raw_url: str) -> str: + """Resolve a bare origin URL to the Agent Skills .well-known index URL. + + If the URL already has a non-trivial path it is returned unchanged. + Trailing slashes on bare origins are normalised away. + + Args: + raw_url: A user-supplied ``https://`` URL — either a bare origin + (``https://example.com``) or a fully-qualified index URL. + + Returns: + Fully-qualified index URL ending in + ``/.well-known/agent-skills/index.json``, or *raw_url* unchanged + when it already contains a meaningful path. + """ + parsed = urlparse(raw_url) + path = parsed.path.rstrip("/") + if not path or path == "/.well-known/agent-skills": + # Bare origin or just the .well-known dir — append full path + base = f"{parsed.scheme}://{parsed.netloc}" + return base + _WELL_KNOWN_PATH + return raw_url + @click.group(help="Manage plugin marketplaces for discovery and governance") def marketplace(): @@ -29,18 +56,65 @@ def marketplace(): @marketplace.command(help="Register a plugin marketplace") @click.argument("repo", required=True) -@click.option("--name", "-n", default=None, help="Display name (defaults to repo name)") +@click.option("--name", "-n", default=None, help="Display name (defaults to repo name or hostname)") @click.option("--branch", "-b", default="main", show_default=True, help="Branch to use") @click.option("--host", default=None, help="Git host FQDN (default: github.com)") @click.option("--verbose", "-v", is_flag=True, help="Show detailed output") def add(repo, name, branch, host, verbose): - """Register a marketplace from OWNER/REPO or HOST/OWNER/REPO.""" + """Register a marketplace from OWNER/REPO, HOST/OWNER/REPO, or HTTPS URL.""" logger = CommandLogger("marketplace-add", verbose=verbose) try: + import re + from ..marketplace.client import _auto_detect_path, fetch_marketplace from ..marketplace.models import MarketplaceSource from ..marketplace.registry import add_marketplace + # URL-based path (Agent Skills discovery) + if repo.startswith("https://") or repo.startswith("http://"): + if repo.startswith("http://"): + logger.error( + "URL marketplaces must use HTTPS. " + "Please provide an https:// URL." + ) + sys.exit(1) + + resolved_url = _resolve_index_url(repo) + parsed = urlparse(resolved_url) + display_name = name or parsed.netloc + + if not re.match(r"^[a-zA-Z0-9._-]+$", display_name): + logger.error( + f"Invalid marketplace name: '{display_name}'. " + f"Names must only contain letters, digits, '.', '_', and '-' " + f"(required for 'apm install skill@marketplace' syntax)." + ) + sys.exit(1) + + logger.start(f"Registering marketplace '{display_name}'...", symbol="gear") + logger.verbose_detail(f" URL: {resolved_url}") + + source = MarketplaceSource( + name=display_name, + source_type="url", + url=resolved_url, + ) + + manifest = fetch_marketplace(source, force_refresh=True) + skill_count = len(manifest.plugins) + + add_marketplace(source) + + logger.success( + f"Marketplace '{display_name}' registered ({skill_count} skills)", + symbol="check", + ) + if manifest.description: + logger.verbose_detail(f" {manifest.description}") + return + + # GitHub path (OWNER/REPO or HOST/OWNER/REPO) + # Parse OWNER/REPO or HOST/OWNER/REPO if "/" not in repo: logger.error( @@ -86,8 +160,6 @@ def add(repo, name, branch, host, verbose): display_name = name or repo_name # Validate name is identifier-compatible for NAME@MARKETPLACE syntax - import re - if not re.match(r"^[a-zA-Z0-9._-]+$", display_name): logger.error( f"Invalid marketplace name: '{display_name}'. " @@ -102,7 +174,6 @@ def add(repo, name, branch, host, verbose): if resolved_host != "github.com": logger.verbose_detail(f" Host: {resolved_host}") - # Auto-detect marketplace.json location probe_source = MarketplaceSource( name=display_name, owner=owner, @@ -122,7 +193,6 @@ def add(repo, name, branch, host, verbose): logger.verbose_detail(f" Detected path: {detected_path}") - # Create source with detected path source = MarketplaceSource( name=display_name, owner=owner, @@ -132,11 +202,9 @@ def add(repo, name, branch, host, verbose): path=detected_path, ) - # Fetch and validate manifest = fetch_marketplace(source, force_refresh=True) plugin_count = len(manifest.plugins) - # Register add_marketplace(source) logger.success( @@ -147,7 +215,14 @@ def add(repo, name, branch, host, verbose): logger.verbose_detail(f" {manifest.description}") except Exception as e: - logger.error(f"Failed to register marketplace: {e}") + from ..marketplace.errors import MarketplaceFetchError + + if isinstance(e, MarketplaceFetchError): + logger.error(str(e)) + elif isinstance(e, ValueError): + logger.error(f"Invalid index format: {e}") + else: + logger.error(f"Failed to register marketplace: {e}") sys.exit(1) @@ -181,7 +256,8 @@ def list_cmd(verbose): f"{len(sources)} marketplace(s) registered:", symbol="info" ) for s in sources: - click.echo(f" {s.name} ({s.owner}/{s.repo})") + location = s.url if s.is_url_source else f"{s.owner}/{s.repo}" + click.echo(f" {s.name} ({location})") return from rich.table import Table @@ -198,7 +274,10 @@ def list_cmd(verbose): table.add_column("Path", style="dim") for s in sources: - table.add_row(s.name, f"{s.owner}/{s.repo}", s.branch, s.path) + if s.is_url_source: + table.add_row(s.name, s.url, "—", "—") + else: + table.add_row(s.name, f"{s.owner}/{s.repo}", s.branch, s.path) console.print() console.print(table) @@ -299,7 +378,7 @@ def update(name, verbose): if name: source = get_marketplace_by_name(name) logger.start(f"Refreshing marketplace '{name}'...", symbol="gear") - clear_marketplace_cache(name, host=source.host) + clear_marketplace_cache(source=source) manifest = fetch_marketplace(source, force_refresh=True) logger.success( f"Marketplace '{name}' updated ({len(manifest.plugins)} plugins)", @@ -317,7 +396,7 @@ def update(name, verbose): ) for s in sources: try: - clear_marketplace_cache(s.name, host=s.host) + clear_marketplace_cache(source=s) manifest = fetch_marketplace(s, force_refresh=True) logger.tree_item( f" {s.name} ({len(manifest.plugins)} plugins)" @@ -351,8 +430,9 @@ def remove(name, yes, verbose): source = get_marketplace_by_name(name) if not yes: + location = source.url if source.is_url_source else f"{source.owner}/{source.repo}" confirmed = click.confirm( - f"Remove marketplace '{source.name}' ({source.owner}/{source.repo})?", + f"Remove marketplace '{source.name}' ({location})?", default=False, ) if not confirmed: @@ -360,7 +440,7 @@ def remove(name, yes, verbose): return remove_marketplace(name) - clear_marketplace_cache(name, host=source.host) + clear_marketplace_cache(source=source) logger.success(f"Marketplace '{name}' removed", symbol="check") except Exception as e: diff --git a/src/apm_cli/deps/lockfile.py b/src/apm_cli/deps/lockfile.py index e4782280..ef74ed84 100644 --- a/src/apm_cli/deps/lockfile.py +++ b/src/apm_cli/deps/lockfile.py @@ -38,6 +38,8 @@ class LockedDependency: is_dev: bool = False # True for devDependencies discovered_via: Optional[str] = None # Marketplace name (provenance) marketplace_plugin_name: Optional[str] = None # Plugin name in marketplace + source_url: Optional[str] = None # URL the index was fetched from + source_digest: Optional[str] = None # sha256 digest of the fetched index def get_unique_key(self) -> str: """Returns unique key for this dependency.""" @@ -84,6 +86,10 @@ def to_dict(self) -> Dict[str, Any]: result["discovered_via"] = self.discovered_via if self.marketplace_plugin_name: result["marketplace_plugin_name"] = self.marketplace_plugin_name + if self.source_url is not None: + result["source_url"] = self.source_url + if self.source_digest is not None: + result["source_digest"] = self.source_digest return result @classmethod @@ -122,6 +128,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "LockedDependency": is_dev=data.get("is_dev", False), discovered_via=data.get("discovered_via"), marketplace_plugin_name=data.get("marketplace_plugin_name"), + source_url=data.get("source_url"), + source_digest=data.get("source_digest"), ) @classmethod diff --git a/src/apm_cli/marketplace/archive.py b/src/apm_cli/marketplace/archive.py new file mode 100644 index 00000000..b9d89e30 --- /dev/null +++ b/src/apm_cli/marketplace/archive.py @@ -0,0 +1,212 @@ +"""Download and safely extract Agent Skills archive packages (.tar.gz / .zip). + +Handles ``type: "archive"`` skill entries from the Agent Skills discovery RFC. +All extraction paths are validated against the destination directory to prevent +path traversal, and total uncompressed size is bounded to prevent decompression +bombs. +""" + +import io +import os +import tarfile +import zipfile +from typing import List + +import requests + +_MAX_UNCOMPRESSED_BYTES = 512 * 1024 * 1024 # 512 MB + + +class ArchiveError(Exception): + """Raised when an archive cannot be downloaded or extracted safely.""" + + +def _check_archive_member(member_path: str) -> None: + """Validate a single archive member path. + + Raises: + ArchiveError: If the path is absolute (including Windows drive-letter + and UNC forms), contains path traversal sequences (``..``), or + contains a null byte. + """ + if "\x00" in member_path: + raise ArchiveError(f"Archive member path contains null byte: {member_path!r}") + # os.path.isabs catches Unix-style absolute paths. Windows drive-letter + # paths (e.g. "C:\..." or "C:/...") and UNC paths ("\\server\share") are + # not caught by os.path.isabs on non-Windows hosts, so check explicitly. + if os.path.isabs(member_path): + raise ArchiveError(f"Archive member has absolute path: {member_path!r}") + forward = member_path.replace("\\", "/") + if forward.startswith("//") or ( + len(forward) >= 2 and forward[1] == ":" and forward[0].isalpha() + ): + raise ArchiveError(f"Archive member has absolute path: {member_path!r}") + normalized = os.path.normpath(member_path) + if normalized.startswith(".."): + raise ArchiveError( + f"Archive member path traversal detected: {member_path!r}" + ) + parts = forward.split("/") + if ".." in parts: + raise ArchiveError( + f"Archive member path traversal detected: {member_path!r}" + ) + + +def _detect_archive_format(content_type: str, url: str) -> str: + """Detect archive format from Content-Type header or URL extension. + + Content-Type takes priority over the URL when both are provided. + + Returns: + ``"tar.gz"`` or ``"zip"``. + + Raises: + ArchiveError: When the format cannot be determined. + """ + ct = content_type.lower().split(";")[0].strip() + if ct in ("application/gzip", "application/x-gzip", "application/x-tar"): + return "tar.gz" + if ct in ("application/zip", "application/x-zip-compressed"): + return "zip" + + lower_url = url.lower().split("?")[0] + if lower_url.endswith(".tar.gz") or lower_url.endswith(".tgz"): + return "tar.gz" + if lower_url.endswith(".zip"): + return "zip" + + raise ArchiveError( + f"Cannot determine archive format from Content-Type={content_type!r} " + f"and URL={url!r}" + ) + + +def _extract_tar_gz(data: bytes, dest_dir: str) -> List[str]: + """Extract a tar.gz archive to *dest_dir* with safety checks. + + Args: + data: Raw archive bytes. + dest_dir: Directory to extract into (must already exist). + + Returns: + List of relative paths extracted. + + Raises: + ArchiveError: On path traversal, symlink escape, or decompression bomb. + """ + extracted: List[str] = [] + total_size = 0 + + try: + with tarfile.open(fileobj=io.BytesIO(data), mode="r:gz") as tf: + for member in tf.getmembers(): + if member.isdir(): + continue + if member.issym() or member.islnk(): + raise ArchiveError( + f"Symlinks and hard links are not supported: {member.name!r}" + ) + _check_archive_member(member.name) + + total_size += member.size + if total_size > _MAX_UNCOMPRESSED_BYTES: + raise ArchiveError( + f"Archive exceeds size limit of {_MAX_UNCOMPRESSED_BYTES} bytes " + f"(decompression bomb guard)" + ) + + dest_path = os.path.realpath(os.path.join(dest_dir, member.name)) + real_dest = os.path.realpath(dest_dir) + if not dest_path.startswith(real_dest + os.sep) and dest_path != real_dest: + raise ArchiveError( + f"Archive member would extract outside destination: {member.name!r}" + ) + + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + with tf.extractfile(member) as src, open(dest_path, "wb") as dst: + dst.write(src.read()) + extracted.append(member.name) + except (tarfile.TarError, KeyError) as exc: + raise ArchiveError(f"Failed to read tar.gz archive: {exc}") from exc + + return extracted + + +def _extract_zip(data: bytes, dest_dir: str) -> List[str]: + """Extract a zip archive to *dest_dir* with safety checks. + + Args: + data: Raw archive bytes. + dest_dir: Directory to extract into (must already exist). + + Returns: + List of relative paths extracted. + + Raises: + ArchiveError: On path traversal, absolute paths, or decompression bomb. + """ + extracted: List[str] = [] + total_size = 0 + + try: + with zipfile.ZipFile(io.BytesIO(data)) as zf: + for info in zf.infolist(): + if info.filename.endswith("/"): + continue + _check_archive_member(info.filename) + + total_size += info.file_size + if total_size > _MAX_UNCOMPRESSED_BYTES: + raise ArchiveError( + f"Archive exceeds size limit of {_MAX_UNCOMPRESSED_BYTES} bytes " + f"(decompression bomb guard)" + ) + + dest_path = os.path.realpath(os.path.join(dest_dir, info.filename)) + real_dest = os.path.realpath(dest_dir) + if not dest_path.startswith(real_dest + os.sep) and dest_path != real_dest: + raise ArchiveError( + f"Archive member would extract outside destination: {info.filename!r}" + ) + + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + with zf.open(info) as src, open(dest_path, "wb") as dst: + dst.write(src.read()) + extracted.append(info.filename) + except zipfile.BadZipFile as exc: + raise ArchiveError(f"Failed to read zip archive: {exc}") from exc + + return extracted + + +def download_and_extract_archive(url: str, dest_dir: str) -> List[str]: + """Download an archive from *url* and extract it to *dest_dir*. + + Detects format from Content-Type header or URL extension. Applies full + safety checks (path traversal, decompression bomb). + + Args: + url: HTTPS URL of the archive. + dest_dir: Directory to extract into (created if it does not exist). + + Returns: + List of relative paths extracted. + + Raises: + ArchiveError: On download failure, unrecognised format, or unsafe content. + """ + try: + resp = requests.get(url, headers={"User-Agent": "apm-cli"}, timeout=60) + resp.raise_for_status() + except requests.exceptions.RequestException as exc: + raise ArchiveError(f"Failed to download archive from {url!r}: {exc}") from exc + + content_type = resp.headers.get("Content-Type", "") + fmt = _detect_archive_format(content_type, url) + + os.makedirs(dest_dir, exist_ok=True) + + if fmt == "tar.gz": + return _extract_tar_gz(resp.content, dest_dir) + return _extract_zip(resp.content, dest_dir) diff --git a/src/apm_cli/marketplace/client.py b/src/apm_cli/marketplace/client.py index 76e1c351..3f2ab062 100644 --- a/src/apm_cli/marketplace/client.py +++ b/src/apm_cli/marketplace/client.py @@ -1,4 +1,5 @@ -"""Fetch, parse, and cache marketplace.json from GitHub repositories. +"""Fetch, parse, and cache marketplace indexes from GitHub repositories or +arbitrary HTTPS URLs (Agent Skills discovery endpoints). Uses ``AuthResolver.try_with_fallback(unauth_first=True)`` for public-first access with automatic credential fallback for private marketplace repos. @@ -6,23 +7,47 @@ proxy (Artifactory Archive Entry Download) before falling back to the GitHub Contents API. When ``PROXY_REGISTRY_ONLY=1``, the GitHub fallback is blocked entirely. + +For URL sources (``source_type='url'``), fetches are made directly to the +fully-qualified HTTPS URL without GitHub auth or proxy. Index format is +auto-detected: Agent Skills (``"skills"`` key) or legacy marketplace.json +(``"plugins"`` key). + Cache lives at ``~/.apm/cache/marketplace/`` with a 1-hour TTL. """ import json +import hashlib import logging import os import time +from dataclasses import dataclass from typing import Dict, List, Optional import requests from .errors import MarketplaceFetchError -from .models import MarketplaceManifest, MarketplacePlugin, MarketplaceSource, parse_marketplace_json +from .models import ( + MarketplaceManifest, + MarketplacePlugin, + MarketplaceSource, + parse_agent_skills_index, + parse_marketplace_json, +) from .registry import get_registered_marketplaces logger = logging.getLogger(__name__) + +@dataclass(frozen=True) +class FetchResult: + """Result of a direct URL fetch.""" + + data: Dict + digest: str + etag: str = "" + last_modified: str = "" + _CACHE_TTL_SECONDS = 3600 # 1 hour _CACHE_DIR_NAME = os.path.join("cache", "marketplace") @@ -61,7 +86,14 @@ def _sanitize_cache_name(name: str) -> str: def _cache_key(source: MarketplaceSource) -> str: - """Cache key that includes host to avoid collisions across hosts.""" + """Cache key that avoids collisions across hosts and URL sources. + + - GitHub sources: ``name`` (same host) or ``{host}__{name}`` (GHE). + - URL sources: first 16 hex chars of ``sha256(url)`` — avoids + host-based collisions between two URL sources on the same domain. + """ + if source.is_url_source: + return hashlib.sha256(source.url.encode()).hexdigest()[:16] normalized_host = source.host.lower() if normalized_host == "github.com": return source.name @@ -76,8 +108,12 @@ def _cache_meta_path(name: str) -> str: return os.path.join(_cache_dir(), f"{_sanitize_cache_name(name)}.meta.json") -def _read_cache(name: str) -> Optional[Dict]: - """Read cached marketplace data if valid (not expired).""" +def _read_cache(name: str): + """Read cached marketplace data if valid (not expired). + + Returns: + Tuple of (data dict, stored index_digest) if cache is fresh, else None. + """ data_path = _cache_data_path(name) meta_path = _cache_meta_path(name) if not os.path.exists(data_path) or not os.path.exists(meta_path): @@ -89,9 +125,10 @@ def _read_cache(name: str) -> Optional[Dict]: ttl = meta.get("ttl_seconds", _CACHE_TTL_SECONDS) if time.time() - fetched_at > ttl: return None # Expired + stored_digest = meta.get("index_digest", "") with open(data_path, "r") as f: - return json.load(f) - except (json.JSONDecodeError, OSError, KeyError) as exc: + return json.load(f), stored_digest + except (json.JSONDecodeError, OSError) as exc: logger.debug("Cache read failed for '%s': %s", name, exc) return None @@ -108,18 +145,23 @@ def _read_stale_cache(name: str) -> Optional[Dict]: return None -def _write_cache(name: str, data: Dict) -> None: +def _write_cache(name: str, data: Dict, *, index_digest: str = "", + etag: str = "", last_modified: str = "") -> None: """Write marketplace data and metadata to cache.""" data_path = _cache_data_path(name) meta_path = _cache_meta_path(name) try: with open(data_path, "w") as f: json.dump(data, f, indent=2) + meta: Dict = {"fetched_at": time.time(), "ttl_seconds": _CACHE_TTL_SECONDS} + if index_digest: + meta["index_digest"] = index_digest + if etag: + meta["etag"] = etag + if last_modified: + meta["last_modified"] = last_modified with open(meta_path, "w") as f: - json.dump( - {"fetched_at": time.time(), "ttl_seconds": _CACHE_TTL_SECONDS}, - f, - ) + json.dump(meta, f) except OSError as exc: logger.debug("Cache write failed for '%s': %s", name, exc) @@ -133,6 +175,22 @@ def _clear_cache(name: str) -> None: pass +def _read_stale_meta(name: str) -> Optional[Dict]: + """Read cache metadata even if the cache has expired. + + Returns the raw meta dict (may contain etag, last_modified, etc.), + or ``None`` if no meta file exists. + """ + meta_path = _cache_meta_path(name) + if not os.path.exists(meta_path): + return None + try: + with open(meta_path, "r") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return None + + # --------------------------------------------------------------------------- # Network fetch # --------------------------------------------------------------------------- @@ -178,6 +236,76 @@ def _try_proxy_fetch( return None +def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", + expected_digest: str = ""): + """Fetch a URL marketplace index directly over HTTPS. + + No GitHub auth or proxy involved — used for URL sources only. + + Supports conditional requests: when *etag* or *last_modified* are provided + the corresponding ``If-None-Match`` / ``If-Modified-Since`` headers are sent. + A 304 Not Modified response returns ``None`` (caller should use cached data). + + When *expected_digest* is non-empty the computed ``sha256:`` digest of the + response body is compared against it; a mismatch raises ``MarketplaceFetchError``. + + Returns: + FetchResult with data, digest, etag, and last_modified; or ``None`` on 304. + + Raises: + MarketplaceFetchError: On non-HTTPS scheme, 404, any other HTTP error, + network failure, non-JSON response body, or digest mismatch. + """ + from urllib.parse import urlparse + + if urlparse(url).scheme != "https": + raise MarketplaceFetchError(url, "URL sources must use HTTPS") + try: + headers = {"User-Agent": "apm-cli"} + if etag: + headers["If-None-Match"] = etag + if last_modified: + headers["If-Modified-Since"] = last_modified + resp = requests.get(url, headers=headers, timeout=30) + if resp.status_code == 304: + return None + if resp.status_code == 404: + raise MarketplaceFetchError(url, "404 Not Found") + resp.raise_for_status() + raw = resp.content + digest = "sha256:" + hashlib.sha256(raw).hexdigest() + if expected_digest and digest != expected_digest: + raise MarketplaceFetchError( + url, + f"digest mismatch: expected {expected_digest!r}, got {digest!r}", + ) + data = json.loads(raw) + resp_etag = resp.headers.get("ETag", "") + resp_last_modified = resp.headers.get("Last-Modified", "") + return FetchResult(data=data, digest=digest, + etag=resp_etag, last_modified=resp_last_modified) + except MarketplaceFetchError: + raise + except requests.exceptions.RequestException as exc: + raise MarketplaceFetchError(url, str(exc)) from exc + except ValueError as exc: + raise MarketplaceFetchError(url, f"Invalid JSON response: {exc}") from exc + +def _detect_index_format(data: Dict) -> str: + """Detect whether *data* is an Agent Skills index or a legacy marketplace.json. + + Returns: + ``"agent-skills"`` if the ``"skills"`` key is present. + ``"github"`` if the ``"plugins"`` key is present. + ``"unknown"`` otherwise. + """ + if "skills" in data: + return "agent-skills" + if "plugins" in data: + return "github" + return "unknown" + + def _github_contents_url(source: MarketplaceSource, file_path: str) -> str: """Build the GitHub Contents API URL for a file.""" from ..core.auth import AuthResolver @@ -270,11 +398,57 @@ def _auto_detect_path( # --------------------------------------------------------------------------- +def _parse_manifest( + data: Dict, + source: MarketplaceSource, + *, + source_url: str = "", + source_digest: str = "", +) -> MarketplaceManifest: + """Parse *data* using the correct parser for *source*. + + For URL sources the index format is auto-detected via + ``_detect_index_format`` and dispatched to the appropriate parser. + For GitHub sources ``parse_marketplace_json`` is used directly. + + Args: + data: Parsed JSON dict from the marketplace index. + source: The marketplace source that produced *data*. + source_url: Optional URL to attach as provenance metadata. + source_digest: Optional digest to attach as provenance metadata. + + Returns: + Parsed ``MarketplaceManifest``. + + Raises: + MarketplaceFetchError: If the URL source index has an + unrecognised format (neither ``skills`` nor ``plugins`` key). + """ + if source.is_url_source: + fmt = _detect_index_format(data) + if fmt == "agent-skills": + return parse_agent_skills_index( + data, source.name, + source_url=source_url, source_digest=source_digest, + ) + if fmt == "github": + return parse_marketplace_json( + data, source.name, + source_url=source_url, source_digest=source_digest, + ) + raise MarketplaceFetchError( + source.url or source.name, + "Unrecognised index format; run `apm marketplace update` to refresh", + ) + return parse_marketplace_json(data, source.name) + + def fetch_marketplace( source: MarketplaceSource, *, force_refresh: bool = False, auth_resolver: Optional[object] = None, + on_stale_warning: Optional[object] = None, ) -> MarketplaceManifest: """Fetch and parse a marketplace manifest. @@ -285,6 +459,9 @@ def fetch_marketplace( source: Marketplace source to fetch. force_refresh: Skip cache and re-fetch from network. auth_resolver: Optional ``AuthResolver`` instance (created if None). + on_stale_warning: Optional callable invoked with a warning message when + stale cache is served due to a network error. Use this to surface + the warning to the terminal in the command layer. Returns: MarketplaceManifest: Parsed manifest. @@ -298,11 +475,60 @@ def fetch_marketplace( if not force_refresh: cached = _read_cache(cache_name) if cached is not None: + cached_data, stored_digest = cached logger.debug("Using cached marketplace data for '%s'", source.name) - return parse_marketplace_json(cached, source.name) + return _parse_manifest( + cached_data, source, + source_url=source.url if source.is_url_source else "", + source_digest=stored_digest, + ) # Fetch from network try: + # URL source — direct HTTPS fetch, no GitHub auth or proxy + if source.is_url_source: + # Read stale ETag/Last-Modified for conditional request + stale_meta = _read_stale_meta(cache_name) + stored_etag = (stale_meta or {}).get("etag", "") + stored_last_modified = (stale_meta or {}).get("last_modified", "") + + result = _fetch_url_direct( + source.url, etag=stored_etag, last_modified=stored_last_modified + ) + + if result is None: + # 304 Not Modified — reset TTL on existing cache, serve stale data + stale = _read_stale_cache(cache_name) + if stale is not None: + _write_cache( + cache_name, stale, + index_digest=(stale_meta or {}).get("index_digest", ""), + etag=stored_etag, + last_modified=stored_last_modified, + ) + logger.debug("304 Not Modified for '%s'; serving cached data", source.name) + return _parse_manifest( + stale, source, + source_url=source.url, + source_digest=(stale_meta or {}).get("index_digest", ""), + ) + raise MarketplaceFetchError( + source.name, + "Got 304 Not Modified but no cached data is available", + ) + + _write_cache( + cache_name, result.data, + index_digest=result.digest, + etag=result.etag, + last_modified=result.last_modified, + ) + return _parse_manifest( + result.data, source, + source_url=source.url, source_digest=result.digest, + ) + + # GitHub source — proxy-first, then GitHub Contents API data = _fetch_file(source, source.path, auth_resolver=auth_resolver) if data is None: raise MarketplaceFetchError( @@ -316,10 +542,18 @@ def fetch_marketplace( # Stale-while-revalidate: serve expired cache on network error stale = _read_stale_cache(cache_name) if stale is not None: - logger.warning( - "Network error fetching '%s'; using stale cache", source.name - ) - return parse_marketplace_json(stale, source.name) + warning_msg = f"Network error fetching '{source.name}'; using stale cache" + logger.warning(warning_msg) + if on_stale_warning is not None: + on_stale_warning(warning_msg) + if source.is_url_source: + _stale_meta = _read_stale_meta(cache_name) + return _parse_manifest( + stale, source, + source_url=source.url, + source_digest=(_stale_meta or {}).get("index_digest", ""), + ) + return _parse_manifest(stale, source) raise @@ -365,18 +599,24 @@ def search_all_marketplaces( def clear_marketplace_cache( name: Optional[str] = None, host: str = "github.com", + source: Optional[MarketplaceSource] = None, ) -> int: """Clear cached data for one or all marketplaces. Returns the number of caches cleared. """ + if source is not None: + # Use the actual source object to derive the correct cache key + # (required for URL sources whose key is sha256-based, not name-based) + _clear_cache(_cache_key(source)) + return 1 if name: # Build a minimal source to derive the cache key _src = MarketplaceSource(name=name, owner="", repo="", host=host) _clear_cache(_cache_key(_src)) return 1 count = 0 - for source in get_registered_marketplaces(): - _clear_cache(_cache_key(source)) + for src in get_registered_marketplaces(): + _clear_cache(_cache_key(src)) count += 1 return count diff --git a/src/apm_cli/marketplace/models.py b/src/apm_cli/marketplace/models.py index 18a4484e..fe761404 100644 --- a/src/apm_cli/marketplace/models.py +++ b/src/apm_cli/marketplace/models.py @@ -1,32 +1,59 @@ """Frozen dataclasses and JSON parser for marketplace manifests. -Supports both Copilot CLI and Claude Code marketplace.json formats. +Supports both Copilot CLI and Claude Code marketplace.json formats, +plus the Agent Skills Discovery RFC v0.2.0 index format. All dataclasses are frozen for thread-safety. """ import logging +import re from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple logger = logging.getLogger(__name__) +# Agent Skills Discovery RFC v0.2.0 — the only schema version we accept. +_AGENT_SKILLS_SCHEMA = "https://schemas.agentskills.io/discovery/0.2.0/schema.json" + +# RFC skill-name rule: 1-64 chars, lowercase alphanumeric + hyphens, +# no leading/trailing/consecutive hyphens. +_SKILL_NAME_RE = re.compile(r"^[a-z0-9]([a-z0-9-]{0,62}[a-z0-9])?$") + @dataclass(frozen=True) class MarketplaceSource: """A registered marketplace repository. Stored in ``~/.apm/marketplaces.json``. + + Two source types are supported: + - ``"github"`` (default) — a GitHub-hosted marketplace.json index. + - ``"url"`` — an arbitrary HTTPS Agent Skills discovery endpoint. """ name: str # Display name (e.g., "acme-tools") - owner: str # GitHub owner - repo: str # GitHub repo - host: str = "github.com" - branch: str = "main" - path: str = "marketplace.json" # Detected on add + owner: str = "" # GitHub owner (GitHub sources only) + repo: str = "" # GitHub repo (GitHub sources only) + host: str = "github.com" # Git host FQDN (GitHub sources only) + branch: str = "main" # Git branch (GitHub sources only) + path: str = "marketplace.json" # Detected on add (GitHub sources only) + source_type: str = "github" # "github" | "url" + url: str = "" # Fully-qualified index URL (URL sources only) + + @property + def is_url_source(self) -> bool: + """Return True if this is a URL-based (Agent Skills) source.""" + return self.source_type == "url" def to_dict(self) -> Dict[str, Any]: """Serialize to dict for JSON storage.""" + if self.is_url_source: + return { + "name": self.name, + "source_type": "url", + "url": self.url, + } + # GitHub sources omit source_type so existing consumers parse unchanged result: Dict[str, Any] = { "name": self.name, "owner": self.owner, @@ -43,13 +70,21 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "MarketplaceSource": """Deserialize from JSON dict.""" + source_type = data.get("source_type", "github") + if source_type == "url": + return cls( + name=data["name"], + source_type="url", + url=data.get("url", ""), + ) return cls( name=data["name"], - owner=data["owner"], - repo=data["repo"], + owner=data.get("owner", ""), + repo=data.get("repo", ""), host=data.get("host", "github.com"), branch=data.get("branch", "main"), path=data.get("path", "marketplace.json"), + source_type="github", ) @@ -83,6 +118,8 @@ class MarketplaceManifest: owner_name: str = "" description: str = "" plugin_root: str = "" # metadata.pluginRoot - base path for bare-name sources + source_url: str = "" + source_digest: str = "" def find_plugin(self, plugin_name: str) -> Optional[MarketplacePlugin]: """Find a plugin by exact name (case-insensitive).""" @@ -176,7 +213,11 @@ def _parse_plugin_entry( def parse_marketplace_json( - data: Dict[str, Any], source_name: str = "" + data: Dict[str, Any], + source_name: str = "", + *, + source_url: str = "", + source_digest: str = "", ) -> MarketplaceManifest: """Parse a marketplace.json dict into a ``MarketplaceManifest``. @@ -186,6 +227,8 @@ def parse_marketplace_json( Args: data: Parsed JSON content of marketplace.json. source_name: Display name of the marketplace (for provenance). + source_url: URL from which the index was fetched (optional, for provenance). + source_digest: SHA-256 digest of the raw index bytes (optional, for provenance). Returns: MarketplaceManifest: Parsed manifest with valid plugin entries. @@ -226,4 +269,123 @@ def parse_marketplace_json( owner_name=owner_name, description=description, plugin_root=plugin_root, + source_url=source_url, + source_digest=source_digest, + ) + + +# --------------------------------------------------------------------------- +# Agent Skills Discovery RFC v0.2.0 index parser +# --------------------------------------------------------------------------- + +def _is_valid_skill_name(name: str) -> bool: + """Return True if *name* satisfies the RFC skill-name rules.""" + if not name or len(name) > 64: + return False + if not _SKILL_NAME_RE.match(name): + return False + if "--" in name: + return False + return True + + +_DIGEST_RE = re.compile(r"^sha256:[0-9a-f]{64}$") + + +def _is_valid_digest(digest: str) -> bool: + """Return True if *digest* matches the ``sha256:{64 hex chars}`` format.""" + return bool(digest and _DIGEST_RE.match(digest)) + + +def parse_agent_skills_index( + data: Dict[str, Any], + source_name: str = "", + *, + source_url: str = "", + source_digest: str = "", +) -> "MarketplaceManifest": + """Parse an Agent Skills Discovery RFC v0.2.0 index into a ``MarketplaceManifest``. + + Args: + data: Parsed JSON of an ``index.json`` served at + ``/.well-known/agent-skills/index.json``. + source_name: Display name of the marketplace (for provenance). + source_url: URL from which the index was fetched (optional, for provenance). + source_digest: SHA-256 digest of the raw index bytes (optional, for provenance). + + Returns: + MarketplaceManifest: Parsed manifest with valid skill entries. + + Raises: + ValueError: If ``$schema`` is missing or not the known v0.2.0 URI. + Clients MUST NOT process an index with an unrecognized schema + (RFC requirement). + """ + schema = data.get("$schema") + if not isinstance(schema, str) or schema != _AGENT_SKILLS_SCHEMA: + raise ValueError( + f"Unrecognized or missing Agent Skills index $schema: {schema!r}. " + f"Expected: {_AGENT_SKILLS_SCHEMA!r}" + ) + + raw_skills = data.get("skills", []) + if not isinstance(raw_skills, list): + logger.warning("Agent Skills index 'skills' field is not a list in '%s'", source_name) + raw_skills = [] + + plugins: List[MarketplacePlugin] = [] + for entry in raw_skills: + if not isinstance(entry, dict): + logger.debug( + "Skipping non-dict entry in Agent Skills array in '%s'", source_name + ) + continue + name = entry.get("name", "") + if not isinstance(name, str): + logger.debug( + "Skipping Agent Skills entry with non-string name %r in '%s'", + name, + source_name, + ) + continue + name = name.strip() + if not name: + logger.debug( + "Skipping Agent Skills entry with empty name in '%s'", source_name + ) + continue + if not _is_valid_skill_name(name): + logger.warning( + "Skipping Agent Skills entry with invalid name %r in '%s' " + "(name must be 1-64 lowercase alphanumeric/hyphen characters)", + name, + source_name, + ) + continue + skill_type = entry.get("type", "") + url = entry.get("url", "") + digest = entry.get("digest", "") + if not _is_valid_digest(digest): + logger.debug( + "Skipping Agent Skills entry %r with invalid digest %r in '%s'", + name, + digest, + source_name, + ) + continue + description = entry.get("description", "") + plugins.append( + MarketplacePlugin( + name=name, + source={"type": skill_type, "url": url, "digest": digest}, + description=description, + source_marketplace=source_name, + ) + ) + + return MarketplaceManifest( + name=source_name or "unknown", + plugins=tuple(plugins), + source_url=source_url, + source_digest=source_digest, ) diff --git a/src/apm_cli/marketplace/resolver.py b/src/apm_cli/marketplace/resolver.py index 6c84fe7c..a25bfeb4 100644 --- a/src/apm_cli/marketplace/resolver.py +++ b/src/apm_cli/marketplace/resolver.py @@ -68,10 +68,14 @@ def _resolve_github_source(source: dict) -> str: def _resolve_url_source(source: dict) -> str: """Resolve a ``url`` source type. - APM is Git-native -- URL sources that point to GitHub repos are - resolved to ``owner/repo``. Non-GitHub URLs are rejected. + GitHub repo URLs are resolved to ``owner/repo``. + Any other HTTPS URL is returned as-is so that Agent Skills CDN entries + and arbitrary HTTPS plugin sources are passed through to the installer. """ url = source.get("url", "") + if not url: + raise ValueError("url source has an empty 'url' field") + # Try to extract owner/repo from common GitHub URL patterns for prefix in ("https://github.com/", "http://github.com/"): if url.lower().startswith(prefix): @@ -83,9 +87,13 @@ def _resolve_url_source(source: dict) -> str: if len(parts) >= 2: return f"{parts[0]}/{parts[1]}" + # Non-GitHub HTTPS URL — return as-is (CDN, arbitrary HTTPS host, etc.) + if url.startswith("https://"): + return url + raise ValueError( f"Cannot resolve URL source '{url}' to a Git coordinate. " - f"APM requires Git-based sources (owner/repo format)." + f"APM requires Git-based sources (owner/repo format) or HTTPS URLs." ) @@ -155,7 +163,7 @@ def resolve_plugin_source( ) -> str: """Resolve a plugin's source to a canonical ``owner/repo[#ref]`` string. - Handles 4 source types: relative, github, url, git-subdir. + Handles 6 source types: relative, github, url, skill-md, archive, git-subdir. NPM sources are rejected with a clear message. Args: @@ -176,6 +184,11 @@ def resolve_plugin_source( # String source = relative path if isinstance(source, str): + if not marketplace_owner: + raise ValueError( + f"Plugin '{plugin.name}' has a relative source but no " + "marketplace_owner is available" + ) return _resolve_relative_source( source, marketplace_owner, marketplace_repo, plugin_root=plugin_root ) @@ -189,6 +202,14 @@ def resolve_plugin_source( if source_type == "github": return _resolve_github_source(source) + elif source_type in ("skill-md", "archive"): + # Agent Skills RFC types — the canonical reference is the download URL + url = source.get("url", "") + if not url: + raise ValueError( + f"Plugin '{plugin.name}' has a '{source_type}' source with no 'url' field" + ) + return url elif source_type == "url": return _resolve_url_source(source) elif source_type == "git-subdir": diff --git a/tests/unit/marketplace/test_lockfile_provenance.py b/tests/unit/marketplace/test_lockfile_provenance.py index 0600b376..f11dc34a 100644 --- a/tests/unit/marketplace/test_lockfile_provenance.py +++ b/tests/unit/marketplace/test_lockfile_provenance.py @@ -71,3 +71,39 @@ def test_backward_compat_existing_fields(self): assert dep.content_hash == "sha256:def456" assert dep.is_dev is True assert dep.discovered_via == "mkt" + + +class TestLockedDependencySourceProvenance: + """source_url and source_digest record where a skill was fetched from.""" + + def test_defaults_are_none(self): + dep = LockedDependency(repo_url="owner/repo") + assert dep.source_url is None + assert dep.source_digest is None + + def test_to_dict_omits_when_none(self): + dep = LockedDependency(repo_url="owner/repo") + d = dep.to_dict() + assert "source_url" not in d + assert "source_digest" not in d + + @pytest.mark.parametrize("field,value", [ + ("source_url", "https://example.com/.well-known/agent-skills/index.json"), + ("source_digest", "sha256:" + "a" * 64), + ]) + def test_to_dict_includes_field(self, field, value): + dep = LockedDependency(repo_url="owner/repo", **{field: value}) + assert dep.to_dict()[field] == value + + @pytest.mark.parametrize("field,value", [ + ("source_url", "https://example.com/.well-known/agent-skills/index.json"), + ("source_digest", "sha256:" + "c" * 64), + ]) + def test_from_dict_round_trip(self, field, value): + dep = LockedDependency.from_dict({"repo_url": "o/r", field: value}) + assert getattr(dep, field) == value + + def test_from_dict_missing_fields_default_none(self): + dep = LockedDependency.from_dict({"repo_url": "o/r"}) + assert dep.source_url is None + assert dep.source_digest is None diff --git a/tests/unit/marketplace/test_marketplace_archive.py b/tests/unit/marketplace/test_marketplace_archive.py new file mode 100644 index 00000000..551b8d8c --- /dev/null +++ b/tests/unit/marketplace/test_marketplace_archive.py @@ -0,0 +1,349 @@ +"""Tests for archive download and extraction (Step 7 — gap #14). + +Covers: _check_archive_member safety checks, _detect_archive_format, _extract_tar_gz, +_extract_zip, and the download_and_extract_archive public API. +""" + +import io +import os +import tarfile +import zipfile + +import pytest + +from apm_cli.marketplace.archive import ( + ArchiveError, + _check_archive_member, + _detect_archive_format, + _extract_tar_gz, + _extract_zip, + download_and_extract_archive, +) +from apm_cli.marketplace.errors import MarketplaceFetchError + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_tar_gz(members: dict) -> bytes: + """Build an in-memory .tar.gz from {path: content_bytes}.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + for name, content in members.items(): + info = tarfile.TarInfo(name=name) + info.size = len(content) + tf.addfile(info, io.BytesIO(content)) + return buf.getvalue() + + +def _make_zip(members: dict) -> bytes: + """Build an in-memory .zip from {path: content_bytes}.""" + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + for name, content in members.items(): + zf.writestr(name, content) + return buf.getvalue() + + +# --------------------------------------------------------------------------- +# Step 7 tests — safety checks +# --------------------------------------------------------------------------- + + +class TestArchiveMemberSafetyChecks: + """_check_archive_member must reject dangerous paths.""" + + @pytest.mark.parametrize("path", [ + "skills/my-skill.md", + "README.md", + "a/b/c/d.txt", + "subdir/file-with-time-12:00.txt", + ]) + def test_safe_paths_accepted(self, path): + _check_archive_member(path) + + @pytest.mark.parametrize("path", [ + "../etc/passwd", + "skills/../../etc/passwd", + ]) + def test_path_traversal_rejected(self, path): + with pytest.raises(ArchiveError, match="path traversal"): + _check_archive_member(path) + + def test_absolute_path_rejected(self): + with pytest.raises(ArchiveError, match="absolute"): + _check_archive_member("/etc/passwd") + + def test_null_byte_in_path_rejected(self): + with pytest.raises(ArchiveError): + _check_archive_member("skills/skill\x00.md") + + @pytest.mark.parametrize("path", [ + "C:\\windows\\system32\\evil.exe", + "C:/windows/system32/evil.exe", + "D:\\", + "\\\\server\\share\\file.txt", + ]) + def test_windows_absolute_paths_rejected(self, path): + with pytest.raises(ArchiveError, match="absolute"): + _check_archive_member(path) + + +# --------------------------------------------------------------------------- +# Step 7 tests — format detection +# --------------------------------------------------------------------------- + + +class TestDetectArchiveFormat: + """_detect_archive_format must identify tar.gz and zip from Content-Type or URL.""" + + @pytest.mark.parametrize("content_type,expected", [ + ("application/gzip", "tar.gz"), + ("application/x-gzip", "tar.gz"), + ("application/x-tar", "tar.gz"), + ("application/zip", "zip"), + ("application/x-zip-compressed", "zip"), + ("application/gzip; charset=utf-8", "tar.gz"), + ]) + def test_detect_archive_format_from_content_type(self, content_type, expected): + assert _detect_archive_format(content_type, "") == expected + + @pytest.mark.parametrize("url,expected", [ + ("https://example.com/skill.tar.gz", "tar.gz"), + ("https://example.com/skill.tgz", "tar.gz"), + ("https://example.com/skill.zip", "zip"), + ]) + def test_detect_archive_format_from_url_extension(self, url, expected): + assert _detect_archive_format("", url) == expected + + def test_content_type_takes_priority_over_url(self): + assert _detect_archive_format("application/zip", "https://example.com/skill.tar.gz") == "zip" + + def test_unknown_format_raises_archive_error(self): + with pytest.raises(ArchiveError, match="format"): + _detect_archive_format("text/html", "https://example.com/skill.html") + + +# --------------------------------------------------------------------------- +# Step 7 tests — tar.gz extraction +# --------------------------------------------------------------------------- + + +class TestExtractTarGz: + """_extract_tar_gz must extract files and enforce safety checks.""" + + @pytest.mark.parametrize("path,content", [ + ("skill.md", b"# My Skill"), + ("skills/my-skill.md", b"content"), + ]) + def test_extract_file(self, tmp_path, path, content): + data = _make_tar_gz({path: content}) + paths = _extract_tar_gz(data, str(tmp_path)) + assert path in paths + assert (tmp_path / path).read_bytes() == content + + def test_path_traversal_member_rejected(self, tmp_path): + data = _make_tar_gz({"../evil.txt": b"bad"}) + with pytest.raises(ArchiveError, match="path traversal|absolute"): + _extract_tar_gz(data, str(tmp_path)) + + def test_decompression_bomb_rejected(self, tmp_path): + large_content = b"x" * (600 * 1024 * 1024) + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + info = tarfile.TarInfo(name="bomb.txt") + info.size = len(large_content) + tf.addfile(info, io.BytesIO(large_content)) + with pytest.raises(ArchiveError, match="size limit|bomb"): + _extract_tar_gz(buf.getvalue(), str(tmp_path)) + + def test_multiple_files_returned(self, tmp_path): + data = _make_tar_gz({"a.md": b"a", "b.md": b"b"}) + paths = _extract_tar_gz(data, str(tmp_path)) + assert sorted(paths) == ["a.md", "b.md"] + + def test_directory_entries_skipped(self, tmp_path): + """Directory members in tar must be skipped without error.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + d = tarfile.TarInfo(name="subdir/") + d.type = tarfile.DIRTYPE + tf.addfile(d) + f = tarfile.TarInfo(name="subdir/file.txt") + f.size = 5 + tf.addfile(f, io.BytesIO(b"hello")) + paths = _extract_tar_gz(buf.getvalue(), str(tmp_path)) + assert paths == ["subdir/file.txt"] + + def test_empty_archive_returns_empty_list(self, tmp_path): + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + pass + assert _extract_tar_gz(buf.getvalue(), str(tmp_path)) == [] + + +# --------------------------------------------------------------------------- +# Step 7 tests — zip extraction +# --------------------------------------------------------------------------- + + +class TestExtractZip: + """_extract_zip must extract files and enforce safety checks.""" + + @pytest.mark.parametrize("path,content", [ + ("skill.md", b"# My Skill"), + ("skills/my-skill.md", b"content"), + ]) + def test_extract_file(self, tmp_path, path, content): + data = _make_zip({path: content}) + paths = _extract_zip(data, str(tmp_path)) + assert path in paths + assert (tmp_path / path).read_bytes() == content + + def test_path_traversal_member_rejected(self, tmp_path): + data = _make_zip({"../evil.txt": b"bad"}) + with pytest.raises(ArchiveError, match="path traversal|absolute"): + _extract_zip(data, str(tmp_path)) + + def test_absolute_path_member_rejected(self, tmp_path): + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("/etc/passwd", "root:x:0:0") + with pytest.raises(ArchiveError, match="absolute"): + _extract_zip(buf.getvalue(), str(tmp_path)) + + def test_decompression_bomb_rejected(self, tmp_path): + large = b"x" * (600 * 1024 * 1024) + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("bomb.txt", large) + with pytest.raises(ArchiveError, match="size limit|bomb"): + _extract_zip(buf.getvalue(), str(tmp_path)) + + def test_corrupted_zip_raises_archive_error(self, tmp_path): + with pytest.raises(ArchiveError, match="zip"): + _extract_zip(b"not a zip file", str(tmp_path)) + + +# --------------------------------------------------------------------------- +# Step 7 tests — download_and_extract_archive +# --------------------------------------------------------------------------- + + +class TestDownloadAndExtractArchive: + """download_and_extract_archive must download, detect format, and extract.""" + + def test_successful_download_and_extract_tar_gz(self, tmp_path, monkeypatch): + import unittest.mock as mock + + archive_bytes = _make_tar_gz({"skill.md": b"# Hello"}) + resp = mock.MagicMock() + resp.status_code = 200 + resp.headers = {"Content-Type": "application/gzip"} + resp.content = archive_bytes + resp.raise_for_status.return_value = None + monkeypatch.setattr("apm_cli.marketplace.archive.requests.get", + lambda *a, **kw: resp) + + paths = download_and_extract_archive( + "https://example.com/skill.tar.gz", str(tmp_path) + ) + assert "skill.md" in paths + assert (tmp_path / "skill.md").read_bytes() == b"# Hello" + + def test_successful_download_and_extract_zip(self, tmp_path, monkeypatch): + import unittest.mock as mock + + archive_bytes = _make_zip({"skill.md": b"# Hello"}) + resp = mock.MagicMock() + resp.status_code = 200 + resp.headers = {"Content-Type": "application/zip"} + resp.content = archive_bytes + resp.raise_for_status.return_value = None + monkeypatch.setattr("apm_cli.marketplace.archive.requests.get", + lambda *a, **kw: resp) + + paths = download_and_extract_archive( + "https://example.com/skill.zip", str(tmp_path) + ) + assert "skill.md" in paths + + def test_404_raises_archive_error(self, tmp_path, monkeypatch): + import unittest.mock as mock + import requests as req + + resp = mock.MagicMock() + resp.status_code = 404 + resp.raise_for_status.side_effect = req.exceptions.HTTPError("404") + monkeypatch.setattr("apm_cli.marketplace.archive.requests.get", + lambda *a, **kw: resp) + + with pytest.raises(ArchiveError, match="404|download"): + download_and_extract_archive( + "https://example.com/missing.tar.gz", str(tmp_path) + ) + + def test_unknown_content_type_raises_archive_error(self, tmp_path, monkeypatch): + import unittest.mock as mock + + resp = mock.MagicMock() + resp.status_code = 200 + resp.headers = {"Content-Type": "text/html"} + resp.content = b"not an archive" + resp.raise_for_status.return_value = None + monkeypatch.setattr("apm_cli.marketplace.archive.requests.get", + lambda *a, **kw: resp) + + with pytest.raises(ArchiveError, match="format"): + download_and_extract_archive( + "https://example.com/index.html", str(tmp_path) + ) + + +# --------------------------------------------------------------------------- +# Symlink and hard link safety in tar.gz +# --------------------------------------------------------------------------- + + +class TestSymlinkAndHardlinkSafety: + """tar.gz archives containing symlinks or hard links must be rejected.""" + + def _make_symlink_tar_gz(self, link_name: str, link_target: str) -> bytes: + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + info = tarfile.TarInfo(name=link_name) + info.type = tarfile.SYMTYPE + info.linkname = link_target + tf.addfile(info) + return buf.getvalue() + + def _make_hardlink_tar_gz( + self, real_name: str, link_name: str, link_target: str + ) -> bytes: + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + real = tarfile.TarInfo(name=real_name) + real.size = 5 + tf.addfile(real, io.BytesIO(b"hello")) + link = tarfile.TarInfo(name=link_name) + link.type = tarfile.LNKTYPE + link.linkname = link_target + tf.addfile(link) + return buf.getvalue() + + def test_symlink_raises_archive_error(self, tmp_path): + data = self._make_symlink_tar_gz("link.txt", "../etc/passwd") + with pytest.raises(ArchiveError, match="[Ss]ymlink|[Ll]ink"): + _extract_tar_gz(data, str(tmp_path)) + + def test_hardlink_raises_archive_error(self, tmp_path): + data = self._make_hardlink_tar_gz("real.txt", "link.txt", "../evil.txt") + with pytest.raises(ArchiveError, match="[Ss]ymlink|[Ll]ink|[Hh]ard"): + _extract_tar_gz(data, str(tmp_path)) + + def test_symlink_error_is_archive_error_not_key_error(self, tmp_path): + """KeyError from extractfile must be wrapped in ArchiveError.""" + data = self._make_symlink_tar_gz("link.txt", "real.txt") + with pytest.raises(ArchiveError): + _extract_tar_gz(data, str(tmp_path)) diff --git a/tests/unit/marketplace/test_marketplace_client.py b/tests/unit/marketplace/test_marketplace_client.py index d41889fe..259d0c32 100644 --- a/tests/unit/marketplace/test_marketplace_client.py +++ b/tests/unit/marketplace/test_marketplace_client.py @@ -35,7 +35,8 @@ def test_write_and_read(self, tmp_path): cached = client_mod._read_cache("test-mkt") assert cached is not None - assert cached["name"] == "Test" + data_out, _ = cached + assert data_out["name"] == "Test" def test_expired_cache(self, tmp_path, monkeypatch): data = {"name": "Test", "plugins": []} @@ -313,23 +314,3 @@ def test_fetch_marketplace_via_proxy_end_to_end(self): assert manifest.name == "Test" assert len(manifest.plugins) == 1 assert manifest.plugins[0].name == "p1" - - -class TestCacheKey: - """Cache key includes host for non-github.com sources.""" - - def test_github_default_unchanged(self): - source = MarketplaceSource(name="skills", owner="o", repo="r") - assert client_mod._cache_key(source) == "skills" - - def test_non_default_host_includes_host(self): - source = MarketplaceSource(name="skills", owner="o", repo="r", host="ghes.corp.com") - key = client_mod._cache_key(source) - assert key.startswith("ghes.corp.com") or key.startswith("ghes_corp_com") - assert key.endswith("skills") - assert key != "skills" - - def test_different_hosts_different_keys(self): - s1 = MarketplaceSource(name="mkt", owner="o", repo="r", host="a.com") - s2 = MarketplaceSource(name="mkt", owner="o", repo="r", host="b.com") - assert client_mod._cache_key(s1) != client_mod._cache_key(s2) diff --git a/tests/unit/marketplace/test_marketplace_install_integration.py b/tests/unit/marketplace/test_marketplace_install_integration.py index 6399e131..d4d86367 100644 --- a/tests/unit/marketplace/test_marketplace_install_integration.py +++ b/tests/unit/marketplace/test_marketplace_install_integration.py @@ -15,25 +15,14 @@ def test_marketplace_ref_detected(self): result = parse_marketplace_ref("security-checks@acme-tools") assert result == ("security-checks", "acme-tools") - def test_owner_repo_not_intercepted(self): - """owner/repo should NOT be intercepted.""" - result = parse_marketplace_ref("owner/repo") - assert result is None - - def test_owner_repo_at_alias_not_intercepted(self): - """owner/repo@alias should NOT be intercepted (has slash).""" - result = parse_marketplace_ref("owner/repo@alias") - assert result is None - - def test_bare_name_not_intercepted(self): - """Just a name without @ should NOT be intercepted.""" - result = parse_marketplace_ref("just-a-name") - assert result is None - - def test_ssh_not_intercepted(self): - """SSH URLs should NOT be intercepted (has colon).""" - result = parse_marketplace_ref("git@github.com:o/r") - assert result is None + @pytest.mark.parametrize("ref", [ + "owner/repo", + "owner/repo@alias", + "just-a-name", + "git@github.com:o/r", + ]) + def test_non_marketplace_ref_returns_none(self, ref): + assert parse_marketplace_ref(ref) is None class TestValidationOutcomeProvenance: diff --git a/tests/unit/marketplace/test_marketplace_resolver.py b/tests/unit/marketplace/test_marketplace_resolver.py index 8bc20d3c..7985f6e8 100644 --- a/tests/unit/marketplace/test_marketplace_resolver.py +++ b/tests/unit/marketplace/test_marketplace_resolver.py @@ -7,7 +7,6 @@ _resolve_github_source, _resolve_git_subdir_source, _resolve_relative_source, - _resolve_url_source, parse_marketplace_ref, resolve_plugin_source, ) @@ -124,20 +123,6 @@ def test_invalid_repo(self): _resolve_github_source({"repo": "just-a-name"}) -class TestResolveUrlSource: - """Resolve url source type.""" - - def test_github_https(self): - assert _resolve_url_source({"url": "https://github.com/owner/repo"}) == "owner/repo" - - def test_github_https_with_git_suffix(self): - assert _resolve_url_source({"url": "https://github.com/owner/repo.git"}) == "owner/repo" - - def test_non_github_url(self): - with pytest.raises(ValueError, match="Cannot resolve URL source"): - _resolve_url_source({"url": "https://gitlab.com/owner/repo"}) - - class TestResolveGitSubdirSource: """Resolve git-subdir source type.""" diff --git a/tests/unit/marketplace/test_marketplace_url_client.py b/tests/unit/marketplace/test_marketplace_url_client.py new file mode 100644 index 00000000..ce5be27e --- /dev/null +++ b/tests/unit/marketplace/test_marketplace_url_client.py @@ -0,0 +1,823 @@ +"""Tests for URL-based marketplace client fetch path. + +Covers: _cache_key() URL sources, _fetch_url_direct(), fetch_marketplace() +URL branch, format auto-detection, cache read/write, stale-while-revalidate. +GitHub paths are not touched — regression tests confirm they still work. +Tests are separate from test_marketplace_client.py which covers GitHub only. +""" + +import hashlib +import json +import os +import time + +import pytest +import requests + +from apm_cli.marketplace.client import ( + FetchResult, + _cache_data_path, + _cache_dir, + _cache_key, + _cache_meta_path, + _detect_index_format, + _fetch_url_direct, + _read_stale_meta, + _write_cache, + fetch_marketplace, +) +from apm_cli.marketplace.errors import MarketplaceFetchError +from apm_cli.marketplace.models import MarketplaceSource + +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- + +_KNOWN_SCHEMA = "https://schemas.agentskills.io/discovery/0.2.0/schema.json" +_VALID_DIGEST = "sha256:" + "a" * 64 + +_AGENT_SKILLS_INDEX = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + { + "name": "code-review", + "type": "skill-md", + "description": "Code review helper", + "url": "/.well-known/agent-skills/code-review/SKILL.md", + "digest": _VALID_DIGEST, + } + ], +} + +_GITHUB_MARKETPLACE_JSON = { + "plugins": [ + { + "name": "my-plugin", + "description": "A plugin", + "source": {"type": "github", "repo": "owner/my-plugin"}, + } + ] +} + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def url_source(): + return MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + + +@pytest.fixture +def github_source(): + return MarketplaceSource( + name="acme", + owner="acme-org", + repo="plugins", + ) + + +@pytest.fixture(autouse=True) +def _isolate_cache(tmp_path, monkeypatch): + """Redirect cache and registry to tmp dir for every test in this file.""" + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + + +def _write_cache_files(source, data, *, expired=False): + """Helper: write cache data + meta for a source (fresh or expired).""" + cache_name = _cache_key(source) + os.makedirs(_cache_dir(), exist_ok=True) + with open(_cache_data_path(cache_name), "w") as f: + json.dump(data, f) + fetched_at = 0 if expired else time.time() + with open(_cache_meta_path(cache_name), "w") as f: + json.dump({"fetched_at": fetched_at, "ttl_seconds": 3600}, f) + + +def _write_cache_files_with_digest(source, data, *, digest="", expired=False): + """Helper: write cache data + meta including an index_digest field.""" + cache_name = _cache_key(source) + os.makedirs(_cache_dir(), exist_ok=True) + with open(_cache_data_path(cache_name), "w") as f: + json.dump(data, f) + fetched_at = 0 if expired else time.time() + meta = {"fetched_at": fetched_at, "ttl_seconds": 3600} + if digest: + meta["index_digest"] = digest + with open(_cache_meta_path(cache_name), "w") as f: + json.dump(meta, f) + + +# --------------------------------------------------------------------------- +# _cache_key — URL sources +# --------------------------------------------------------------------------- + + +class TestCacheKey: + """_cache_key() must use SHA-256 of URL for URL sources.""" + + def test_url_source_returns_sha256_of_url(self, url_source): + expected = hashlib.sha256(url_source.url.encode()).hexdigest()[:16] + assert _cache_key(url_source) == expected + + def test_url_source_key_is_not_name(self, url_source): + """Key must not fall back to the name-based GitHub key.""" + assert _cache_key(url_source) != url_source.name + + def test_two_url_sources_same_host_get_different_keys(self): + """Two marketplaces on the same hostname must not share a cache slot.""" + src_a = MarketplaceSource( + name="a", source_type="url", + url="https://example.com/a/index.json", + ) + src_b = MarketplaceSource( + name="b", source_type="url", + url="https://example.com/b/index.json", + ) + assert _cache_key(src_a) != _cache_key(src_b) + + def test_github_com_source_returns_name(self, github_source): + """github.com sources keep the existing name-based key (regression).""" + assert _cache_key(github_source) == github_source.name + + def test_github_custom_host_includes_host_in_key(self): + """Custom-host GitHub sources keep the host__name format (regression).""" + src = MarketplaceSource(name="acme", owner="o", repo="r", host="ghe.corp.com") + key = _cache_key(src) + assert "acme" in key + assert "ghe" in key.lower() + + def test_empty_url_produces_deterministic_key(self): + """Empty-string URL has a deterministic SHA-256 key, not a crash.""" + src = MarketplaceSource(name="x", source_type="url", url="") + key = _cache_key(src) + assert key == hashlib.sha256(b"").hexdigest()[:16] + + def test_very_long_url_key_is_always_16_chars(self): + """Key is always truncated to 16 hex characters regardless of URL length.""" + long_url = "https://example.com/" + "a" * 2000 + src = MarketplaceSource(name="x", source_type="url", url=long_url) + assert len(_cache_key(src)) == 16 + + +# --------------------------------------------------------------------------- +# _fetch_url_direct — network layer + + +class TestFetchUrlDirectEmptyUrl: + """_fetch_url_direct raises clearly when given an empty URL.""" + + def test_empty_url_raises_fetch_error(self): + """An empty URL has no scheme and raises immediately without a network call.""" + with pytest.raises(MarketplaceFetchError): + _fetch_url_direct("") + + +# --------------------------------------------------------------------------- +# _detect_index_format — direct unit tests +# --------------------------------------------------------------------------- + + +class TestDetectIndexFormat: + """_detect_index_format dispatches correctly on index shape.""" + + def test_skills_key_returns_agent_skills(self): + assert _detect_index_format({"skills": []}) == "agent-skills" + + def test_plugins_key_returns_github(self): + assert _detect_index_format({"plugins": []}) == "github" + + def test_neither_key_returns_unknown(self): + assert _detect_index_format({}) == "unknown" + + def test_both_keys_present_agent_skills_wins(self): + """When both keys are present, the Agent Skills format takes precedence.""" + assert _detect_index_format({"skills": [], "plugins": []}) == "agent-skills" +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirect: + """_fetch_url_direct() must handle all HTTP and network outcomes.""" + + def test_http_200_returns_parsed_json(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + assert result.data == _AGENT_SKILLS_INDEX + + def test_http_404_raises_fetch_error(self, monkeypatch): + mock_resp = _mock_response(404) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + with pytest.raises(MarketplaceFetchError, match="404"): + _fetch_url_direct("https://example.com/index.json") + + def test_http_500_raises_fetch_error(self, monkeypatch): + mock_resp = _mock_response(500, raise_for_status=requests.exceptions.HTTPError("500")) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + with pytest.raises(MarketplaceFetchError): + _fetch_url_direct("https://example.com/index.json") + + def test_network_timeout_raises_fetch_error(self, monkeypatch): + def _timeout(*a, **kw): + raise requests.exceptions.Timeout("timed out") + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", _timeout) + with pytest.raises(MarketplaceFetchError): + _fetch_url_direct("https://example.com/index.json") + + def test_non_json_response_raises_fetch_error(self, monkeypatch): + mock_resp = _mock_response(200, json_error=ValueError("not JSON")) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + with pytest.raises(MarketplaceFetchError): + _fetch_url_direct("https://example.com/index.json") + + +# --------------------------------------------------------------------------- +# fetch_marketplace — URL branch +# --------------------------------------------------------------------------- + + +class TestFetchMarketplaceURL: + """fetch_marketplace() must route URL sources through _fetch_url_direct.""" + + def test_url_source_calls_fetch_url_direct_not_fetch_file(self, url_source, monkeypatch): + """_fetch_file must never be called for a URL source.""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST), + ) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_file", + _never_called("_fetch_file"), + ) + result = fetch_marketplace(url_source) + assert len(result.plugins) == 1 + + def test_url_source_skills_key_uses_agent_skills_parser(self, url_source, monkeypatch): + """Index with 'skills' key must be parsed by parse_agent_skills_index.""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST), + ) + result = fetch_marketplace(url_source) + assert result.plugins[0].name == "code-review" + + def test_url_source_plugins_key_uses_marketplace_parser(self, url_source, monkeypatch): + """Index with 'plugins' key must be parsed by parse_marketplace_json.""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=_GITHUB_MARKETPLACE_JSON, digest=_VALID_DIGEST), + ) + result = fetch_marketplace(url_source) + assert result.plugins[0].name == "my-plugin" + + def test_url_source_uses_fresh_cache_without_network_call(self, url_source, monkeypatch): + """A fresh cache must short-circuit the network entirely.""" + _write_cache_files(url_source, _AGENT_SKILLS_INDEX, expired=False) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _never_called("_fetch_url_direct"), + ) + result = fetch_marketplace(url_source) + assert result.plugins[0].name == "code-review" + + def test_url_source_writes_data_and_meta_to_cache(self, url_source, monkeypatch): + """After a successful network fetch both .json and .meta.json must exist.""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST), + ) + fetch_marketplace(url_source) + cache_name = _cache_key(url_source) + assert os.path.exists(_cache_data_path(cache_name)) + assert os.path.exists(_cache_meta_path(cache_name)) + + def test_url_source_stale_cache_returned_on_network_error(self, url_source, monkeypatch): + """Stale-while-revalidate: expired cache beats a network failure.""" + _write_cache_files(url_source, _AGENT_SKILLS_INDEX, expired=True) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _raises_fetch_error(url_source.name), + ) + result = fetch_marketplace(url_source) + assert result.plugins[0].name == "code-review" + + def test_url_source_unknown_format_raises(self, url_source, monkeypatch): + """Fresh-fetch with unrecognized format raises MarketplaceFetchError.""" + bad_index = {"unexpected": "data"} + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=bad_index, digest=_VALID_DIGEST), + ) + with pytest.raises(MarketplaceFetchError, match="[Uu]nrecogni"): + fetch_marketplace(url_source) + + def test_url_source_no_cache_network_error_raises(self, url_source, monkeypatch): + """No cache + network failure must propagate MarketplaceFetchError.""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _raises_fetch_error(url_source.name), + ) + with pytest.raises(MarketplaceFetchError): + fetch_marketplace(url_source) + + def test_github_source_uses_fetch_file_not_fetch_url_direct( + self, github_source, monkeypatch + ): + """GitHub sources must never touch _fetch_url_direct (regression).""" + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _never_called("_fetch_url_direct"), + ) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_file", + lambda source, path, **kw: _GITHUB_MARKETPLACE_JSON, + ) + result = fetch_marketplace(github_source) + assert result.plugins[0].name == "my-plugin" + + +# --------------------------------------------------------------------------- +# FetchResult — digest capture (t5-test-01) +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirectDigest: + """_fetch_url_direct must return a FetchResult with .data and .digest.""" + + def test_returns_fetch_result_not_plain_dict(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + assert isinstance(result, FetchResult) + + def test_data_field_equals_parsed_json(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + assert result.data == _AGENT_SKILLS_INDEX + + def test_digest_is_sha256_of_raw_bytes(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + expected = "sha256:" + hashlib.sha256(mock_resp.content).hexdigest() + assert result.digest == expected + + def test_digest_format_is_sha256_colon_64hex(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + assert result.digest.startswith("sha256:") + hex_part = result.digest[len("sha256:"):] + assert len(hex_part) == 64 + assert all(c in "0123456789abcdef" for c in hex_part) + + def test_matching_expected_digest_does_not_raise(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + expected = "sha256:" + hashlib.sha256(mock_resp.content).hexdigest() + result = _fetch_url_direct("https://example.com/index.json", + expected_digest=expected) + assert result.digest == expected + + def test_mismatched_expected_digest_raises_fetch_error(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + wrong = "sha256:" + "0" * 64 + with pytest.raises(MarketplaceFetchError, match="digest mismatch"): + _fetch_url_direct("https://example.com/index.json", expected_digest=wrong) + + def test_empty_expected_digest_skips_verification(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json", expected_digest="") + assert isinstance(result, FetchResult) + + +# --------------------------------------------------------------------------- +# fetch_marketplace — index digest storage + manifest fields (t5-test-02/03) +# --------------------------------------------------------------------------- + +_FIXED_DIGEST = "sha256:" + "b" * 64 + + +def _fetch_url_direct_stub(url, *, etag="", last_modified=""): + return FetchResult(data=_AGENT_SKILLS_INDEX, digest=_FIXED_DIGEST) + + +class TestFetchMarketplaceDigestStorage: + """fetch_marketplace must persist index_digest to .meta.json for URL sources.""" + + def test_meta_contains_index_digest_after_live_fetch(self, url_source, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _fetch_url_direct_stub, + ) + fetch_marketplace(url_source, force_refresh=True) + meta_path = _cache_meta_path(_cache_key(url_source)) + with open(meta_path) as f: + meta = json.load(f) + assert meta.get("index_digest") == _FIXED_DIGEST + + def test_manifest_source_url_equals_source_url(self, url_source, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _fetch_url_direct_stub, + ) + manifest = fetch_marketplace(url_source, force_refresh=True) + assert manifest.source_url == url_source.url + + def test_manifest_source_digest_equals_fetched_digest(self, url_source, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _fetch_url_direct_stub, + ) + manifest = fetch_marketplace(url_source, force_refresh=True) + assert manifest.source_digest == _FIXED_DIGEST + + def test_cache_hit_manifest_carries_stored_digest(self, url_source): + """Warm cache that includes index_digest surfaces it on the manifest.""" + _write_cache_files_with_digest(url_source, _AGENT_SKILLS_INDEX, digest=_FIXED_DIGEST) + manifest = fetch_marketplace(url_source) + assert manifest.source_digest == _FIXED_DIGEST + + def test_cache_hit_without_digest_has_empty_source_digest(self, url_source): + """Older cache entries without index_digest return empty string (backward compat).""" + _write_cache_files(url_source, _AGENT_SKILLS_INDEX) + manifest = fetch_marketplace(url_source) + assert manifest.source_digest == "" + + +def _mock_response(status_code, *, json_body=None, json_error=None, raise_for_status=None): + """Build a minimal mock for requests.Response.""" + import unittest.mock as mock + + resp = mock.MagicMock() + resp.status_code = status_code + if json_body is not None: + body_bytes = json.dumps(json_body).encode() + resp.content = body_bytes + resp.json.return_value = json_body + else: + resp.content = b"" + if json_error is not None: + resp.json.side_effect = json_error + if raise_for_status is not None: + resp.raise_for_status.side_effect = raise_for_status + else: + resp.raise_for_status.return_value = None + return resp + + +def _never_called(name: str): + """Return a callable that fails loudly if invoked.""" + def _fn(*a, **kw): + raise AssertionError(f"{name} must not be called in this test") + return _fn + + +def _raises_fetch_error(source_name: str): + """Return a callable that raises MarketplaceFetchError.""" + def _fn(*a, **kw): + raise MarketplaceFetchError(source_name, "simulated network error") + return _fn + + +# --------------------------------------------------------------------------- +# Step 6: ETag / conditional refresh +# --------------------------------------------------------------------------- + +def _mock_response_with_headers(status_code, *, json_body=None, response_headers=None): + """Build a mock requests.Response with custom response headers.""" + import unittest.mock as mock + + resp = mock.MagicMock() + resp.status_code = status_code + response_headers = response_headers or {} + resp.headers = response_headers + if json_body is not None: + body_bytes = json.dumps(json_body).encode() + resp.content = body_bytes + resp.json.return_value = json_body + else: + resp.content = b"" + resp.raise_for_status.return_value = None + return resp + + +class TestConditionalFetchHeaders: + """_fetch_url_direct must send conditional request headers and handle 304.""" + + @pytest.mark.parametrize("kwarg,value,header_name", [ + ("etag", '"abc"', "If-None-Match"), + ("last_modified", "Mon, 13 Apr 2026 00:00:00 GMT", "If-Modified-Since"), + ]) + def test_conditional_header_sent(self, monkeypatch, kwarg, value, header_name): + captured = {} + + def fake_get(url, headers=None, timeout=None): + captured["headers"] = headers or {} + return _mock_response_with_headers(200, json_body=_AGENT_SKILLS_INDEX) + + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", fake_get) + _fetch_url_direct("https://example.com/index.json", **{kwarg: value}) + assert captured["headers"].get(header_name) == value + + def test_304_response_returns_none(self, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: _mock_response_with_headers(304), + ) + result = _fetch_url_direct("https://example.com/index.json", etag='"abc"') + assert result is None + + @pytest.mark.parametrize("resp_header,resp_value,result_field", [ + ("ETag", '"new-etag"', "etag"), + ("Last-Modified", "Sun, 12 Apr 2026 00:00:00 GMT", "last_modified"), + ]) + def test_fetch_result_captures_response_header(self, monkeypatch, + resp_header, resp_value, result_field): + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: _mock_response_with_headers( + 200, json_body=_AGENT_SKILLS_INDEX, + response_headers={resp_header: resp_value}, + ), + ) + result = _fetch_url_direct("https://example.com/index.json") + assert getattr(result, result_field) == resp_value + + def test_fetch_result_etag_empty_when_header_absent(self, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: _mock_response_with_headers(200, json_body=_AGENT_SKILLS_INDEX), + ) + result = _fetch_url_direct("https://example.com/index.json") + assert result.etag == "" + + +class TestConditionalCacheMeta: + """_write_cache/_read_stale_meta must round-trip ETag and Last-Modified.""" + + @pytest.mark.parametrize("kwarg,meta_key,value", [ + ("etag", "etag", '"v1"'), + ("last_modified", "last_modified", "Mon, 13 Apr 2026 00:00:00 GMT"), + ]) + def test_write_cache_stores_header_in_meta(self, tmp_path, kwarg, meta_key, value): + _write_cache("test-mkt", {"skills": []}, **{kwarg: value}) + meta_path = _cache_meta_path("test-mkt") + with open(meta_path) as f: + meta = json.load(f) + assert meta.get(meta_key) == value + + @pytest.mark.parametrize("kwarg,meta_key,value", [ + ("etag", "etag", '"v1"'), + ("last_modified", "last_modified", "Mon, 13 Apr 2026 00:00:00 GMT"), + ]) + def test_read_stale_meta_returns_header_from_expired_cache(self, tmp_path, + kwarg, meta_key, value): + _write_cache("test-mkt", {"skills": []}, **{kwarg: value}) + meta_path = _cache_meta_path("test-mkt") + with open(meta_path) as f: + meta = json.load(f) + meta["fetched_at"] = time.time() - 7200 + with open(meta_path, "w") as f: + json.dump(meta, f) + + stale_meta = _read_stale_meta("test-mkt") + assert stale_meta is not None + assert stale_meta.get(meta_key) == value + + def test_read_stale_meta_returns_none_when_no_cache(self): + assert _read_stale_meta("nonexistent-abc123") is None + + +def _write_cache_files_with_etag(source, data, *, etag="", last_modified="", expired=False): + """Write cache files including optional ETag/Last-Modified for test setup.""" + import apm_cli.marketplace.client as client_mod + cache_name = _cache_key(source) + data_path = _cache_data_path(cache_name) + meta_path = _cache_meta_path(cache_name) + with open(data_path, "w") as f: + json.dump(data, f) + fetched_at = time.time() - 7200 if expired else time.time() + meta = {"fetched_at": fetched_at, "ttl_seconds": 3600} + if etag: + meta["etag"] = etag + if last_modified: + meta["last_modified"] = last_modified + with open(meta_path, "w") as f: + json.dump(meta, f) + + +class TestFetchMarketplaceConditionalRefresh: + """fetch_marketplace must use conditional headers on stale URL cache re-fetch.""" + + @pytest.mark.parametrize("header_kwarg,header_value", [ + ("etag", '"stored-etag"'), + ("last_modified", "Mon, 13 Apr 2026 00:00:00 GMT"), + ]) + def test_expired_cache_sends_stored_header_on_refetch( + self, url_source, monkeypatch, header_kwarg, header_value + ): + _write_cache_files_with_etag( + url_source, _AGENT_SKILLS_INDEX, **{header_kwarg: header_value}, expired=True + ) + captured = {} + + def fake_fetch(url, *, etag=None, last_modified=None): + captured["etag"] = etag + captured["last_modified"] = last_modified + return FetchResult(data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST, + etag=etag or "", last_modified=last_modified or "") + + monkeypatch.setattr("apm_cli.marketplace.client._fetch_url_direct", fake_fetch) + fetch_marketplace(url_source) + assert captured.get(header_kwarg) == header_value + + def test_expired_cache_304_serves_stale_data(self, url_source, monkeypatch): + _write_cache_files_with_etag( + url_source, _AGENT_SKILLS_INDEX, etag='"stored-etag"', expired=True + ) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, *, etag=None, last_modified=None: None, + ) + manifest = fetch_marketplace(url_source) + assert manifest.plugins[0].name == "code-review" + + def test_expired_cache_304_resets_ttl(self, url_source, monkeypatch): + _write_cache_files_with_etag( + url_source, _AGENT_SKILLS_INDEX, etag='"stored-etag"', expired=True + ) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, *, etag=None, last_modified=None: None, + ) + before = time.time() + fetch_marketplace(url_source) + meta_path = _cache_meta_path(_cache_key(url_source)) + with open(meta_path) as f: + meta = json.load(f) + assert meta["fetched_at"] >= before + + def test_new_etag_from_200_response_stored_in_meta(self, url_source, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, *, etag=None, last_modified=None: FetchResult( + data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST, + etag='"brand-new"', last_modified="", + ), + ) + fetch_marketplace(url_source, force_refresh=True) + meta_path = _cache_meta_path(_cache_key(url_source)) + with open(meta_path) as f: + meta = json.load(f) + assert meta.get("etag") == '"brand-new"' + + +# --------------------------------------------------------------------------- +# D4/T8: HTTPS scheme enforcement +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirectHttpsEnforcement: + """_fetch_url_direct must reject non-HTTPS schemes before making any network call.""" + + @pytest.mark.parametrize("url", [ + "http://example.com/index.json", + "ftp://example.com/index.json", + "file:///etc/passwd", + ]) + def test_non_https_scheme_raises(self, monkeypatch, url): + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + _never_called("requests.get"), + ) + with pytest.raises(MarketplaceFetchError, match="HTTPS"): + _fetch_url_direct(url) + + def test_https_url_is_accepted(self, monkeypatch): + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", lambda *a, **kw: mock_resp) + result = _fetch_url_direct("https://example.com/index.json") + assert isinstance(result, FetchResult) + + +# --------------------------------------------------------------------------- +# T12: timeout error message includes URL +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirectErrorMessages: + """_fetch_url_direct must produce informative MarketplaceFetchError messages.""" + + def test_timeout_error_message_includes_url(self, monkeypatch): + url = "https://example.com/index.json" + + def fake_get(*a, **kw): + raise requests.exceptions.Timeout("timed out after 30s") + + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", fake_get) + exc = pytest.raises(MarketplaceFetchError, _fetch_url_direct, url) + assert url in str(exc.value) + assert "timed" in str(exc.value).lower() or "timeout" in str(exc.value).lower() or "30" in str(exc.value) + + def test_connection_error_message_includes_url(self, monkeypatch): + url = "https://example.com/index.json" + + def fake_get(*a, **kw): + raise requests.exceptions.ConnectionError("connection refused") + + monkeypatch.setattr("apm_cli.marketplace.client.requests.get", fake_get) + exc = pytest.raises(MarketplaceFetchError, _fetch_url_direct, url) + assert url in str(exc.value) + + +# --------------------------------------------------------------------------- +# E5: on_stale_warning callback +# --------------------------------------------------------------------------- + + +class TestFetchMarketplaceStaleWarning: + """fetch_marketplace must call on_stale_warning when serving expired cache.""" + + def test_on_stale_warning_called_on_network_error(self, url_source, monkeypatch): + _write_cache_files(url_source, _AGENT_SKILLS_INDEX, expired=True) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _raises_fetch_error(url_source.name), + ) + warnings = [] + fetch_marketplace(url_source, on_stale_warning=warnings.append) + assert len(warnings) == 1 + assert url_source.name in warnings[0] + + def test_on_stale_warning_not_called_on_fresh_fetch(self, url_source, monkeypatch): + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: FetchResult(data=_AGENT_SKILLS_INDEX, digest=_VALID_DIGEST), + ) + warnings = [] + fetch_marketplace(url_source, on_stale_warning=warnings.append) + assert warnings == [] + + def test_on_stale_warning_not_called_on_cache_hit(self, url_source, monkeypatch): + _write_cache_files(url_source, _AGENT_SKILLS_INDEX, expired=False) + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + _never_called("_fetch_url_direct"), + ) + warnings = [] + fetch_marketplace(url_source, on_stale_warning=warnings.append) + assert warnings == [] + + +# --------------------------------------------------------------------------- +# Stale-while-revalidate — source_digest preservation +# --------------------------------------------------------------------------- + + +class TestFetchMarketplaceStaleDigestPreservation: + """Stale fallback on network error must preserve source_digest from cached meta.""" + + def test_stale_fallback_preserves_source_digest(self, url_source, monkeypatch): + """source_digest must equal stored index_digest when stale cache is served.""" + stored_digest = "sha256:" + "b" * 64 + _write_cache_files_with_digest( + url_source, _AGENT_SKILLS_INDEX, digest=stored_digest, expired=True + ) + from apm_cli.marketplace.errors import MarketplaceFetchError as MFE + + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: (_ for _ in ()).throw( + MFE(url_source.name, "network error") + ), + ) + manifest = fetch_marketplace(url_source) + assert manifest.source_digest == stored_digest + + def test_stale_fallback_preserves_source_url(self, url_source, monkeypatch): + """source_url must equal source.url when stale cache is served.""" + _write_cache_files_with_digest( + url_source, _AGENT_SKILLS_INDEX, digest=_VALID_DIGEST, expired=True + ) + from apm_cli.marketplace.errors import MarketplaceFetchError as MFE + + monkeypatch.setattr( + "apm_cli.marketplace.client._fetch_url_direct", + lambda url, **kw: (_ for _ in ()).throw( + MFE(url_source.name, "network error") + ), + ) + manifest = fetch_marketplace(url_source) + assert manifest.source_url == url_source.url diff --git a/tests/unit/marketplace/test_marketplace_url_commands.py b/tests/unit/marketplace/test_marketplace_url_commands.py new file mode 100644 index 00000000..0352e089 --- /dev/null +++ b/tests/unit/marketplace/test_marketplace_url_commands.py @@ -0,0 +1,505 @@ +"""Tests for URL-based marketplace add command. + +Covers: URL detection, .well-known auto-resolution, name derivation, +HTTPS enforcement, fetch wiring, and GitHub regression guard. +Tests are kept in a separate file from test_marketplace_commands.py +which covers GitHub/OWNER/REPO sources only. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from apm_cli.marketplace.models import ( + MarketplaceManifest, + MarketplacePlugin, + MarketplaceSource, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def runner(): + return CliRunner() + + +@pytest.fixture(autouse=True) +def _isolate_config(tmp_path, monkeypatch): + """Redirect all filesystem state to a temp dir.""" + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr( + "apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json") + ) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + + +@pytest.fixture +def mock_url_manifest(): + return MarketplaceManifest( + name="example-skills", + plugins=( + MarketplacePlugin(name="code-review", description="Reviews code"), + MarketplacePlugin(name="security-scan", description="Scans"), + ), + ) + + +# --------------------------------------------------------------------------- +# URL detection +# --------------------------------------------------------------------------- + + +class TestURLDetection: + """add command detects https:// arguments and routes to URL path.""" + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_https_url_not_rejected_as_invalid_format( + self, mock_fetch, mock_add, runner, mock_url_manifest + ): + """https:// argument must NOT produce 'Invalid format' error.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + result = runner.invoke( + marketplace, + ["add", "https://example.com/.well-known/agent-skills/index.json"], + ) + assert "Invalid format" not in result.output + assert "OWNER/REPO" not in result.output + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_url_source_type_is_url( + self, mock_fetch, mock_add, runner, mock_url_manifest + ): + """Source passed to add_marketplace must have source_type='url'.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + runner.invoke( + marketplace, + ["add", "https://example.com/.well-known/agent-skills/index.json"], + ) + assert mock_add.called + source = mock_add.call_args[0][0] + assert source.source_type == "url" + + def test_http_url_rejected(self, runner): + """Plain http:// (non-TLS) must be rejected — RFC requires HTTPS.""" + from apm_cli.commands.marketplace import marketplace + + result = runner.invoke(marketplace, ["add", "http://example.com"]) + assert result.exit_code != 0 + assert "https" in result.output.lower() + + +# --------------------------------------------------------------------------- +# .well-known auto-resolution +# --------------------------------------------------------------------------- + + +class TestWellKnownResolution: + """Bare origin URLs are automatically resolved to the .well-known path.""" + + @pytest.mark.parametrize("input_url,expected_url", [ + ("https://example.com", + "https://example.com/.well-known/agent-skills/index.json"), + ("https://example.com/", + "https://example.com/.well-known/agent-skills/index.json"), + ("https://example.com/.well-known/agent-skills/index.json", + "https://example.com/.well-known/agent-skills/index.json"), + ("https://example.com/.well-known/agent-skills/", + "https://example.com/.well-known/agent-skills/index.json"), + ("https://example.com/.well-known/agent-skills", + "https://example.com/.well-known/agent-skills/index.json"), + ]) + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_well_known_url_resolution( + self, mock_fetch, mock_add, runner, mock_url_manifest, + input_url, expected_url + ): + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + runner.invoke(marketplace, ["add", input_url]) + source = mock_add.call_args[0][0] + assert source.url == expected_url + + +# --------------------------------------------------------------------------- +# Display name derivation +# --------------------------------------------------------------------------- + + +class TestDisplayNameDerivation: + """Display name comes from --name or is derived from the URL hostname.""" + + @pytest.mark.parametrize("args,expected_name", [ + (["add", "https://skills.example.com"], "skills.example.com"), + (["add", "https://example.com", "--name", "my-skills"], "my-skills"), + ]) + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_name_resolved_correctly( + self, mock_fetch, mock_add, runner, mock_url_manifest, args, expected_name + ): + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + runner.invoke(marketplace, args) + source = mock_add.call_args[0][0] + assert source.name == expected_name + + def test_invalid_name_rejected(self, runner): + """Names with spaces or special chars are rejected.""" + from apm_cli.commands.marketplace import marketplace + + result = runner.invoke( + marketplace, + ["add", "https://example.com", "--name", "bad name!"], + ) + assert result.exit_code != 0 + assert "invalid" in result.output.lower() or "name" in result.output.lower() + + +# --------------------------------------------------------------------------- +# Fetch wiring +# --------------------------------------------------------------------------- + + +class TestFetchWiring: + """fetch_marketplace and add_marketplace are called correctly.""" + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_fetch_marketplace_called_with_url_source( + self, mock_fetch, mock_add, runner, mock_url_manifest + ): + """fetch_marketplace must be called with a URL-typed source.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + result = runner.invoke( + marketplace, + ["add", "https://example.com/.well-known/agent-skills/index.json"], + ) + assert result.exit_code == 0 + assert mock_fetch.called + source = mock_fetch.call_args[0][0] + assert source.source_type == "url" + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_add_marketplace_called_on_success( + self, mock_fetch, mock_add, runner, mock_url_manifest + ): + """add_marketplace must be called when fetch succeeds.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + result = runner.invoke(marketplace, ["add", "https://example.com"]) + assert result.exit_code == 0 + assert mock_add.called + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_success_message_shown( + self, mock_fetch, mock_add, runner, mock_url_manifest + ): + """Success output must mention the marketplace name and skill count.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.return_value = mock_url_manifest + result = runner.invoke( + marketplace, + ["add", "https://example.com", "--name", "my-skills"], + ) + assert result.exit_code == 0 + assert "my-skills" in result.output + assert "2" in result.output # 2 plugins in mock manifest + + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_fetch_failure_exits_with_error(self, mock_fetch, runner): + """If fetch raises, command exits non-zero with an error message.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.side_effect = Exception("connection refused") + result = runner.invoke(marketplace, ["add", "https://example.com"]) + assert result.exit_code != 0 + assert "failed" in result.output.lower() or "error" in result.output.lower() + + +# --------------------------------------------------------------------------- +# GitHub regression guard +# --------------------------------------------------------------------------- + + +class TestGitHubRegression: + """Existing OWNER/REPO path must be completely unaffected.""" + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + @patch("apm_cli.marketplace.client._auto_detect_path") + def test_owner_repo_still_works( + self, mock_detect, mock_fetch, mock_add, runner + ): + """OWNER/REPO argument routes to GitHub path, not URL path.""" + from apm_cli.commands.marketplace import marketplace + + mock_detect.return_value = "marketplace.json" + mock_fetch.return_value = MarketplaceManifest( + name="Test", plugins=(MarketplacePlugin(name="p1"),) + ) + result = runner.invoke(marketplace, ["add", "acme-org/plugins"]) + assert result.exit_code == 0 + source = mock_add.call_args[0][0] + assert source.source_type == "github" + assert source.owner == "acme-org" + assert source.repo == "plugins" + + @patch("apm_cli.marketplace.registry.add_marketplace") + @patch("apm_cli.marketplace.client.fetch_marketplace") + @patch("apm_cli.marketplace.client._auto_detect_path") + def test_owner_repo_does_not_call_url_path( + self, mock_detect, mock_fetch, mock_add, runner + ): + """_auto_detect_path must still be called for GitHub sources.""" + from apm_cli.commands.marketplace import marketplace + + mock_detect.return_value = "marketplace.json" + mock_fetch.return_value = MarketplaceManifest( + name="Test", plugins=(MarketplacePlugin(name="p1"),) + ) + runner.invoke(marketplace, ["add", "acme-org/plugins"]) + assert mock_detect.called + + +# --------------------------------------------------------------------------- +# list command — URL source display (t8-test-07) +# --------------------------------------------------------------------------- + + +class TestListURLSource: + """list command must show URL for URL-based sources, not owner/repo.""" + + def _register_url_source(self, config_dir): + """Write a URL source into the marketplaces registry file.""" + import json, os + os.makedirs(config_dir, exist_ok=True) + registry = os.path.join(config_dir, "marketplaces.json") + with open(registry, "w") as f: + json.dump( + {"marketplaces": [{"name": "example-skills", "source_type": "url", + "url": "https://example.com/.well-known/agent-skills/index.json"}]}, + f, + ) + + def test_list_plaintext_shows_url_not_owner_repo(self, runner, tmp_path, monkeypatch): + from apm_cli.commands.marketplace import marketplace + + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json")) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + self._register_url_source(config_dir) + + # Force plain output (no Rich console) by patching _get_console to return None + with patch("apm_cli.commands.marketplace._get_console", return_value=None): + result = runner.invoke(marketplace, ["list"]) + + assert "https://example.com" in result.output + # Must not show "(/)"-style owner/repo for a URL source + assert "(/" not in result.output + + def test_list_does_not_show_default_github_fields_for_url_source( + self, runner, tmp_path, monkeypatch + ): + from apm_cli.commands.marketplace import marketplace + + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json")) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + self._register_url_source(config_dir) + + with patch("apm_cli.commands.marketplace._get_console", return_value=None): + result = runner.invoke(marketplace, ["list"]) + + # Default GitHub placeholder values must not bleed through + assert "marketplace.json" not in result.output + assert "main" not in result.output + + +# --------------------------------------------------------------------------- +# remove command — URL source display (t8-test-08, t8-test-09) +# --------------------------------------------------------------------------- + + +class TestRemoveURLSource: + """remove command must show URL in confirmation and clear correct cache.""" + + def _register_url_source(self, config_dir): + import json, os + os.makedirs(config_dir, exist_ok=True) + registry = os.path.join(config_dir, "marketplaces.json") + with open(registry, "w") as f: + json.dump( + {"marketplaces": [{"name": "example-skills", "source_type": "url", + "url": "https://example.com/.well-known/agent-skills/index.json"}]}, + f, + ) + + def test_remove_confirmation_shows_url(self, runner, tmp_path, monkeypatch): + from apm_cli.commands.marketplace import marketplace + + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json")) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + self._register_url_source(config_dir) + + # Provide "n" so it cancels without actually removing + result = runner.invoke(marketplace, ["remove", "example-skills"], input="n\n") + + assert "https://example.com" in result.output + # Confirm text must not show default "(/)"-style placeholder + assert "(/" not in result.output + + def test_remove_yes_clears_url_cache_key(self, runner, tmp_path, monkeypatch): + """--yes must clear the sha256-based cache slot, not a host-based one.""" + import hashlib, json, os, time + from apm_cli.marketplace.client import _cache_data_path, _cache_meta_path + + config_dir = str(tmp_path / ".apm") + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json")) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr("apm_cli.marketplace.registry._registry_cache", None) + self._register_url_source(config_dir) + + # Write a cache file at the correct sha256-based key + url = "https://example.com/.well-known/agent-skills/index.json" + cache_key = hashlib.sha256(url.encode()).hexdigest()[:16] + cache_dir = os.path.join(config_dir, "cache", "marketplace") + os.makedirs(cache_dir, exist_ok=True) + data_path = _cache_data_path(cache_key) + meta_path = _cache_meta_path(cache_key) + with open(data_path, "w") as f: + json.dump({"skills": []}, f) + with open(meta_path, "w") as f: + json.dump({"fetched_at": time.time(), "ttl_seconds": 3600}, f) + + from apm_cli.commands.marketplace import marketplace + result = runner.invoke(marketplace, ["remove", "--yes", "example-skills"]) + + assert result.exit_code == 0 + assert not os.path.exists(data_path), "Cache data file must be removed" + assert not os.path.exists(meta_path), "Cache meta file must be removed" + + + +# --------------------------------------------------------------------------- +# E1 / T9 / T10: targeted exception messages in marketplace add +# --------------------------------------------------------------------------- + + +class TestAddCommandErrorMessages: + """marketplace add must produce targeted error messages, not generic wrapping.""" + + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_schema_error_shows_invalid_index_format(self, mock_fetch, runner): + """T9: wrong $schema → 'Invalid index format', not generic 'Failed to register'.""" + from apm_cli.commands.marketplace import marketplace + + mock_fetch.side_effect = ValueError( + "Unrecognized or missing Agent Skills index $schema: 'bad-schema'" + ) + result = runner.invoke(marketplace, ["add", "https://example.com/skills.json"]) + assert result.exit_code == 1 + assert "Failed to register marketplace" not in result.output + assert "schema" in result.output.lower() or "index format" in result.output.lower() + + @patch("apm_cli.marketplace.client.fetch_marketplace") + def test_fetch_error_not_double_wrapped(self, mock_fetch, runner): + """T10: MarketplaceFetchError message not wrapped in 'Failed to register marketplace:'.""" + from apm_cli.commands.marketplace import marketplace + from apm_cli.marketplace.errors import MarketplaceFetchError + + mock_fetch.side_effect = MarketplaceFetchError( + "example.com", "Unrecognized index format at 'https://example.com/'" + ) + result = runner.invoke(marketplace, ["add", "https://example.com/skills.json"]) + assert result.exit_code == 1 + assert "Failed to register marketplace" not in result.output + assert "Failed to fetch marketplace" in result.output + + +# --------------------------------------------------------------------------- +# update command — URL source cache clearing +# --------------------------------------------------------------------------- + + +class TestUpdateURLSource: + """marketplace update must clear the correct (SHA256-based) cache for URL sources.""" + + def _make_url_source(self, name="example-skills"): + return MarketplaceSource( + name=name, + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + + @patch("apm_cli.marketplace.client.fetch_marketplace") + @patch("apm_cli.marketplace.client.clear_marketplace_cache") + @patch("apm_cli.marketplace.registry.get_marketplace_by_name") + def test_update_single_url_source_passes_source_to_clear( + self, mock_get, mock_clear, mock_fetch, runner + ): + """Single-name update must call clear_marketplace_cache(source=).""" + from apm_cli.commands.marketplace import marketplace + + url_source = self._make_url_source() + mock_get.return_value = url_source + mock_fetch.return_value = MarketplaceManifest( + name="example-skills", plugins=() + ) + runner.invoke(marketplace, ["update", "example-skills"]) + mock_clear.assert_called_once() + _, kwargs = mock_clear.call_args + assert kwargs.get("source") == url_source, ( + "clear_marketplace_cache must receive source= for URL sources, " + "not name+host which generates a wrong cache key" + ) + + @patch("apm_cli.marketplace.client.fetch_marketplace") + @patch("apm_cli.marketplace.client.clear_marketplace_cache") + @patch("apm_cli.marketplace.registry.get_registered_marketplaces") + def test_update_all_url_sources_passes_source_to_clear( + self, mock_list, mock_clear, mock_fetch, runner + ): + """Bulk update must call clear_marketplace_cache(source=s) for URL sources.""" + from apm_cli.commands.marketplace import marketplace + + url_source = self._make_url_source() + mock_list.return_value = [url_source] + mock_fetch.return_value = MarketplaceManifest( + name="example-skills", plugins=() + ) + runner.invoke(marketplace, ["update"]) + mock_clear.assert_called_once() + _, kwargs = mock_clear.call_args + assert kwargs.get("source") == url_source, ( + "clear_marketplace_cache must receive source= for URL sources" + ) diff --git a/tests/unit/marketplace/test_marketplace_url_models.py b/tests/unit/marketplace/test_marketplace_url_models.py index a7709499..52909ef0 100644 --- a/tests/unit/marketplace/test_marketplace_url_models.py +++ b/tests/unit/marketplace/test_marketplace_url_models.py @@ -1,7 +1,8 @@ """Tests for URL-based marketplace source and Agent Skills index parser. -Covers Step 1 (MarketplaceSource URL extension) and Step 4 -(parse_agent_skills_index) from the issue #676 implementation plan. +Covers MarketplaceSource URL fields, serialization round-trips, and the +parse_agent_skills_index() parser including schema enforcement, skill name +validation, and source type handling. """ import pytest @@ -33,7 +34,7 @@ # --------------------------------------------------------------------------- -# MarketplaceSource — URL extension (Step 1) +# MarketplaceSource — URL fields # --------------------------------------------------------------------------- @@ -121,6 +122,23 @@ def test_github_from_dict_backward_compat_no_source_type(self): src = MarketplaceSource.from_dict(d) assert src.source_type == "github" + def test_github_from_dict_explicit_source_type(self): + """Explicit source_type='github' in dict is honoured by from_dict.""" + d = {"name": "acme", "owner": "acme-org", "repo": "plugins", "source_type": "github"} + src = MarketplaceSource.from_dict(d) + assert src.source_type == "github" + assert src.owner == "acme-org" + + def test_url_source_to_dict_omits_github_only_fields(self): + """URL to_dict must not include host, branch, or path.""" + src = MarketplaceSource( + name="x", source_type="url", url="https://example.com" + ) + d = src.to_dict() + assert "host" not in d + assert "branch" not in d + assert "path" not in d + # --- roundtrip --- def test_url_source_roundtrip(self): @@ -146,7 +164,7 @@ def test_github_source_roundtrip_unchanged(self): # --------------------------------------------------------------------------- -# parse_agent_skills_index (Step 4) +# parse_agent_skills_index # --------------------------------------------------------------------------- @@ -222,6 +240,12 @@ def test_empty_skills_list(self): manifest = parse_agent_skills_index(data, "test") assert len(manifest.plugins) == 0 + def test_missing_skills_key_returns_empty_manifest(self): + """No 'skills' key present — returns an empty manifest rather than raising.""" + data = {"$schema": _KNOWN_SCHEMA} + manifest = parse_agent_skills_index(data, "test") + assert len(manifest.plugins) == 0 + # --- $schema enforcement --- def test_known_schema_accepted(self): @@ -246,98 +270,274 @@ def test_non_string_schema_raises(self): # --- skill name validation (RFC: 1-64 chars, lowercase alnum + hyphens) --- - def test_valid_name_simple(self): + @pytest.mark.parametrize("name", [ + "my-skill", + "skill-v2-final", + "a", + "a" * 64, + "123", + ]) + def test_valid_name_accepted(self, name): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "my-skill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": name, "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} ], } assert len(parse_agent_skills_index(data, "t").plugins) == 1 - def test_valid_name_with_numbers(self): + @pytest.mark.parametrize("name", [ + "MySkill", + "bad name", + "-bad", + "bad-", + "bad--name", + "a" * 65, + "my_skill", + "my.skill", + "", + ]) + def test_invalid_name_skipped(self, name): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "skill-v2-final", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": name, "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} ], } - assert len(parse_agent_skills_index(data, "t").plugins) == 1 + assert len(parse_agent_skills_index(data, "t").plugins) == 0 - def test_invalid_name_uppercase_skipped(self): + def test_missing_name_skipped(self): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "MySkill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} ], } assert len(parse_agent_skills_index(data, "t").plugins) == 0 - def test_invalid_name_spaces_skipped(self): + # --- mixed valid/invalid entries --- + + def test_only_valid_entries_returned(self): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "bad name", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": "good-skill", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, + {"name": "Bad Skill!", "type": "skill-md", "url": "/b", "digest": _VALID_DIGEST}, + {"type": "skill-md", "url": "/c", "digest": _VALID_DIGEST}, # no name ], } - assert len(parse_agent_skills_index(data, "t").plugins) == 0 + manifest = parse_agent_skills_index(data, "test") + assert len(manifest.plugins) == 1 + assert manifest.plugins[0].name == "good-skill" + + # --- non-string / non-dict entry handling --- - def test_invalid_name_leading_hyphen_skipped(self): + def test_non_string_name_skipped(self): + """Integer name field must not raise AttributeError (bug guard).""" data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "-bad", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": 42, "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} ], } assert len(parse_agent_skills_index(data, "t").plugins) == 0 - def test_invalid_name_trailing_hyphen_skipped(self): + def test_non_dict_entry_in_skills_skipped(self): + """Non-dict items in skills list are silently skipped.""" + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + "not-a-dict", + {"name": "good-skill", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, + ], + } + manifest = parse_agent_skills_index(data, "t") + assert len(manifest.plugins) == 1 + + def test_skills_not_a_list_returns_empty(self): + """If 'skills' is not a list, parser warns and returns empty manifest.""" + data = {"$schema": _KNOWN_SCHEMA, "skills": {"name": "oops"}} + manifest = parse_agent_skills_index(data, "t") + assert len(manifest.plugins) == 0 + + # --- source_name / manifest.name --- + + def test_empty_source_name_yields_unknown_manifest_name(self): + """Empty source_name falls back to 'unknown' in manifest.name.""" + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "") + assert manifest.name == "unknown" + + # --- optional entry fields default to empty string --- + + def test_missing_description_defaults_to_empty(self): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "bad-", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": "my-skill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} ], } - assert len(parse_agent_skills_index(data, "t").plugins) == 0 + p = parse_agent_skills_index(data, "t").plugins[0] + assert p.description == "" - def test_invalid_name_consecutive_hyphens_skipped(self): + def test_missing_url_in_entry_defaults_to_empty(self): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "bad--name", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": "my-skill", "type": "skill-md", "digest": _VALID_DIGEST} ], } - assert len(parse_agent_skills_index(data, "t").plugins) == 0 + p = parse_agent_skills_index(data, "t").plugins[0] + assert p.source["url"] == "" - def test_invalid_name_too_long_skipped(self): + def test_entry_without_digest_is_skipped(self): data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "a" * 65, "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": "my-skill", "type": "skill-md", "url": "/x"} ], } assert len(parse_agent_skills_index(data, "t").plugins) == 0 - def test_missing_name_skipped(self): + # --- duplicate names --- + + def test_duplicate_skill_names_both_accepted(self): + """Parser does not deduplicate; both entries are returned.""" data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + {"name": "my-skill", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, + {"name": "my-skill", "type": "archive", "url": "/b.tar.gz", "digest": _VALID_DIGEST}, + ], + } + manifest = parse_agent_skills_index(data, "t") + assert len(manifest.plugins) == 2 + + +# --------------------------------------------------------------------------- +# MarketplaceManifest — source provenance fields (t5-test-04) +# --------------------------------------------------------------------------- + + +class TestManifestSourceFields: + """MarketplaceManifest must carry source_url and source_digest for provenance.""" + + def test_parse_agent_skills_index_default_source_fields_are_empty(self): + manifest = parse_agent_skills_index(_SINGLE_SKILL_INDEX, "test") + assert manifest.source_url == "" + assert manifest.source_digest == "" + + def test_parse_agent_skills_index_accepts_source_url_kwarg(self): + manifest = parse_agent_skills_index( + _SINGLE_SKILL_INDEX, "test", + source_url="https://example.com/.well-known/agent-skills/index.json", + ) + assert manifest.source_url == "https://example.com/.well-known/agent-skills/index.json" + + def test_parse_agent_skills_index_accepts_source_digest_kwarg(self): + manifest = parse_agent_skills_index( + _SINGLE_SKILL_INDEX, "test", + source_digest="sha256:" + "a" * 64, + ) + assert manifest.source_digest == "sha256:" + "a" * 64 + + +# --------------------------------------------------------------------------- +# Digest format validation (t5-test-06) +# --------------------------------------------------------------------------- + + +class TestDigestFormatValidation: + """parse_agent_skills_index must skip entries with malformed digest values.""" + + def test_valid_digest_entry_is_included(self): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "ok-skill", "type": "skill-md", "url": "/x", "digest": _VALID_DIGEST} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 1 + + @pytest.mark.parametrize("digest", [ + "md5:abc123", + "sha256:abc", + "SHA256:" + "a" * 64, + ]) + def test_malformed_digest_entry_skipped(self, digest): + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "bad-skill", "type": "skill-md", "url": "/x", "digest": digest} ], } assert len(parse_agent_skills_index(data, "t").plugins) == 0 - # --- mixed valid/invalid entries --- + def test_missing_digest_entry_skipped(self): + """A skill entry with no digest is skipped — digest is required by the RFC.""" + data = { + "$schema": _KNOWN_SCHEMA, + "skills": [ + {"name": "no-digest", "type": "skill-md", "url": "/x"} + ], + } + assert len(parse_agent_skills_index(data, "t").plugins) == 0 - def test_only_valid_entries_returned(self): + def test_valid_and_invalid_digest_only_valid_included(self): + """Mixed entries: only those with a valid digest are returned.""" data = { "$schema": _KNOWN_SCHEMA, "skills": [ - {"name": "good-skill", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, - {"name": "Bad Skill!", "type": "skill-md", "url": "/b", "digest": _VALID_DIGEST}, - {"type": "skill-md", "url": "/c", "digest": _VALID_DIGEST}, # no name + {"name": "good", "type": "skill-md", "url": "/a", "digest": _VALID_DIGEST}, + {"name": "bad", "type": "skill-md", "url": "/b", "digest": "sha256:short"}, ], } - manifest = parse_agent_skills_index(data, "test") - assert len(manifest.plugins) == 1 - assert manifest.plugins[0].name == "good-skill" + plugins = parse_agent_skills_index(data, "t").plugins + assert len(plugins) == 1 + assert plugins[0].name == "good" + + +# --------------------------------------------------------------------------- +# E2 / T11: warning level for invalid skill name entries +# --------------------------------------------------------------------------- + + +class TestInvalidNameLogLevel: + """parse_agent_skills_index must emit WARNING (not DEBUG) for invalid names.""" + + def test_invalid_names_emit_warning_not_debug(self, caplog): + import logging + + _V = "sha256:" + "a" * 64 + data = { + "$schema": "https://schemas.agentskills.io/discovery/0.2.0/schema.json", + "skills": [ + {"name": "INVALID_UPPER", "type": "skill-md", "url": "/a", "digest": _V}, + {"name": "also-invalid!", "type": "skill-md", "url": "/b", "digest": _V}, + {"name": "valid-skill", "type": "skill-md", "url": "/c", "digest": _V}, + ], + } + with caplog.at_level(logging.WARNING, logger="apm_cli.marketplace.models"): + result = parse_agent_skills_index(data, "test-src") + + assert len(result.plugins) == 1 + assert result.plugins[0].name == "valid-skill" + warning_messages = [r.message for r in caplog.records if r.levelno == logging.WARNING] + assert len(warning_messages) == 2 + + def test_structural_issues_do_not_produce_warnings(self, caplog): + """Non-dict entries and missing name are structural noise — keep at DEBUG.""" + import logging + + data = { + "$schema": "https://schemas.agentskills.io/discovery/0.2.0/schema.json", + "skills": [ + "not-a-dict", + None, + ], + } + with caplog.at_level(logging.WARNING, logger="apm_cli.marketplace.models"): + result = parse_agent_skills_index(data, "test-src") + + assert len(result.plugins) == 0 + warning_messages = [r.message for r in caplog.records if r.levelno == logging.WARNING] + assert len(warning_messages) == 0 diff --git a/tests/unit/marketplace/test_marketplace_url_resolver.py b/tests/unit/marketplace/test_marketplace_url_resolver.py new file mode 100644 index 00000000..1523a3ae --- /dev/null +++ b/tests/unit/marketplace/test_marketplace_url_resolver.py @@ -0,0 +1,196 @@ +"""Tests for URL-source resolver behaviour. + +Covers: +- resolve_plugin_source() with skill-md and archive Agent Skills types +- _resolve_url_source() with non-GitHub HTTPS URLs +- resolve_marketplace_plugin() end-to-end with a URL marketplace source +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from apm_cli.marketplace.errors import PluginNotFoundError +from apm_cli.marketplace.models import MarketplaceManifest, MarketplacePlugin, MarketplaceSource +from apm_cli.marketplace.resolver import ( + _resolve_url_source, + resolve_marketplace_plugin, + resolve_plugin_source, +) + +_VALID_DIGEST = "sha256:" + "a" * 64 +_SKILL_URL = "https://example.com/.well-known/agent-skills/code-review/SKILL.md" +_ARCHIVE_URL = "https://example.com/.well-known/agent-skills/my-toolset.tar.gz" + + +@pytest.fixture +def skill_md_plugin(): + return MarketplacePlugin( + name="code-review", + description="Code review helper", + source={"type": "skill-md", "url": _SKILL_URL, "digest": _VALID_DIGEST}, + source_marketplace="example-skills", + ) + + +@pytest.fixture +def archive_plugin(): + return MarketplacePlugin( + name="my-toolset", + description="A set of tools", + source={"type": "archive", "url": _ARCHIVE_URL, "digest": _VALID_DIGEST}, + source_marketplace="example-skills", + ) + + +@pytest.fixture +def url_source(): + return MarketplaceSource( + name="example-skills", + source_type="url", + url="https://example.com/.well-known/agent-skills/index.json", + ) + + +# --------------------------------------------------------------------------- +# resolve_plugin_source — skill-md and archive types (t8-test-02, t8-test-03) +# --------------------------------------------------------------------------- + + +class TestResolvePluginSourceAgentSkills: + """skill-md and archive source types must resolve to their download URL.""" + + def test_skill_md_returns_url(self, skill_md_plugin): + result = resolve_plugin_source(skill_md_plugin) + assert result == _SKILL_URL + + def test_archive_returns_url(self, archive_plugin): + result = resolve_plugin_source(archive_plugin) + assert result == _ARCHIVE_URL + + def test_skill_md_missing_url_raises(self): + """skill-md with no url field must raise ValueError, not return empty string.""" + plugin = MarketplacePlugin( + name="broken", + source={"type": "skill-md", "digest": _VALID_DIGEST}, + ) + with pytest.raises(ValueError, match="url"): + resolve_plugin_source(plugin) + + def test_archive_missing_url_raises(self): + plugin = MarketplacePlugin( + name="broken", + source={"type": "archive", "digest": _VALID_DIGEST}, + ) + with pytest.raises(ValueError, match="url"): + resolve_plugin_source(plugin) + + +# --------------------------------------------------------------------------- +# _resolve_url_source — non-GitHub HTTPS allowed (t8-test-04, t8-test-05) +# --------------------------------------------------------------------------- + + +class TestResolveUrlSource: + """_resolve_url_source() must pass non-GitHub HTTPS URLs through unchanged.""" + + def test_non_github_https_returns_url_directly(self): + source = {"type": "url", "url": "https://cdn.example.com/plugin"} + assert _resolve_url_source(source) == "https://cdn.example.com/plugin" + + def test_cdn_url_with_path_returns_unchanged(self): + url = "https://cdn.example.com/skills/v1/my-plugin" + assert _resolve_url_source({"url": url}) == url + + def test_github_com_url_still_extracts_owner_repo(self): + """GitHub.com URLs must keep the existing owner/repo extraction (regression).""" + source = {"url": "https://github.com/owner/plugin-repo"} + assert _resolve_url_source(source) == "owner/plugin-repo" + + def test_github_com_url_with_git_suffix(self): + source = {"url": "https://github.com/owner/plugin-repo.git"} + assert _resolve_url_source(source) == "owner/plugin-repo" + + def test_empty_url_raises(self): + with pytest.raises(ValueError): + _resolve_url_source({"url": ""}) + + @pytest.mark.parametrize("url", [ + "http://example.com/index.json", + "ftp://example.com/index.json", + "file:///etc/passwd", + ]) + def test_non_https_url_raises(self, url): + with pytest.raises(ValueError, match="HTTPS"): + _resolve_url_source({"url": url}) + + +# --------------------------------------------------------------------------- +# resolve_marketplace_plugin — URL marketplace end-to-end (t8-test-06) +# --------------------------------------------------------------------------- + + +class TestResolveMarketplacePluginURL: + """resolve_marketplace_plugin() must work for URL-based marketplaces.""" + + def test_url_marketplace_skill_md_resolves(self, url_source, skill_md_plugin): + manifest = MarketplaceManifest( + name="example-skills", + plugins=(skill_md_plugin,), + ) + with ( + patch( + "apm_cli.marketplace.resolver.get_marketplace_by_name", + return_value=url_source, + ), + patch( + "apm_cli.marketplace.resolver.fetch_or_cache", + return_value=manifest, + ), + ): + canonical, plugin = resolve_marketplace_plugin( + "code-review", "example-skills" + ) + assert canonical == _SKILL_URL + assert plugin.name == "code-review" + + def test_url_marketplace_passes_empty_owner_repo(self, url_source, skill_md_plugin): + """URL sources have owner='' and repo='' — resolver must not crash on this.""" + manifest = MarketplaceManifest( + name="example-skills", + plugins=(skill_md_plugin,), + ) + with ( + patch( + "apm_cli.marketplace.resolver.get_marketplace_by_name", + return_value=url_source, + ), + patch( + "apm_cli.marketplace.resolver.fetch_or_cache", + return_value=manifest, + ), + ): + # Must not raise even though source.owner == "" and source.repo == "" + canonical, _ = resolve_marketplace_plugin( + "code-review", "example-skills" + ) + assert canonical # some non-empty result returned + + def test_plugin_not_found_raises(self, url_source, skill_md_plugin): + """Looking up a non-existent plugin raises PluginNotFoundError.""" + manifest = MarketplaceManifest( + name="example-skills", + plugins=(skill_md_plugin,), + ) + with ( + patch( + "apm_cli.marketplace.resolver.get_marketplace_by_name", + return_value=url_source, + ), + patch( + "apm_cli.marketplace.resolver.fetch_or_cache", + return_value=manifest, + ), + ): + with pytest.raises(PluginNotFoundError): + resolve_marketplace_plugin("nonexistent", "example-skills") From 0e0288469f1bd0f61334c8992398ac4750c8af85 Mon Sep 17 00:00:00 2001 From: Vicente-Pastor <32805335+Vicente-Pastor@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:32:27 +0100 Subject: [PATCH 3/5] fix: resolve S1/DR1/DR3/DR4/DR5/DR6/DR7 from external review - S1: validate resp.url scheme post-redirect in _fetch_url_direct - DR1: from_dict raises ValueError on unknown source_type - DR3: add Optional[Tuple[Dict, str]] return type to _read_cache - DR4: preserve query string in _resolve_index_url - DR5: document GitHub kwargs behavior in _parse_manifest docstring - DR6: case-insensitive scheme detection in add command - DR7: PEP 8 two blank lines after FetchResult class 364 tests passing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/apm_cli/commands/marketplace.py | 10 ++++-- src/apm_cli/marketplace/client.py | 15 +++++++-- src/apm_cli/marketplace/models.py | 2 ++ .../test_marketplace_url_client.py | 31 +++++++++++++++++++ .../test_marketplace_url_commands.py | 16 ++++++++++ .../test_marketplace_url_models.py | 6 ++++ 6 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/apm_cli/commands/marketplace.py b/src/apm_cli/commands/marketplace.py index 273a2b4e..fa1d65d1 100644 --- a/src/apm_cli/commands/marketplace.py +++ b/src/apm_cli/commands/marketplace.py @@ -39,7 +39,10 @@ def _resolve_index_url(raw_url: str) -> str: if not path or path == "/.well-known/agent-skills": # Bare origin or just the .well-known dir — append full path base = f"{parsed.scheme}://{parsed.netloc}" - return base + _WELL_KNOWN_PATH + resolved = base + _WELL_KNOWN_PATH + if parsed.query: + resolved += "?" + parsed.query + return resolved return raw_url @@ -71,8 +74,9 @@ def add(repo, name, branch, host, verbose): from ..marketplace.registry import add_marketplace # URL-based path (Agent Skills discovery) - if repo.startswith("https://") or repo.startswith("http://"): - if repo.startswith("http://"): + repo_lower = repo.lower() + if repo_lower.startswith("https://") or repo_lower.startswith("http://"): + if repo_lower.startswith("http://"): logger.error( "URL marketplaces must use HTTPS. " "Please provide an https:// URL." diff --git a/src/apm_cli/marketplace/client.py b/src/apm_cli/marketplace/client.py index 3f2ab062..2a815746 100644 --- a/src/apm_cli/marketplace/client.py +++ b/src/apm_cli/marketplace/client.py @@ -22,7 +22,7 @@ import os import time from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import requests @@ -48,6 +48,7 @@ class FetchResult: etag: str = "" last_modified: str = "" + _CACHE_TTL_SECONDS = 3600 # 1 hour _CACHE_DIR_NAME = os.path.join("cache", "marketplace") @@ -108,7 +109,7 @@ def _cache_meta_path(name: str) -> str: return os.path.join(_cache_dir(), f"{_sanitize_cache_name(name)}.meta.json") -def _read_cache(name: str): +def _read_cache(name: str) -> Optional[Tuple[Dict, str]]: """Read cached marketplace data if valid (not expired). Returns: @@ -267,6 +268,12 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", if last_modified: headers["If-Modified-Since"] = last_modified resp = requests.get(url, headers=headers, timeout=30) + # Guard against HTTPS→HTTP redirect (S1) + final_url = getattr(resp, "url", None) + if isinstance(final_url, str) and urlparse(final_url).scheme != "https": + raise MarketplaceFetchError( + url, "Redirect to non-HTTPS URL rejected" + ) if resp.status_code == 304: return None if resp.status_code == 404: @@ -409,7 +416,9 @@ def _parse_manifest( For URL sources the index format is auto-detected via ``_detect_index_format`` and dispatched to the appropriate parser. - For GitHub sources ``parse_marketplace_json`` is used directly. + For GitHub sources ``parse_marketplace_json`` is used directly + (``source_url`` and ``source_digest`` are ignored — GitHub sources + have no URL provenance). Args: data: Parsed JSON dict from the marketplace index. diff --git a/src/apm_cli/marketplace/models.py b/src/apm_cli/marketplace/models.py index fe761404..0b9a6d89 100644 --- a/src/apm_cli/marketplace/models.py +++ b/src/apm_cli/marketplace/models.py @@ -77,6 +77,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "MarketplaceSource": source_type="url", url=data.get("url", ""), ) + if source_type != "github": + raise ValueError(f"Unsupported marketplace source_type: {source_type!r}") return cls( name=data["name"], owner=data.get("owner", ""), diff --git a/tests/unit/marketplace/test_marketplace_url_client.py b/tests/unit/marketplace/test_marketplace_url_client.py index ce5be27e..55e7cd0a 100644 --- a/tests/unit/marketplace/test_marketplace_url_client.py +++ b/tests/unit/marketplace/test_marketplace_url_client.py @@ -821,3 +821,34 @@ def test_stale_fallback_preserves_source_url(self, url_source, monkeypatch): ) manifest = fetch_marketplace(url_source) assert manifest.source_url == url_source.url + + +# --------------------------------------------------------------------------- +# S1: HTTPS redirect bypass +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirectRedirectEnforcement: + """_fetch_url_direct must reject responses redirected to non-HTTPS URLs.""" + + def test_redirect_to_http_raises(self, monkeypatch): + """An HTTPS→HTTP redirect must be caught after the request completes.""" + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + mock_resp.url = "http://evil.com/index.json" + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: mock_resp, + ) + with pytest.raises(MarketplaceFetchError, match="non-HTTPS"): + _fetch_url_direct("https://example.com/index.json") + + def test_redirect_to_https_accepted(self, monkeypatch): + """HTTPS→HTTPS redirect is fine.""" + mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) + mock_resp.url = "https://cdn.example.com/index.json" + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: mock_resp, + ) + result = _fetch_url_direct("https://example.com/index.json") + assert isinstance(result, FetchResult) diff --git a/tests/unit/marketplace/test_marketplace_url_commands.py b/tests/unit/marketplace/test_marketplace_url_commands.py index 0352e089..d7e12d31 100644 --- a/tests/unit/marketplace/test_marketplace_url_commands.py +++ b/tests/unit/marketplace/test_marketplace_url_commands.py @@ -100,6 +100,20 @@ def test_http_url_rejected(self, runner): assert result.exit_code != 0 assert "https" in result.output.lower() + @pytest.mark.parametrize("url", [ + "HTTPS://EXAMPLE.COM", + "Https://example.com", + "HTTP://example.com", + ]) + def test_mixed_case_scheme_detected_as_url(self, runner, url): + """URL detection must be case-insensitive per RFC 3986 §3.1.""" + from apm_cli.commands.marketplace import marketplace + + result = runner.invoke(marketplace, ["add", url]) + # Should NOT produce "Invalid format ... OWNER/REPO" — + # it should enter the URL path (may fail on http or succeed on https) + assert "OWNER/REPO" not in result.output + # --------------------------------------------------------------------------- # .well-known auto-resolution @@ -120,6 +134,8 @@ class TestWellKnownResolution: "https://example.com/.well-known/agent-skills/index.json"), ("https://example.com/.well-known/agent-skills", "https://example.com/.well-known/agent-skills/index.json"), + ("https://example.com?token=abc", + "https://example.com/.well-known/agent-skills/index.json?token=abc"), ]) @patch("apm_cli.marketplace.registry.add_marketplace") @patch("apm_cli.marketplace.client.fetch_marketplace") diff --git a/tests/unit/marketplace/test_marketplace_url_models.py b/tests/unit/marketplace/test_marketplace_url_models.py index 52909ef0..f567f817 100644 --- a/tests/unit/marketplace/test_marketplace_url_models.py +++ b/tests/unit/marketplace/test_marketplace_url_models.py @@ -129,6 +129,12 @@ def test_github_from_dict_explicit_source_type(self): assert src.source_type == "github" assert src.owner == "acme-org" + def test_unknown_source_type_raises(self): + """from_dict with unrecognised source_type must raise ValueError.""" + d = {"name": "x", "source_type": "artifactory", "url": "https://art.corp.com/index.json"} + with pytest.raises(ValueError, match="artifactory"): + MarketplaceSource.from_dict(d) + def test_url_source_to_dict_omits_github_only_fields(self): """URL to_dict must not include host, branch, or path.""" src = MarketplaceSource( From fa8b7a4f577e0da5bbdaae3fc5921f305b5f54e7 Mon Sep 17 00:00:00 2001 From: Vicente-Pastor <32805335+Vicente-Pastor@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:14:49 +0100 Subject: [PATCH 4/5] fix: address all PR #691 review comments - C01/C02: Fix CodeQL URL substring sanitization in test assertions - C03/C11-C16/C19: Replace all Unicode em-dashes with ASCII in PR files - C04/C05: Case-insensitive HTTPS scheme validation (urlparse-based) - C06: Reject empty URL in MarketplaceSource.from_dict() - C07: Strict field validation in parse_agent_skills_index() - C08: HTTPS enforcement + post-redirect check in archive download - C09: Reject non-regular tar members (device files, FIFOs, symlinks) - C10/C17: Centralize path security using path_security.py guards - C18: 10 MB response size limit on _fetch_url_direct() - C20: Add CHANGELOG.md entries for URL marketplace feature - C21: Update marketplace docs and command reference for URL support - C22: Restore deleted TestCacheKey tests - C23: Verified browse command works for URL sources 387 tests passing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 6 ++ docs/src/content/docs/guides/marketplaces.md | 24 +++++ .../.apm/skills/apm-usage/commands.md | 1 + src/apm_cli/commands/marketplace.py | 6 +- src/apm_cli/marketplace/archive.py | 68 +++++++++---- src/apm_cli/marketplace/client.py | 34 +++++-- src/apm_cli/marketplace/models.py | 30 +++++- src/apm_cli/marketplace/resolver.py | 8 +- .../marketplace/test_marketplace_archive.py | 88 +++++++++++++++-- .../marketplace/test_marketplace_client.py | 20 ++++ .../test_marketplace_url_client.py | 67 +++++++++++-- .../test_marketplace_url_commands.py | 16 +-- .../test_marketplace_url_models.py | 98 +++++++++++++++++-- .../test_marketplace_url_resolver.py | 16 ++- 14 files changed, 411 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58cb4996..d7c5ecfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `apm marketplace add ` accepts HTTPS URLs for registering Agent Skills discovery indexes, with automatic `.well-known/agent-skills/index.json` resolution for bare origins (#676) +- Agent Skills Discovery RFC v0.2.0 index parser with strict `$schema` validation, skill name rules, and digest verification (#676) +- SHA-256 digest computation and integrity verification for URL-based marketplace indexes (#676) +- ETag/Last-Modified conditional refresh for URL marketplace indexes with stale-while-revalidate fallback (#676) +- Archive download and extraction support for `type: "archive"` skill entries with path traversal and decompression bomb safety guards (#676) +- Lockfile provenance fields (`source_url`, `source_digest`) for URL-sourced marketplace dependencies (#676) - `apm install` now automatically discovers and deploys local `.apm/` primitives (skills, instructions, agents, prompts, hooks, commands) to target directories, with local content taking priority over dependencies on collision (#626, #644) ### Fixed diff --git a/docs/src/content/docs/guides/marketplaces.md b/docs/src/content/docs/guides/marketplaces.md index a2607822..c8c5914a 100644 --- a/docs/src/content/docs/guides/marketplaces.md +++ b/docs/src/content/docs/guides/marketplaces.md @@ -65,6 +65,8 @@ With `pluginRoot` set to `./plugins`, the source `"my-tool"` resolves to `owner/ ## Register a marketplace +### From a GitHub repository + ```bash apm marketplace add acme/plugin-marketplace ``` @@ -85,6 +87,28 @@ apm marketplace add acme/plugin-marketplace --host ghes.corp.example.com apm marketplace add ghes.corp.example.com/acme/plugin-marketplace ``` +### From a URL + +```bash +apm marketplace add https://plugins.example.com +``` + +APM automatically appends `/.well-known/agent-skills/index.json` to bare origins, following the Agent Skills Discovery RFC v0.2.0. You can also pass the full index URL: + +```bash +apm marketplace add https://plugins.example.com/.well-known/agent-skills/index.json +``` + +The index must conform to the Agent Skills RFC schema (`$schema: "https://aka.ms/agent-skills-discovery/v0.2.0/schema"`). Plugins may use `type: "skill-md"` (direct Markdown content) or `type: "archive"` (downloadable `.tar.gz` archive). APM enforces HTTPS-only, validates SHA-256 digests when present, and supports conditional refresh via `ETag`/`Last-Modified` headers. + +**Options:** +- `--name/-n` -- Custom display name for the marketplace + +```bash +# Register with a custom name +apm marketplace add https://plugins.example.com --name company-skills +``` + ## List registered marketplaces ```bash diff --git a/packages/apm-guide/.apm/skills/apm-usage/commands.md b/packages/apm-guide/.apm/skills/apm-usage/commands.md index 029891bb..11d4e6a8 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/commands.md +++ b/packages/apm-guide/.apm/skills/apm-usage/commands.md @@ -53,6 +53,7 @@ | Command | Purpose | Key flags | |---------|---------|-----------| | `apm marketplace add OWNER/REPO` | Register a marketplace | `-n NAME`, `-b BRANCH`, `--host HOST` | +| `apm marketplace add URL` | Register a URL-based marketplace | `-n NAME` | | `apm marketplace list` | List registered marketplaces | -- | | `apm marketplace browse NAME` | Browse marketplace packages | -- | | `apm marketplace update [NAME]` | Update marketplace index | -- | diff --git a/src/apm_cli/commands/marketplace.py b/src/apm_cli/commands/marketplace.py index fa1d65d1..b3fc0030 100644 --- a/src/apm_cli/commands/marketplace.py +++ b/src/apm_cli/commands/marketplace.py @@ -26,7 +26,7 @@ def _resolve_index_url(raw_url: str) -> str: Trailing slashes on bare origins are normalised away. Args: - raw_url: A user-supplied ``https://`` URL — either a bare origin + raw_url: A user-supplied ``https://`` URL -- either a bare origin (``https://example.com``) or a fully-qualified index URL. Returns: @@ -37,7 +37,7 @@ def _resolve_index_url(raw_url: str) -> str: parsed = urlparse(raw_url) path = parsed.path.rstrip("/") if not path or path == "/.well-known/agent-skills": - # Bare origin or just the .well-known dir — append full path + # Bare origin or just the .well-known dir -- append full path base = f"{parsed.scheme}://{parsed.netloc}" resolved = base + _WELL_KNOWN_PATH if parsed.query: @@ -279,7 +279,7 @@ def list_cmd(verbose): for s in sources: if s.is_url_source: - table.add_row(s.name, s.url, "—", "—") + table.add_row(s.name, s.url, "--", "--") else: table.add_row(s.name, f"{s.owner}/{s.repo}", s.branch, s.path) diff --git a/src/apm_cli/marketplace/archive.py b/src/apm_cli/marketplace/archive.py index b9d89e30..a9122570 100644 --- a/src/apm_cli/marketplace/archive.py +++ b/src/apm_cli/marketplace/archive.py @@ -7,13 +7,24 @@ """ import io +import logging import os import tarfile import zipfile +from pathlib import Path from typing import List +from urllib.parse import urlparse import requests +from apm_cli.utils.path_security import ( + PathTraversalError, + ensure_path_within, + validate_path_segments, +) + +logger = logging.getLogger(__name__) + _MAX_UNCOMPRESSED_BYTES = 512 * 1024 * 1024 # 512 MB @@ -24,6 +35,10 @@ class ArchiveError(Exception): def _check_archive_member(member_path: str) -> None: """Validate a single archive member path. + Uses the centralized ``path_security`` guards where possible and adds + archive-specific checks (null bytes, Windows UNC/drive-letter paths) + that the centralized module does not cover. + Raises: ArchiveError: If the path is absolute (including Windows drive-letter and UNC forms), contains path traversal sequences (``..``), or @@ -41,16 +56,10 @@ def _check_archive_member(member_path: str) -> None: len(forward) >= 2 and forward[1] == ":" and forward[0].isalpha() ): raise ArchiveError(f"Archive member has absolute path: {member_path!r}") - normalized = os.path.normpath(member_path) - if normalized.startswith(".."): - raise ArchiveError( - f"Archive member path traversal detected: {member_path!r}" - ) - parts = forward.split("/") - if ".." in parts: - raise ArchiveError( - f"Archive member path traversal detected: {member_path!r}" - ) + try: + validate_path_segments(member_path, context="archive member") + except PathTraversalError as exc: + raise ArchiveError(str(exc)) from exc def _detect_archive_format(content_type: str, url: str) -> str: @@ -107,6 +116,13 @@ def _extract_tar_gz(data: bytes, dest_dir: str) -> List[str]: raise ArchiveError( f"Symlinks and hard links are not supported: {member.name!r}" ) + if not member.isreg(): + logger.warning( + "Skipping non-regular tar member: %s (type=%s)", + member.name, + member.type, + ) + continue _check_archive_member(member.name) total_size += member.size @@ -116,15 +132,24 @@ def _extract_tar_gz(data: bytes, dest_dir: str) -> List[str]: f"(decompression bomb guard)" ) - dest_path = os.path.realpath(os.path.join(dest_dir, member.name)) - real_dest = os.path.realpath(dest_dir) - if not dest_path.startswith(real_dest + os.sep) and dest_path != real_dest: + dest_path_obj = Path(os.path.join(dest_dir, member.name)) + try: + ensure_path_within(dest_path_obj, Path(dest_dir)) + except PathTraversalError: raise ArchiveError( f"Archive member would extract outside destination: {member.name!r}" ) + dest_path = str(dest_path_obj.resolve()) os.makedirs(os.path.dirname(dest_path), exist_ok=True) - with tf.extractfile(member) as src, open(dest_path, "wb") as dst: + src = tf.extractfile(member) + if src is None: + logger.warning( + "Cannot extract %s: extractfile returned None", + member.name, + ) + continue + with src, open(dest_path, "wb") as dst: dst.write(src.read()) extracted.append(member.name) except (tarfile.TarError, KeyError) as exc: @@ -163,12 +188,14 @@ def _extract_zip(data: bytes, dest_dir: str) -> List[str]: f"(decompression bomb guard)" ) - dest_path = os.path.realpath(os.path.join(dest_dir, info.filename)) - real_dest = os.path.realpath(dest_dir) - if not dest_path.startswith(real_dest + os.sep) and dest_path != real_dest: + dest_path_obj = Path(os.path.join(dest_dir, info.filename)) + try: + ensure_path_within(dest_path_obj, Path(dest_dir)) + except PathTraversalError: raise ArchiveError( f"Archive member would extract outside destination: {info.filename!r}" ) + dest_path = str(dest_path_obj.resolve()) os.makedirs(os.path.dirname(dest_path), exist_ok=True) with zf.open(info) as src, open(dest_path, "wb") as dst: @@ -196,12 +223,19 @@ def download_and_extract_archive(url: str, dest_dir: str) -> List[str]: Raises: ArchiveError: On download failure, unrecognised format, or unsafe content. """ + if urlparse(url).scheme.lower() != "https": + raise ArchiveError(f"Only HTTPS URLs are supported for archive download, got: {url!r}") try: resp = requests.get(url, headers={"User-Agent": "apm-cli"}, timeout=60) resp.raise_for_status() except requests.exceptions.RequestException as exc: raise ArchiveError(f"Failed to download archive from {url!r}: {exc}") from exc + # Guard against HTTPS->HTTP redirect + final_url = getattr(resp, "url", None) + if isinstance(final_url, str) and urlparse(final_url).scheme.lower() != "https": + raise ArchiveError(f"Redirect to non-HTTPS URL rejected: {final_url!r}") + content_type = resp.headers.get("Content-Type", "") fmt = _detect_archive_format(content_type, url) diff --git a/src/apm_cli/marketplace/client.py b/src/apm_cli/marketplace/client.py index 2a815746..4aa2ee96 100644 --- a/src/apm_cli/marketplace/client.py +++ b/src/apm_cli/marketplace/client.py @@ -50,6 +50,7 @@ class FetchResult: _CACHE_TTL_SECONDS = 3600 # 1 hour +_MAX_INDEX_BYTES = 10 * 1024 * 1024 # 10 MB _CACHE_DIR_NAME = os.path.join("cache", "marketplace") # Candidate locations for marketplace.json in a repository (priority order) @@ -90,7 +91,7 @@ def _cache_key(source: MarketplaceSource) -> str: """Cache key that avoids collisions across hosts and URL sources. - GitHub sources: ``name`` (same host) or ``{host}__{name}`` (GHE). - - URL sources: first 16 hex chars of ``sha256(url)`` — avoids + - URL sources: first 16 hex chars of ``sha256(url)`` -- avoids host-based collisions between two URL sources on the same domain. """ if source.is_url_source: @@ -241,7 +242,7 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", expected_digest: str = ""): """Fetch a URL marketplace index directly over HTTPS. - No GitHub auth or proxy involved — used for URL sources only. + No GitHub auth or proxy involved -- used for URL sources only. Supports conditional requests: when *etag* or *last_modified* are provided the corresponding ``If-None-Match`` / ``If-Modified-Since`` headers are sent. @@ -259,7 +260,7 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", """ from urllib.parse import urlparse - if urlparse(url).scheme != "https": + if urlparse(url).scheme.lower() != "https": raise MarketplaceFetchError(url, "URL sources must use HTTPS") try: headers = {"User-Agent": "apm-cli"} @@ -270,7 +271,7 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", resp = requests.get(url, headers=headers, timeout=30) # Guard against HTTPS→HTTP redirect (S1) final_url = getattr(resp, "url", None) - if isinstance(final_url, str) and urlparse(final_url).scheme != "https": + if isinstance(final_url, str) and urlparse(final_url).scheme.lower() != "https": raise MarketplaceFetchError( url, "Redirect to non-HTTPS URL rejected" ) @@ -279,7 +280,24 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", if resp.status_code == 404: raise MarketplaceFetchError(url, "404 Not Found") resp.raise_for_status() + content_length = resp.headers.get("Content-Length") + if content_length: + try: + size = int(content_length) + except ValueError: + pass + else: + if size > _MAX_INDEX_BYTES: + raise MarketplaceFetchError( + url, + f"Index exceeds size limit ({size} bytes, max {_MAX_INDEX_BYTES})", + ) raw = resp.content + if len(raw) > _MAX_INDEX_BYTES: + raise MarketplaceFetchError( + url, + f"Index exceeds size limit ({len(raw)} bytes, max {_MAX_INDEX_BYTES})", + ) digest = "sha256:" + hashlib.sha256(raw).hexdigest() if expected_digest and digest != expected_digest: raise MarketplaceFetchError( @@ -417,7 +435,7 @@ def _parse_manifest( For URL sources the index format is auto-detected via ``_detect_index_format`` and dispatched to the appropriate parser. For GitHub sources ``parse_marketplace_json`` is used directly - (``source_url`` and ``source_digest`` are ignored — GitHub sources + (``source_url`` and ``source_digest`` are ignored -- GitHub sources have no URL provenance). Args: @@ -494,7 +512,7 @@ def fetch_marketplace( # Fetch from network try: - # URL source — direct HTTPS fetch, no GitHub auth or proxy + # URL source -- direct HTTPS fetch, no GitHub auth or proxy if source.is_url_source: # Read stale ETag/Last-Modified for conditional request stale_meta = _read_stale_meta(cache_name) @@ -506,7 +524,7 @@ def fetch_marketplace( ) if result is None: - # 304 Not Modified — reset TTL on existing cache, serve stale data + # 304 Not Modified -- reset TTL on existing cache, serve stale data stale = _read_stale_cache(cache_name) if stale is not None: _write_cache( @@ -537,7 +555,7 @@ def fetch_marketplace( source_url=source.url, source_digest=result.digest, ) - # GitHub source — proxy-first, then GitHub Contents API + # GitHub source -- proxy-first, then GitHub Contents API data = _fetch_file(source, source.path, auth_resolver=auth_resolver) if data is None: raise MarketplaceFetchError( diff --git a/src/apm_cli/marketplace/models.py b/src/apm_cli/marketplace/models.py index 0b9a6d89..5ff353f1 100644 --- a/src/apm_cli/marketplace/models.py +++ b/src/apm_cli/marketplace/models.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -# Agent Skills Discovery RFC v0.2.0 — the only schema version we accept. +# Agent Skills Discovery RFC v0.2.0 -- the only schema version we accept. _AGENT_SKILLS_SCHEMA = "https://schemas.agentskills.io/discovery/0.2.0/schema.json" # RFC skill-name rule: 1-64 chars, lowercase alphanumeric + hyphens, @@ -27,8 +27,8 @@ class MarketplaceSource: Stored in ``~/.apm/marketplaces.json``. Two source types are supported: - - ``"github"`` (default) — a GitHub-hosted marketplace.json index. - - ``"url"`` — an arbitrary HTTPS Agent Skills discovery endpoint. + - ``"github"`` (default) -- a GitHub-hosted marketplace.json index. + - ``"url"`` -- an arbitrary HTTPS Agent Skills discovery endpoint. """ name: str # Display name (e.g., "acme-tools") @@ -72,10 +72,15 @@ def from_dict(cls, data: Dict[str, Any]) -> "MarketplaceSource": """Deserialize from JSON dict.""" source_type = data.get("source_type", "github") if source_type == "url": + url = data.get("url", "") + if not url: + raise ValueError( + "URL source requires a non-empty 'url' field" + ) return cls( name=data["name"], source_type="url", - url=data.get("url", ""), + url=url, ) if source_type != "github": raise ValueError(f"Unsupported marketplace source_type: {source_type!r}") @@ -365,7 +370,22 @@ def parse_agent_skills_index( ) continue skill_type = entry.get("type", "") + if not isinstance(skill_type, str) or skill_type not in ("skill-md", "archive"): + logger.warning( + "Skipping Agent Skills entry %r with unsupported type %r in '%s'", + name, + skill_type, + source_name, + ) + continue url = entry.get("url", "") + if not isinstance(url, str) or not url: + logger.warning( + "Skipping Agent Skills entry %r with missing/invalid url in '%s'", + name, + source_name, + ) + continue digest = entry.get("digest", "") if not _is_valid_digest(digest): logger.debug( @@ -376,6 +396,8 @@ def parse_agent_skills_index( ) continue description = entry.get("description", "") + if not isinstance(description, str): + description = "" plugins.append( MarketplacePlugin( name=name, diff --git a/src/apm_cli/marketplace/resolver.py b/src/apm_cli/marketplace/resolver.py index a25bfeb4..a266d05a 100644 --- a/src/apm_cli/marketplace/resolver.py +++ b/src/apm_cli/marketplace/resolver.py @@ -87,8 +87,10 @@ def _resolve_url_source(source: dict) -> str: if len(parts) >= 2: return f"{parts[0]}/{parts[1]}" - # Non-GitHub HTTPS URL — return as-is (CDN, arbitrary HTTPS host, etc.) - if url.startswith("https://"): + # Non-GitHub HTTPS URL -- return as-is (CDN, arbitrary HTTPS host, etc.) + from urllib.parse import urlparse as _urlparse + + if _urlparse(url).scheme.lower() == "https": return url raise ValueError( @@ -203,7 +205,7 @@ def resolve_plugin_source( if source_type == "github": return _resolve_github_source(source) elif source_type in ("skill-md", "archive"): - # Agent Skills RFC types — the canonical reference is the download URL + # Agent Skills RFC types -- the canonical reference is the download URL url = source.get("url", "") if not url: raise ValueError( diff --git a/tests/unit/marketplace/test_marketplace_archive.py b/tests/unit/marketplace/test_marketplace_archive.py index 551b8d8c..4f033618 100644 --- a/tests/unit/marketplace/test_marketplace_archive.py +++ b/tests/unit/marketplace/test_marketplace_archive.py @@ -1,4 +1,4 @@ -"""Tests for archive download and extraction (Step 7 — gap #14). +"""Tests for archive download and extraction (Step 7 -- gap #14). Covers: _check_archive_member safety checks, _detect_archive_format, _extract_tar_gz, _extract_zip, and the download_and_extract_archive public API. @@ -47,7 +47,7 @@ def _make_zip(members: dict) -> bytes: # --------------------------------------------------------------------------- -# Step 7 tests — safety checks +# Step 7 tests -- safety checks # --------------------------------------------------------------------------- @@ -68,7 +68,7 @@ def test_safe_paths_accepted(self, path): "skills/../../etc/passwd", ]) def test_path_traversal_rejected(self, path): - with pytest.raises(ArchiveError, match="path traversal"): + with pytest.raises(ArchiveError, match="traversal"): _check_archive_member(path) def test_absolute_path_rejected(self): @@ -91,7 +91,7 @@ def test_windows_absolute_paths_rejected(self, path): # --------------------------------------------------------------------------- -# Step 7 tests — format detection +# Step 7 tests -- format detection # --------------------------------------------------------------------------- @@ -126,7 +126,7 @@ def test_unknown_format_raises_archive_error(self): # --------------------------------------------------------------------------- -# Step 7 tests — tar.gz extraction +# Step 7 tests -- tar.gz extraction # --------------------------------------------------------------------------- @@ -145,7 +145,7 @@ def test_extract_file(self, tmp_path, path, content): def test_path_traversal_member_rejected(self, tmp_path): data = _make_tar_gz({"../evil.txt": b"bad"}) - with pytest.raises(ArchiveError, match="path traversal|absolute"): + with pytest.raises(ArchiveError, match="traversal"): _extract_tar_gz(data, str(tmp_path)) def test_decompression_bomb_rejected(self, tmp_path): @@ -184,7 +184,7 @@ def test_empty_archive_returns_empty_list(self, tmp_path): # --------------------------------------------------------------------------- -# Step 7 tests — zip extraction +# Step 7 tests -- zip extraction # --------------------------------------------------------------------------- @@ -203,7 +203,7 @@ def test_extract_file(self, tmp_path, path, content): def test_path_traversal_member_rejected(self, tmp_path): data = _make_zip({"../evil.txt": b"bad"}) - with pytest.raises(ArchiveError, match="path traversal|absolute"): + with pytest.raises(ArchiveError, match="traversal"): _extract_zip(data, str(tmp_path)) def test_absolute_path_member_rejected(self, tmp_path): @@ -227,7 +227,7 @@ def test_corrupted_zip_raises_archive_error(self, tmp_path): # --------------------------------------------------------------------------- -# Step 7 tests — download_and_extract_archive +# Step 7 tests -- download_and_extract_archive # --------------------------------------------------------------------------- @@ -347,3 +347,73 @@ def test_symlink_error_is_archive_error_not_key_error(self, tmp_path): data = self._make_symlink_tar_gz("link.txt", "real.txt") with pytest.raises(ArchiveError): _extract_tar_gz(data, str(tmp_path)) + + +# --------------------------------------------------------------------------- +# Non-regular tar members (device files, FIFOs) -- C09 +# --------------------------------------------------------------------------- + + +class TestNonRegularTarMembers: + """Non-regular tar members (device files, FIFOs) must be skipped.""" + + def _make_tar_with_member_type(self, name: str, member_type: int) -> bytes: + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + info = tarfile.TarInfo(name=name) + info.type = member_type + tf.addfile(info) + return buf.getvalue() + + def test_device_file_skipped(self, tmp_path): + data = self._make_tar_with_member_type("dev/sda", tarfile.CHRTYPE) + extracted = _extract_tar_gz(data, str(tmp_path)) + assert extracted == [] + + def test_fifo_skipped(self, tmp_path): + data = self._make_tar_with_member_type("pipe", tarfile.FIFOTYPE) + extracted = _extract_tar_gz(data, str(tmp_path)) + assert extracted == [] + + def test_block_device_skipped(self, tmp_path): + data = self._make_tar_with_member_type("dev/blk", tarfile.BLKTYPE) + extracted = _extract_tar_gz(data, str(tmp_path)) + assert extracted == [] + + def test_regular_file_still_extracted(self, tmp_path): + data = _make_tar_gz({"hello.txt": b"world"}) + extracted = _extract_tar_gz(data, str(tmp_path)) + assert "hello.txt" in extracted + assert (tmp_path / "hello.txt").read_bytes() == b"world" + + +# --------------------------------------------------------------------------- +# Archive HTTPS enforcement -- C08 +# --------------------------------------------------------------------------- + + +class TestArchiveHttpsEnforcement: + """download_and_extract_archive must enforce HTTPS.""" + + def test_http_url_raises(self, tmp_path): + with pytest.raises(ArchiveError, match="HTTPS"): + download_and_extract_archive("http://example.com/a.tar.gz", str(tmp_path)) + + def test_ftp_url_raises(self, tmp_path): + with pytest.raises(ArchiveError, match="HTTPS"): + download_and_extract_archive("ftp://example.com/a.tar.gz", str(tmp_path)) + + def test_redirect_to_http_raises(self, tmp_path, monkeypatch): + import unittest.mock as mock + + resp = mock.MagicMock() + resp.status_code = 200 + resp.url = "http://evil.com/a.tar.gz" + resp.headers = {"Content-Type": "application/gzip"} + resp.content = b"" + resp.raise_for_status.return_value = None + monkeypatch.setattr("apm_cli.marketplace.archive.requests.get", + lambda *a, **kw: resp) + + with pytest.raises(ArchiveError, match="non-HTTPS"): + download_and_extract_archive("https://example.com/a.tar.gz", str(tmp_path)) diff --git a/tests/unit/marketplace/test_marketplace_client.py b/tests/unit/marketplace/test_marketplace_client.py index 259d0c32..3833ad84 100644 --- a/tests/unit/marketplace/test_marketplace_client.py +++ b/tests/unit/marketplace/test_marketplace_client.py @@ -314,3 +314,23 @@ def test_fetch_marketplace_via_proxy_end_to_end(self): assert manifest.name == "Test" assert len(manifest.plugins) == 1 assert manifest.plugins[0].name == "p1" + + +class TestCacheKey: + """Cache key includes host for non-github.com sources.""" + + def test_github_default_unchanged(self): + source = MarketplaceSource(name="skills", owner="o", repo="r") + assert client_mod._cache_key(source) == "skills" + + def test_non_default_host_includes_host(self): + source = MarketplaceSource(name="skills", owner="o", repo="r", host="ghes.corp.com") + key = client_mod._cache_key(source) + assert key.startswith("ghes.corp.com") or key.startswith("ghes_corp_com") + assert key.endswith("skills") + assert key != "skills" + + def test_different_hosts_different_keys(self): + s1 = MarketplaceSource(name="mkt", owner="o", repo="r", host="a.com") + s2 = MarketplaceSource(name="mkt", owner="o", repo="r", host="b.com") + assert client_mod._cache_key(s1) != client_mod._cache_key(s2) diff --git a/tests/unit/marketplace/test_marketplace_url_client.py b/tests/unit/marketplace/test_marketplace_url_client.py index 55e7cd0a..69cfe83f 100644 --- a/tests/unit/marketplace/test_marketplace_url_client.py +++ b/tests/unit/marketplace/test_marketplace_url_client.py @@ -2,7 +2,7 @@ Covers: _cache_key() URL sources, _fetch_url_direct(), fetch_marketplace() URL branch, format auto-detection, cache read/write, stale-while-revalidate. -GitHub paths are not touched — regression tests confirm they still work. +GitHub paths are not touched -- regression tests confirm they still work. Tests are separate from test_marketplace_client.py which covers GitHub only. """ @@ -117,7 +117,7 @@ def _write_cache_files_with_digest(source, data, *, digest="", expired=False): # --------------------------------------------------------------------------- -# _cache_key — URL sources +# _cache_key -- URL sources # --------------------------------------------------------------------------- @@ -169,7 +169,7 @@ def test_very_long_url_key_is_always_16_chars(self): # --------------------------------------------------------------------------- -# _fetch_url_direct — network layer +# _fetch_url_direct -- network layer class TestFetchUrlDirectEmptyUrl: @@ -182,7 +182,7 @@ def test_empty_url_raises_fetch_error(self): # --------------------------------------------------------------------------- -# _detect_index_format — direct unit tests +# _detect_index_format -- direct unit tests # --------------------------------------------------------------------------- @@ -240,7 +240,7 @@ def test_non_json_response_raises_fetch_error(self, monkeypatch): # --------------------------------------------------------------------------- -# fetch_marketplace — URL branch +# fetch_marketplace -- URL branch # --------------------------------------------------------------------------- @@ -345,7 +345,7 @@ def test_github_source_uses_fetch_file_not_fetch_url_direct( # --------------------------------------------------------------------------- -# FetchResult — digest capture (t5-test-01) +# FetchResult -- digest capture (t5-test-01) # --------------------------------------------------------------------------- @@ -403,7 +403,7 @@ def test_empty_expected_digest_skips_verification(self, monkeypatch): # --------------------------------------------------------------------------- -# fetch_marketplace — index digest storage + manifest fields (t5-test-02/03) +# fetch_marketplace -- index digest storage + manifest fields (t5-test-02/03) # --------------------------------------------------------------------------- _FIXED_DIGEST = "sha256:" + "b" * 64 @@ -462,6 +462,7 @@ def _mock_response(status_code, *, json_body=None, json_error=None, raise_for_st resp = mock.MagicMock() resp.status_code = status_code + resp.headers = {} if json_body is not None: body_bytes = json.dumps(json_body).encode() resp.content = body_bytes @@ -782,7 +783,7 @@ def test_on_stale_warning_not_called_on_cache_hit(self, url_source, monkeypatch) # --------------------------------------------------------------------------- -# Stale-while-revalidate — source_digest preservation +# Stale-while-revalidate -- source_digest preservation # --------------------------------------------------------------------------- @@ -852,3 +853,53 @@ def test_redirect_to_https_accepted(self, monkeypatch): ) result = _fetch_url_direct("https://example.com/index.json") assert isinstance(result, FetchResult) + + +# --------------------------------------------------------------------------- +# Response size limit -- C18 +# --------------------------------------------------------------------------- + + +class TestFetchUrlDirectSizeLimit: + """_fetch_url_direct must reject oversized responses.""" + + def test_content_length_over_limit_raises(self, monkeypatch): + mock_resp = _mock_response_with_headers( + 200, json_body={"skills": []}, + response_headers={"Content-Length": str(11 * 1024 * 1024)}, + ) + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: mock_resp, + ) + with pytest.raises(MarketplaceFetchError, match="size limit"): + _fetch_url_direct("https://example.com/index.json") + + def test_content_length_under_limit_succeeds(self, monkeypatch): + mock_resp = _mock_response_with_headers( + 200, json_body={"skills": []}, + response_headers={"Content-Length": "1024"}, + ) + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: mock_resp, + ) + result = _fetch_url_direct("https://example.com/index.json") + assert isinstance(result, FetchResult) + + def test_body_over_limit_without_content_length_raises(self, monkeypatch): + import unittest.mock as mock + + oversized_body = b"x" * (11 * 1024 * 1024) + mock_resp = mock.MagicMock() + mock_resp.status_code = 200 + mock_resp.url = "https://example.com/index.json" + mock_resp.headers = {} + mock_resp.content = oversized_body + mock_resp.raise_for_status.return_value = None + monkeypatch.setattr( + "apm_cli.marketplace.client.requests.get", + lambda *a, **kw: mock_resp, + ) + with pytest.raises(MarketplaceFetchError, match="size limit"): + _fetch_url_direct("https://example.com/index.json") diff --git a/tests/unit/marketplace/test_marketplace_url_commands.py b/tests/unit/marketplace/test_marketplace_url_commands.py index d7e12d31..ccfad242 100644 --- a/tests/unit/marketplace/test_marketplace_url_commands.py +++ b/tests/unit/marketplace/test_marketplace_url_commands.py @@ -93,7 +93,7 @@ def test_url_source_type_is_url( assert source.source_type == "url" def test_http_url_rejected(self, runner): - """Plain http:// (non-TLS) must be rejected — RFC requires HTTPS.""" + """Plain http:// (non-TLS) must be rejected -- RFC requires HTTPS.""" from apm_cli.commands.marketplace import marketplace result = runner.invoke(marketplace, ["add", "http://example.com"]) @@ -110,7 +110,7 @@ def test_mixed_case_scheme_detected_as_url(self, runner, url): from apm_cli.commands.marketplace import marketplace result = runner.invoke(marketplace, ["add", url]) - # Should NOT produce "Invalid format ... OWNER/REPO" — + # Should NOT produce "Invalid format ... OWNER/REPO" -- # it should enter the URL path (may fail on http or succeed on https) assert "OWNER/REPO" not in result.output @@ -300,7 +300,7 @@ def test_owner_repo_does_not_call_url_path( # --------------------------------------------------------------------------- -# list command — URL source display (t8-test-07) +# list command -- URL source display (t8-test-07) # --------------------------------------------------------------------------- @@ -333,7 +333,8 @@ def test_list_plaintext_shows_url_not_owner_repo(self, runner, tmp_path, monkeyp with patch("apm_cli.commands.marketplace._get_console", return_value=None): result = runner.invoke(marketplace, ["list"]) - assert "https://example.com" in result.output + url = "https://example.com/.well-known/agent-skills/index.json" + assert url in result.output # Must not show "(/)"-style owner/repo for a URL source assert "(/" not in result.output @@ -358,7 +359,7 @@ def test_list_does_not_show_default_github_fields_for_url_source( # --------------------------------------------------------------------------- -# remove command — URL source display (t8-test-08, t8-test-09) +# remove command -- URL source display (t8-test-08, t8-test-09) # --------------------------------------------------------------------------- @@ -389,7 +390,8 @@ def test_remove_confirmation_shows_url(self, runner, tmp_path, monkeypatch): # Provide "n" so it cancels without actually removing result = runner.invoke(marketplace, ["remove", "example-skills"], input="n\n") - assert "https://example.com" in result.output + url = "https://example.com/.well-known/agent-skills/index.json" + assert url in result.output # Confirm text must not show default "(/)"-style placeholder assert "(/" not in result.output @@ -463,7 +465,7 @@ def test_fetch_error_not_double_wrapped(self, mock_fetch, runner): # --------------------------------------------------------------------------- -# update command — URL source cache clearing +# update command -- URL source cache clearing # --------------------------------------------------------------------------- diff --git a/tests/unit/marketplace/test_marketplace_url_models.py b/tests/unit/marketplace/test_marketplace_url_models.py index f567f817..98af1b37 100644 --- a/tests/unit/marketplace/test_marketplace_url_models.py +++ b/tests/unit/marketplace/test_marketplace_url_models.py @@ -34,7 +34,7 @@ # --------------------------------------------------------------------------- -# MarketplaceSource — URL fields +# MarketplaceSource -- URL fields # --------------------------------------------------------------------------- @@ -135,6 +135,18 @@ def test_unknown_source_type_raises(self): with pytest.raises(ValueError, match="artifactory"): MarketplaceSource.from_dict(d) + def test_url_source_missing_url_raises(self): + """from_dict with source_type='url' but no url key must raise.""" + d = {"name": "x", "source_type": "url"} + with pytest.raises(ValueError, match="non-empty"): + MarketplaceSource.from_dict(d) + + def test_url_source_empty_url_raises(self): + """from_dict with source_type='url' and empty url must raise.""" + d = {"name": "x", "source_type": "url", "url": ""} + with pytest.raises(ValueError, match="non-empty"): + MarketplaceSource.from_dict(d) + def test_url_source_to_dict_omits_github_only_fields(self): """URL to_dict must not include host, branch, or path.""" src = MarketplaceSource( @@ -247,7 +259,7 @@ def test_empty_skills_list(self): assert len(manifest.plugins) == 0 def test_missing_skills_key_returns_empty_manifest(self): - """No 'skills' key present — returns an empty manifest rather than raising.""" + """No 'skills' key present -- returns an empty manifest rather than raising.""" data = {"$schema": _KNOWN_SCHEMA} manifest = parse_agent_skills_index(data, "test") assert len(manifest.plugins) == 0 @@ -385,15 +397,15 @@ def test_missing_description_defaults_to_empty(self): p = parse_agent_skills_index(data, "t").plugins[0] assert p.description == "" - def test_missing_url_in_entry_defaults_to_empty(self): + def test_missing_url_in_entry_is_skipped(self): + """Entries without a url field are now skipped (C07 fix).""" data = { "$schema": _KNOWN_SCHEMA, "skills": [ {"name": "my-skill", "type": "skill-md", "digest": _VALID_DIGEST} ], } - p = parse_agent_skills_index(data, "t").plugins[0] - assert p.source["url"] == "" + assert len(parse_agent_skills_index(data, "t").plugins) == 0 def test_entry_without_digest_is_skipped(self): data = { @@ -420,7 +432,7 @@ def test_duplicate_skill_names_both_accepted(self): # --------------------------------------------------------------------------- -# MarketplaceManifest — source provenance fields (t5-test-04) +# MarketplaceManifest -- source provenance fields (t5-test-04) # --------------------------------------------------------------------------- @@ -479,7 +491,7 @@ def test_malformed_digest_entry_skipped(self, digest): assert len(parse_agent_skills_index(data, "t").plugins) == 0 def test_missing_digest_entry_skipped(self): - """A skill entry with no digest is skipped — digest is required by the RFC.""" + """A skill entry with no digest is skipped -- digest is required by the RFC.""" data = { "$schema": _KNOWN_SCHEMA, "skills": [ @@ -531,7 +543,7 @@ def test_invalid_names_emit_warning_not_debug(self, caplog): assert len(warning_messages) == 2 def test_structural_issues_do_not_produce_warnings(self, caplog): - """Non-dict entries and missing name are structural noise — keep at DEBUG.""" + """Non-dict entries and missing name are structural noise -- keep at DEBUG.""" import logging data = { @@ -547,3 +559,73 @@ def test_structural_issues_do_not_produce_warnings(self, caplog): assert len(result.plugins) == 0 warning_messages = [r.message for r in caplog.records if r.levelno == logging.WARNING] assert len(warning_messages) == 0 + + +class TestSkillFieldValidation: + """parse_agent_skills_index must validate type/url/description fields.""" + + _SCHEMA = "https://schemas.agentskills.io/discovery/0.2.0/schema.json" + _DIGEST = "sha256:" + "a" * 64 + + def test_unsupported_type_skipped(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "my-skill", "type": "unknown-type", "url": "/a", "digest": self._DIGEST}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 0 + + def test_non_string_type_skipped(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "my-skill", "type": 123, "url": "/a", "digest": self._DIGEST}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 0 + + def test_missing_url_skipped(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "my-skill", "type": "skill-md", "digest": self._DIGEST}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 0 + + def test_non_string_url_skipped(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "my-skill", "type": "skill-md", "url": 42, "digest": self._DIGEST}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 0 + + def test_non_string_description_defaults_to_empty(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "my-skill", "type": "skill-md", "url": "/a", + "digest": self._DIGEST, "description": 42}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 1 + assert result.plugins[0].description == "" + + def test_valid_skill_md_and_archive_accepted(self): + data = { + "$schema": self._SCHEMA, + "skills": [ + {"name": "skill-a", "type": "skill-md", "url": "/a", "digest": self._DIGEST}, + {"name": "skill-b", "type": "archive", "url": "/b", "digest": self._DIGEST}, + ], + } + result = parse_agent_skills_index(data, "s") + assert len(result.plugins) == 2 diff --git a/tests/unit/marketplace/test_marketplace_url_resolver.py b/tests/unit/marketplace/test_marketplace_url_resolver.py index 1523a3ae..8e1682b1 100644 --- a/tests/unit/marketplace/test_marketplace_url_resolver.py +++ b/tests/unit/marketplace/test_marketplace_url_resolver.py @@ -53,7 +53,7 @@ def url_source(): # --------------------------------------------------------------------------- -# resolve_plugin_source — skill-md and archive types (t8-test-02, t8-test-03) +# resolve_plugin_source -- skill-md and archive types (t8-test-02, t8-test-03) # --------------------------------------------------------------------------- @@ -87,7 +87,7 @@ def test_archive_missing_url_raises(self): # --------------------------------------------------------------------------- -# _resolve_url_source — non-GitHub HTTPS allowed (t8-test-04, t8-test-05) +# _resolve_url_source -- non-GitHub HTTPS allowed (t8-test-04, t8-test-05) # --------------------------------------------------------------------------- @@ -124,9 +124,17 @@ def test_non_https_url_raises(self, url): with pytest.raises(ValueError, match="HTTPS"): _resolve_url_source({"url": url}) + @pytest.mark.parametrize("url", [ + "HTTPS://cdn.example.com/skills", + "Https://CDN.Example.Com/index.json", + ]) + def test_mixed_case_https_scheme_accepted(self, url): + """RFC 3986: scheme is case-insensitive; HTTPS:// must be accepted.""" + assert _resolve_url_source({"url": url}) == url + # --------------------------------------------------------------------------- -# resolve_marketplace_plugin — URL marketplace end-to-end (t8-test-06) +# resolve_marketplace_plugin -- URL marketplace end-to-end (t8-test-06) # --------------------------------------------------------------------------- @@ -155,7 +163,7 @@ def test_url_marketplace_skill_md_resolves(self, url_source, skill_md_plugin): assert plugin.name == "code-review" def test_url_marketplace_passes_empty_owner_repo(self, url_source, skill_md_plugin): - """URL sources have owner='' and repo='' — resolver must not crash on this.""" + """URL sources have owner='' and repo='' -- resolver must not crash on this.""" manifest = MarketplaceManifest( name="example-skills", plugins=(skill_md_plugin,), From 516370aa043a8b43a448b96336b281711e22eed2 Mon Sep 17 00:00:00 2001 From: Vicente-Pastor <32805335+Vicente-Pastor@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:27:53 +0100 Subject: [PATCH 5/5] fix: AGENTS.md compliance -- encoding, docs, changelog - Replace non-ASCII arrows (U+2192) and section sign (U+00A7) with ASCII - Update cli-commands.md with URL marketplace add syntax and examples - Update marketplace group description to mention URL sources - Add PR number (#691) alongside issue number (#676) in CHANGELOG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++------ .../src/content/docs/reference/cli-commands.md | 18 +++++++++++++----- src/apm_cli/marketplace/client.py | 2 +- .../marketplace/test_marketplace_url_client.py | 4 ++-- .../test_marketplace_url_commands.py | 4 ++-- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7c5ecfb..ec5def7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- `apm marketplace add ` accepts HTTPS URLs for registering Agent Skills discovery indexes, with automatic `.well-known/agent-skills/index.json` resolution for bare origins (#676) -- Agent Skills Discovery RFC v0.2.0 index parser with strict `$schema` validation, skill name rules, and digest verification (#676) -- SHA-256 digest computation and integrity verification for URL-based marketplace indexes (#676) -- ETag/Last-Modified conditional refresh for URL marketplace indexes with stale-while-revalidate fallback (#676) -- Archive download and extraction support for `type: "archive"` skill entries with path traversal and decompression bomb safety guards (#676) -- Lockfile provenance fields (`source_url`, `source_digest`) for URL-sourced marketplace dependencies (#676) +- `apm marketplace add ` accepts HTTPS URLs for registering Agent Skills discovery indexes, with automatic `.well-known/agent-skills/index.json` resolution for bare origins (#676, #691) +- Agent Skills Discovery RFC v0.2.0 index parser with strict `$schema` validation, skill name rules, and digest verification (#676, #691) +- SHA-256 digest computation and integrity verification for URL-based marketplace indexes (#676, #691) +- ETag/Last-Modified conditional refresh for URL marketplace indexes with stale-while-revalidate fallback (#676, #691) +- Archive download and extraction support for `type: "archive"` skill entries with path traversal and decompression bomb safety guards (#676, #691) +- Lockfile provenance fields (`source_url`, `source_digest`) for URL-sourced marketplace dependencies (#676, #691) - `apm install` now automatically discovers and deploys local `.apm/` primitives (skills, instructions, agents, prompts, hooks, commands) to target directories, with local content taking priority over dependencies on collision (#626, #644) ### Fixed diff --git a/docs/src/content/docs/reference/cli-commands.md b/docs/src/content/docs/reference/cli-commands.md index 438ded4f..2466a364 100644 --- a/docs/src/content/docs/reference/cli-commands.md +++ b/docs/src/content/docs/reference/cli-commands.md @@ -937,7 +937,7 @@ apm mcp show a5e8a7f0-d4e4-4a1d-b12f-2896a23fd4f1 ### `apm marketplace` - Plugin marketplace management -Register, browse, and manage plugin marketplaces. Marketplaces are GitHub repositories containing a `marketplace.json` index of plugins. +Register, browse, and manage plugin marketplaces. Marketplaces are GitHub repositories containing a `marketplace.json` index of plugins, or HTTPS URLs serving an Agent Skills Discovery index. > See the [Marketplaces guide](../../guides/marketplaces/) for concepts and workflows. @@ -947,26 +947,28 @@ apm marketplace COMMAND [OPTIONS] #### `apm marketplace add` - Register a marketplace -Register a GitHub repository as a plugin marketplace. +Register a GitHub repository or HTTPS URL as a plugin marketplace. ```bash apm marketplace add OWNER/REPO [OPTIONS] apm marketplace add HOST/OWNER/REPO [OPTIONS] +apm marketplace add URL [OPTIONS] ``` **Arguments:** - `OWNER/REPO` - GitHub repository containing `marketplace.json` - `HOST/OWNER/REPO` - Repository on a non-github.com host (e.g., GitHub Enterprise) +- `URL` - HTTPS URL to an Agent Skills Discovery index (bare origins auto-resolve to `/.well-known/agent-skills/index.json`) **Options:** - `-n, --name TEXT` - Custom display name for the marketplace -- `-b, --branch TEXT` - Branch to track (default: main) -- `--host TEXT` - Git host FQDN (default: github.com or `GITHUB_HOST` env var) +- `-b, --branch TEXT` - Branch to track (default: main; GitHub sources only) +- `--host TEXT` - Git host FQDN (default: github.com or `GITHUB_HOST` env var; GitHub sources only) - `-v, --verbose` - Show detailed output **Examples:** ```bash -# Register a marketplace +# Register a GitHub marketplace apm marketplace add acme/plugin-marketplace # Register with a custom name and branch @@ -975,6 +977,12 @@ apm marketplace add acme/plugin-marketplace --name acme-plugins --branch release # Register from a GitHub Enterprise host apm marketplace add acme/plugin-marketplace --host ghes.corp.example.com apm marketplace add ghes.corp.example.com/acme/plugin-marketplace + +# Register a URL-based marketplace (bare origin) +apm marketplace add https://plugins.example.com + +# Register with full index URL and custom name +apm marketplace add https://plugins.example.com/.well-known/agent-skills/index.json --name company-skills ``` #### `apm marketplace list` - List registered marketplaces diff --git a/src/apm_cli/marketplace/client.py b/src/apm_cli/marketplace/client.py index 4aa2ee96..f0a10984 100644 --- a/src/apm_cli/marketplace/client.py +++ b/src/apm_cli/marketplace/client.py @@ -269,7 +269,7 @@ def _fetch_url_direct(url: str, *, etag: str = "", last_modified: str = "", if last_modified: headers["If-Modified-Since"] = last_modified resp = requests.get(url, headers=headers, timeout=30) - # Guard against HTTPS→HTTP redirect (S1) + # Guard against HTTPS->HTTP redirect (S1) final_url = getattr(resp, "url", None) if isinstance(final_url, str) and urlparse(final_url).scheme.lower() != "https": raise MarketplaceFetchError( diff --git a/tests/unit/marketplace/test_marketplace_url_client.py b/tests/unit/marketplace/test_marketplace_url_client.py index 69cfe83f..78514930 100644 --- a/tests/unit/marketplace/test_marketplace_url_client.py +++ b/tests/unit/marketplace/test_marketplace_url_client.py @@ -833,7 +833,7 @@ class TestFetchUrlDirectRedirectEnforcement: """_fetch_url_direct must reject responses redirected to non-HTTPS URLs.""" def test_redirect_to_http_raises(self, monkeypatch): - """An HTTPS→HTTP redirect must be caught after the request completes.""" + """An HTTPS->HTTP redirect must be caught after the request completes.""" mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) mock_resp.url = "http://evil.com/index.json" monkeypatch.setattr( @@ -844,7 +844,7 @@ def test_redirect_to_http_raises(self, monkeypatch): _fetch_url_direct("https://example.com/index.json") def test_redirect_to_https_accepted(self, monkeypatch): - """HTTPS→HTTPS redirect is fine.""" + """HTTPS->HTTPS redirect is fine.""" mock_resp = _mock_response(200, json_body=_AGENT_SKILLS_INDEX) mock_resp.url = "https://cdn.example.com/index.json" monkeypatch.setattr( diff --git a/tests/unit/marketplace/test_marketplace_url_commands.py b/tests/unit/marketplace/test_marketplace_url_commands.py index ccfad242..be61ecc0 100644 --- a/tests/unit/marketplace/test_marketplace_url_commands.py +++ b/tests/unit/marketplace/test_marketplace_url_commands.py @@ -106,7 +106,7 @@ def test_http_url_rejected(self, runner): "HTTP://example.com", ]) def test_mixed_case_scheme_detected_as_url(self, runner, url): - """URL detection must be case-insensitive per RFC 3986 §3.1.""" + """URL detection must be case-insensitive per RFC 3986 Section 3.1.""" from apm_cli.commands.marketplace import marketplace result = runner.invoke(marketplace, ["add", url]) @@ -438,7 +438,7 @@ class TestAddCommandErrorMessages: @patch("apm_cli.marketplace.client.fetch_marketplace") def test_schema_error_shows_invalid_index_format(self, mock_fetch, runner): - """T9: wrong $schema → 'Invalid index format', not generic 'Failed to register'.""" + """T9: wrong $schema -> 'Invalid index format', not generic 'Failed to register'.""" from apm_cli.commands.marketplace import marketplace mock_fetch.side_effect = ValueError(