diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d3e84f..47cf47e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,9 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev]" + - name: Download spaCy model + run: python -m spacy download en_core_web_lg + - name: Lint with ruff run: ruff check sentinelguard/ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index acd8dc4..fc8ddc2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -43,6 +43,9 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev]" + - name: Download spaCy model + run: python -m spacy download en_core_web_lg + - name: Run tests run: pytest tests/ -v --tb=short diff --git a/pyproject.toml b/pyproject.toml index 9d78a3e..54571eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dependencies = [ "presidio-analyzer>=2.2.0", "presidio-anonymizer>=2.2.0", "spacy>=3.6.0", - "en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl", # Model-based detection (HuggingFace) "transformers>=4.30.0", "torch>=2.0.0", diff --git a/setup.py b/setup.py index 667c987..ae0c9b4 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,42 @@ -"""Backwards-compatible setup.py for sentinelguard.""" +"""Setup with post-install hook to download spaCy model for Presidio PII detection.""" + +import subprocess +import sys from setuptools import setup +from setuptools.command.install import install + + +class PostInstallCommand(install): + """Post-installation: download spaCy model required by Presidio.""" + + def run(self): + install.run(self) + self._download_spacy_model() + + def _download_spacy_model(self): + model = "en_core_web_lg" + print(f"🔍 Checking spaCy model: {model}") + try: + import spacy + try: + spacy.load(model) + print(f"✅ {model} — already installed") + except OSError: + print(f"⬇️ {model} — downloading...") + subprocess.check_call( + [sys.executable, "-m", "spacy", "download", model] + ) + print(f"✅ {model} — installed") + except ImportError: + print("⚠️ spaCy not installed, skipping model download") + except Exception as e: + print(f"⚠️ Could not download {model}: {e}") + print(f" Run manually: python -m spacy download {model}") + if __name__ == "__main__": - setup() + setup( + cmdclass={ + "install": PostInstallCommand, + }, + )