diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index a88ffe43..f5ccf735 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -26,7 +26,7 @@ jobs: #os: [macOS-latest, ubuntu-latest, windows-latest] #python-version: [3.8, 3.9, "3.10"] os: [ubuntu-latest] - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: ["3.12", "3.13"] steps: - uses: actions/checkout@v1 @@ -92,44 +92,29 @@ jobs: #Install numpy first - name: Install numpy + shell: bash -l {0} run: | - conda install "numpy<2" + conda install "numpy" - #Install pydca - - name: Check out pydca - uses: actions/checkout@v3 - with: - repository: cabb99/pydca - path: pydca_dir + # Install pyDCA from GitHub (optional – tests that need it will be skipped if this fails) - name: Install pydca + continue-on-error: true + shell: bash -l {0} run: | - cd pydca_dir - echo "numpy<2" > constraints.txt - conda run -n test pip install -r requirements.txt -c constraints.txt - conda run -n test python -m pip install . --no-deps - conda list - cd .. - pwd - conda run -n test python -c "import sys; print(sys.executable); print('\n'.join(sys.path))" - conda run -n test python -c "import pydca; print('pydca is installed and importable')" - conda run -n test pip freeze | grep pydca + python -m pip install "git+https://github.com/cabb99/pydca.git" + python -c "import pydca; print('pydca is installed and importable')" + pip freeze | grep pydca - name: Install package # conda setup requires this special shell shell: bash -l {0} run: | - conda install -c conda-forge --file requirements.txt + pip install -r requirements.txt + conda install -c conda-forge pdbfixer python -m pip install . --no-deps conda list - - name: Install pdbfixer - - # conda setup requires this special shell - shell: bash -l {0} - run: | - conda install -c conda-forge pdbfixer - - name: Run tests # conda setup requires this special shell @@ -139,7 +124,7 @@ jobs: python -c "import sys; print(sys.executable); print('\n'.join(sys.path))" python -c "import pydca; print('pydca is installed and importable')" pip freeze | grep pydca - pytest -v --cov=frustratometer --cov-report=xml --color=yes tests/ + pytest -v --skip-slow --skip-memory-heavy --cov=frustratometer --cov-report=xml --color=yes tests/ - name: CodeCov uses: codecov/codecov-action@v3 diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml new file mode 100644 index 00000000..05a50575 --- /dev/null +++ b/.github/workflows/publish-to-pypi.yml @@ -0,0 +1,124 @@ +name: Publish to PyPI + +on: + release: + types: [published] + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + fetch-depth: 0 # versioneer needs full history + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install build dependencies + run: python3 -m pip install build --user + + - name: Build wheel and source tarball + run: python3 -m build + + - name: Store distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-testpypi: + name: Publish to TestPyPI + needs: build + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/frustratometer + permissions: + id-token: write # mandatory for trusted publishing + + steps: + - name: Download distributions + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + test-from-testpypi: + name: Test install from TestPyPI + needs: publish-to-testpypi + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + channels: conda-forge,defaults + activate-environment: test-publish + auto-update-conda: false + auto-activate-base: false + + - name: Install conda dependencies + shell: bash -l {0} + run: | + conda install -c conda-forge openmm pdbfixer pytest + + - name: Wait for TestPyPI to index the package + shell: bash -l {0} + run: sleep 30 + + - name: Install frustratometer from TestPyPI + shell: bash -l {0} + run: | + pip install --pre \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple/ \ + frustratometer + + - name: Verify import and version + shell: bash -l {0} + run: | + python -c "import frustratometer; print(frustratometer.__version__)" + + - name: Run tests + shell: bash -l {0} + run: | + python -m pytest tests/ \ + --ignore=tests/test_dca_frustratometer.py \ + -x --tb=short + + publish-to-pypi: + name: Publish to PyPI + needs: test-from-testpypi + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/frustratometer + permissions: + id-token: write # mandatory for trusted publishing + + steps: + - name: Download distributions + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/README.md b/README.md index 8d25b47f..b7a1e3b0 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,25 @@ The frustratometer is based on the principle of minimal frustration postulated b Under most circunstances amino acids are minimally frustrated in the protein, but pockets where amino acids are of interest for protein function can remain frustrated, as they become minimally frustrated when achieving the corresponding function. In short, a frustrated residue or interaction indicates that the particular amino acid would be minimized under a different environment, which reveals possible competing evolutionary pressures on its selection. We have identified that these pockets usually correspond to regions of functional importance, for example, that may form part of a catalitic domain, a hinge domain, or a binding region. -This principle of minimum frustration has been shown using the AWSEM forcefield but can be extended to any other forcefield, including atomistic forcefields, or a pseudo-forcefield like DCA, as implemented here. +This principle of minimum frustration has been shown using the AWSEM forcefield but can be extended to any other forcefield, including atomistic forcefields, or energy models derived from coevolutionary analysis, such as DCA-based Potts models, as implemented here. ## Installation -To install this modules please clone this repository and install it using the following commands. +### Using conda (recommended) - git clone HanaJaafari/Frustratometer + conda install -c conda-forge frustratometer + +### Using pip + + pip install frustratometer + +Note: `openmm` and `pdbfixer` are not available on PyPI and must be installed separately via conda: + + conda install -c conda-forge openmm pdbfixer + +### From source + + git clone https://github.com/HanaJaafari/Frustratometer cd Frustratometer conda install -c conda-forge --file requirements.txt pip install -e . diff --git a/devtools/docker/Dockerfile b/devtools/docker/Dockerfile new file mode 100644 index 00000000..5398a099 --- /dev/null +++ b/devtools/docker/Dockerfile @@ -0,0 +1,35 @@ +ARG PYTHON_VERSION=3.12 +FROM condaforge/miniforge3:latest + +# Install OS-level deps +RUN apt-get update -qq \ + && apt-get install -qq -y --no-install-recommends \ + gcc g++ gfortran git wget hmmer \ + && rm -rf /var/lib/apt/lists/* + +# Create conda env with the requested Python version +ARG PYTHON_VERSION +RUN conda create -n test -y python=${PYTHON_VERSION} && conda clean -afy +SHELL ["conda", "run", "-n", "test", "/bin/bash", "-c"] + +WORKDIR /repo + +# Install dependencies first (better layer caching) +COPY requirements.txt . +RUN pip install --quiet --no-cache-dir -r requirements.txt +RUN pip install --quiet --no-cache-dir pytest pytest-cov +RUN conda install -n test -y -c conda-forge pdbfixer \ + || echo "WARNING: pdbfixer install failed – pdbfixer tests will be skipped" + +# On Python <3.10, old prody needs numpy<2; conda's pdbfixer may have upgraded it +RUN python -c "import sys; sys.exit(0 if sys.version_info < (3,10) else 1)" \ + && conda install -n test -y "numpy<2" \ + || true + +# Install pyDCA (optional – continues on failure) +RUN pip install --quiet --no-cache-dir "git+https://github.com/cabb99/pydca.git" \ + || echo "WARNING: pydca install failed – pyDCA tests will be skipped" + +# Install the package itself +COPY . . +RUN pip install --quiet --no-cache-dir --no-deps -e . diff --git a/devtools/scripts/smoke_test.sh b/devtools/scripts/smoke_test.sh new file mode 100755 index 00000000..955e0ee8 --- /dev/null +++ b/devtools/scripts/smoke_test.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# smoke_test.sh – build and run the fast test suite for one or more Python versions. +# +# By default all versions run in PARALLEL. Set PARALLEL=0 to run sequentially. +# +# Usage: +# devtools/scripts/smoke_test.sh [PYTHON_VERSION ...] +# +# Examples: +# devtools/scripts/smoke_test.sh # defaults: 3.12 3.13 +# devtools/scripts/smoke_test.sh 3.10 3.11 3.12 3.13 3.14 +# +# Environment variables: +# TEST_FLAGS – pytest flags (default: --skip-slow --skip-stochastic --skip-network) +# LOG_DIR – directory for per-version log files (default: devtools/logs) +# PARALLEL – set to 0 to run sequentially (default: 1) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +DOCKERFILE="$REPO_ROOT/devtools/docker/Dockerfile" +TEST_FLAGS="${TEST_FLAGS:---skip-slow --skip-stochastic --skip-network}" +LOG_DIR="${LOG_DIR:-$REPO_ROOT/devtools/logs}" +PARALLEL="${PARALLEL:-1}" + +# Default to testing Python 3.7 through 3.14, but allow overriding via command-line args +if [ $# -gt 0 ]; then + VERSIONS=("$@") +else + VERSIONS=(3.7 3.8 3.9 3.10 3.11 3.12 3.13 3.14) +fi + +mkdir -p "$LOG_DIR" + +# ---------- worker function (one per Python version) ---------- +run_one() { + local PY="$1" + local LOG="$LOG_DIR/smoke_py${PY}.log" + local IMAGE="frustratometer-test:py${PY}" + + { + echo "════════════════════════════════════════" + echo " Python ${PY} – started $(date '+%H:%M:%S')" + echo "════════════════════════════════════════" + + echo "--- docker build ---" + if ! docker build \ + --build-arg PYTHON_VERSION="${PY}" \ + -f "$DOCKERFILE" \ + -t "$IMAGE" \ + "$REPO_ROOT" ; then + echo "BUILD FAILED for Python ${PY}" + return 1 + fi + + echo "" + echo "--- pytest ---" + if docker run --rm "$IMAGE" \ + conda run -n test --no-capture-output \ + python -m pytest tests/ $TEST_FLAGS -v --tb=short --color=no ; then + echo "" + echo "RESULT: PASS (Python ${PY})" + return 0 + else + echo "" + echo "RESULT: FAIL (Python ${PY})" + return 1 + fi + } > "$LOG" 2>&1 +} + +# ---------- launch ---------- +PIDS=() +for PY in "${VERSIONS[@]}"; do + if [ "$PARALLEL" = "1" ]; then + run_one "$PY" & + PIDS+=("$!:$PY") + else + run_one "$PY" || true + fi +done + +# ---------- wait & collect ---------- +PASS=() +FAIL=() + +if [ "$PARALLEL" = "1" ]; then + for entry in "${PIDS[@]}"; do + PID="${entry%%:*}" + PY="${entry##*:}" + if wait "$PID"; then + PASS+=("$PY") + else + FAIL+=("$PY") + fi + done +else + for PY in "${VERSIONS[@]}"; do + if grep -q "^RESULT: PASS" "$LOG_DIR/smoke_py${PY}.log" 2>/dev/null; then + PASS+=("$PY") + else + FAIL+=("$PY") + fi + done +fi + +# ---------- summary ---------- +echo "" +echo "════════════════════════════════════════" +echo " Summary" +echo "════════════════════════════════════════" +[ ${#PASS[@]} -gt 0 ] && echo " PASSED: ${PASS[*]}" +[ ${#FAIL[@]} -gt 0 ] && echo " FAILED: ${FAIL[*]}" +echo " Logs: $LOG_DIR/" +echo "" + +# Print failures inline for convenience +for PY in "${FAIL[@]+"${FAIL[@]}"}"; do + echo "" + echo "──── FAILURES for Python ${PY} (last 50 lines) ────" + tail -50 "$LOG_DIR/smoke_py${PY}.log" +done + +[ ${#FAIL[@]} -eq 0 ] # exit 0 only if nothing failed diff --git a/docs/frustratometer.rst b/docs/frustratometer.rst index 990a17f3..107f1a82 100644 --- a/docs/frustratometer.rst +++ b/docs/frustratometer.rst @@ -20,7 +20,7 @@ The frustratometer is based on the principle of minimal frustration postulated b Under most circumstances, amino acids are minimally frustrated in the protein, but pockets where amino acids are of interest for protein function. In short, a frustrated residue or interaction indicates that the particular amino acid would be minimized under a different configuration, or a different sequence, which reveals possible competing evolutionary pressures on its selection. We have identified that these pockets usually correspond to regions of functional importance, for example, that may form part of a catalytic domain, a hinge domain, or a binding region. -This principle of minimum frustration has been shown using the AWSEM forcefield but can be extended to any other forcefield, including atomistic forcefields, or a pseudo-forcefield like DCA, as implemented here. +This principle of minimum frustration has been shown using the AWSEM forcefield but can be extended to any other forcefield, including atomistic forcefields, or energy models derived from coevolutionary analysis, such as DCA-based Potts models, as implemented here. In this module, we implement a version of the frustratometer based on Direct Coupling analysis. diff --git a/frustratometer/classes/DCA.py b/frustratometer/classes/DCA.py index 7c9623c5..e2ffc33f 100644 --- a/frustratometer/classes/DCA.py +++ b/frustratometer/classes/DCA.py @@ -30,6 +30,24 @@ class DCA(Frustratometer): locally generate these parameters using the pyDCA package. In this case, the user can try using the "from_distance_matrix," "from_pfam_alignment," or "from_hmmer_alignment" methods. """ + + @staticmethod + def _compute_potts_model_from_alignment(filtered_alignment_file: Union[Path, str], DCA_format: str) -> dict: + """Compute a Potts model from a filtered alignment using pyDCA backends.""" + if DCA_format not in ("plmDCA", "mfDCA"): + raise ValueError("DCA_format must be either 'plmDCA' or 'mfDCA'.") + + try: + if DCA_format == "plmDCA": + return dca.pydca.plmdca(str(filtered_alignment_file)) + return dca.pydca.mfdca(str(filtered_alignment_file)) + except ImportError as exc: + raise ImportError( + "from_pfam_alignment requires the optional dependency 'pydca' to compute a Potts model. " + "Install it (for example: pip install \"git+https://github.com/cabb99/pydca.git\") or use from_potts_model_file/from_pottsmodel " + "with a precomputed Potts model." + ) from exc + # @classmethod # def from_distance_matrix(cls, # potts_model: dict, @@ -294,10 +312,10 @@ def from_pfam_alignment(cls,pdb_structure : object, self.alignment_file=pfam.download_aligment(self.PFAM_ID,self.alignment_output_file_name) self.filtered_alignment_file=filter.filter_alignment(self.alignment_output_file_name,self.filtered_alignment_output_file_name) - if self.DCA_format=="plmDCA": - self.potts_model=dca.pydca.plmdca(str(self.filtered_alignment_file)) - else: - self.potts_model=dca.pydca.mfdca(str(self.filtered_alignment_file)) + self.potts_model = cls._compute_potts_model_from_alignment( + filtered_alignment_file=self.filtered_alignment_file, + DCA_format=self.DCA_format, + ) self.aa_freq = None self.contact_freq = None @@ -365,10 +383,10 @@ def from_hmmer_alignment(cls,pdb_structure : object, self.alignment_file=align.jackhmmer(self.sequence,self.alignment_output_file_name,self.query_sequence_database_file) self.filtered_alignment_file=filter.filter_alignment(self.alignment_output_file_name,self.filtered_alignment_output_file_name) - if self.DCA_format=="plmDCA": - self.potts_model=dca.pydca.plmdca(str(self.filtered_alignment_file)) - else: - self.potts_model=dca.pydca.mfdca(str(self.filtered_alignment_file)) + self.potts_model = cls._compute_potts_model_from_alignment( + filtered_alignment_file=self.filtered_alignment_file, + DCA_format=self.DCA_format, + ) self.aa_freq = None self.contact_freq = None diff --git a/frustratometer/classes/Structure.py b/frustratometer/classes/Structure.py index 62ebec5b..da820a75 100644 --- a/frustratometer/classes/Structure.py +++ b/frustratometer/classes/Structure.py @@ -5,6 +5,7 @@ import numpy as np from typing import Union from pathlib import Path +import tempfile import warnings __all__ = ['Structure'] @@ -14,7 +15,7 @@ class Structure: def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_selection: str = None, aligned_sequence: str = None, filtered_aligned_sequence: str = None, - distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True)->object: + distance_matrix_method:str = 'CB', pdb_directory: Path = None, repair_pdb:bool = True)->object: """ Generates structure object. Both PDB and CIF format files are accepted as input. @@ -49,7 +50,7 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s and 'minimum' for using the minimum distance between all atoms in each residue. pdb_directory: str - Directory where repaired pdb will be downloaded + Directory where repaired pdb will be stored. Defaults to the system temporary directory. repair_pdb: bool If True, provided pdb file will be repaired with missing residues inserted and heteroatoms removed. @@ -59,6 +60,8 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s ------- Structure object """ + if pdb_directory is None: + pdb_directory = Path(tempfile.gettempdir()) try: #Check if file exists @@ -183,7 +186,7 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s @classmethod def full_pdb(cls,pdb_file: Union[Path,str], chain: Union[str,None]=None, aligned_sequence: str = None, filtered_aligned_sequence: str = None, - distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True): + distance_matrix_method:str = 'CB', pdb_directory: Path = None, repair_pdb:bool = True): warnings.warn("The class method 'full_pdb' is now depreciated. You can now simply call the Structure class to create a full pdb or spliced pdb object.") return cls(pdb_file=pdb_file, chain=chain, @@ -196,7 +199,7 @@ def full_pdb(cls,pdb_file: Union[Path,str], chain: Union[str,None]=None, aligned @classmethod def spliced_pdb(cls,pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_selection: str = None, aligned_sequence: str = None, filtered_aligned_sequence: str = None, - distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True): + distance_matrix_method:str = 'CB', pdb_directory: Path = None, repair_pdb:bool = True): warnings.warn("The class method 'spliced_pdb' is now depreciated. You can now simply call the Structure class to create a full pdb or spliced pdb object.") return cls(pdb_file=pdb_file, chain=chain, diff --git a/frustratometer/frustration/frustration.py b/frustratometer/frustration/frustration.py index eaddd834..948911e0 100644 --- a/frustratometer/frustration/frustration.py +++ b/frustratometer/frustration/frustration.py @@ -395,7 +395,7 @@ def compute_singleresidue_decoy_energy_fluctuation(seq: str, def compute_mutational_decoy_energy_fluctuation(seq: str, potts_model: dict, mask: np.array, ) -> np.array: - """ + r""" Computes a (LxLx21x21) matrix for a sequence of length L. Matrix[i,j] describes all possible changes in energy upon mutating residue i and j simultaneously. .. math:: @@ -439,11 +439,11 @@ def compute_mutational_decoy_energy_fluctuation(seq: str, seq_len = len(seq_index) # Create masked decoys - pos1,pos2=np.where(mask>0) - contacts_len=len(pos1) + pos1_flat,pos2_flat=np.where(mask>0) + contacts_len=len(pos1_flat) - pos1,aa1,aa2=np.meshgrid(pos1, np.arange(21), np.arange(21), indexing='ij', sparse=True) - pos2,aa1,aa2=np.meshgrid(pos2, np.arange(21), np.arange(21), indexing='ij', sparse=True) + pos1,aa1,aa2=np.meshgrid(pos1_flat, np.arange(21), np.arange(21), indexing='ij', sparse=True) + pos2,aa1,aa2=np.meshgrid(pos2_flat, np.arange(21), np.arange(21), indexing='ij', sparse=True) #Compute fields decoy_energy = np.zeros([contacts_len, 21, 21]) @@ -451,14 +451,25 @@ def compute_mutational_decoy_energy_fluctuation(seq: str, decoy_energy -= (potts_model['h'][pos2, aa2] - potts_model['h'][pos2, seq_index[pos2]]) # h correction aa2 #Compute couplings - j_correction = np.zeros([contacts_len, 21, 21]) - for pos, aa in enumerate(seq_index): - # J correction interactions with other aminoacids - reduced_j = potts_model['J'][pos, :, aa, :].astype(np.float32) - j_correction += reduced_j[pos1, seq_index[pos1]] * mask[pos, pos1] - j_correction -= reduced_j[pos1, aa1] * mask[pos, pos1] - j_correction += reduced_j[pos2, seq_index[pos2]] * mask[pos, pos2] - j_correction -= reduced_j[pos2, aa2] * mask[pos, pos2] + # j_correction = np.zeros([contacts_len, 21, 21]) + # for pos, aa in enumerate(seq_index): + # # J correction interactions with other aminoacids + # reduced_j = potts_model['J'][pos, :, aa, :].astype(np.float32) + # j_correction += reduced_j[pos1, seq_index[pos1]] * mask[pos, pos1] + # j_correction -= reduced_j[pos1, aa1] * mask[pos, pos1] + # j_correction += reduced_j[pos2, seq_index[pos2]] * mask[pos, pos2] + # j_correction -= reduced_j[pos2, aa2] * mask[pos, pos2] + + # Vectorized: Compute reduced_j[pos1, aa1] * mask[pos, pos1] in einsum + # V[j, a] = sum_i J[i, j, seq_index[i], a] * mask[i, j] + j_reduced = potts_model['J'][np.arange(seq_len), :, seq_index, :].astype(np.float32) # (L, L, 21) + mask_float = mask.astype(np.float32) + V = np.einsum('ijk,ij->jk', j_reduced, mask_float) # (L, 21) + j_correction = ( V[pos1_flat, seq_index[pos1_flat]][:, None, None] + - V[pos1_flat, :][:, :, None] + + V[pos2_flat, seq_index[pos2_flat]][:, None, None] + - V[pos2_flat, :][:, None, :]) + # J correction, interaction with self aminoacids j_correction -= potts_model['J'][pos1, pos2, seq_index[pos1], seq_index[pos2]] * mask[pos1, pos2] # Taken two times j_correction += potts_model['J'][pos1, pos2, aa1, seq_index[pos2]] * mask[pos1, pos2] # Added mistakenly @@ -474,7 +485,7 @@ def compute_mutational_decoy_energy_fluctuation(seq: str, def compute_configurational_decoy_energy_fluctuation(seq: str, potts_model: dict, mask: np.array, ) -> np.array: - """ + r""" Computes a (LxLx21x21) matrix for a sequence of length L. Matrix[i,j] describes all possible changes in energy upon mutating and altering the local densities of residue i and j simultaneously. diff --git a/frustratometer/optimization/EnergyTerm.py b/frustratometer/optimization/EnergyTerm.py index 6cd87bc6..b564939f 100644 --- a/frustratometer/optimization/EnergyTerm.py +++ b/frustratometer/optimization/EnergyTerm.py @@ -1,8 +1,25 @@ import abc +import sys import numpy as np from frustratometer.utils.format_time import format_time -# from typing import Callable -# from functools import lru_cache #TODO: Implement lru_cache for memoization of energy functions + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + # Python 3.7 backfill + class cached_property: # noqa: N801 + def __init__(self, func): + self.func = func + self.attrname = None + self.__doc__ = func.__doc__ + def __set_name__(self, owner, name): + self.attrname = name + def __get__(self, instance, owner=None): + if instance is None: + return self + val = self.func(instance) + instance.__dict__[self.attrname] = val + return val try: import numba @@ -58,7 +75,9 @@ def use_numba(self): def use_numba(self, value): if self._use_numba != value: self._use_numba = value - # self.clear_cache() + for attr in ('energy_function', 'denergy_mutation_function', + 'denergy_swap_function', 'energies_function'): + self.__dict__.pop(attr, None) def clear_cache(self): # Clear the cache for all cached properties @@ -79,10 +98,9 @@ def dummy_decorator(func, *args, **kwargs): """ Dummy decorator for functions that do not require numba compilation. """ return func - @property - #@lru_cache(maxsize=None) + @cached_property def energies_function(self): - """ Returns the energy function as a numba dispatcher. """ + """ Returns the energies function as a numba dispatcher. """ energy_function = self.energy_function def compute_energies(seq_indices:np.ndarray): """Compute the energies of multiple sequences.""" @@ -90,30 +108,25 @@ def compute_energies(seq_indices:np.ndarray): for i in numba.prange(len(seq_indices)): energies[i] = energy_function(seq_indices[i]) return energies - if self.use_numba: return numba.njit(types.Array(types.float64, 1, 'C')(types.Array(types.int64, 2, 'A', readonly=True)), parallel=True)(compute_energies) - else: - return compute_energies + return compute_energies - @property - #@lru_cache(maxsize=None) + @cached_property def energy_function(self): """ Returns the energy function as a numba dispatcher. """ if self.use_numba: return numba.njit(types.float64(types.Array(types.int64, 1, 'A', readonly=True)))(self.compute_energy) return self.compute_energy - - @property - #@lru_cache(maxsize=None) + + @cached_property def denergy_mutation_function(self): """ Returns the mutation energy change function as a numba dispatcher. """ if self.use_numba: return numba.njit(types.float64(types.Array(types.int64, 1, 'A', readonly=True),types.int64,types.int64))(self.compute_denergy_mutation) return self.compute_denergy_mutation - @property - #@lru_cache(maxsize=None) + @cached_property def denergy_swap_function(self): """ Returns the swap energy change function as a numba dispatcher. """ if self.use_numba: diff --git a/frustratometer/optimization/inner_product.py b/frustratometer/optimization/inner_product.py index f8421890..ebef1116 100644 --- a/frustratometer/optimization/inner_product.py +++ b/frustratometer/optimization/inner_product.py @@ -513,11 +513,14 @@ def compute_all_region_means(indicators1d, indicators2d): def diff_mean_inner_product_2_by_2(r0, r1, repetitions, region_mean): ijkl, iikl, ijil, ijjl, ijki, ijkj, ijkk, iiil, iiki, iikk, ijii, ijij, ijji, ijjj, iiii = range(15) n_elements= len(repetitions) + mean_inner_product = np.zeros(n_elements**4) + # Cannot decrease a count that is already zero + if repetitions[r0] == 0 or r0 == r1: + return mean_inner_product.reshape(n_elements**2, n_elements**2) m=repetitions n=m.copy() n[r0]-=1 n[r1]+=1 - mean_inner_product = np.zeros(n_elements**4) n_elements_2=n_elements**2 n_elements_3=n_elements**3 # dijkx = np.zeros(n_elements) @@ -688,6 +691,9 @@ def diff_mean_inner_product_2_by_2(r0, r1, repetitions, region_mean): def diff_mean_inner_product_1_by_2(r0, r1, repetitions, region_mean): ijk, iik, iji, ijj, iii = range(5) n_elements= len(repetitions) + # Cannot decrease a count that is already zero + if repetitions[r0] == 0 or r0 == r1: + return np.zeros(n_elements**3).reshape(n_elements, n_elements**2) m=repetitions n=m.copy() n[r0]-=1 @@ -728,6 +734,9 @@ def diff_mean_inner_product_1_by_1(r0, r1, repetitions,region_mean): ij, ii = range(2) n_elements= len(repetitions) + # Cannot decrease a count that is already zero + if repetitions[r0] == 0 or r0 == r1: + return np.zeros(n_elements**2).reshape(n_elements, n_elements) m=repetitions n=m.copy() n[r0]-=1 @@ -768,6 +777,10 @@ def diff_mean_inner_product_matrix(r0,r1, repetitions, indicators1d, indicators2 # Create the resulting matrix filled with zeros R = np.zeros((total_size, total_size)) + + # Cannot decrease a count that is already zero + if repetitions[r0] == 0 or r0 == r1: + return R # Compute the starting indices for each matrix #start_indices = np.cumsum([0] + block_sizes[:-1]) diff --git a/frustratometer/pdb/fix.py b/frustratometer/pdb/fix.py index 142656cb..a404a8b4 100644 --- a/frustratometer/pdb/fix.py +++ b/frustratometer/pdb/fix.py @@ -1,10 +1,11 @@ from pathlib import Path +import tempfile import pdbfixer PDBFile = pdbfixer.pdbfixer.app.PDBFile PDBFixer = pdbfixer.PDBFixer -def repair_pdb(pdb_file: str, chain: str, pdb_directory: Path= Path.cwd()) -> PDBFixer: +def repair_pdb(pdb_file: str, chain: str, pdb_directory: Path = None) -> PDBFixer: """ Repairs a pdb or cif file using pdbfixer. Note that a pdb file will be produced, regardless of input file format @@ -22,6 +23,8 @@ def repair_pdb(pdb_file: str, chain: str, pdb_directory: Path= Path.cwd()) -> PD fixer : object Repaired PDB Object """ + if pdb_directory is None: + pdb_directory = Path(tempfile.gettempdir()) pdb_directory=Path(pdb_directory) pdb_file=Path(pdb_file) diff --git a/requirements.txt b/requirements.txt index 30ffba19..4c7e4596 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -python>=3.7 #pydantic requirement -numpy +numpy; python_version >= "3.10" +numpy<2; python_version < "3.10" #avoids errors when using prody scipy pandas biopython prody -pyparsing#<=3.1.1 #avoids errors when using prody -pydantic>=2 -numba -#importlib-metadata; python_version < '3.9' \ No newline at end of file +pyparsing +pydantic; python_version >= "3.10" +pydantic<=3.1.1; python_version < "3.10" #avoids errors when using prody +numba \ No newline at end of file diff --git a/setup.py b/setup.py index e7954fb7..b202d902 100644 --- a/setup.py +++ b/setup.py @@ -44,16 +44,16 @@ # Allows `setup.py test` to work correctly with pytest setup_requires=[] + pytest_runner, - # Additional entries you may want simply uncomment the lines you want and fill in the data - # url='http://www.my_package.com', # Website - # install_requires=[], # Required packages, pulls from pip if needed; do not use for Conda deployment - # platforms=['Linux', - # 'Mac OS-X', - # 'Unix', - # 'Windows'], # Valid platforms your code works on, adjust to your flavor - # python_requires=">=3.5", # Python version restrictions - - # Manual control if final package is compressible or not, set False to prevent the .egg from being made - # zip_safe=False, + python_requires=">=3.7", + install_requires=[ + 'numpy', + 'scipy', + 'pandas', + 'biopython', + 'prody', + 'pyparsing', + 'pydantic>=2', + 'numba', + ], ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..021d7c22 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,78 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--skip-slow", + action="store_true", + default=False, + help="Skip tests marked as slow (brute-force / expensive computations).", + ) + parser.addoption( + "--skip-network", + action="store_true", + default=False, + help="Skip tests that require network access (downloads, remote databases).", + ) + parser.addoption( + "--skip-stochastic", + action="store_true", + default=False, + help="Skip tests marked as stochastic (Monte Carlo / random-sampling results).", + ) + parser.addoption( + "--skip-memory-heavy", + action="store_true", + default=False, + help="Skip tests marked as memory_heavy (peak RSS > 7 GB).", + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + "slow: marks tests as slow (brute-force or expensive computations); " + "runs by default, pass --skip-slow to exclude.", + ) + config.addinivalue_line( + "markers", + "network: marks tests that require network access; " + "runs by default, pass --skip-network to exclude.", + ) + config.addinivalue_line( + "markers", + "stochastic: marks tests whose results depend on random sampling and may " + "occasionally fail; runs by default, pass --skip-stochastic to exclude.", + ) + config.addinivalue_line( + "markers", + "memory_heavy: marks tests that require more than ~7 GB of RAM " + "(e.g. large protein AWSEM models); " + "runs by default, pass --skip-memory-heavy to exclude.", + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--skip-slow"): + skip_slow = pytest.mark.skip(reason="slow test — skipped via --skip-slow") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) + + if config.getoption("--skip-network"): + skip_network = pytest.mark.skip(reason="network test — skipped via --skip-network") + for item in items: + if "network" in item.keywords: + item.add_marker(skip_network) + + if config.getoption("--skip-stochastic"): + skip_stochastic = pytest.mark.skip(reason="stochastic test — skipped via --skip-stochastic") + for item in items: + if "stochastic" in item.keywords: + item.add_marker(skip_stochastic) + + if config.getoption("--skip-memory-heavy"): + skip_mh = pytest.mark.skip(reason="memory-heavy test — skipped via --skip-memory-heavy") + for item in items: + if "memory_heavy" in item.keywords: + item.add_marker(skip_mh) diff --git a/tests/test_awsem_frustratometer.py b/tests/test_awsem_frustratometer.py index b5fda59a..7af4dec9 100644 --- a/tests/test_awsem_frustratometer.py +++ b/tests/test_awsem_frustratometer.py @@ -8,14 +8,60 @@ test_path=Path('tests') test_data_path=Path('tests/data') -# Assuming you have a function to load your tests configurations + tests_config = pd.read_csv(test_path/"test_awsem_config.csv",comment='#') -#tests_config = pd.read_csv(test_path/"test_awsem_config.csv") + +# Build parametrize list, marking 1jge rows as memory_heavy (~7.5 GB peak RSS) +_MEMORY_HEAVY_PDBS = {"1jge"} +_test_params = [] +for _rec in tests_config.to_dict(orient="records"): + if _rec["pdb"] in _MEMORY_HEAVY_PDBS: + _test_params.append(pytest.param(_rec, marks=pytest.mark.memory_heavy)) + else: + _test_params.append(_rec) @pytest.fixture(scope="module") def test_structure(): return {test_data['pdb']: frustratometer.Structure(test_data_path/f"{test_data['pdb']}.pdb") for test_data in tests_config.to_dict(orient="records")} + +@pytest.fixture(scope="module") +def awsem_6u5e(): + """6u5e + AWSEM(k_elec=0, sep=10, cutoff=None) — shared by fields/couplings energy tests.""" + structure = frustratometer.Structure(test_data_path / '6u5e.pdb', "A") + return frustratometer.AWSEM(structure, k_electrostatics=0, + min_sequence_separation_contact=10, + distance_cutoff_contact=None) + + +@pytest.fixture(scope="module") +def awsem_6u5e_density(): + """6u5e + AWSEM(cutoff=9.499, sep=2, k_elec=0) — shared by single-residue energy/decoy tests.""" + structure = frustratometer.Structure(test_data_path / '6u5e.pdb', "A") + return frustratometer.AWSEM(structure, + distance_cutoff_contact=9.499, + min_sequence_separation_contact=2, + k_electrostatics=0) + + +@pytest.fixture(scope="module") +def awsem_1mba(): + """1MBA_A sub/full models (k_elec=0, sep=10, cutoff=10) — shared by subsequence tests.""" + substructure = frustratometer.Structure(test_data_path / '1MBA_A.pdb', "A", + seq_selection="resnum 39to146") + model_sub = frustratometer.AWSEM(substructure, k_electrostatics=0.0, + min_sequence_separation_contact=10, + distance_cutoff_contact=10.0) + model_sub_no_context = frustratometer.AWSEM(substructure, k_electrostatics=0.0, + min_sequence_separation_contact=10, + distance_cutoff_contact=10.0, + burial_in_context=False) + full_structure = frustratometer.Structure(test_data_path / '1MBA_A.pdb', "A") + model_full = frustratometer.AWSEM(full_structure, k_electrostatics=0.0, + min_sequence_separation_contact=10, + distance_cutoff_contact=10.0) + return {"sub": model_sub, "full": model_full, "sub_no_context": model_sub_no_context} + def test_prody_expected_error(): test_data=tests_config.iloc[0] try: @@ -28,13 +74,13 @@ def test_prody_expected_error(): raise -@pytest.mark.parametrize("test_data", tests_config.to_dict(orient="records")) +@pytest.mark.parametrize("test_data", _test_params) def test_density_residues(test_data, test_structure): #structure = frustratometer.Structure(test_data_path/f"{test_data['pdb']}.pdb") structure = test_structure[test_data['pdb']] sequence_separation = 2 if test_data['seqsep'] == 3 else 13 model = frustratometer.AWSEM(structure, distance_cutoff_contact=9.5, min_sequence_separation_rho=sequence_separation, k_electrostatics=0) - data = pd.read_csv(test_data['singleresidue'], delim_whitespace=True) + data = pd.read_csv(test_data['singleresidue'], sep=r'\s+') data['Calculated_density'] = model.rho_r data['Expected_density'] = data['DensityRes'] max_atol = np.max(np.abs(data['Calculated_density'] - data['Expected_density'])) @@ -46,13 +92,13 @@ def test_density_residues(test_data, test_structure): print(f"Assertion failed: Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance.") raise AssertionError(f"Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance of 1E-3.") -@pytest.mark.parametrize("test_data", tests_config.to_dict(orient="records")) +@pytest.mark.parametrize("test_data", _test_params) def test_single_residue_frustration(test_data,test_structure): #structure = frustratometer.Structure(test_data_path/f"{test_data['pdb']}.pdb") structure = test_structure[test_data['pdb']] sequence_separation = 2 if test_data['seqsep'] == 3 else 13 model = frustratometer.AWSEM(structure, distance_cutoff_contact=9.5, min_sequence_separation_rho=sequence_separation, min_sequence_separation_contact=2, k_electrostatics=test_data['k_electrostatics'] * 4.184, min_sequence_separation_electrostatics=1) - data = pd.read_csv(test_data['singleresidue'], delim_whitespace=True) + data = pd.read_csv(test_data['singleresidue'], sep=r'\s+') data['Calculated_frustration'] = model.frustration(kind='singleresidue') data['Expected_frustration'] = data['FrstIndex'] try: @@ -62,7 +108,7 @@ def test_single_residue_frustration(test_data,test_structure): print(f"Assertion failed: Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance.") raise AssertionError(f"Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance of 3E-1.") -@pytest.mark.parametrize("test_data", tests_config.to_dict(orient="records")) +@pytest.mark.parametrize("test_data", _test_params) def test_mutational_frustration(test_data,test_structure): #structure = frustratometer.Structure(test_data_path/f"{test_data['pdb']}.pdb") structure = test_structure[test_data['pdb']] @@ -71,7 +117,7 @@ def test_mutational_frustration(test_data,test_structure): assert True return model = frustratometer.AWSEM(structure, distance_cutoff_contact=9.5, min_sequence_separation_rho=sequence_separation, min_sequence_separation_contact=0, k_electrostatics=test_data['k_electrostatics'] * 4.184, min_sequence_separation_electrostatics=1) - data = pd.read_csv(test_data['mutational'], delim_whitespace=True) + data = pd.read_csv(test_data['mutational'], sep=r'\s+') if test_data['pdb']!="ijge": chains=['A','B','C'] @@ -97,7 +143,9 @@ def test_mutational_frustration(test_data,test_structure): print(f"Assertion failed: Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance.") raise AssertionError(f"Maximum absolute tolerance found was {max_atol}, which exceeds the allowed tolerance of {atol}.") -@pytest.mark.parametrize("test_data", tests_config.to_dict(orient="records")) +@pytest.mark.slow +@pytest.mark.stochastic +@pytest.mark.parametrize("test_data", _test_params) def test_configurational_frustration(test_data,test_structure): #This test may fail due to the randomness of the decoy generation @@ -115,7 +163,7 @@ def test_configurational_frustration(test_data,test_structure): k_electrostatics=test_data['k_electrostatics'] * 4.184, min_sequence_separation_electrostatics=1) - data = pd.read_csv(test_data['configurational'], delim_whitespace=True) + data = pd.read_csv(test_data['configurational'], sep=r'\s+') if test_data['pdb'] != "ijge": chains = ['A', 'B', 'C'] @@ -145,7 +193,7 @@ def test_configurational_frustration(test_data,test_structure): ##### def test_residue_density_calculation(): #Import Lammps AWSEM Frustratometer single residue frustration values - lammps_single_frustration_dataframe=pd.read_csv(test_data_path/"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep="\s+") + lammps_single_frustration_dataframe=pd.read_csv(test_data_path/"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep=r"\s+") lammps_single_frustration_dataframe["i"]=lammps_single_frustration_dataframe["i"]-1 expected_rho_values=lammps_single_frustration_dataframe["rho_i"] @@ -161,17 +209,13 @@ def test_AWSEM_native_energy(): print(e) assert np.round(e, 0) == -915 -def test_AWSEM_fields_energy(): - structure=frustratometer.Structure(test_data_path/f'6u5e.pdb',"A") - model=frustratometer.AWSEM(structure,k_electrostatics=0, min_sequence_separation_contact = 10, distance_cutoff_contact = None) - e = model.fields_energy() +def test_AWSEM_fields_energy(awsem_6u5e): + e = awsem_6u5e.fields_energy() print(e) assert np.round(e, 0) == -555 -def test_AWSEM_couplings_energy(): - structure=frustratometer.Structure(test_data_path/f'6u5e.pdb',"A") - model=frustratometer.AWSEM(structure,k_electrostatics=0, min_sequence_separation_contact = 10, distance_cutoff_contact = None) - e = model.couplings_energy() +def test_AWSEM_couplings_energy(awsem_6u5e): + e = awsem_6u5e.couplings_energy() print(e) assert np.round(e, 0) == -362 @@ -180,18 +224,14 @@ def test_fields_couplings_AWSEM_energy(): model = frustratometer.AWSEM(structure) assert model.fields_energy() + model.couplings_energy() - model.native_energy() < 1E-6 -def test_single_residue_AWSEM_energy(): +def test_single_residue_AWSEM_energy(awsem_6u5e_density): _AA = '-ACDEFGHIKLMNPQRSTVWY' #Import Lammps AWSEM Frustratometer single residue frustration values - lammps_single_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep="\s+") + lammps_single_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep=r"\s+") ### - structure=frustratometer.Structure(test_data_path/f'6u5e.pdb',"A") - model=frustratometer.AWSEM(structure,distance_cutoff_contact=9.499, - min_sequence_separation_contact=2, - k_electrostatics=0) - + model = awsem_6u5e_density #Calculate fields - seq_index = np.array([_AA.find(aa) for aa in structure.sequence]) + seq_index = np.array([_AA.find(aa) for aa in model.sequence]) seq_len = len(seq_index) h = -model.potts_model['h'][range(seq_len), seq_index] @@ -208,7 +248,7 @@ def test_single_residue_AWSEM_energy(): def test_contact_pair_AWSEM_energy(): _AA = '-ACDEFGHIKLMNPQRSTVWY' #Import Lammps AWSEM Frustratometer mutational frustration values - lammps_mutational_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_mutational_1E6decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep="\s+") + lammps_mutational_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_mutational_1E6decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep=r"\s+") lammps_mutational_frustration_dataframe["i"]=lammps_mutational_frustration_dataframe["i"]-1 lammps_mutational_frustration_dataframe["j"]=lammps_mutational_frustration_dataframe["j"]-1 ### @@ -247,71 +287,53 @@ def test_selected_subsequence_AWSEM_burial_energy_matrix(): #Test Protein Segment Native AWSEM Energy Calculation ##### -def test_selected_subsequence_AWSEM_rho_calculations(): - #Substructure object - substructure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A",seq_selection="resnum 39to146") - model_1=frustratometer.AWSEM(substructure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - model_1_init_index=model_1.init_index_shift; model_1_fin_index=model_1.fin_index_shift - - #Full structure object - structure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A") - model_2=frustratometer.AWSEM(structure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - +def test_selected_subsequence_AWSEM_rho_calculations(awsem_1mba): + model_1 = awsem_1mba["sub"] + model_2 = awsem_1mba["full"] + model_1_init_index = model_1.init_index_shift + model_1_fin_index = model_1.fin_index_shift #Check if shape and entries of rho matrices are identical assert model_1.rho_r.shape==model_2.rho_r[model_1_init_index:model_1_fin_index].shape assert model_1.rho_r.all()==model_2.rho_r[model_1_init_index:model_1_fin_index].all() -def test_selected_subsequence_AWSEM_burial_energy(): - #Substructure object - substructure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A",seq_selection="resnum 39to146") - model_1=frustratometer.AWSEM(substructure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - model_1_init_index=model_1.init_index_shift; model_1_fin_index=model_1.fin_index_shift - - #Full structure object - structure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A") - model_2=frustratometer.AWSEM(structure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - +def test_selected_subsequence_AWSEM_burial_energy(awsem_1mba): + model_1 = awsem_1mba["sub"] + model_2 = awsem_1mba["full"] + model_1_init_index = model_1.init_index_shift + model_1_fin_index = model_1.fin_index_shift #Check if burial energies are identical assert model_1.burial_energy.shape==model_2.burial_energy[model_1_init_index:model_1_fin_index].shape assert model_1.burial_energy.all()==model_2.burial_energy[model_1_init_index:model_1_fin_index].all() -def test_selected_subsequence_AWSEM_contact_energy(): - #Substructure object - substructure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A",seq_selection="resnum 39to146") - model_1=frustratometer.AWSEM(substructure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - model_1_init_index=model_1.init_index_shift; model_1_fin_index=model_1.fin_index_shift - - #Full structure object - structure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A") - model_2=frustratometer.AWSEM(structure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0) - +def test_selected_subsequence_AWSEM_contact_energy(awsem_1mba): + model_1 = awsem_1mba["sub"] + model_2 = awsem_1mba["full"] + model_1_init_index = model_1.init_index_shift + model_1_fin_index = model_1.fin_index_shift #Check if contact energies are identical assert model_1.contact_energy.shape==model_2.contact_energy[:,model_1_init_index:model_1_fin_index,model_1_init_index:model_1_fin_index,:,:].shape assert model_1.contact_energy.all()==model_2.contact_energy[:,model_1_init_index:model_1_fin_index,model_1_init_index:model_1_fin_index,:,:].all() -def test_selected_subsequence_AWSEM_burial_energy_without_protein_context(): - structure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A",seq_selection="resnum 39to146") - model=frustratometer.AWSEM(structure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0,burial_in_context=False) - selected_region_burial=model.fields_energy() +def test_selected_subsequence_AWSEM_burial_energy_without_protein_context(awsem_1mba): + model = awsem_1mba["sub_no_context"] + selected_region_burial = model.fields_energy() # Energy units are in kJ/mol assert np.round(selected_region_burial, 2) == -377.95 -def test_selected_subsequence_AWSEM_contact_energy_without_protein_context(): - structure=frustratometer.Structure(test_data_path/f'1MBA_A.pdb',"A",seq_selection="resnum 39to146") - model=frustratometer.AWSEM(structure, k_electrostatics=0.0,min_sequence_separation_contact=10,distance_cutoff_contact=10.0,burial_in_context=False) - selected_region_contact=model.couplings_energy() +def test_selected_subsequence_AWSEM_contact_energy_without_protein_context(awsem_1mba): + model = awsem_1mba["sub_no_context"] + selected_region_contact = model.couplings_energy() # Energy units are in kJ/mol assert np.round(selected_region_contact, 2) == -148.92 -def test_single_residue_decoy_AWSEM_energy_statistics(): +def test_single_residue_decoy_AWSEM_energy_statistics(awsem_6u5e_density): _AA = '-ACDEFGHIKLMNPQRSTVWY' #Import Lammps AWSEM Frustratometer single residue frustration values - lammps_single_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep="\s+") + lammps_single_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_singleresidue_1E8decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep=r"\s+") ### - structure=frustratometer.Structure(test_data_path/f'6u5e.pdb',"A") - model=frustratometer.AWSEM(structure,distance_cutoff_contact=9.499, min_sequence_separation_contact=2, k_electrostatics=0) + model = awsem_6u5e_density #Calculate fields - seq_index = np.array([_AA.find(aa) for aa in structure.sequence]) + seq_index = np.array([_AA.find(aa) for aa in model.sequence]) seq_len = len(seq_index) h = -model.potts_model['h'][range(seq_len), seq_index] @@ -335,7 +357,7 @@ def test_single_residue_decoy_AWSEM_energy_statistics(): def test_contact_pair_decoy_AWSEM_energy_statistics(): _AA = '-ACDEFGHIKLMNPQRSTVWY' #Import Lammps AWSEM Frustratometer mutational frustration values - lammps_mutational_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_mutational_1E6decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep="\s+") + lammps_mutational_frustration_dataframe=pd.read_csv(test_data_path/f"6U5E_A_tertiary_frustration_mutational_1E6decoys_AWSEM_Frustratometer_LAMMPS_Carlos.dat",header=0,sep=r"\s+") lammps_mutational_frustration_dataframe["i"]=lammps_mutational_frustration_dataframe["i"]-1 lammps_mutational_frustration_dataframe["j"]=lammps_mutational_frustration_dataframe["j"]-1 ### @@ -372,7 +394,7 @@ def test_contact_pair_decoy_AWSEM_energy_statistics(): assert (abs(np.array(merged_dataframe["std(decoy_energies)"]-merged_dataframe["STD_Decoy_Energy"])) < 1.2E-1).all() -@pytest.fixture +@pytest.fixture(scope="module") def structure(): return frustratometer.Structure(test_data_path/f'1l63.pdb',"A") diff --git a/tests/test_dca_frustratometer.py b/tests/test_dca_frustratometer.py index e02df7bc..05afd1ad 100644 --- a/tests/test_dca_frustratometer.py +++ b/tests/test_dca_frustratometer.py @@ -11,11 +11,14 @@ from frustratometer.utils import _path import Bio.AlignIO import subprocess +import os +import importlib.util data_path = frustratometer.utils.create_directory(_path/'..'/'tests'/'data') -#scratch_path = frustratometer.utils.create_directory(_path/'..'/'tests'/'scratch') +output_path = Path(os.environ.get('FRUSTRATOMETER_TEST_OUTPUT', tempfile.gettempdir())) _AA = '-ACDEFGHIKLMNPQRSTVWY' +_HAS_PYDCA = importlib.util.find_spec("pydca") is not None def test_download_pfam_database(): """Downloads a small database from Pfam and tests that the files are splitted correctly.""" @@ -464,12 +467,13 @@ def test_from_potts_model_file(): assert model.potts_model["J"].shape==(len(filtered_aligned_sequence),len(filtered_aligned_sequence),21,21) assert model.potts_model["h"].shape==(len(filtered_aligned_sequence),21) +@pytest.mark.skipif(not _HAS_PYDCA, reason="pyDCA is not installed") def test_from_pfam_alignment_mfDCA_calculation(): pdb_file = f'{data_path}/6JXX_A.pdb' chain = 'A' distance_matrix_method='CB' - alignment_output_file_name=f"{data_path}/PF11976_test_alignment.sto" - filtered_alignment_output_file_name=f"{data_path}/PF11976_test_filtered_alignment.sto" + alignment_output_file_name=f"{output_path}/PF11976_test_alignment.sto" + filtered_alignment_output_file_name=f"{output_path}/PF11976_test_filtered_alignment.sto" PFAM_ID="PF11976" DCA_format="mfDCA" @@ -481,12 +485,13 @@ def test_from_pfam_alignment_mfDCA_calculation(): assert model.potts_model["J"].shape==(len(filtered_aligned_sequence),len(filtered_aligned_sequence),21,21) assert model.potts_model["h"].shape==(len(filtered_aligned_sequence),21) +@pytest.mark.skipif(not _HAS_PYDCA, reason="pyDCA is not installed") def test_from_pfam_alignment_plmDCA_calculation(): pdb_file = f'{data_path}/6JXX_A.pdb' chain = 'A' distance_matrix_method='CB' - alignment_output_file_name=f"{data_path}/PF11976_test_alignment.sto" - filtered_alignment_output_file_name=f"{data_path}/PF11976_test_filtered_alignment.sto" + alignment_output_file_name=f"{output_path}/PF11976_test_alignment.sto" + filtered_alignment_output_file_name=f"{output_path}/PF11976_test_filtered_alignment.sto" PFAM_ID="PF11976" DCA_format="plmDCA" @@ -498,12 +503,13 @@ def test_from_pfam_alignment_plmDCA_calculation(): assert model.potts_model["J"].shape==(len(filtered_aligned_sequence),len(filtered_aligned_sequence),21,21) assert model.potts_model["h"].shape==(len(filtered_aligned_sequence),21) +@pytest.mark.skipif(not _HAS_PYDCA, reason="pyDCA is not installed") def test_from_hmmer_alignment_plmDCA_calculation(): pdb_file = f'{data_path}/6JXX_A.pdb' chain = 'A' distance_matrix_method='CB' - alignment_output_file_name=f"{data_path}/PF11976_test_alignment.sto" - filtered_alignment_output_file_name=f"{data_path}/PF11976_test_filtered_alignment.sto" + alignment_output_file_name=f"{output_path}/PF11976_test_alignment.sto" + filtered_alignment_output_file_name=f"{output_path}/PF11976_test_filtered_alignment.sto" query_sequence_database_file=f"{data_path}/protein-matching-PF11976.fasta" DCA_format="plmDCA" diff --git a/tests/test_gamma.py b/tests/test_gamma.py index 6bcd2122..ea617fb1 100644 --- a/tests/test_gamma.py +++ b/tests/test_gamma.py @@ -384,20 +384,22 @@ def setUp(self): def test_correlate_with_compatible_instances(self): correlation = self.gamma1.correlate(self.gamma2) self.assertIsNotNone(correlation) - self.assertEqual(correlation, 1.0) + self.assertAlmostEqual(correlation, 1.0, places=10) correlation = self.gamma1.correlate(self.gamma3) - self.assertEqual(correlation, -1.0) + self.assertAlmostEqual(correlation, -1.0, places=10) def test_correlate_segments_with_compatible_instances(self): correlations = self.gamma1.correlate_segments(self.gamma2) expected_correlations = {'Burial': 1.0, 'Direct': 1.0, 'Water': 1.0, 'Protein': 1.0} - self.assertDictEqual(correlations, expected_correlations) + for key in expected_correlations: + self.assertAlmostEqual(correlations[key], expected_correlations[key], places=10) correlations = self.gamma1.correlate_segments(self.gamma3) expected_correlations = {'Burial': -1.0, 'Direct': -1.0, 'Water': -1.0, 'Protein': -1.0} - self.assertDictEqual(correlations, expected_correlations) + for key in expected_correlations: + self.assertAlmostEqual(correlations[key], expected_correlations[key], places=10) def test_correlate_with_incompatible_instances(self): diff --git a/tests/test_optimization.py b/tests/test_optimization.py index 7b939a17..b707cc4a 100644 --- a/tests/test_optimization.py +++ b/tests/test_optimization.py @@ -57,22 +57,22 @@ def setup_2_by_2(length): # Specific test functions def test_compute_region_means_1_by_1(): - template_test_compute_region_means(compute_region_means_1_by_1_numpy, compute_region_means_1_by_1, setup_1_by_1, range(1, 200)) + template_test_compute_region_means(compute_region_means_1_by_1_numpy, compute_region_means_1_by_1, setup_1_by_1, [1, 2, 3, 5, 10, 20, 50, 100, 150, 200]) def test_compute_region_means_1_by_2(): - template_test_compute_region_means(compute_region_means_1_by_2_numpy, compute_region_means_1_by_2, setup_1_by_2, range(1, 100)) + template_test_compute_region_means(compute_region_means_1_by_2_numpy, compute_region_means_1_by_2, setup_1_by_2, [1, 2, 3, 5, 10, 25, 50, 100]) def test_compute_region_means_2_by_2(): - template_test_compute_region_means(compute_region_means_2_by_2_numpy, compute_region_means_2_by_2, setup_2_by_2, range(1, 40)) + template_test_compute_region_means(compute_region_means_2_by_2_numpy, compute_region_means_2_by_2, setup_2_by_2, [1, 2, 5, 10, 20, 40]) def test_compute_region_means_2_by_2_parallel(): - template_test_compute_region_means(compute_region_means_2_by_2, compute_region_means_2_by_2_parallel, setup_2_by_2, range(1, 50)) + template_test_compute_region_means(compute_region_means_2_by_2, compute_region_means_2_by_2_parallel, setup_2_by_2, [1, 2, 5, 10, 25, 50]) def test_compute_region_means(): - template_test_compute_region_means(compute_region_means, compute_region_means_1_by_1, setup_1_by_1, range(1, 400,5)) - template_test_compute_region_means(compute_region_means, compute_region_means_1_by_2, setup_1_by_2, range(1, 400,5)) - template_test_compute_region_means(compute_region_means, compute_region_means_2_by_2_parallel, setup_2_by_2, range(1, 300,5)) + template_test_compute_region_means(compute_region_means, compute_region_means_1_by_1, setup_1_by_1, [1, 2, 5, 10, 25, 50, 100, 200, 400]) + template_test_compute_region_means(compute_region_means, compute_region_means_1_by_2, setup_1_by_2, [1, 2, 5, 10, 25, 50, 100, 200, 400]) + template_test_compute_region_means(compute_region_means, compute_region_means_2_by_2_parallel, setup_2_by_2, [1, 2, 5, 10, 25, 50, 100, 200, 400]) def test_mean_inner_product(): native_sequences=[ @@ -188,16 +188,23 @@ def test_diff_mean_inner_product_2_by_2(n_elements = 10): matrix2d_1 = np.random.rand(n_elements, n_elements) repetitions = np.random.randint(0, 1000, size=n_elements) r0, r1 = np.random.choice(n_elements, 2, replace=False) - + region_mean = compute_region_means_2_by_2(matrix2d_0, matrix2d_1) - + result_adjusted = diff_mean_inner_product_2_by_2(r0, r1, repetitions, region_mean) + + if repetitions[r0] == 0: + # Moving from an empty bin is invalid; the diff must be zero + if not np.allclose(result_adjusted, 0): + failed = True + print(f"rep[r0]==0 case: expected zero diff but got non-zero for r0={r0}, r1={r1}") + continue + # Original function with adjusted repetitions m = repetitions.copy() n = m.copy() n[r0] -= 1 n[r1] += 1 - result_adjusted = diff_mean_inner_product_2_by_2(r0, r1, repetitions, region_mean) - + # Recompute the functions for new and original repetitions directly result_new_reps = mean_inner_product_2_by_2(n, region_mean) result_original_reps = mean_inner_product_2_by_2(repetitions, region_mean) @@ -266,16 +273,23 @@ def test_diff_mean_inner_product_1_by_2(n_elements = 10): matrix2d_1 = np.random.rand(n_elements, n_elements) repetitions = np.random.randint(0, 1000, size=n_elements) r0, r1 = np.random.choice(n_elements, 2, replace=False) - + region_mean = compute_region_means_1_by_2(matrix1d_0, matrix2d_1) - + result_adjusted = diff_mean_inner_product_1_by_2(r0, r1, repetitions, region_mean) + + if repetitions[r0] == 0: + # Moving from an empty bin is invalid; the diff must be zero + if not np.allclose(result_adjusted, 0): + failed = True + print(f"rep[r0]==0 case: expected zero diff but got non-zero for r0={r0}, r1={r1}") + continue + # Original function with adjusted repetitions m = repetitions.copy() n = m.copy() n[r0] -= 1 n[r1] += 1 - result_adjusted = diff_mean_inner_product_1_by_2(r0, r1, repetitions, region_mean) - + # Recompute the functions for new and original repetitions directly result_new_reps = mean_inner_product_1_by_2(n, region_mean) result_original_reps = mean_inner_product_1_by_2(repetitions, region_mean) @@ -305,16 +319,23 @@ def test_diff_mean_inner_product_1_by_1(n_elements = 10): matrix1d_1 = np.random.rand(n_elements) repetitions = np.random.randint(0, 1000, size=n_elements) r0, r1 = np.random.choice(n_elements, 2, replace=False) - + region_mean = compute_region_means_1_by_1(matrix1d_0, matrix1d_1) - + result_adjusted = diff_mean_inner_product_1_by_1(r0, r1, repetitions, region_mean) + + if repetitions[r0] == 0: + # Moving from an empty bin is invalid; the diff must be zero + if not np.allclose(result_adjusted, 0): + failed = True + print(f"rep[r0]==0 case: expected zero diff but got non-zero for r0={r0}, r1={r1}") + continue + # Original function with adjusted repetitions m = repetitions.copy() n = m.copy() n[r0] -= 1 n[r1] += 1 - result_adjusted = diff_mean_inner_product_1_by_1(r0, r1, repetitions, region_mean) - + # Recompute the functions for new and original repetitions directly result_new_reps = mean_inner_product_1_by_1(n, region_mean) result_original_reps = mean_inner_product_1_by_1(repetitions, region_mean) @@ -341,9 +362,11 @@ def test_diff_mean_inner_product_1_by_1(n_elements = 10): ################ _AA = '-ACDEFGHIKLMNPQRSTVWY' +_AB = ''.join([a for a in _AA if a not in ['-','C','P']]) +_AC = ''.join([a for a in _AA if a != '-'] + ['-']) -@pytest.fixture(params=[(10, 2, 0.0), (10, 2, 4.15), (None, 10, 4.15)]) -@pytest.mark.parametrize(["distance_cutoff_contact", "min_sequence_separation_contact", "k_electrostatics"], []) +#"distance_cutoff_contact", "min_sequence_separation_contact", "k_electrostatics" +@pytest.fixture(scope="module", params=[(10, 2, 0.0), (10, 2, 4.15), (None, 10, 4.15)]) def model(request): native_pdb = "tests/data/1bfz.pdb" distance_cutoff_contact, min_sequence_separation_contact, k_electrostatics = request.param @@ -351,24 +374,27 @@ def model(request): model = AWSEM(structure, distance_cutoff_contact=distance_cutoff_contact, min_sequence_separation_contact=min_sequence_separation_contact, expose_indicator_functions=True, k_electrostatics=k_electrostatics) return model -@pytest.mark.parametrize("reduced_alphabet", [_AA,''.join([a for a in _AA if a not in ['-','C','P']]),''.join([a for a in _AA if a != '-'] + ['-'])]) +@pytest.mark.parametrize("reduced_alphabet,use_numba", [ + (_AA, True), (_AA, False), (_AB, False), (_AC, False), +]) @pytest.mark.parametrize("exact", [True, False]) -@pytest.mark.parametrize("use_numba", [True, False]) def test_heterogeneity(reduced_alphabet, exact, use_numba): seq_indices = np.random.randint(0, len(reduced_alphabet), size=(1,5)) het=Heterogeneity(exact=exact,use_numba=use_numba,alphabet=reduced_alphabet) het.test(seq_indices[0]) -@pytest.mark.parametrize("reduced_alphabet", [_AA,''.join([a for a in _AA if a not in ['-','C','P']]),''.join([a for a in _AA if a != '-'] + ['-'])]) -@pytest.mark.parametrize("use_numba", [True, False]) +@pytest.mark.parametrize("reduced_alphabet,use_numba", [ + (_AA, True), (_AA, False), (_AB, False), (_AC, False), +]) def test_awsem_energy(model,reduced_alphabet,use_numba): seq_indices = np.random.randint(0, len(reduced_alphabet), size=(1,len(model.sequence))) awsem_energy = AwsemEnergy(use_numba=use_numba, model=model, alphabet=reduced_alphabet) awsem_energy.test(seq_indices[0]) awsem_energy.regression_test() -@pytest.mark.parametrize("reduced_alphabet", [_AA,''.join([a for a in _AA if a not in ['-','C','P']]),''.join([a for a in _AA if a != '-'] + ['-'])]) -@pytest.mark.parametrize("use_numba", [True, False]) +@pytest.mark.parametrize("reduced_alphabet,use_numba", [ + (_AA, True), (_AA, False), (_AB, False), (_AC, False), +]) def test_awsem_energy_average(model, reduced_alphabet, use_numba): seq_indices = np.random.randint(0, len(reduced_alphabet), size=(1,len(model.sequence))) awsem_de2 = AwsemEnergyAverage(use_numba=use_numba, model=model, alphabet=reduced_alphabet) @@ -376,8 +402,10 @@ def test_awsem_energy_average(model, reduced_alphabet, use_numba): awsem_de2.regression_test(seq_indices[0]) -@pytest.mark.parametrize("reduced_alphabet", [_AA,''.join([a for a in _AA if a not in ['-','C','P']]),''.join([a for a in _AA if a != '-'] + ['-'])]) -@pytest.mark.parametrize("use_numba", [True, False]) +@pytest.mark.slow +@pytest.mark.parametrize("reduced_alphabet,use_numba", [ + (_AA, True), (_AA, False), (_AB, False), (_AC, False), +]) def test_awsem_energy_variance(model, reduced_alphabet, use_numba): seq_indices = np.random.randint(0, len(reduced_alphabet), size=(1,len(model.sequence))) awsem_de2 = AwsemEnergyVariance(use_numba=use_numba, model=model, alphabet=reduced_alphabet)