Skip to content
Merged
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,7 @@ Copyright (c) 2022-2024, Carlos Bueno, Hana Jaafari

## Acknowledgements

This work was supported by National Science Foundation (NSF) Center for Theoretical Biological Physics (NSF PHY-2019745) and NSF grants PHY-1522550.
Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.
Carlos Bueno was supported by the MolSSI Software Fellowship, under the mentorship of Jessica Nash. We thank AMD (Advanced Micro Devices, Inc.) for the donation of high-performance computing hardware and HPC resources. This project is also supported by the Center for Theoretical Biological Physics (NSF Grants PHY-2019745 and PHY-1522550), with additional support from the D.R. Bullard Welch Chair at Rice University (Grant No. C-0016 to PGW). Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.

## References

Expand Down
29 changes: 14 additions & 15 deletions frustratometer/align/align.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,38 @@
import subprocess
from pathlib import Path
import tempfile
from typing import Union
import typing

def jackhmmer(sequence,
output_file,
database,
log=subprocess.DEVNULL,
def jackhmmer(sequence: str,
output_file: typing.Union[Path,str],
database: typing.Union[Path,str],
log = subprocess.DEVNULL,
dry_run: bool = False,
**kwargs)->Path:
**kwargs) -> Path:
"""
Generates alignment using jackhmmer
Generates alignment in stockholm format using jackhmmer.

Parameters
----------
sequence : str
Protein sequence
output_file : str
If True, will record alignment output messages;
Arguments: True OR False (Default is False)
Path to save the alignment file
database: str
Location of the sequence database used to generation MSA
Location of the sequence database used to generation MSA.
The database should be in fasta format.
Databases can be downloaded from: https://www.uniprot.org/help/downloads
log: File handle
jackhmmer output Default:DEVNULL
Path to write the output of the program. Default:DEVNULL
dry_run: bool (default: False)
Save the temporary fasta_file and print the command instead of running
Creates a fasta file with the sequence and prints the command needed to run jackhmmer
**kwargs:
Other arguments that can be passed to jackhmmer.
More information can be found by executing `jackhmmer -h`
arguments without a value such as --noali should be passed as `noali=True`
Common kwargs:
N: number of iterations
E: E-value threshold


Returns
-------
output_file: Path
Expand Down Expand Up @@ -98,7 +97,7 @@ def extract_sequences_from_alignment(alignment_file,
import Bio.SeqIO

alignment = Bio.AlignIO.read(alignment_file,'stockholm')
database = Bio.SeqIO.parse('/home/cb/Development/DCA_Frustratometer/dca_frustratometer/databases/Uniprot/uniprot_sprot.fasta','fasta')
database = Bio.SeqIO.parse(database,'fasta')
alignment_records=[]
for record in alignment:
alignment_records+=[record.name]
Expand Down
4 changes: 2 additions & 2 deletions frustratometer/classes/AWSEM.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def compute_configurational_energies(self):

# decoy_data_columns=['decoy_i','i_resno','j_resno','ires_type','jres_type','aa1','aa2','rij','rho_i','rho_j','water_energy','burial_energy_i','burial_energy_j','electrostatic_energy','total_energies']
# decoy_data=[]
configurational_energies=np.zeros((n,n))
configurational_energies=np.full((n,n), np.nan) # masked pairs will be left as nan
for c in range(n_contacts):
n1=indices1[c]
n2=indices2[c]
Expand All @@ -366,4 +366,4 @@ def compute_configurational_energies(self):

def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
13 changes: 9 additions & 4 deletions frustratometer/classes/Gamma.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,16 +408,21 @@ def plot_gamma(self, new_order=None):

# Plot setup
f, axes = plt.subplots(2, 2, figsize=(18, 16))
titles = ['Burial Gammas', 'Direct Gammas', 'Water Gammas', 'Protein Gammas']
f.subplots_adjust(hspace=50) # fix overlap between axis ticks of upper subplots and titles of lower subplots
titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas']

for i, (title, name) in enumerate(zip(titles, segments)):
ax = axes[i // 2, i % 2]
sns.heatmap(segments[name].reshape(-1, 20), ax=ax, cmap='RdBu_r', center=0)
ax.set_title(title)
ax.set_xticks(np.arange(len(self.alphabet)) + 0.5)
ax.set_xticklabels(self.alphabet)
ax.set_yticks(np.arange(segments[name].shape[0] // 20) + 0.5)
ax.set_yticklabels(range(segments[name].shape[0] // 20))
if i==0: # burial
ax.set_yticks([0.5,1.5,2.5])
ax.set_yticklabels(['low','medium','high'], rotation=45, size=12)
else: # direct, prot, or wat
ax.set_yticks(np.arange(len(self.alphabet)) + 0.5)
ax.set_yticklabels(self.alphabet, rotation=0)

plt.tight_layout()
plt.show()
Expand Down Expand Up @@ -648,4 +653,4 @@ class O():

self.gamma1 = Gamma(np.arange(0,1260,1))
self.gamma2 = Gamma(np.arange(0,1260,1)*5+10)
self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)
self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)
37 changes: 27 additions & 10 deletions frustratometer/frustration/frustration.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
"""
Utilities for sequence energies, decoys, and frustration metrics from Potts models.

Conventions
-----------
- Alphabet: `_AA = '-ACDEFGHIKLMNPQRSTVWY'` (gap '-' at index 0)
- Sequence length: L
- Fields and couplings:
* potts_model['h'] has shape (L, Q)
* potts_model['J'] has shape (L, L, Q, Q)
- Energies: A positive value in the potts model or a negative value in the energy is a favorable contribution.
- Masks: boolean array of shape (L, L). True values indicate included residue pairs.

Notes
-----
All functions assume `len(seq) == potts_model['h'].shape[0] == potts_model['J'].shape[0]`.
"""

import prody
import scipy.spatial.distance as sdist
import numpy as np
Expand All @@ -10,25 +28,24 @@ def compute_mask(distance_matrix: np.array,
maximum_contact_distance: Union[float, None] = None,
minimum_sequence_separation: Union[int, None] = None) -> np.array:
"""
Computes a 2D Boolean mask from a given distance matrix based on a distance cutoff and a sequence separation cutoff.
Computes a 2D Boolean mask (L, L) for pairwise interactions from a given distance matrix using a distance cutoff and/or a minimum sequence-separation cutoff.
Both cutoffs are inclusive. True indicates that the residue pair meets the criteria.

Parameters
----------
distance_matrix : np.array
distance_matrix : np.array (L,L)
A 2D array where the element at index [i, j] represents the spatial distance
between residues i and j. This matrix is assumed to be symmetric.
between residues i and j (e.g., Ca-Ca, Cb-Cb). This matrix is assumed to be symmetric.
maximum_contact_distance : float, optional
The maximum distance of a contact. Pairs of residues with distances less than this
threshold are marked as True in the mask. If None, the spatial distance criterion
is ignored and all distances are included. Default is None.
The maximum distance of a contact. Include i,j if distance_matrix[i,j] <= maximum_contact_distance.
If None, no distance filtering is applied. Default is None.
minimum_sequence_separation : int, optional
A minimum sequence distance threshold. Pairs of residues with sequence indices
differing by at least this value are marked as True in the mask. If None,
the sequence distance criterion is ignored. Default is None.
A minimum sequence separation threshold. Include i,j if |i-j| >= minimum_sequence_separation.
If None, no sequence separation is applied . Default is None.

Returns
-------
mask : np.array
mask : np.array (L,L)
A 2D Boolean array of the same dimensions as `distance_matrix`. Elements of the mask
are True where the residue pairs meet the specified `distance_cutoff` and
`sequence_distance_cutoff` criteria.
Expand Down
Loading