From a9c31804116a83869b0a5bb159e8142b76cadb7d Mon Sep 17 00:00:00 2001 From: Carlos Bueno Date: Fri, 2 May 2025 13:58:30 -0500 Subject: [PATCH 1/6] Improves acknowledgements --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 6b15b7e8..8d25b47f 100644 --- a/README.md +++ b/README.md @@ -200,8 +200,7 @@ Copyright (c) 2022-2024, Carlos Bueno, Hana Jaafari ## Acknowledgements -This work was supported by National Science Foundation (NSF) Center for Theoretical Biological Physics (NSF PHY-2019745) and NSF grants PHY-1522550. -Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6. +Carlos Bueno was supported by the MolSSI Software Fellowship, under the mentorship of Jessica Nash. We thank AMD (Advanced Micro Devices, Inc.) for the donation of high-performance computing hardware and HPC resources. This project is also supported by the Center for Theoretical Biological Physics (NSF Grants PHY-2019745 and PHY-1522550), with additional support from the D.R. Bullard Welch Chair at Rice University (Grant No. C-0016 to PGW). Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6. ## References From caab92ed1018cb9ccffe5fa8c3ca8dc3a772d799 Mon Sep 17 00:00:00 2001 From: ftclark3 <60708336+ftclark3@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:59:12 -0500 Subject: [PATCH 2/6] makes plot title agree with default segment order in gamma plotting --- frustratometer/classes/Gamma.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py index 821bffea..9b577648 100644 --- a/frustratometer/classes/Gamma.py +++ b/frustratometer/classes/Gamma.py @@ -408,7 +408,7 @@ def plot_gamma(self, new_order=None): # Plot setup f, axes = plt.subplots(2, 2, figsize=(18, 16)) - titles = ['Burial Gammas', 'Direct Gammas', 'Water Gammas', 'Protein Gammas'] + titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas'] for i, (title, name) in enumerate(zip(titles, segments)): ax = axes[i // 2, i % 2] @@ -648,4 +648,4 @@ class O(): self.gamma1 = Gamma(np.arange(0,1260,1)) self.gamma2 = Gamma(np.arange(0,1260,1)*5+10) - self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4) \ No newline at end of file + self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4) From 483be414d0aacd828e52c35c3783e6fdb836c326 Mon Sep 17 00:00:00 2001 From: ftclark3 <60708336+ftclark3@users.noreply.github.com> Date: Sat, 20 Sep 2025 14:43:36 -0500 Subject: [PATCH 3/6] fix overlap between axis ticks of upper subplots and titles of lower subplots fix overlap between axis ticks of upper subplots and titles of lower subplots in Gamma.plot_gamma() --- frustratometer/classes/Gamma.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py index 9b577648..57540959 100644 --- a/frustratometer/classes/Gamma.py +++ b/frustratometer/classes/Gamma.py @@ -408,6 +408,7 @@ def plot_gamma(self, new_order=None): # Plot setup f, axes = plt.subplots(2, 2, figsize=(18, 16)) + f.subplots_adjust(hspace=50) # fix overlap between axis ticks of upper subplots and titles of lower subplots titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas'] for i, (title, name) in enumerate(zip(titles, segments)): From 2efd50443dac3f2ed4c9a931a70c3a2993a149b0 Mon Sep 17 00:00:00 2001 From: ftclark3 <60708336+ftclark3@users.noreply.github.com> Date: Sat, 20 Sep 2025 14:45:34 -0500 Subject: [PATCH 4/6] add y axis labels to plot_gamma function add y axis labels to Gamma.plot_gamma --- frustratometer/classes/Gamma.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py index 9b577648..cbb51b9e 100644 --- a/frustratometer/classes/Gamma.py +++ b/frustratometer/classes/Gamma.py @@ -416,8 +416,12 @@ def plot_gamma(self, new_order=None): ax.set_title(title) ax.set_xticks(np.arange(len(self.alphabet)) + 0.5) ax.set_xticklabels(self.alphabet) - ax.set_yticks(np.arange(segments[name].shape[0] // 20) + 0.5) - ax.set_yticklabels(range(segments[name].shape[0] // 20)) + if i==0: # burial + ax.set_yticks([0.5,1.5,2.5]) + ax.set_yticklabels(['low','medium','high'], rotation=45, size=12) + else: # direct, prot, or wat + ax.set_yticks(np.arange(len(self.alphabet)) + 0.5) + ax.set_yticklabels(self.alphabet, rotation=0) plt.tight_layout() plt.show() From e25509ff966879216d5415ef6365f4709b8f8c95 Mon Sep 17 00:00:00 2001 From: ftclark3 <60708336+ftclark3@users.noreply.github.com> Date: Fri, 10 Oct 2025 23:19:06 -0500 Subject: [PATCH 5/6] configurational frustration pair energy matrix initialize values to np.nan instead of 0. For unmasked pairs, values are filled in, while, for masked pairs (inappropriate sequence separation, distance, etc.), values remain nan --- frustratometer/classes/AWSEM.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py index 770f4ad0..a25290c7 100644 --- a/frustratometer/classes/AWSEM.py +++ b/frustratometer/classes/AWSEM.py @@ -341,7 +341,7 @@ def compute_configurational_energies(self): # decoy_data_columns=['decoy_i','i_resno','j_resno','ires_type','jres_type','aa1','aa2','rij','rho_i','rho_j','water_energy','burial_energy_i','burial_energy_j','electrostatic_energy','total_energies'] # decoy_data=[] - configurational_energies=np.zeros((n,n)) + configurational_energies=np.full((n,n), np.nan) # masked pairs will be left as nan for c in range(n_contacts): n1=indices1[c] n2=indices2[c] @@ -366,4 +366,4 @@ def compute_configurational_energies(self): def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000): mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq) - return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction) \ No newline at end of file + return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction) From 1400358a1decc07d0795fc21675c107bad5a0e51 Mon Sep 17 00:00:00 2001 From: Carlos Bueno Date: Wed, 22 Oct 2025 13:44:22 -0500 Subject: [PATCH 6/6] Minor documentation changes --- frustratometer/align/align.py | 28 ++++++++--------- frustratometer/frustration/frustration.py | 37 +++++++++++++++++------ 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/frustratometer/align/align.py b/frustratometer/align/align.py index f2857434..7f4733b5 100644 --- a/frustratometer/align/align.py +++ b/frustratometer/align/align.py @@ -1,29 +1,31 @@ import subprocess from pathlib import Path import tempfile +import typing -def jackhmmer(sequence, - output_file, - database, - log=subprocess.DEVNULL, +def jackhmmer(sequence: str, + output_file: typing.Union[Path,str], + database: typing.Union[Path,str], + log = subprocess.DEVNULL, dry_run: bool = False, - **kwargs): + **kwargs) -> Path: """ - Generates alignment using jackhmmer + Generates alignment in stockholm format using jackhmmer. Parameters ---------- sequence : str Protein sequence output_file : str - If True, will record alignment output messages; - Arguments: True OR False (Default is False) + Path to save the alignment file database: str - Location of the sequence database used to generation MSA + Location of the sequence database used to generation MSA. + The database should be in fasta format. + Databases can be downloaded from: https://www.uniprot.org/help/downloads log: File handle - jackhmmer output Default:DEVNULL + Path to write the output of the program. Default:DEVNULL dry_run: bool (default: False) - Save the temporary fasta_file and print the command instead of running + Creates a fasta file with the sequence and prints the command needed to run jackhmmer **kwargs: Other arguments that can be passed to jackhmmer. More information can be found by executing `jackhmmer -h` @@ -31,8 +33,6 @@ def jackhmmer(sequence, Common kwargs: N: number of iterations E: E-value threshold - - Returns ------- output_file: Path @@ -97,7 +97,7 @@ def extract_sequences_from_alignment(alignment_file, import Bio.SeqIO alignment = Bio.AlignIO.read(alignment_file,'stockholm') - database = Bio.SeqIO.parse('/home/cb/Development/DCA_Frustratometer/dca_frustratometer/databases/Uniprot/uniprot_sprot.fasta','fasta') + database = Bio.SeqIO.parse(database,'fasta') alignment_records=[] for record in alignment: alignment_records+=[record.name] diff --git a/frustratometer/frustration/frustration.py b/frustratometer/frustration/frustration.py index 5a24caaa..008970cf 100644 --- a/frustratometer/frustration/frustration.py +++ b/frustratometer/frustration/frustration.py @@ -1,3 +1,21 @@ +""" +Utilities for sequence energies, decoys, and frustration metrics from Potts models. + +Conventions +----------- +- Alphabet: `_AA = '-ACDEFGHIKLMNPQRSTVWY'` (gap '-' at index 0) +- Sequence length: L +- Fields and couplings: + * potts_model['h'] has shape (L, Q) + * potts_model['J'] has shape (L, L, Q, Q) +- Energies: A positive value in the potts model or a negative value in the energy is a favorable contribution. +- Masks: boolean array of shape (L, L). True values indicate included residue pairs. + +Notes +----- +All functions assume `len(seq) == potts_model['h'].shape[0] == potts_model['J'].shape[0]`. +""" + import prody import scipy.spatial.distance as sdist import numpy as np @@ -9,25 +27,24 @@ def compute_mask(distance_matrix: np.array, maximum_contact_distance: typing.Union[float, None] = None, minimum_sequence_separation: typing.Union[int, None] = None) -> np.array: """ - Computes a 2D Boolean mask from a given distance matrix based on a distance cutoff and a sequence separation cutoff. + Computes a 2D Boolean mask (L, L) for pairwise interactions from a given distance matrix using a distance cutoff and/or a minimum sequence-separation cutoff. + Both cutoffs are inclusive. True indicates that the residue pair meets the criteria. Parameters ---------- - distance_matrix : np.array + distance_matrix : np.array (L,L) A 2D array where the element at index [i, j] represents the spatial distance - between residues i and j. This matrix is assumed to be symmetric. + between residues i and j (e.g., Ca-Ca, Cb-Cb). This matrix is assumed to be symmetric. maximum_contact_distance : float, optional - The maximum distance of a contact. Pairs of residues with distances less than this - threshold are marked as True in the mask. If None, the spatial distance criterion - is ignored and all distances are included. Default is None. + The maximum distance of a contact. Include i,j if distance_matrix[i,j] <= maximum_contact_distance. + If None, no distance filtering is applied. Default is None. minimum_sequence_separation : int, optional - A minimum sequence distance threshold. Pairs of residues with sequence indices - differing by at least this value are marked as True in the mask. If None, - the sequence distance criterion is ignored. Default is None. + A minimum sequence separation threshold. Include i,j if |i-j| >= minimum_sequence_separation. + If None, no sequence separation is applied . Default is None. Returns ------- - mask : np.array + mask : np.array (L,L) A 2D Boolean array of the same dimensions as `distance_matrix`. Elements of the mask are True where the residue pairs meet the specified `distance_cutoff` and `sequence_distance_cutoff` criteria.