cabb99 · cabb99 · Mar 31, 2026 · May 2, 2025 · Sep 15, 2025 · Sep 16, 2025
diff --git a/README.md b/README.md
@@ -200,8 +200,7 @@ Copyright (c) 2022-2024, Carlos Bueno, Hana Jaafari
 
 ## Acknowledgements
 
-This work was supported by National Science Foundation (NSF) Center for Theoretical Biological Physics (NSF PHY-2019745) and NSF grants PHY-1522550.
-Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.
+Carlos Bueno was supported by the MolSSI Software Fellowship, under the mentorship of Jessica Nash. We thank AMD (Advanced Micro Devices, Inc.) for the donation of high-performance computing hardware and HPC resources. This project is also supported by the Center for Theoretical Biological Physics (NSF Grants PHY-2019745 and PHY-1522550), with additional support from the D.R. Bullard Welch Chair at Rice University (Grant No. C-0016 to PGW). Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.
 
 ## References
 

diff --git a/frustratometer/align/align.py b/frustratometer/align/align.py
@@ -1,39 +1,38 @@
 import subprocess
 from pathlib import Path
 import tempfile
-from typing import Union
+import typing
 
-def jackhmmer(sequence,
-              output_file,
-              database,
-              log=subprocess.DEVNULL,
+def jackhmmer(sequence: str,
+              output_file: typing.Union[Path,str],
+              database: typing.Union[Path,str],
+              log = subprocess.DEVNULL,
               dry_run: bool = False,
-              **kwargs)->Path:
+              **kwargs) -> Path:
     """
-    Generates alignment using jackhmmer
+    Generates alignment in stockholm format using jackhmmer.
 
     Parameters
     ----------
     sequence :  str
         Protein sequence
     output_file : str
-        If True, will record alignment output messages;
-        Arguments: True OR False (Default is False)
+        Path to save the alignment file
     database: str
-        Location of the sequence database used to generation MSA
+        Location of the sequence database used to generation MSA.
+        The database should be in fasta format.
+        Databases can be downloaded from: https://www.uniprot.org/help/downloads
     log: File handle
-        jackhmmer output Default:DEVNULL
+        Path to write the output of the program. Default:DEVNULL
     dry_run: bool (default: False)
-        Save the temporary fasta_file and print the command instead of running
+        Creates a fasta file with the sequence and prints the command needed to run jackhmmer
     **kwargs: 
         Other arguments that can be passed to jackhmmer.
         More information can be found by executing `jackhmmer -h`
         arguments without a value such as --noali should be passed as `noali=True`
         Common kwargs:
             N: number of iterations
             E: E-value threshold
-
-
     Returns
     -------
     output_file: Path
@@ -98,7 +97,7 @@ def extract_sequences_from_alignment(alignment_file,
     import Bio.SeqIO
 
     alignment = Bio.AlignIO.read(alignment_file,'stockholm')
-    database = Bio.SeqIO.parse('/home/cb/Development/DCA_Frustratometer/dca_frustratometer/databases/Uniprot/uniprot_sprot.fasta','fasta')
+    database = Bio.SeqIO.parse(database,'fasta')
     alignment_records=[]
     for record in alignment:
         alignment_records+=[record.name]

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
@@ -341,7 +341,7 @@ def compute_configurational_energies(self):
 
         # decoy_data_columns=['decoy_i','i_resno','j_resno','ires_type','jres_type','aa1','aa2','rij','rho_i','rho_j','water_energy','burial_energy_i','burial_energy_j','electrostatic_energy','total_energies']
         # decoy_data=[]
-        configurational_energies=np.zeros((n,n))
+        configurational_energies=np.full((n,n), np.nan) # masked pairs will be left as nan
         for c in range(n_contacts):
             n1=indices1[c]
             n2=indices2[c]
@@ -366,4 +366,4 @@ def compute_configurational_energies(self):
 
     def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
         mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
-        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
+        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py
@@ -408,16 +408,21 @@ def plot_gamma(self, new_order=None):
 
         # Plot setup
         f, axes = plt.subplots(2, 2, figsize=(18, 16))
-        titles = ['Burial Gammas', 'Direct Gammas', 'Water Gammas', 'Protein Gammas']
+        f.subplots_adjust(hspace=50) # fix overlap between axis ticks of upper subplots and titles of lower subplots
+        titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas']
 
         for i, (title, name) in enumerate(zip(titles, segments)):
             ax = axes[i // 2, i % 2]
             sns.heatmap(segments[name].reshape(-1, 20), ax=ax, cmap='RdBu_r', center=0)
             ax.set_title(title)
             ax.set_xticks(np.arange(len(self.alphabet)) + 0.5)
             ax.set_xticklabels(self.alphabet)
-            ax.set_yticks(np.arange(segments[name].shape[0] // 20) + 0.5)
-            ax.set_yticklabels(range(segments[name].shape[0] // 20))
+            if i==0: # burial
+                ax.set_yticks([0.5,1.5,2.5])
+                ax.set_yticklabels(['low','medium','high'], rotation=45, size=12)
+            else: # direct, prot, or wat
+                ax.set_yticks(np.arange(len(self.alphabet)) + 0.5)
+                ax.set_yticklabels(self.alphabet, rotation=0)
 
         plt.tight_layout()
         plt.show()
@@ -648,4 +653,4 @@ class O():
 
     self.gamma1 = Gamma(np.arange(0,1260,1))
     self.gamma2 = Gamma(np.arange(0,1260,1)*5+10)
-    self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)
+    self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)
diff --git a/frustratometer/frustration/frustration.py b/frustratometer/frustration/frustration.py
@@ -1,3 +1,21 @@
+"""
+Utilities for sequence energies, decoys, and frustration metrics from Potts models.
+
+Conventions
+-----------
+- Alphabet: `_AA = '-ACDEFGHIKLMNPQRSTVWY'` (gap '-' at index 0)
+- Sequence length: L
+- Fields and couplings:
+    * potts_model['h'] has shape (L, Q)
+    * potts_model['J'] has shape (L, L, Q, Q)
+- Energies: A positive value in the potts model or a negative value in the energy is a favorable contribution.
+- Masks: boolean array of shape (L, L). True values indicate included residue pairs.
+
+Notes
+-----
+All functions assume `len(seq) == potts_model['h'].shape[0] == potts_model['J'].shape[0]`.
+"""
+
 import prody
 import scipy.spatial.distance as sdist
 import numpy as np
@@ -10,25 +28,24 @@ def compute_mask(distance_matrix: np.array,
                  maximum_contact_distance: Union[float, None] = None,
                  minimum_sequence_separation: Union[int, None] = None) -> np.array:
     """
-    Computes a 2D Boolean mask from a given distance matrix based on a distance cutoff and a sequence separation cutoff.
+    Computes a 2D Boolean mask (L, L) for pairwise interactions from a given distance matrix using a distance cutoff and/or a minimum sequence-separation cutoff.
+    Both cutoffs are inclusive. True indicates that the residue pair meets the criteria.
 
     Parameters
     ----------
-    distance_matrix : np.array
+    distance_matrix : np.array (L,L)
         A 2D array where the element at index [i, j] represents the spatial distance
-        between residues i and j. This matrix is assumed to be symmetric.
+        between residues i and j (e.g., Ca-Ca, Cb-Cb). This matrix is assumed to be symmetric.
     maximum_contact_distance : float, optional
-        The maximum distance of a contact. Pairs of residues with distances less than this
-        threshold are marked as True in the mask. If None, the spatial distance criterion
-        is ignored and all distances are included. Default is None.
+        The maximum distance of a contact. Include i,j if distance_matrix[i,j] <= maximum_contact_distance. 
+        If None, no distance filtering is applied. Default is None.
     minimum_sequence_separation : int, optional
-        A minimum sequence distance threshold. Pairs of residues with sequence indices
-        differing by at least this value are marked as True in the mask. If None,
-        the sequence distance criterion is ignored. Default is None.
+        A minimum sequence separation threshold. Include i,j if |i-j| >= minimum_sequence_separation.
+        If None, no sequence separation is applied . Default is None.
 
     Returns
     -------
-    mask : np.array
+    mask : np.array (L,L)
         A 2D Boolean array of the same dimensions as `distance_matrix`. Elements of the mask
         are True where the residue pairs meet the specified `distance_cutoff` and
         `sequence_distance_cutoff` criteria.