From a9c31804116a83869b0a5bb159e8142b76cadb7d Mon Sep 17 00:00:00 2001
From: Carlos Bueno <carlos.bueno@rice.edu>
Date: Fri, 2 May 2025 13:58:30 -0500
Subject: [PATCH 1/6] Improves acknowledgements

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6b15b7e8..8d25b47f 100644
--- a/README.md
+++ b/README.md
@@ -200,8 +200,7 @@ Copyright (c) 2022-2024, Carlos Bueno, Hana Jaafari
 
 ## Acknowledgements
  
-This work was supported by National Science Foundation (NSF) Center for Theoretical Biological Physics (NSF PHY-2019745) and NSF grants PHY-1522550.
-Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.
+Carlos Bueno was supported by the MolSSI Software Fellowship, under the mentorship of Jessica Nash. We thank AMD (Advanced Micro Devices, Inc.) for the donation of high-performance computing hardware and HPC resources. This project is also supported by the Center for Theoretical Biological Physics (NSF Grants PHY-2019745 and PHY-1522550), with additional support from the D.R. Bullard Welch Chair at Rice University (Grant No. C-0016 to PGW). Project skeleton based on the [Computational Molecular Science Python Cookiecutter] (https://github.com/molssi/cookiecutter-cms) version 1.6.
 
 ## References
 

From caab92ed1018cb9ccffe5fa8c3ca8dc3a772d799 Mon Sep 17 00:00:00 2001
From: ftclark3 <60708336+ftclark3@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:59:12 -0500
Subject: [PATCH 2/6] makes plot title agree with default segment order in
 gamma plotting

---
 frustratometer/classes/Gamma.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py
index 821bffea..9b577648 100644
--- a/frustratometer/classes/Gamma.py
+++ b/frustratometer/classes/Gamma.py
@@ -408,7 +408,7 @@ def plot_gamma(self, new_order=None):
         
         # Plot setup
         f, axes = plt.subplots(2, 2, figsize=(18, 16))
-        titles = ['Burial Gammas', 'Direct Gammas', 'Water Gammas', 'Protein Gammas']
+        titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas']
 
         for i, (title, name) in enumerate(zip(titles, segments)):
             ax = axes[i // 2, i % 2]
@@ -648,4 +648,4 @@ class O():
 
     self.gamma1 = Gamma(np.arange(0,1260,1))
     self.gamma2 = Gamma(np.arange(0,1260,1)*5+10)
-    self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)
\ No newline at end of file
+    self.gamma3 = Gamma(np.arange(1260,0,-1)*2-4)

From 483be414d0aacd828e52c35c3783e6fdb836c326 Mon Sep 17 00:00:00 2001
From: ftclark3 <60708336+ftclark3@users.noreply.github.com>
Date: Sat, 20 Sep 2025 14:43:36 -0500
Subject: [PATCH 3/6] fix overlap between axis ticks of upper subplots and
 titles of lower subplots

fix overlap between axis ticks of upper subplots and titles of lower subplots in Gamma.plot_gamma()
---
 frustratometer/classes/Gamma.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py
index 9b577648..57540959 100644
--- a/frustratometer/classes/Gamma.py
+++ b/frustratometer/classes/Gamma.py
@@ -408,6 +408,7 @@ def plot_gamma(self, new_order=None):
         
         # Plot setup
         f, axes = plt.subplots(2, 2, figsize=(18, 16))
+        f.subplots_adjust(hspace=50) # fix overlap between axis ticks of upper subplots and titles of lower subplots
         titles = ['Burial Gammas', 'Direct Gammas', 'Protein Gammas', 'Water Gammas']
 
         for i, (title, name) in enumerate(zip(titles, segments)):

From 2efd50443dac3f2ed4c9a931a70c3a2993a149b0 Mon Sep 17 00:00:00 2001
From: ftclark3 <60708336+ftclark3@users.noreply.github.com>
Date: Sat, 20 Sep 2025 14:45:34 -0500
Subject: [PATCH 4/6] add y axis labels to plot_gamma function

add y axis labels to Gamma.plot_gamma
---
 frustratometer/classes/Gamma.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/frustratometer/classes/Gamma.py b/frustratometer/classes/Gamma.py
index 9b577648..cbb51b9e 100644
--- a/frustratometer/classes/Gamma.py
+++ b/frustratometer/classes/Gamma.py
@@ -416,8 +416,12 @@ def plot_gamma(self, new_order=None):
             ax.set_title(title)
             ax.set_xticks(np.arange(len(self.alphabet)) + 0.5)
             ax.set_xticklabels(self.alphabet)
-            ax.set_yticks(np.arange(segments[name].shape[0] // 20) + 0.5)
-            ax.set_yticklabels(range(segments[name].shape[0] // 20))
+            if i==0: # burial
+                ax.set_yticks([0.5,1.5,2.5])
+                ax.set_yticklabels(['low','medium','high'], rotation=45, size=12)
+            else: # direct, prot, or wat
+                ax.set_yticks(np.arange(len(self.alphabet)) + 0.5)
+                ax.set_yticklabels(self.alphabet, rotation=0)
 
         plt.tight_layout()
         plt.show()

From e25509ff966879216d5415ef6365f4709b8f8c95 Mon Sep 17 00:00:00 2001
From: ftclark3 <60708336+ftclark3@users.noreply.github.com>
Date: Fri, 10 Oct 2025 23:19:06 -0500
Subject: [PATCH 5/6] configurational frustration pair energy matrix

initialize values to np.nan instead of 0. For unmasked pairs, values are filled in, while, for masked pairs (inappropriate sequence separation, distance, etc.), values remain nan
---
 frustratometer/classes/AWSEM.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 770f4ad0..a25290c7 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -341,7 +341,7 @@ def compute_configurational_energies(self):
 
         # decoy_data_columns=['decoy_i','i_resno','j_resno','ires_type','jres_type','aa1','aa2','rij','rho_i','rho_j','water_energy','burial_energy_i','burial_energy_j','electrostatic_energy','total_energies']
         # decoy_data=[]
-        configurational_energies=np.zeros((n,n))
+        configurational_energies=np.full((n,n), np.nan) # masked pairs will be left as nan
         for c in range(n_contacts):
             n1=indices1[c]
             n2=indices2[c]
@@ -366,4 +366,4 @@ def compute_configurational_energies(self):
     
     def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
         mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
-        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
\ No newline at end of file
+        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)

From 1400358a1decc07d0795fc21675c107bad5a0e51 Mon Sep 17 00:00:00 2001
From: Carlos Bueno <carlos.bueno@rice.edu>
Date: Wed, 22 Oct 2025 13:44:22 -0500
Subject: [PATCH 6/6] Minor documentation changes

---
 frustratometer/align/align.py             | 28 ++++++++---------
 frustratometer/frustration/frustration.py | 37 +++++++++++++++++------
 2 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/frustratometer/align/align.py b/frustratometer/align/align.py
index f2857434..7f4733b5 100644
--- a/frustratometer/align/align.py
+++ b/frustratometer/align/align.py
@@ -1,29 +1,31 @@
 import subprocess
 from pathlib import Path
 import tempfile
+import typing
 
-def jackhmmer(sequence,
-              output_file,
-              database,
-              log=subprocess.DEVNULL,
+def jackhmmer(sequence: str,
+              output_file: typing.Union[Path,str],
+              database: typing.Union[Path,str],
+              log = subprocess.DEVNULL,
               dry_run: bool = False,
-              **kwargs):
+              **kwargs) -> Path:
     """
-    Generates alignment using jackhmmer
+    Generates alignment in stockholm format using jackhmmer.
 
     Parameters
     ----------
     sequence :  str
         Protein sequence
     output_file : str
-        If True, will record alignment output messages;
-        Arguments: True OR False (Default is False)
+        Path to save the alignment file
     database: str
-        Location of the sequence database used to generation MSA
+        Location of the sequence database used to generation MSA.
+        The database should be in fasta format.
+        Databases can be downloaded from: https://www.uniprot.org/help/downloads
     log: File handle
-        jackhmmer output Default:DEVNULL
+        Path to write the output of the program. Default:DEVNULL
     dry_run: bool (default: False)
-        Save the temporary fasta_file and print the command instead of running
+        Creates a fasta file with the sequence and prints the command needed to run jackhmmer
     **kwargs: 
         Other arguments that can be passed to jackhmmer.
         More information can be found by executing `jackhmmer -h`
@@ -31,8 +33,6 @@ def jackhmmer(sequence,
         Common kwargs:
             N: number of iterations
             E: E-value threshold
-
-
     Returns
     -------
     output_file: Path
@@ -97,7 +97,7 @@ def extract_sequences_from_alignment(alignment_file,
     import Bio.SeqIO
 
     alignment = Bio.AlignIO.read(alignment_file,'stockholm')
-    database = Bio.SeqIO.parse('/home/cb/Development/DCA_Frustratometer/dca_frustratometer/databases/Uniprot/uniprot_sprot.fasta','fasta')
+    database = Bio.SeqIO.parse(database,'fasta')
     alignment_records=[]
     for record in alignment:
         alignment_records+=[record.name]
diff --git a/frustratometer/frustration/frustration.py b/frustratometer/frustration/frustration.py
index 5a24caaa..008970cf 100644
--- a/frustratometer/frustration/frustration.py
+++ b/frustratometer/frustration/frustration.py
@@ -1,3 +1,21 @@
+"""
+Utilities for sequence energies, decoys, and frustration metrics from Potts models.
+
+Conventions
+-----------
+- Alphabet: `_AA = '-ACDEFGHIKLMNPQRSTVWY'` (gap '-' at index 0)
+- Sequence length: L
+- Fields and couplings:
+    * potts_model['h'] has shape (L, Q)
+    * potts_model['J'] has shape (L, L, Q, Q)
+- Energies: A positive value in the potts model or a negative value in the energy is a favorable contribution.
+- Masks: boolean array of shape (L, L). True values indicate included residue pairs.
+
+Notes
+-----
+All functions assume `len(seq) == potts_model['h'].shape[0] == potts_model['J'].shape[0]`.
+"""
+
 import prody
 import scipy.spatial.distance as sdist
 import numpy as np
@@ -9,25 +27,24 @@ def compute_mask(distance_matrix: np.array,
                  maximum_contact_distance: typing.Union[float, None] = None,
                  minimum_sequence_separation: typing.Union[int, None] = None) -> np.array:
     """
-    Computes a 2D Boolean mask from a given distance matrix based on a distance cutoff and a sequence separation cutoff.
+    Computes a 2D Boolean mask (L, L) for pairwise interactions from a given distance matrix using a distance cutoff and/or a minimum sequence-separation cutoff.
+    Both cutoffs are inclusive. True indicates that the residue pair meets the criteria.
 
     Parameters
     ----------
-    distance_matrix : np.array
+    distance_matrix : np.array (L,L)
         A 2D array where the element at index [i, j] represents the spatial distance
-        between residues i and j. This matrix is assumed to be symmetric.
+        between residues i and j (e.g., Ca-Ca, Cb-Cb). This matrix is assumed to be symmetric.
     maximum_contact_distance : float, optional
-        The maximum distance of a contact. Pairs of residues with distances less than this
-        threshold are marked as True in the mask. If None, the spatial distance criterion
-        is ignored and all distances are included. Default is None.
+        The maximum distance of a contact. Include i,j if distance_matrix[i,j] <= maximum_contact_distance. 
+        If None, no distance filtering is applied. Default is None.
     minimum_sequence_separation : int, optional
-        A minimum sequence distance threshold. Pairs of residues with sequence indices
-        differing by at least this value are marked as True in the mask. If None,
-        the sequence distance criterion is ignored. Default is None.
+        A minimum sequence separation threshold. Include i,j if |i-j| >= minimum_sequence_separation.
+        If None, no sequence separation is applied . Default is None.
 
     Returns
     -------
-    mask : np.array
+    mask : np.array (L,L)
         A 2D Boolean array of the same dimensions as `distance_matrix`. Elements of the mask
         are True where the residue pairs meet the specified `distance_cutoff` and
         `sequence_distance_cutoff` criteria.