Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions calicolipidlibrary/Ceramide.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# need to fix nomenclature
class Ceramide(SphingoLipid):

# h is one less than nomenclature as the first OH is defined as part of the head group
# so h = 1 is a d-LCB species
chain1_ranges = []
for c in range(14, 23):
for d in range(0, 2):
for h in range(1, 3):
if (h > 1 and d > 0):
for d in range(0, 3):
for h in range(0, 3):
if (h > 1 and d > 1):
continue
chain1_ranges.append([c, d, h])

Expand Down
38 changes: 19 additions & 19 deletions calicolipidlibrary/LysoCL.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,25 +96,25 @@ def theoreticalDigest(self):

return FRAGMENTS

def generateLibrary(self, target=None, mode="pos"):
if target:
handle = open(target, "a+")
if mode == "pos":
adduct_set = self.pos_adduct_set
elif mode == "neg":
adduct_set = self.neg_adduct_set
# parent = self.__bases__[0]
class_name = self.__class__.__name__
for c in self.chain_sets:
for adduct in adduct_set:
self.set_chains_and_adduct(class_name, c[:3], adduct=adduct)
content = self.printNist()
if target:
handle.write(content)
else:
sys.stdout.write(content)
if target:
handle.close()
# def generateLibrary(self, target=None, mode="pos", retention_times):
# if target:
# handle = open(target, "a+")
# if mode == "pos":
# adduct_set = self.pos_adduct_set
# elif mode == "neg":
# adduct_set = self.neg_adduct_set
# # parent = self.__bases__[0]
# class_name = self.__class__.__name__
# for c in self.chain_sets:
# for adduct in adduct_set:
# self.set_chains_and_adduct(class_name, c[:3], adduct=adduct)
# content = self.printNist(retention_times)
# if target:
# handle.write(content)
# else:
# sys.stdout.write(content)
# if target:
# handle.close()


# x = LysoCL("LysoCL",[[16,0,0], [18,1,0], [18,2,0]], adduct="[M+H]+")
Expand Down
4 changes: 2 additions & 2 deletions calicolipidlibrary/SM.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def theoreticalDigest(self):
MW_list(
[
self.chains[0][0],
(2 * self.chains[0][0] - 2 * self.chains[0][1] - 1),
(2 * self.chains[0][0] - 2 * self.chains[0][1] + 1),
1,
self.chains[0][2],
0,
Expand All @@ -45,7 +45,7 @@ def theoreticalDigest(self):
MW_list(
[
self.chains[0][0],
(2 * self.chains[0][0] - 2 * self.chains[0][1] - 1),
(2 * self.chains[0][0] - 2 * self.chains[0][1] + 1),
1,
self.chains[0][2],
0,
Expand Down
56 changes: 26 additions & 30 deletions calicolipidlibrary/lipidRules.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
import re
import sys
from types import NoneType

from create_smiles import *
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors


H2O = 18.01056
NH3 = 17.00274

H2O = 18.010564683
NH3 = 17.026549100
PROTON = 1.00727645229
HYDROGEN = 1.0078250321
OXYGEN = 15.9949146221
ELECTRON = 0.0005489
ELECTRON = 0.00054857990

ADDUCT = {
"[M]": 0.0,
Expand Down Expand Up @@ -559,7 +555,7 @@ def prettyFormula(self):

return formulaStr

def printNist(self):
def printNist(self, retention_times):
bbType = LIPID_BACKBONES[self.lipidclass][0]
Nchains = len(self.chains)
totalC = 0
Expand Down Expand Up @@ -622,6 +618,7 @@ def printNist(self):
ChainStrings = str(self.chains[0][0])
FullName = self.lipidclass + ChainStrings
FullNameWithAdduct = FullName + " " + self.adduct
SumName = FullName

atoms = self.MF()
MASS = MW_list(atoms)
Expand Down Expand Up @@ -730,21 +727,19 @@ def printNist(self):
RECORD.append("FORMULA: " + self.prettyFormula() + "\n")
RECORD.append("SMILES: " + get_lipid_smiles(self.lipidclass, LIPID_BACKBONES[self.lipidclass][0],
LIPID_BACKBONES[self.lipidclass][1], self.chains) + "\n")
RECORD.append("NumPeaks: " + str(len(FRAGMENTS)) + "\n")
#Add retention times if they are available
if retention_times is not None:
this_rt_entry = retention_times[retention_times["name"] == FullName]
if not this_rt_entry.empty:
if "rt" in this_rt_entry.columns.tolist():
RECORD.append(f"RT: {this_rt_entry.iloc[0]["rt"]:.2f}\n")
if "rt_min" in this_rt_entry.columns.tolist():
RECORD.append(f"RT_min: {this_rt_entry.iloc[0]["rt_min"]:.2f}\n")
if "rt_max" in this_rt_entry.columns.tolist():
RECORD.append(f"RT_max: {this_rt_entry.iloc[0]["rt_max"]:.2f}\n")

RECORD.append("NumPeaks: " + str(len(FRAGMENTS)) + "\n")

# smiles = get_lipid_smiles(self.lipidclass, LIPID_BACKBONES[self.lipidclass][0],
# LIPID_BACKBONES[self.lipidclass][1], self.chains)

# Temporary checks to make sure that molecular weight from smiles matches that calculated from headgroup+sn
# mol = Chem.MolFromSmiles(smiles)
# smiles_mw = Chem.Descriptors.ExactMolWt(mol)
# if smiles == "" or smiles_mw == 0.0 or (abs(MASS - smiles_mw) > .0001):
# print(FullName)
# print("SMILES MF: " + str(Chem.rdMolDescriptors.CalcMolFormula(mol)))

#RECORD.append("SMILES MF: " + str(Chem.rdMolDescriptors.CalcMolFormula(mol)) + "\n")
#end of temporary check

for f in FRAGMENTS:
RECORD.append(str(f[0]) + " " + str(f[1]) + " " + str(f[2]) + "\n")
Expand All @@ -770,19 +765,20 @@ def theoreticalDigest(self):
# FRAGMENTS.append( [PREC, 1000, "pre"] )
return FRAGMENTS

def generateLibrary(self, target=None, mode="pos"):
def generateLibrary(self, target=None, mode="pos", retention_times=None):
if target:
handle = open(target, "a+")
if mode == "pos":
adduct_set = self.pos_adduct_set
elif mode == "neg":
adduct_set = self.neg_adduct_set
#self.rt =
# parent = self.__bases__[0]
class_name = self.__class__.__name__
for c in self.chain_sets:
for adduct in adduct_set:
self.set_chains_and_adduct(class_name, c, adduct=adduct)
content = self.printNist()
content = self.printNist(retention_times)
if target and content is not None:
handle.write(content)
elif content is not None:
Expand Down Expand Up @@ -888,10 +884,8 @@ class NAcylGPL(Lipid):
for c1 in chain1_ranges:
for c2 in chain2_ranges:
for c3 in chain3_ranges:
if c2[0] < c1[0]:
continue
# if ((c2[0] == c1[0]) and (c2[1] < c1[0])):
# continue
if c2[0] < c1[0]: continue
if (c2[0] == c1[0] and c2[1] < c1[1]): continue
chain_sets.append([c1, c2, c3])


Expand All @@ -905,7 +899,7 @@ class SphingoLipid(Lipid):

chain1_ranges = []
for c in range(14, 23):
for d in range(0, 2):
for d in range(0, 3):
for h in range(1, 2):
if (h > 1 and d > 0):
continue
Expand Down Expand Up @@ -987,6 +981,8 @@ class AcylSphingoLipid(Lipid):
chain_sets = []
for c1 in chain1_ranges:
for c2 in chain2_ranges:
if c2[0] < c1[0]: continue
if (c2[0] == c1[0] and c2[1] < c1[1]): continue
for c3 in chain3_ranges:
chain_sets.append([c1, c2, c3])

Expand Down Expand Up @@ -1095,7 +1091,7 @@ class CardioLipin(Lipid):
if c3[0] < c2[0]: continue
if (c3[0] == c2[0] and c3[1] < c2[1]): continue
if c4[0] < c3[0]: continue
if (c4[0] == c3[0] and c3[1] < c2[1]): continue
if (c4[0] == c3[0] and c4[1] < c3[1]): continue

# if c2[0] < c1[0]: continue
# if (c2[0] == c1[0] and c2[1] < c1[0]): continue
Expand Down
2 changes: 1 addition & 1 deletion calicolipidlibrary/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def print_spectrum(lipid_class, chains=[], adduct=""):

if lipid_class not in ALL_LIPID_CLASSES:
print("Lipid Class '" + lipid_class + "' is not a supported lipid class.")
print("Suported lipid classes are:")
print("Supported lipid classes are:")
for lipid in ALL_LIPID_CLASSES:
print(lipid)
return
Expand Down
57 changes: 48 additions & 9 deletions generateDB.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import datetime
import argparse
import os
import pandas as pd
from nltk import rte_features

from calicolipidlibrary import *


Expand Down Expand Up @@ -38,6 +41,13 @@ def cli_parser():
default="",
)

parser.add_argument(
"-r",
"--retention-times",
help="retention time file name",
dest="retention_time_file_name",
default="",
)
return parser


Expand All @@ -64,6 +74,13 @@ def cli_parser():
print("AVAILABLE LIPID CLASSES:")
for lipid_class in ALL_LIPID_CLASSES:
print(lipid_class)
print("-r <retention times>:")
print("\tSupply path and filename for a .tsv containing known retention times\n")
#Needs Name RT or Name and RT+rt_min+rt_max or Name and rt_min_rt_max?
print("\tFile must contain a column with \"name\" and \"rt\" and/or\n")
print("\t\"name\" and both of \"rt_max\" and \"rt_min\"\n")
print("\tDEFAULT: No retention times will be associated.\n")

exit(0)

args = cli_parser().parse_args()
Expand All @@ -86,6 +103,30 @@ def cli_parser():

exit(1)



if args.retention_time_file_name != "":
if not os.path.exists(args.retention_time_file_name):
print("Supplied retention time files does not exist. Exiting program.")
exit(1)

retention_times_df = pd.read_csv(args.retention_time_file_name, delimiter = '\t')
retention_times_df.columns = retention_times_df.columns.str.lower() #make column names lower case
rt_col_names = retention_times_df.columns.tolist()

#check that we have Name and RT or both of RT_min and RT_max (or all three)
if ("name" not in rt_col_names) and (("rt" in rt_col_names) or ("rt_max" not in rt_col_names and "rt_min" not in rt_col_names)):
print("Supplied retention time file is improperly formatted.\n")
print("\tSupply path and filename for a .tsv containing known retention times\n")
print("\tFile must contain a column with \"name\" and \"rt\" and/or\n")
print("\tboth of \"rt_max\" and \"rt_min\"\n")
print("\tExiting program.")
exit(1)
else:
retention_times_df = retention_times_df.loc[:, retention_times_df.columns.isin(["name", "rt", "rt_min", "rt_max"])]
else:
retention_times_df = None

lipid_classes = []
if len(args.lipid_classes) == 0:
lipid_classes = ALL_LIPID_CLASSES.copy()
Expand Down Expand Up @@ -128,18 +169,16 @@ def cli_parser():

open(output_msp_file, "w")

for lipid_class in lipid_classes:
lipid_classes_list = list(lipid_classes)
lipid_classes_list.sort() #put lipid classes in alphabetical order
for lipid_class in lipid_classes_list:
msg = "Enumerating spectra for " + lipid_class + " ... "
instantiate_str = lipid_class + " = " + lipid_class + "()"
sys.stdout.write(msg)
eval_str = (
lipid_class
+ ".generateLibrary(target='"
+ output_msp_file
+ "', mode='"
+ ion
+ "')"
)
eval_str = (f"{lipid_class}.generateLibrary(target='{output_msp_file}', mode='{ion}', retention_times=retention_times_df)"
)


exec(instantiate_str)
eval(eval_str)
sys.stdout.write(" DONE\n")
Expand Down