From 34650e9d9d39a830cdca5e5ef339ea6290527228 Mon Sep 17 00:00:00 2001 From: Michael Skinnider Date: Wed, 4 Mar 2026 09:03:47 -0500 Subject: [PATCH 1/2] Revert "log removal of invalid SMILES with examples" This reverts commit 89d48039c17476686aca5bc1a85557c8c3237fab. --- src/clm/commands/write_structural_prior_CV.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/clm/commands/write_structural_prior_CV.py b/src/clm/commands/write_structural_prior_CV.py index e5c1b855..3dc31395 100644 --- a/src/clm/commands/write_structural_prior_CV.py +++ b/src/clm/commands/write_structural_prior_CV.py @@ -260,23 +260,8 @@ def write_structural_prior_CV( lambda s: clean_mol(s, raise_error=False) is None ) ] - gen = gen.drop(invalid_idx) - n_candidates = len(candidates) - n_invalid = len(invalid_idx) - - # log if invalid SMILES were detected and removed - if n_invalid > 0: - examples = gen.loc[invalid_idx, "smiles"].head(5).tolist() - - logger.warning( - f"Removed {n_invalid} invalid SMILES among " - f"{n_candidates} candidates to match a test molecule " - f"(possibly due to a different rdkit version). " - f"Examples: {examples}" - ) - inputs = {"model": gen.assign(source="model")} if pubchem_file: From e53d4dc5cc374c625a35df694ee7fb99772ab4fc Mon Sep 17 00:00:00 2001 From: Seungchan An Date: Wed, 4 Mar 2026 09:44:02 -0500 Subject: [PATCH 2/2] collect examples before drop --- src/clm/commands/write_structural_prior_CV.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/clm/commands/write_structural_prior_CV.py b/src/clm/commands/write_structural_prior_CV.py index 3dc31395..62d7e7e6 100644 --- a/src/clm/commands/write_structural_prior_CV.py +++ b/src/clm/commands/write_structural_prior_CV.py @@ -260,6 +260,21 @@ def write_structural_prior_CV( lambda s: clean_mol(s, raise_error=False) is None ) ] + + n_candidates = len(candidates) + n_invalid = len(invalid_idx) + + # log if invalid SMILES were detected and removed + if n_invalid > 0: + examples = gen.loc[invalid_idx, "smiles"].head(5).tolist() + + logger.warning( + f"Removed {n_invalid} invalid SMILES among " + f"{n_candidates} candidates to match a test molecule " + f"(possibly due to a different rdkit version). " + f"Examples: {examples}" + ) + gen = gen.drop(invalid_idx) inputs = {"model": gen.assign(source="model")}