From bff18463bc7c5f4b958ad2b9811c1d432151051f Mon Sep 17 00:00:00 2001 From: Ya-Fan Chen <20377719+Lexachoc@users.noreply.github.com> Date: Tue, 3 Jun 2025 14:17:02 +0200 Subject: [PATCH 1/3] Allow disabling 3D descriptors while enabling 2D descriptors ### Summary This PR adds support for disabling the calculation of 3D descriptors independently in the `from_smiles` function, while preserving backward compatibility. ### Problem Currently, the `from_smiles` function uses a single `descriptors` flag to control both 2D and 3D descriptor calculation. However, some SMILES strings (e.g., "C") cause `RuntimeError` when 3D descriptors are enabled, even though they work fine with only 2D descriptors. For example: ```python from padelpy import from_smiles # This raises RuntimeError from_smiles("C", descriptors=True) # This works fine padeldescriptor(mol_dir="input.smi", d_file="output.csv", d_2d=True, d_3d=False) ### Solution If `descriptors_3d` is not provided, it defaults to the value of descriptors (preserving current behavior). If `descriptors_3d=False`, 3D descriptor calculation is skipped even when descriptors=True. --- padelpy/functions.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/padelpy/functions.py b/padelpy/functions.py index 67063e1..552a4c0 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -31,6 +31,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, + descriptors_3d: bool = None, fingerprints: bool = False, timeout: int = 60, maxruntime: int = -1, @@ -43,6 +44,7 @@ def from_smiles(smiles, SMILES strings output_csv (str): if supplied, saves descriptors to this CSV file descriptors (bool): if `True`, calculates descriptors + descriptors_3d (bool): if `False`, disable 3-D descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion maxruntime (int): maximum running time per molecule in seconds. default=-1. @@ -77,7 +79,12 @@ def from_smiles(smiles, if output_csv is None: save_csv = False output_csv = "{}.csv".format(timestamp) - + + if descriptors_3d is None: + d_3d = descriptors + else: + d_3d = descriptors_3d + for attempt in range(3): try: padeldescriptor( @@ -86,7 +93,7 @@ def from_smiles(smiles, convert3d=True, retain3d=True, d_2d=descriptors, - d_3d=descriptors, + d_3d=d_3d, fingerprints=fingerprints, sp_timeout=timeout, retainorder=True, @@ -192,6 +199,7 @@ def from_mdl(mdl_file: str, def from_sdf(sdf_file: str, output_csv: str = None, descriptors: bool = True, + descriptors_3d: bool = None, fingerprints: bool = False, timeout: int = 60, maxruntime: int = -1, @@ -204,6 +212,7 @@ def from_sdf(sdf_file: str, sdf_file (str): path to sdf file output_csv (str): if supplied, saves descriptors/fingerprints here descriptors (bool): if `True`, calculates descriptors + descriptors_3d (bool): if `False`, disable 3-D descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion maxruntime (int): maximum running time per molecule in seconds. default=-1. @@ -223,6 +232,7 @@ def from_sdf(sdf_file: str, rows = _from_mdl_lower(mol_file=sdf_file, output_csv=output_csv, descriptors=descriptors, + descriptors_3d=descriptors_3d, fingerprints=fingerprints, timeout=timeout, maxruntime=maxruntime, @@ -234,6 +244,7 @@ def from_sdf(sdf_file: str, def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True, + descriptors_3d: bool = None, fingerprints: bool = False, timeout: int = 60, maxruntime: int = -1, @@ -250,7 +261,12 @@ def _from_mdl_lower(mol_file: str, output_csv = "{}.csv".format( datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] ) - + + if descriptors_3d is None: + d_3d = descriptors + else: + d_3d = descriptors_3d + for attempt in range(3): try: padeldescriptor( @@ -261,7 +277,7 @@ def _from_mdl_lower(mol_file: str, retain3d=True, retainorder=True, d_2d=descriptors, - d_3d=descriptors, + d_3d=d_3d, fingerprints=fingerprints, sp_timeout=timeout, threads=threads From 0d39f20864f2783f7aadc81b230222e73623d92b Mon Sep 17 00:00:00 2001 From: Ya-Fan Chen <20377719+Lexachoc@users.noreply.github.com> Date: Tue, 3 Jun 2025 14:57:29 +0200 Subject: [PATCH 2/3] Update functions.py forgot to add the new flag for `from_mdl` --- padelpy/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/padelpy/functions.py b/padelpy/functions.py index 552a4c0..a343388 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -158,6 +158,7 @@ def from_smiles(smiles, def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, + descriptors_3d: bool = None, fingerprints: bool = False, timeout: int = 60, maxruntime: int = -1, @@ -170,6 +171,7 @@ def from_mdl(mdl_file: str, mdl_file (str): path to MDL file output_csv (str): if supplied, saves descriptors/fingerprints here descriptors (bool): if `True`, calculates descriptors + descriptors_3d (bool): if `False`, disable 3-D descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion maxruntime (int): maximum running time per molecule in seconds. default=-1. @@ -188,6 +190,7 @@ def from_mdl(mdl_file: str, rows = _from_mdl_lower(mol_file=mdl_file, output_csv=output_csv, descriptors=descriptors, + descriptors_3d=descriptors_3d, fingerprints=fingerprints, timeout=timeout, maxruntime=maxruntime, From 3cb44b4c9ef22f3eaf572ddd46078ea4a45c908a Mon Sep 17 00:00:00 2001 From: Ya-Fan Chen <20377719+Lexachoc@users.noreply.github.com> Date: Tue, 3 Jun 2025 15:04:33 +0200 Subject: [PATCH 3/3] Update README.md add usages of the new flag `descriptors_3d` for "from_mdl", "from_smiles", and "from_sdf" functions --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index a62faa9..26e89e8 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ descriptors = from_smiles(['CCC', 'CCCC'], threads = 1) # save descriptors to a CSV file _ = from_smiles('CCC', output_csv='descriptors.csv') + +# only calculate 2-D descriptors (some SMILES strings like 'C' can cause issues with 3-D descriptors) +descriptors = from_smiles('C', descriptors_3d=False) ``` ### MDL MolFile to Descriptors/Fingerprints @@ -77,6 +80,9 @@ desc_fp = from_mdl('mols.mdl', threads=1) # save descriptors to a CSV file _ = from_mdl('mols.mdl', output_csv='descriptors.csv') + +# only calculate 2-D descriptors (some SMILES strings like 'C' can cause issues with 3-D descriptors) +descriptors = from_mdl('mols.mdl', descriptors_3d=False) ``` ### SDF to Descriptors/Fingerprints @@ -102,6 +108,9 @@ desc_fp = from_mdl('mols.sdf', threads=1) # save descriptors to a CSV file _ = from_sdf('mols.sdf', output_csv='descriptors.csv') + +# only calculate 2-D descriptors (some SMILES strings like 'C' can cause issues with 3-D descriptors) +descriptors = from_sdf('mols.sdf', descriptors_3d=False) ``` ### Command Line Wrapper