Source code for drugforge.data.backend.rdkit

from pathlib import Path
from typing import Union

from drugforge.data.schema.schema_base import read_file_directly
from rdkit import Chem


def _set_SD_data(mol: Union[Chem.Mol, Chem.Conformer], data: dict[str, str]):
    """
    Set the SD data on an rdkit molecule or conformer

    Parameters
    ----------
    mol: Union[Chem.Mol, Chem.Conformer]
        rdkit molecule or conformer

    data: dict[str, str]
        Dictionary of SD data to set
    """
    for key, value in data.items():
        mol.SetProp(str(key), str(value))


[docs] def set_SD_data(mol: Chem.Mol, data: dict[str, str | list]): """ Set the SD data on an rdkit molecule, overwriting any existing data. If the length of a list is 1, will set that value to all conformers. If the length of a list is equal to the number of conformers, will set each value to the corresponding conformer. Finally, it will set the properties for the whole molecule to be the data for the first conformer. Otherwise, will raise a ValueError. Parameters ---------- mol: rdkit.Chem.Mol rdkit molecule data: dict[str, list] Dictionary of SD data to set. Each key should be a tag name and each value should be a list of values, one for each conformer. """ num_confs = mol.GetNumConformers() # convert to dict of lists first data = {k: v if isinstance(v, list) else [v] for k, v in data.items()} for key, value in data.items(): if len(value) == 1: for conf in mol.GetConformers(): conf.SetProp(str(key), str(value[0])) elif len(value) == num_confs: for i, conf in enumerate(mol.GetConformers()): conf.SetProp(str(key), str(value[i])) else: raise ValueError( f"Length of data for tag '{key}' does not match number of conformers ({num_confs}). " f"Expected {num_confs} but got {len(value)} elements." ) # Set the properties for the highest level to be the data for the first conformer from drugforge.data.util.data_conversion import get_first_value_of_dict_of_lists first_conf_data = get_first_value_of_dict_of_lists(data) _set_SD_data(mol, first_conf_data)
def _get_SD_data(mol: Union[Chem.Mol, Chem.Conformer]) -> dict[str, str]: """ Get the SD data from an RDKit molecule or conformer Parameters ---------- mol: Union[Chem.Mol, Chem.Conformer] RDKit molecule or conformer Returns ------- dict Dictionary of SD data """ return mol.GetPropsAsDict()
[docs] def get_SD_data(mol: Chem.Mol) -> dict[str, list]: """ Get the SD data from an RDKit molecule. If there are multiple conformers, will get data from the conformers, so properties saved to mol.Prop will be ignored. Parameters ---------- mol: Chem.Mol RDKit molecule Returns ------- dict Dictionary of SD data """ if mol.GetNumConformers() == 1: from drugforge.data.util.data_conversion import ( get_dict_of_lists_from_dict_of_str, ) return get_dict_of_lists_from_dict_of_str(_get_SD_data(mol)) from drugforge.data.util.data_conversion import get_dict_of_lists_from_list_of_dicts data_list = [_get_SD_data(conf) for conf in mol.GetConformers()] return get_dict_of_lists_from_list_of_dicts(data_list)
[docs] def load_sdf(file: Union[str, Path]) -> Chem.Mol: """ Load an SDF file into an RDKit molecule """ sdf_str = read_file_directly(file) return sdf_str_to_rdkit_mol(sdf_str)
[docs] def sdf_str_to_rdkit_mol(sdf: str) -> Chem.Mol: """ Convert a SDF string to an RDKit molecule Parameters ---------- sdf : str SDF string Returns ------- Chem.Mol RDKit molecule """ from io import BytesIO bio = BytesIO(sdf.encode()) suppl = Chem.ForwardSDMolSupplier(bio, removeHs=False) ref = next(suppl) for mol in suppl: data = mol.GetPropsAsDict() conf = mol.GetConformer() _set_SD_data(conf, data) ref.AddConformer(conf, assignId=True) return ref
[docs] def rdkit_mol_to_sdf_str(mol: Chem.Mol) -> str: """ Convert an RDKit molecule to a SDF string Parameters ---------- mol : Chem.Mol RDKit molecule Returns ------- str SDF string """ from io import StringIO sdfio = StringIO() w = Chem.SDWriter(sdfio) w.write(mol) w.flush() return sdfio.getvalue()
[docs] def rdkit_smiles_roundtrip(smi: str) -> str: """ Roundtrip a SMILES string through RDKit to canonicalize it Parameters ---------- smi : str SMILES string to canonicalize Returns ------- str Canonicalized SMILES string """ mol = Chem.MolFromSmiles(smi) return Chem.MolToSmiles(mol)