Source code for drugforge.data.schema.ligand

import json
import logging
import warnings
from enum import Flag, auto
from pathlib import Path
from typing import (  # noqa: F401
    TYPE_CHECKING,
    Any,
    Dict,
    Literal,
    Optional,
    Tuple,
    Union,
)

import numpy as np
from drugforge.data.backend.openeye import (
    clear_SD_data,
    load_openeye_mol2,
    load_openeye_sdf,
    oechem,
    oemol_to_inchi,
    oemol_to_inchikey,
    oemol_to_sdf_string,
    oemol_to_smiles,
    oequacpac,
    sdf_string_to_oemol,
    set_SD_data,
    smiles_to_oemol,
)
from drugforge.data.backend.rdkit import rdkit_mol_to_sdf_str
from drugforge.data.operators.state_expanders.expansion_tag import StateExpansionTag
from drugforge.data.schema.identifiers import (
    BespokeParameters,
    ChargeProvenance,
    LigandIdentifiers,
    LigandProvenance,
)
from drugforge.data.schema.schema_base import DataStorageType
from pydantic import Field, field_validator, model_validator
from pydantic_core import PydanticSerializationError

from .experimental import ExperimentalCompoundData
from .schema_base import (
    DataModelAbstractBase,
    schema_dict_get_val_overload,
    write_file_directly,
)

if TYPE_CHECKING:
    import gufe
    from rdkit import Chem

logger = logging.getLogger(__name__)



[docs]
class InvalidLigandError(ValueError): ...  # noqa: E701




[docs]
class ChemicalRelationship(Flag):
    """
    Enum describing the chemical relationship between two ligands.
    Currently not distinguishing between conjugate acids / bases and tautomers, which means that ligands which are
    technically constitutional isomers (i.e. +/- a proton) will be considered tautomers
    """

    DISTINCT = auto()
    IDENTICAL = auto()
    STEREOISOMER = auto()
    TAUTOMER = auto()
    PROTONATION_STATE_ISOMER = auto()
    UNKNOWN = 0



# Ligand Schema

[docs]
class Ligand(DataModelAbstractBase):
    """
    Schema for a Ligand.

    Has first class serialization support for SDF files as well as the typical JSON and dictionary
    serialization.

    Note that equality comparisons are done on the chemical structure data found in the `data` field, not the other fields or the SD Tags in the original SDF
    This means you can change the other fields and still have equality, but changing the chemical structure data will change
    equality.

    You must provide either a compound_name or ids field otherwise the ligand will be invalid.

    Parameters
    ----------
    compound_name : str, optional
        Name of compound, by default None
    ids : Optional[LigandIdentifiers], optional
        LigandIdentifiers Schema for identifiers associated with this ligand, by default None
    experimental_data : Optional[ExperimentalCompoundData], optional
        ExperimentalCompoundData Schema for experimental data associated with the compound, by default None
    tags : dict[str, str], optional
        Dictionary of SD tags, by default {}
    data : str, optional, private
        Chemical structure data from the SDF file stored as a string ""
    data_format : DataStorageType, optional, private, const
        Enum describing the data storage method, by default DataStorageType.sdf
    """

    compound_name: Optional[str] = Field(None, description="Name of compound")
    ids: Optional[LigandIdentifiers] = Field(
        None,
        description="LigandIdentifiers Schema for identifiers associated with this ligand",
    )
    provenance: LigandProvenance = Field(
        ...,
        description="Identifiers for the input state of the ligand used to ensure the sdf data is correct.",
        frozen=True,
    )
    experimental_data: Optional[ExperimentalCompoundData] = Field(
        None,
        description="ExperimentalCompoundData Schema for experimental data associated with the compound",
    )

    expansion_tag: Optional[StateExpansionTag] = Field(
        None,
        description="Expansion tag linking this ligand to its parent in a state expansion if needed",
    )

    charge_provenance: Optional[ChargeProvenance] = Field(
        None, description="The provenance information of the local charging method."
    )

    bespoke_parameters: Optional[BespokeParameters] = Field(
        None,
        description="The bespoke parameters for this ligand organised by interaction type.",
    )
    # r_epik_state_penalty seems to be a float in some cases and a str in others
    # this should resolve the issue
    tags: dict[str, str] = Field(
        {},
        description="Dictionary of SD tags. "
        "If multiple conformers are present, these tags represent the first conformer.",
    )

    conf_tags: Optional[dict[str, list]] = Field(
        {}, description="Dictionary of SD tags for each conformer."
    )

    data: str = Field(
        ...,
        description="SDF file stored as a string to hold internal data state",
        repr=False,
    )
    data_format: Literal[DataStorageType.sdf] = DataStorageType.sdf

    # add in a field validator to ensure that any values in the tags dict are converted to strings
    @field_validator("tags", mode="before")
    def _convert_tags_to_str(cls, v):
        if not isinstance(v, dict):
            raise ValueError("tags must be a dictionary")
        return {k: str(vv) for k, vv in v.items()}

    @model_validator(mode="before")
    def _validate_at_least_one_id(cls, values):
        ids = values.get("ids")
        compound_name = values.get("compound_name")
        # check if all the identifiers are None, sometimes when this is called from
        # already instantiated ligand we need to be able to handle a dict and instantiated class
        if compound_name is None:
            if ids is None or all(
                [v is None for v in schema_dict_get_val_overload(ids)]
            ):
                raise ValueError(
                    "At least one identifier must be provide, or compound_name must be provided"
                )
        return values

    @field_validator("tags")
    @classmethod
    def _validate_tags(cls, v):
        # check that tags are not reserved attribute names and format partial charges
        reser_attr_names = Ligand.model_fields.keys()
        for k in v.keys():
            if k in reser_attr_names:
                raise ValueError(f"Tag name {k} is a reserved attribute name")
        return v

    def __hash__(self):
        return self.model_dump_json().__hash__()

    def __eq__(self, other: "Ligand") -> bool:
        return self.data_equal(other)

    def data_equal(self, other: "Ligand") -> bool:
        # Take out the header block since those aren't really important in checking
        # equality
        return "\n".join(self.data.split("\n")[2:]) == "\n".join(
            other.data.split("\n")[2:]
        )


[docs]
    @classmethod
    def from_oemol(cls, mol: oechem.OEMol, **kwargs) -> "Ligand":
        """
        Create a Ligand from an OEMol extracting all SD tags into the internal model
        """
        from drugforge.data.backend.openeye import get_SD_data
        from drugforge.data.util.data_conversion import get_first_value_of_dict_of_lists

        # work with a copy as we change the state of the molecule
        # working with input_mol also ensures that we're working with an OEMol and not an OEGraphMol
        input_mol = oechem.OEMol(mol)
        oechem.OEClearAromaticFlags(input_mol)
        oechem.OEAssignAromaticFlags(input_mol, oechem.OEAroModel_MDL)
        oechem.OEAssignHybridization(input_mol)
        oechem.OEAddExplicitHydrogens(input_mol)
        kwargs.pop("data", None)

        conf_tags = get_SD_data(mol)
        sd_tags = get_first_value_of_dict_of_lists(conf_tags)

        for key, value in sd_tags.items():
            try:
                # check to see if we have JSON of a model field
                kwargs[key] = json.loads(value)
            except json.JSONDecodeError:
                kwargs[key] = value

        # extract all passed kwargs as a tag if it has no field in the model
        keys_to_save = [
            key for key in kwargs.keys() if key not in cls.model_fields.keys()
        ]

        tags = set()
        # some keys will not be hashable, ignore them
        for key, value in kwargs.items():
            if key in keys_to_save:
                try:
                    tags.add((key, value))
                except TypeError:
                    warnings.warn(
                        f"Tag {key} with value {value} is not hashable and will not be saved"
                    )

        kwargs["tags"] = dict(tags)

        # Do the same thing for the conformer tags, only keeping the ones in 'tags'
        conf_tags_list = []
        for key, value in conf_tags.items():
            if key in keys_to_save:
                conf_tags_list.append((key, value))

        # if there aren't any, copy the tags to the conformers
        if len(conf_tags_list) == 0:
            conf_tags_list = [(k, [v] * input_mol.NumConfs()) for k, v in tags]

        kwargs["conf_tags"] = dict(conf_tags_list)

        # clean the sdf data for the internal model
        sdf_str = oemol_to_sdf_string(clear_SD_data(input_mol))
        # create a smiles which does not have nitrogen stereo
        smiles = oemol_to_smiles(input_mol)
        # create the internal LigandProvenance model
        if "provenance" not in kwargs:
            provenance = LigandProvenance(
                isomeric_smiles=smiles,
                inchi=oemol_to_inchi(input_mol),
                inchi_key=oemol_to_inchikey(input_mol),
                fixed_inchi=oemol_to_inchi(input_mol, fixed_hydrogens=True),
                fixed_inchikey=oemol_to_inchikey(input_mol, fixed_hydrogens=True),
            )
            kwargs["provenance"] = provenance
        # check for an openeye title which could be used as a compound name
        if mol.GetTitle() != "" and kwargs.get("compound_name") is None:
            kwargs["compound_name"] = mol.GetTitle()

        return cls(data=sdf_str, **kwargs)



[docs]
    def to_oemol(self) -> oechem.OEMol:
        """
        Convert the current molecule state to an OEMol including all fields as SD tags
        """
        mol = sdf_string_to_oemol(self.data)
        data = {}
        for key in Ligand.model_fields.keys():
            if key not in ["data", "tags", "conf_tags", "data_format"]:
                field = getattr(self, key)
                try:
                    data[key] = field.model_dump_json()
                except (AttributeError, PydanticSerializationError):
                    if field is not None:
                        data[key] = str(getattr(self, key))
        # dump the enum using value to get the str repr
        data["data_format"] = self.data_format.value

        # add partial charges if present
        if "atom.dprop.PartialCharge" in self.tags:
            charges = self.tags["atom.dprop.PartialCharge"].split(" ")
            for oe_atom in mol.GetAtoms():
                oe_atom.SetPartialCharge(float(charges[oe_atom.GetIdx()]))

        # add high level tags to data
        data.update(self.tags)

        # update conf tags to data
        data.update(self.conf_tags)

        mol = set_SD_data(mol, data)

        if self.compound_name is not None:
            mol.SetTitle(self.compound_name)

        return mol



[docs]
    @classmethod
    def from_single_conformers(cls, confs: list["Ligand"]) -> ["Ligand"]:
        """
        Create a Ligand object from a list of Ligand objects, each representing a single conformer.

        This is a bit complicated because we want to ensure that the resulting Ligand object
        has the same data as all the original conformers.
        """
        # check that all the conformers are the same
        if not all([confs[0].is_chemically_equal(conf) for conf in confs]):
            raise InvalidLigandError(
                "All conformers must have the same chemical structure data"
            )

        oemol = confs[0].to_oemol()

        tags = confs[0].tags

        sd_data = []
        for conf in confs[1:]:
            sd_data.append(conf.tags)
            oemol.NewConf(conf.to_oemol())

        from drugforge.data.util.data_conversion import (
            get_dict_of_lists_from_list_of_dicts,
        )

        # Turn list[dict[k,v]] into dict[k,[v]]
        conf_tags = get_dict_of_lists_from_list_of_dicts([tags] + sd_data)

        # Filter out the keys that are a model attribute
        conf_tags = {
            k: v for k, v in conf_tags.items() if k not in cls.model_fields.keys()
        }

        # create a new Ligand object with the data from the first conformer
        new_lig = cls.from_oemol(oemol)

        new_lig.set_SD_data(conf_tags)
        return new_lig



[docs]
    def to_single_conformers(self) -> ["Ligand"]:
        """
        Return a Ligand object for each conformer.
        """
        return [self.from_oemol(conf) for conf in self.to_oemol().GetConfs()]



[docs]
    def to_rdkit(self) -> "Chem.Mol":
        """
        Convert the current molecule state to an RDKit molecule including all fields as SD tags.
        """
        from drugforge.data.backend.rdkit import sdf_str_to_rdkit_mol, set_SD_data
        from rdkit import Chem

        rdkit_mol: Chem.Mol = sdf_str_to_rdkit_mol(self.data)
        data = {}
        for key in Ligand.model_fields.keys():
            if key not in ["data", "tags", "data_format", "conf_tags"]:
                field = getattr(self, key)
                try:
                    data[key] = field.model_dump_json()
                # with changing to pydantic v2, model_dump_json may PydanticSerializationError or AttributeError
                # so we need to catch both to do the proper parsing
                except (AttributeError, PydanticSerializationError):
                    if field is not None:
                        data[key] = str(getattr(self, key))
        # dump the enum using value to get the str repr
        data["data_format"] = self.data_format.value
        # if we have a compound name set it as the RDKit _Name prop as well
        if self.compound_name is not None:
            data["_Name"] = self.compound_name
        # dump tags as separate items
        if self.tags is not None:
            data.update({k: v for k, v in self.tags.items()})
        # if we have partial charges set them on the atoms assuming the atom ordering is not changed
        if "atom.dprop.PartialCharge" in self.tags:
            for i, charge in enumerate(
                self.tags["atom.dprop.PartialCharge"].split(" ")
            ):
                atom = rdkit_mol.GetAtomWithIdx(i)
                atom.SetDoubleProp("PartialCharge", float(charge))

        # set the SD that is different for each conformer
        # convert to str first
        data.update(
            {tag: [str(v) for v in values] for tag, values in self.conf_tags.items()}
        )
        set_SD_data(rdkit_mol, data)
        return rdkit_mol



[docs]
    def to_openfe(self) -> "gufe.components.SmallMoleculeComponent":
        """
        Convert to an openfe SmallMoleculeComponent via the rdkit interface.
        """
        import gufe

        return gufe.components.SmallMoleculeComponent.from_rdkit(
            self.to_rdkit(), name=self.compound_name
        )



[docs]
    @classmethod
    def from_openfe(
        cls, mol: "gufe.components.SmallMoleculeComponent", **kwargs
    ) -> "Ligand":
        """
        Create a Ligand from an openfe SmallMoleculeComponent
        """
        rdkit_mol = mol.to_rdkit()
        sdf_str = rdkit_mol_to_sdf_str(rdkit_mol)
        return cls.from_sdf_str(sdf_str, compound_name=mol.name, **kwargs)



[docs]
    @classmethod
    def from_smiles(cls, smiles: str, **kwargs) -> "Ligand":
        """
        Create a Ligand from a SMILES string
        """
        kwargs.pop("data", None)
        mol = smiles_to_oemol(smiles)
        return cls.from_oemol(mol, **kwargs)


    @property
    def smiles(self) -> str:
        """
        Get the canonical isomeric SMILES string for the ligand
        """
        mol = self.to_oemol()
        return oemol_to_smiles(mol, isomeric=True)

    @property
    def non_iso_smiles(self) -> str:
        """
        Get the non-isomeric canonical SMILES string for the ligand
        """
        mol = self.to_oemol()
        return oemol_to_smiles(mol, isomeric=False)


[docs]
    @classmethod
    def from_inchi(cls, inchi: str, **kwargs) -> "Ligand":
        """
        Create a Ligand from an InChI string
        """
        kwargs.pop("data", None)
        mol = oechem.OEMol()
        oechem.OEInChIToMol(mol, inchi)
        return cls.from_oemol(mol=mol, **kwargs)


    @property
    def inchi(self) -> str:
        """
        Get the InChI string for the ligand
        """
        mol = self.to_oemol()
        return oemol_to_inchi(mol=mol, fixed_hydrogens=False)

    @property
    def fixed_inchi(self) -> str:
        """
        Returns
        -------
            The fixed hydrogen inchi for the ligand.
        """
        mol = self.to_oemol()
        return oemol_to_inchi(mol=mol, fixed_hydrogens=True)

    @property
    def inchikey(self) -> str:
        """
        Get the InChIKey string for the ligand
        """
        mol = self.to_oemol()
        return oemol_to_inchikey(mol=mol, fixed_hydrogens=False)

    @property
    def fixed_inchikey(self) -> str:
        """
        Returns
        -------
         The fixed hydrogen layer inchi key for the ligand
        """
        mol = self.to_oemol()
        return oemol_to_inchikey(mol=mol, fixed_hydrogens=True)


[docs]
    @classmethod
    def from_mol2(
        cls,
        mol2_file: Union[str, Path],
        **kwargs,
    ) -> "Ligand":
        """
        Read in a ligand from an MOL2 file extracting all possible SD data into internal fields.

        Parameters
        ----------
        mol2_file : Union[str, Path]
            Path to the MOL2 file
        """

        oemol = load_openeye_mol2(mol2_file)
        return cls.from_oemol(oemol, **kwargs)



[docs]
    @classmethod
    def from_sdf(
        cls,
        sdf_file: Union[str, Path],
        **kwargs,
    ) -> "Ligand":
        """
        Read in a ligand from an SDF file extracting all possible SD data into internal fields.

        Parameters
        ----------
        sdf_file : Union[str, Path]
            Path to the SDF file
        """

        oemol = load_openeye_sdf(sdf_file)
        return cls.from_oemol(oemol, **kwargs)



[docs]
    @classmethod
    def from_sdf_str(cls, sdf_str: str, **kwargs) -> "Ligand":
        """
        Create a Ligand from an SDF string
        """
        kwargs.pop("data", None)
        mol = sdf_string_to_oemol(sdf_str)
        return cls.from_oemol(mol, **kwargs)



[docs]
    def to_sdf(self, filename: Union[str, Path], allow_append=False) -> None:
        """
        Write out the ligand to an SDF file with all attributes stored as SD tags

        Parameters
        ----------
        filename : Union[str, Path]
            Path to the SDF file
        allow_append : bool, optional
            Allow appending to the file, by default False

        """
        if allow_append:
            fmode = "a"
        else:
            fmode = "w"
        mol = self.to_oemol()
        write_file_directly(filename, oemol_to_sdf_string(mol), mode=fmode)



[docs]
    def set_SD_data(self, data: dict[str, Union[str, list]]) -> None:
        """
        Set the SD data for the ligand, uses an update to overwrite existing data in line with
        OpenEye behaviour
        """
        # convert to dict of lists first
        data = {k: v if isinstance(v, list) else [v] for k, v in data.items()}

        # turn values into str for sdf roundtripping
        data = {k: [str(v) for v in values] for k, values in data.items()}

        # make sure we don't overwrite any attributes
        # and ensure that the length of the data matches the number of conformers
        new_data = {}
        for k, v in data.items():
            if k in Ligand.model_fields.keys():
                warnings.warn(f"Tag name {k} is a reserved attribute name, skipping")
            else:
                # if list is len 1, generate a list of len N, where N is the number of conformers
                if len(v) == 1:
                    v = v * self.num_poses

                if not len(v) == self.num_poses:
                    raise ValueError(
                        f"Length of data for tag '{k}' does not match number of conformers. "
                        f"Expected {self.num_poses} but got {len(v)} elements."
                    )
                new_data[k] = v

        # update tags and conf_tags!
        from drugforge.data.util.data_conversion import get_first_value_of_dict_of_lists

        self.conf_tags.update(new_data)
        self.tags.update(get_first_value_of_dict_of_lists(new_data))



[docs]
    def to_sdf_str(self) -> str:
        """
        Set the SD data for a ligand to a string representation of the data
        that can be written out to an SDF file
        """
        mol = self.to_oemol()
        return oemol_to_sdf_string(mol)



[docs]
    def get_single_conf_SD_data(self, i: int = 0) -> dict[str, str]:
        """
        Get the SD data for the ligand for a particular conformer. Defaults to the first one.
        If you'd like to get SD data for all the conformers, those are saved in Ligand.conf_tags

        Parameters
        ----------
        i: int
            Return the ith conformer. Defaults to the first one (i=0).

        Returns
        -------
        dict[str, str]
            A dictionary of key: value pairs for the SD tags.
        """
        data = {tag: values[i] for tag, values in self.conf_tags.items()}
        return data



[docs]
    def print_SD_data(self) -> None:
        """
        Print the SD data for the ligand
        """
        print(self.tags)



[docs]
    def clear_SD_data(self) -> None:
        """
        Clear the SD data for the ligand
        """
        self.tags = {}
        self.conf_tags = {}



[docs]
    def set_expansion(
        self,
        parent: "Ligand",
        provenance: dict[str, Any],
    ) -> None:
        """
        Set the expansion of the ligand with a reference to the parent ligand and the settings used to create the
        expansion.

        Parameters
        ----------
            parent: The parent ligand from which this child was created.
            provenance: The provenance dictionary of the state expander used to create this ligand created via
            `expander.provenance()` where the keys are fields of the expander and the values capture the
            associated settings.
        """
        self.expansion_tag = StateExpansionTag.from_parent(
            parent=parent, provenance=provenance
        )


    @property
    def flattened(self) -> "Ligand":
        """
        Return a version of the ligand with 3d coordinates from the ligand and stereochemical information removed.
        """
        return Ligand.from_smiles(
            smiles=self.non_iso_smiles,
            compound_name=self.compound_name,
            expansion_tag=StateExpansionTag.from_parent(
                parent=self,
                provenance={
                    "oechem": oechem.OEChemGetVersion(),
                },
            ),
        )

    @property
    def canonical_tautomer(self) -> "Ligand":
        """
        Get the canonical tautomer of the ligand.
        Not necessarily the most physiologically relevant tautomer, but helpful for comparing ligands.
        """
        mol = self.to_oemol()
        canonical_tautomer = oechem.OEMol()
        if oequacpac.OEGetUniqueProtomer(canonical_tautomer, mol):
            return Ligand.from_oemol(
                compound_name=self.compound_name,
                mol=canonical_tautomer,
                expansion_tag=StateExpansionTag.from_parent(
                    parent=self,
                    provenance={
                        "expander": "oequacpac.OEGetUniqueProtomer",
                        "oechem": oechem.OEChemGetVersion(),
                        "quacpac": oequacpac.OEQuacPacGetVersion(),
                    },
                ),
            )
        else:
            raise ValueError("Unable to generate canonical tautomer")

    @property
    def num_poses(self) -> int:
        """
        Get the number of poses in the ligand.
        """
        return self.to_oemol().NumConfs()

    @property
    def has_multiple_poses(self) -> bool:
        """
        Check if the ligand has multiple poses.
        """
        return self.num_poses > 1

    @property
    def neutralized(self) -> "Ligand":
        """
        Get the neutralized version of the ligand.
        """
        mol = self.to_oemol()
        if oequacpac.OESetNeutralpHModel(mol):
            return Ligand.from_oemol(
                compound_name=self.compound_name,
                mol=mol,
                expansion_tag=StateExpansionTag.from_parent(
                    parent=self,
                    provenance={
                        "expander": "oequacpac.OESetNeutralpHModel",
                        "oequacpac": oequacpac.OEQuacPacGetVersion(),
                    },
                ),
            )
        else:
            raise ValueError("Unable to generate neutralized ligand")

    @property
    def has_perceived_stereo(self) -> bool:
        """
        Check if the ligand has any stereo bonds or chiral centers.
        Will be true if there are chiral centers even if they are undefined.
        Returns
        -------
        True if the ligand does contain any stereochemistry else False.
        """
        oe_mol = self.to_oemol()
        for atom in oe_mol.GetAtoms():
            if atom.IsChiral():
                return True
        for bond in oe_mol.GetBonds():
            if bond.IsChiral():
                return True
        return False

    @property
    def has_defined_stereo(self) -> bool:
        """
        Check if the ligand has defined stereochemistry.
        Will be true if there are chiral centers and they are defined.
        If there are defined stereo bonds but no chiral centers
        (possible if some places are "over-defined") this will be false.
        """
        mol = self.to_oemol()
        for atom in mol.GetAtoms():
            if atom.IsChiral() and atom.HasStereoSpecified():
                return True
        for bond in mol.GetBonds():
            if bond.IsChiral() and bond.HasStereoSpecified():
                return True
        return False


[docs]
    def is_chemically_equal(self, other: "Ligand") -> bool:
        """
        Check if the ligand is chemically equal to another ligand using the inchikey.
        Both ligands must both have defined stereochemistry or both not have defined stereochemistry.
        """
        return (
            self.fixed_inchikey == other.fixed_inchikey
            and self.has_defined_stereo == other.has_defined_stereo
        )



[docs]
    def is_stereoisomer(self, other: "Ligand") -> bool:
        """
        Check if the ligand is a possible stereoisomer of another ligand.
        Returns False if the ligands are the same.
        """
        # First check if molecules are the same
        if self.is_chemically_equal(other):
            return False

        return self.non_iso_smiles == other.non_iso_smiles



[docs]
    def has_same_charge(self, other: "Ligand") -> bool:
        """
        Check if the ligand has the same charge as another ligand (the ligands can be the same).
        """
        return oechem.OENetCharge(self.to_oemol()) == oechem.OENetCharge(
            other.to_oemol()
        )



[docs]
    def is_protonation_state_isomer(self, other: "Ligand") -> bool:
        """
        Check if the ligand is a conjugate acid or base of another ligand
        by neutralizing both ligands and checking if they are chemically equal.
        """
        if self.is_chemically_equal(other):
            return False
        return self.neutralized.is_chemically_equal(other.neutralized)



[docs]
    def is_tautomer(self, other: "Ligand") -> bool:
        """
        Check if the ligand is a tautomer of another ligand, excluding protonation state isomers.
        Returns False if the ligands are the same or stereoisomers.
        """
        # First check if molecules are the same or just a stereoisomer
        if self.is_chemically_equal(other) or not self.has_same_charge(other):
            return False
        return self.canonical_tautomer.is_chemically_equal(other.canonical_tautomer)



[docs]
    def get_chemical_relationship(self, other: "Ligand") -> ChemicalRelationship:
        """
        Get the chemical relationship between two ligands
        """
        # First check the easy, mutually distinct relationships
        if self.is_chemically_equal(other):
            return ChemicalRelationship.IDENTICAL
        elif self.is_stereoisomer(other):
            return ChemicalRelationship.STEREOISOMER
        elif self.is_protonation_state_isomer(other):
            return ChemicalRelationship.PROTONATION_STATE_ISOMER
        elif self.is_tautomer(other):
            return ChemicalRelationship.TAUTOMER

        # now we can worry about the complicated ones
        relationship = ChemicalRelationship.UNKNOWN

        if self.neutralized.flattened.is_tautomer(
            other.neutralized.flattened
        ) or self.flattened.is_tautomer(other.flattened):
            relationship |= ChemicalRelationship.TAUTOMER

        if self.flattened.is_protonation_state_isomer(other.flattened):
            relationship |= ChemicalRelationship.PROTONATION_STATE_ISOMER

        if self.neutralized.flattened.is_tautomer(
            other.neutralized.flattened
        ) and not self.flattened.is_tautomer(other.flattened):
            relationship |= ChemicalRelationship.PROTONATION_STATE_ISOMER

        if self.canonical_tautomer.is_stereoisomer(other.canonical_tautomer):
            relationship |= ChemicalRelationship.STEREOISOMER

        if relationship == ChemicalRelationship.UNKNOWN:
            relationship |= ChemicalRelationship.DISTINCT
        return relationship



[docs]
    def sort_confs_by_sd_tag_value(self, by: str, ascending: bool = True) -> np.ndarray:
        """
        Sort the conformers of the ligand by a particular sd tag.
        Changes the Ligand object IN PLACE and returns the indices of the conformers in the sorted order.

        Parameters
        ----------
        by: str
            Key value of SD tag to use
        ascending: bool
            Whether to sort the values in ascending order, by default True.

        Returns
        -------
        np.ndarray
            Array of len(num_confs) returned by `np.argsort`.
            Represents the set of indices that sorts the original conformer list into the new order.

        Raises
        ------
        Value Error
            If 'by' tag not found in ligand tags or if unable to sort the conformers
        """
        import numpy as np
        from drugforge.data.backend.openeye import get_SD_data

        if self.num_poses == 1:
            warnings.warn("Only one conformer present, no sorting will be done")
            return np.array([0])

        if not self.tags.get(by):
            raise ValueError(f"Tag {by} not found in ligand tags: {self.tags.keys()}")

        mol = self.to_oemol()
        confs = np.array([conf for conf in mol.GetConfs()])
        sort_array = np.argsort(np.array(self.conf_tags[by]))
        if not ascending:
            sort_array = np.flip(sort_array)
        sorted_by_value = confs[sort_array]
        if mol.OrderConfs(sorted_by_value):
            self.set_SD_data(get_SD_data(mol))
            self.data = oemol_to_sdf_string(mol)
            return sort_array
        else:
            raise ValueError("Unable to sort conformers")





[docs]
class ReferenceLigand(Ligand):
    target_name: Optional[str] = None




[docs]
def write_ligands_to_multi_sdf(
    sdf_name: Union[str, Path],
    ligands: list[Ligand],
    overwrite=False,
):
    """
    Dumb way to do this, but just write out each ligand to the same.
    Alternate way would be to flush each to OEMol and then write out
    using OE but seems convoluted.

    Note that this will overwrite the file if it exists unless overwrite is set to False

    Parameters
    ----------
    sdf_name : Union[str, Path]
        Path to the SDF file
    ligands : list[Ligand]
        List of ligands to write out
    overwrite : bool, optional
        Overwrite the file if it exists, by default False

    Raises
    ------
    FileExistsError
        If the file exists and overwrite is False
    ValueError
        If the sdf_name does not end in .sdf
    """

    sdf_file = Path(sdf_name)
    if sdf_file.exists() and not overwrite:
        raise FileExistsError(f"{sdf_file} exists and overwrite is False")

    elif sdf_file.exists() and overwrite:
        sdf_file.unlink()

    if not sdf_file.suffix == ".sdf":
        raise ValueError("SDF name must end in .sdf")

    for ligand in ligands:
        ligand.to_sdf(sdf_file, allow_append=True)