Source code for drugforge.data.schema.target

import logging
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union  # noqa: F401

from drugforge.data.backend.openeye import (
    load_openeye_pdb,
    oechem,
    oemol_to_pdb_string,
    pdb_string_to_oemol,
    split_openeye_mol,
)
from drugforge.data.schema.identifiers import TargetIdentifiers
from drugforge.data.schema.schema_base import (
    DataModelAbstractBase,
    DataStorageType,
    MoleculeFilter,
    check_strings_for_equality_with_exclusion,
    schema_dict_get_val_overload,
    write_file_directly,
)
from pydantic import Field, model_validator

logger = logging.getLogger(__name__)


[docs] class InvalidTargetError(ValueError): ... # noqa: E701
[docs] class Target(DataModelAbstractBase): """ Schema for a Target, wrapper around a PDB file """ target_name: str = Field(None, description="The name of the target") ids: Optional[TargetIdentifiers] = Field( None, description="TargetIdentifiers Schema for identifiers associated with this target", ) data: str = Field( "", description="PDB file stored as a string to hold internal data state", repr=False, ) data_format: DataStorageType = Field( DataStorageType.pdb, description="Enum describing the data storage method", frozen=True, ) @model_validator(mode="before") @classmethod def _validate_at_least_one_id(cls, v): # check if skip validation if v.get("_skip_validate_ids"): return v else: ids = v.get("ids") compound_name = v.get("target_name") # check if all the identifiers are None, sometimes when this is called from # already instantiated ligand we need to be able to handle a dict and instantiated class if compound_name is None: if ids is None or all( [not v for v in schema_dict_get_val_overload(ids)] ): raise ValueError( "At least one identifier must be provided, or target_name must be provided" ) return v @classmethod def from_pdb( cls, pdb_file: Union[str, Path], target_chains=[], ligand_chain="", **kwargs ) -> "Target": kwargs.pop("data", None) # directly read in data # First load full complex molecule complex_mol = load_openeye_pdb(pdb_file) # Split molecule into parts using given chains mol_filter = MoleculeFilter( protein_chains=target_chains, ligand_chain=ligand_chain ) split_dict = split_openeye_mol(complex_mol, mol_filter) return cls.from_oemol(split_dict["prot"], **kwargs) def to_pdb(self, filename: Union[str, Path]) -> None: # directly write out data write_file_directly(filename, self.data) @classmethod def from_oemol(cls, mol: oechem.OEMol, **kwargs) -> "Target": kwargs.pop("data", None) pdb_str = oemol_to_pdb_string(mol) return cls(data=pdb_str, **kwargs) def to_oemol(self) -> oechem.OEMol: return pdb_string_to_oemol(self.data) def __eq__(self, other: Any) -> bool: if not isinstance(other, Target): return NotImplemented # check if the data is the same # but exclude the MASTER record as this is not always in the SAME PLACE # for some strange reason return check_strings_for_equality_with_exclusion( self.data, other.data, "MASTER" ) def __ne__(self, other: Any) -> bool: return not self.__eq__(other) @property def hash(self): """Create a hash based on the pdb file contents""" import hashlib return hashlib.sha256(self.data.encode()).hexdigest() @property def crystal_symmetry(self): """ Get the crystal symmetry of the target """ return oechem.OEGetCrystalSymmetry(self.to_oemol())