Source code for haddock.modules.analysis.caprieval

"""Calculate CAPRI metrics for the input models.

By default the following metrics are calculated:

- FNAT (fraction of native contacts), namely the fraction of
    intermolecular contacts in the docked complex that are also
    present in the reference complex.
- IRMSD (interface root mean square deviation), namely the RMSD
    of the interface of the docked complex with respect
    to the reference complex.
- LRMSD (ligand root mean square deviation), namely the RMSD of the
    ligand of the docked complex with respect to the
    reference complex upon superposition of the receptor.
- DOCKQ, a measure of the quality of the docked model obtained
    by combining FNAT, IRMSD and LRMSD (see
    Basu and Wallner 2016,  11 (8), e0161879).
- ILRMSD (interface ligand root mean square deviation), the RMSD of the
    ligand of the docked complex with respect to the reference complex
    upon superposition of the interface of the receptor.
- GLOBAL_RMSD, the full RMSD between the reference and the model.

The following files are generated:

- **capri_ss.tsv**: a table with the CAPRI metrics for each model.
- **capri_clt.tsv**: a table with the CAPRI metrics for each cluster of models (if clustering information is available).
"""

from pathlib import Path

from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import FilePath, Union
from haddock.libs.libontology import PDBFile
from haddock.libs.libparallel import Scheduler
from haddock.modules import BaseHaddockModule
from haddock.modules.analysis.caprieval.capri import (
    CAPRI,
    capri_cluster_analysis,
    dump_weights,
    extract_data_from_capri_class,
    )


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)



[docs]
class HaddockModule(BaseHaddockModule):
    """HADDOCK3 module to calculate the CAPRI metrics."""

    name = RECIPE_PATH.name

    def __init__(
        self,
        order: int,
        path: Path,
        init_params: FilePath = DEFAULT_CONFIG,
    ) -> None:
        super().__init__(order, path, init_params)


[docs]
    @classmethod
    def confirm_installation(cls) -> None:
        """Confirm if contact executable is compiled."""
        return



[docs]
    @staticmethod
    def is_nested(models: list[Union[PDBFile, list[PDBFile]]]) -> bool:
        for model in models:
            if isinstance(model, list):
                return True
        return False


    def _run(self) -> None:
        """Execute module."""
        # Get the models generated in previous step
        if type(self.previous_io) == iter:
            _e = "This module cannot come after one that produced an iterable."
            self.finish_with_error(_e)

        models = self.previous_io.retrieve_models(individualize=True)
        if self.is_nested(models):
            raise ValueError(
                "CAPRI module cannot be executed after modules that produce a nested list of models"
            )

        # dump previously used weights
        dump_weights(self.order)

        # Sort by score to find the "best"
        models.sort()
        best_model = models[0]
        assert isinstance(best_model, PDBFile), "Best model is not a PDBFile"
        best_model_fname = best_model.rel_path

        if self.params["reference_fname"]:
            reference = Path(self.params["reference_fname"])
        else:
            self.log(
                "No reference was given. "
                "Using the structure with the lowest score from previous step"
            )
            reference = best_model_fname

        # Each model is a job; this is not the most efficient way
        #  but by assigning each model to an individual job
        #  we can handle scenarios in which the models are hetergoneous
        #  for example during CAPRI scoring
        jobs: list[CAPRI] = []
        for i, model_to_be_evaluated in enumerate(models, start=1):
            if isinstance(
                model_to_be_evaluated, list
            ):  # `models_to_be_evaluated` cannot be a list, `CAPRI` class is expecting a single model
                raise ValueError(
                    "CAPRI module cannot handle a list of `model_to_be_evaluated`"
                )
            jobs.append(
                CAPRI(
                    identificator=i,
                    model=model_to_be_evaluated,
                    path=Path("."),
                    reference=reference,
                    params=self.params,
                )
            )

        engine = Scheduler(
            tasks=jobs, ncores=self.params["ncores"], max_cpus=self.params["max_cpus"]
        )
        engine.run()

        jobs = engine.results
        jobs = sorted(jobs, key=lambda capri: capri.identificator)

        extract_data_from_capri_class(
            capri_objects=jobs,
            output_fname=Path(".", "capri_ss.tsv"),
            sort_key=self.params["sortby"],
            sort_ascending=self.params["sort_ascending"],
        )

        capri_cluster_analysis(
            capri_list=jobs,
            model_list=models,  # type: ignore # ignore this here only if we are checking the return type of `retrieve_models` is not nested!!
            output_fname="capri_clt.tsv",
            clt_threshold=self.params["clt_threshold"],
            # output_count=len(capri_jobs),
            sort_key=self.params["sortby"],
            sort_ascending=self.params["sort_ascending"],
            path=Path("."),
        )

        # Send models to the next step,
        #  no operation is done on them
        self.output_models = models  # type: ignore # ignore this here only if we are checking the return type of `retrieve_models` is not nested!!
        self.export_io_models()