Source code for haddock.modules.analysis.filter

"""Filter models based on their score.

This module filters the input models based on their score using a threshold
value. Models having higher score than the threshold value are filtered out.

The number of models to be selected is unknown, and is the set of models that
have a score below the defined threshold.
For this module to be functional, a score must be first computed. This can be
performed by running a CNS module or a scoring module. If scores are not
accessible, the workflow will terminate with an error message.

If the threshold value is too stringent, resulting in no models passed to the
next module, the workflow will stop with an error message.
"""

from pathlib import Path

from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Any, FilePath
from haddock.libs.libontology import Format, PDBFile
from haddock.modules import BaseHaddockModule


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)



[docs]
class HaddockModule(BaseHaddockModule):
    """HADDOCK3 module to select top cluster/model."""

    name = RECIPE_PATH.name

    def __init__(self,
                 order: int,
                 path: Path,
                 *ignore: Any,
                 init_params: FilePath = DEFAULT_CONFIG,
                 **everything: Any) -> None:
        super().__init__(order, path, init_params)


[docs]
    @classmethod
    def confirm_installation(cls) -> None:
        """Confirm if module is installed."""
        return


    def _run(self) -> None:
        """Execute module."""
        # Make sure we have access to complexes
        if type(self.previous_io) == iter:
            self.finish_with_error(
                "[filter] This module cannot come after one"
                " that produced an iterable."
                )
        # Get the models generated in previous step
        models: list[PDBFile] = [
            p
            for p in self.previous_io.output
            if p.file_type == Format.PDB
            ]
        
        # Get the filter by parameter
        filter_by = "score"
        threshold = self.params["threshold"]

        # Make sure we can access this attribute on models
        models_with_attributes: list[PDBFile] = [
            m for m in models
            if getattr(m, filter_by, None) != None
            ]
        
        # Check how many of them are available
        ratio_models_with_attr = len(models_with_attributes) / len(models)
        self.log(
            f"{100 * (1 - ratio_models_with_attr):6.2f} % "
            "of the input models have accessible scores."
            )
        if len(models_with_attributes) == 0:
            self.finish_with_error(
                "Input models do not have scores. "
                "Please consider running a scoring module before!"
                )

        # Process to the actual filtering step
        filtered_models: list[PDBFile] = [
            m for m in models_with_attributes
            if getattr(m, filter_by) <= threshold
            ]

        # Final evaluation of the outcome of the filtering
        percent_filtered = (1 - (len(filtered_models) / len(models))) * 100
        if len(filtered_models) == 0:
            self.finish_with_error(
                f"With the currently set 'threshold' value of {threshold}, "
                "ALL models were filtered out."
                )
        else:
            self.log(
                f"With currently set 'threshold' value of {threshold}, "
                f"{percent_filtered:6.2f}% of the models were filtered out."
                )

        # select the models based on the parameter
        self.output_models = filtered_models
        self.export_io_models()