Source code for haddock.modules.analysis.filter

"""Filter models based on their score.

This module filters the input models based on their score using a threshold
value. Models having higher score than the threshold value are filtered out.

The number of models to be selected is unknown, and is the set of models that
have a score below the defined threshold.
For this module to be functional, a score must be first computed. This can be
performed by running a CNS module or a scoring module. If scores are not
accessible, the workflow will terminate with an error message.

If the threshold value is too stringent, resulting in no models passed to the
next module, the workflow will stop with an error message.
"""

from pathlib import Path

from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Any, FilePath
from haddock.libs.libontology import Format, PDBFile
from haddock.modules import BaseHaddockModule


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)


[docs] class HaddockModule(BaseHaddockModule): """HADDOCK3 module to select top cluster/model.""" name = RECIPE_PATH.name def __init__(self, order: int, path: Path, *ignore: Any, init_params: FilePath = DEFAULT_CONFIG, **everything: Any) -> None: super().__init__(order, path, init_params)
[docs] @classmethod def confirm_installation(cls) -> None: """Confirm if module is installed.""" return
def _run(self) -> None: """Execute module.""" # Make sure we have access to complexes if type(self.previous_io) == iter: self.finish_with_error( "[filter] This module cannot come after one" " that produced an iterable." ) # Get the models generated in previous step models: list[PDBFile] = [ p for p in self.previous_io.output if p.file_type == Format.PDB ] # Get the filter by parameter filter_by = "score" threshold = self.params["threshold"] # Make sure we can access this attribute on models models_with_attributes: list[PDBFile] = [ m for m in models if getattr(m, filter_by, None) != None ] # Check how many of them are available ratio_models_with_attr = len(models_with_attributes) / len(models) self.log( f"{100 * (1 - ratio_models_with_attr):6.2f} % " "of the input models have accessible scores." ) if len(models_with_attributes) == 0: self.finish_with_error( "Input models do not have scores. " "Please consider running a scoring module before!" ) # Process to the actual filtering step filtered_models: list[PDBFile] = [ m for m in models_with_attributes if getattr(m, filter_by) <= threshold ] # Final evaluation of the outcome of the filtering percent_filtered = (1 - (len(filtered_models) / len(models))) * 100 if len(filtered_models) == 0: self.finish_with_error( f"With the currently set 'threshold' value of {threshold}, " "ALL models were filtered out." ) else: self.log( f"With currently set 'threshold' value of {threshold}, " f"{percent_filtered:6.2f}% of the models were filtered out." ) # select the models based on the parameter self.output_models = filtered_models self.export_io_models()