Source code for haddock.modules.analysis.seletopclusts

"""Select models from the top clusters.

This module selects a number of models from a number of clusters. The
selection is based on the score of the models within the clusters.

In the standard HADDOCK analysis, the top 4 models of the top 10 clusters
are shown. In case seletopclusts is run after a sampling module, we can
keep a few models from all the clusters to have more diversity at the
refinement stage(s).
"""

from pathlib import Path

from haddock import log
from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Any, FilePath
from haddock.modules import BaseHaddockModule
from haddock.modules.analysis.seletopclusts.seletopclusts import (
    select_top_clusts_models,
    write_selected_models,
    )


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)


[docs] class HaddockModule(BaseHaddockModule): """Haddock Module for 'seletopclusts'.""" name = RECIPE_PATH.name def __init__(self, order: int, path: Path, *ignore: Any, init_params: FilePath = DEFAULT_CONFIG, **everything: Any) -> None: super().__init__(order, path, init_params)
[docs] @classmethod def confirm_installation(cls) -> None: """Confirm if module is installed.""" return
def _run(self) -> None: """Execute the module's protocol.""" # Check parameters validity if self.params["top_models"] <= 0: _msg = "top_models must be either > 0 or nan." self.finish_with_error(_msg) if not isinstance(self.params["top_cluster"], int): _msg = "top_cluster must be an integer." self.finish_with_error(_msg) # Retrieve list of previous models models_to_select = self.previous_io.retrieve_models() # Check if cluster info is accessible if any([mdl.clt_rank is None for mdl in models_to_select]): _msg = ( "Impossible to obtain cluster information. Please consider " "running a clustering method prior to this module." ) self.finish_with_error(_msg) # Make model selection selected_models, _notes = select_top_clusts_models( self.params["sortby"], models_to_select, self.params["top_cluster"], self.params["top_models"], ) # Log notes for note in _notes: log.info(note) # dump the models to disk and change their attributes renamed_models = write_selected_models( "seletopclusts.txt", selected_models, self.path, ) # Make these new models the output of this module self.output_models = renamed_models # Export outputs self.export_io_models()