Source code for haddock.libs.libcns

"""CNS scripts util functions."""

import itertools
import math
from functools import partial
from os import linesep
from pathlib import Path

from haddock import EmptyPath, log
from haddock.core import cns_paths
from haddock.core.typing import Any, FilePath, FilePathT, Optional, Union
from haddock.libs import libpdb
from haddock.libs.libfunc import false, true
from haddock.libs.libmath import RandomNumberGenerator
from haddock.libs.libontology import PDBFile
from haddock.libs.libutil import transform_to_list


RND = RandomNumberGenerator()


[docs] def generate_default_header( path: Optional[FilePath] = None, ) -> tuple[str, str, str, str, str, str]: """Generate CNS default header.""" # TODO: Remove the `type: ignore` comments if path is not None: axis = load_axis(**cns_paths.get_axis(path)) # type: ignore link = load_link(Path(path, cns_paths.LINK_FILE)) scatter = load_scatter(Path(path, cns_paths.SCATTER_LIB)) tensor = load_tensor(**cns_paths.get_tensors(path)) # type: ignore trans_vec = load_trans_vectors( **cns_paths.get_translation_vectors(path) # type: ignore ) # noqa: E501 water_box = load_boxtyp20(cns_paths.get_water_box(path)["boxtyp20"]) else: axis = load_axis(**cns_paths.axis) # type: ignore link = load_link(cns_paths.link_file) scatter = load_scatter(cns_paths.scatter_lib) tensor = load_tensor(**cns_paths.tensors) # type: ignore trans_vec = load_trans_vectors(**cns_paths.translation_vectors) # type: ignore water_box = load_boxtyp20(cns_paths.water_box["boxtyp20"]) return ( link, trans_vec, tensor, scatter, axis, water_box, )
def _is_nan(x: Any) -> bool: """Inspect if is nan.""" try: return math.isnan(x) except (ValueError, TypeError): return False
[docs] def filter_empty_vars(v: Any) -> bool: """ Filter empty variables. See: https://github.com/haddocking/haddock3/issues/162 Returns ------- bool Returns `True` if the variable is not empty, and `False` if the variable is empty. That is, `False` reflects those variables that should not be written in CNS. Raises ------ TypeError If the type of `value` is not supported by CNS. """ cases = ( (lambda x: _is_nan(x), false), (lambda x: isinstance(x, str) and bool(x), true), (lambda x: isinstance(x, str) and not bool(x), false), (lambda x: isinstance(x, bool), true), # it should return True (lambda x: isinstance(x, (EmptyPath, Path)), true), (lambda x: type(x) in (int, float), true), (lambda x: x is None, false), ) for detect, give in cases: if detect(v): return give(v) else: emsg = f"Value {v!r} has a unknown type for CNS: {type(v)}." log.error(emsg) raise TypeError(emsg)
[docs] def load_workflow_params( param_header: str = f"{linesep}! Parameters{linesep}", **params: Any, ) -> str: """ Write the values at the header section. "Empty variables" are ignored. These are defined accoring to :func:`filter_empty_vars`. Parameters ---------- params : dict Dictionary containing the key:value pairs for the parameters to be written to CNS. Values cannot be of dictionary type. Returns ------- str The string with the CNS parameters defined. """ non_empty_parameters = ((k, v) for k, v in params.items() if filter_empty_vars(v)) # types besides the ones in the if-statements should not enter this loop for param, v in non_empty_parameters: param_header += write_eval_line(param, v) assert isinstance(param_header, str) return param_header
[docs] def write_eval_line(param: Any, value: Any, eval_line: str = "eval (${}={})") -> str: """Write the CNS eval line depending on the type of `value`.""" eval_line += linesep if isinstance(value, bool): value = str(value).lower() return eval_line.format(param, value) elif isinstance(value, str): value = '"' + value + '"' return eval_line.format(param, value) elif isinstance(value, Path): value = '"' + str(value) + '"' return eval_line.format(param, value) elif isinstance(value, EmptyPath): return eval_line.format(param, '""') elif isinstance(value, (int, float)): return eval_line.format(param, value) else: emsg = f"Unexpected type when writing CNS header: {type(value)}" log.error(emsg) raise TypeError(emsg)
load_axis = partial( load_workflow_params, param_header=f"{linesep}! Axis{linesep}" ) # noqa: E501 load_tensor = partial( load_workflow_params, param_header=f"{linesep}! Tensors{linesep}" ) # noqa: E501 prepare_output = partial( load_workflow_params, param_header=f"{linesep}! Output structure{linesep}" ) # noqa: E501 load_trans_vectors = partial( load_workflow_params, param_header=f"{linesep}! Translation vectors{linesep}" ) # noqa: E501 load_ambig = partial(write_eval_line, "ambig_fname") load_unambig = partial(write_eval_line, "unambig_fname") load_hbond = partial(write_eval_line, "hbond_fname") load_dihe = partial(write_eval_line, "dihe_f") load_tensor_tbl = partial(write_eval_line, "tensor_tbl")
[docs] def load_scatter(scatter_lib: Path) -> str: """Add scatter library.""" return load_workflow_params( param_header=f"{linesep}! Scatter lib{linesep}", scatter_lib=scatter_lib )
[docs] def load_boxtyp20(waterbox_param: Path) -> str: """Add boxtyp20 eval line.""" return load_workflow_params( param_header=f"{linesep}! Water box{linesep}", boxtyp20=waterbox_param )
# This is used by docking
[docs] def prepare_multiple_input(pdb_input_list: list[str], psf_input_list: list[str]) -> str: """Prepare multiple input files.""" input_str = f"{linesep}! Input structure{linesep}" for psf in psf_input_list: input_str += f"structure{linesep}" input_str += f" @@{psf}{linesep}" input_str += f"end{linesep}" ncount = 1 for pdb in pdb_input_list: input_str += f"coor @@{pdb}{linesep}" input_str += write_eval_line(f"input_pdb_filename_{ncount}", pdb) ncount += 1 # check how many chains there are across all the PDBs chain_l: list[list[str]] = [] for pdb in pdb_input_list: for element in libpdb.identify_chainseg(pdb): chain_l.append(element) ncomponents = len(set(itertools.chain(*chain_l))) input_str += write_eval_line("ncomponents", ncomponents) return input_str
# This is used by Topology and Scoring
[docs] def prepare_single_input( pdb_input: FilePath, psf_input: Union[None, FilePath, list[FilePathT]] = None ) -> str: """Input of the CNS file. This section will be written for any recipe even if some CNS variables are not used, it should not be an issue. """ input_str = f"{linesep}! Input structure{linesep}" if psf_input: # if isinstance(psf_input, str): input_str += f"structure{linesep}" input_str += f" @@{psf_input}{linesep}" input_str += f"end{linesep}" input_str += f"coor @@{pdb_input}{linesep}" if isinstance(psf_input, list): input_str += f"structure{linesep}" for psf in psf_input: input_str += f" @@{psf}{linesep}" input_str += f"end{linesep}" # $file variable is still used by some CNS recipes, need refactoring! input_str += write_eval_line("file", pdb_input) segids, chains = libpdb.identify_chainseg(pdb_input) chainsegs = sorted(list(set(segids) | set(chains))) ncomponents = len(chainsegs) input_str += write_eval_line("ncomponents", ncomponents) for i, segid in enumerate(chainsegs, start=1): input_str += write_eval_line(f"prot_segid_{i}", segid) seed = RND.randint(100, 99999) input_str += write_eval_line("seed", seed) return input_str
[docs] def prepare_cns_input( model_number: int, input_element: Union[PDBFile, list[PDBFile]], step_path: FilePath, recipe_str: str, defaults: Any, identifier: str, ambig_fname: FilePath = "", native_segid: bool = False, default_params_path: Optional[Path] = None, debug: Optional[bool] = False, seed: Optional[int] = None, ) -> Union[Path, str]: """ Generate the .inp file needed by the CNS engine. Parameters ---------- model_number : int The number of the model. Will be used as file name suffix. input_element : `libs.libontology.Persisten`, list of those """ # TODO: Refactor this function into smaller functions or classes # read the default parameters default_params = load_workflow_params(**defaults) default_params += write_eval_line("ambig_fname", ambig_fname) # write the PDBs pdb_list = [pdb.rel_path for pdb in transform_to_list(input_element)] # write the PSFs psf_list: list[Path] = [] if isinstance(input_element, (list, tuple)): for pdb in input_element: if isinstance(pdb.topology, (list, tuple)): for psf in pdb.topology: psf_fname = psf.rel_path psf_list.append(psf_fname) else: if pdb.topology is None: raise ValueError(f"Topology not found for pdb {pdb.rel_path}.") psf_fname = pdb.topology.rel_path psf_list.append(psf_fname) elif isinstance(input_element.topology, (list, tuple)): pdb = input_element # for clarity if pdb.topology is None: raise ValueError(f"Topology not found for pdb {pdb.rel_path}.") for psf in pdb.topology: psf_fname = psf.rel_path psf_list.append(psf_fname) else: pdb = input_element # for clarity if pdb.topology is None: raise ValueError(f"Topology not found for pdb {pdb.rel_path}.") psf_fname = pdb.topology.rel_path psf_list.append(psf_fname) input_str = prepare_multiple_input( pdb_input_list=[str(p) for p in pdb_list], psf_input_list=[str(p) for p in psf_list], ) output_pdb_filename = f"{identifier}_{model_number}.pdb" output = f"{linesep}! Output structure{linesep}" output += write_eval_line("output_pdb_filename", output_pdb_filename) # prepare chain/seg IDs segid_str = "" if native_segid: chainid_list: list[str] = [] if isinstance(input_element, (list, tuple)): for pdb in input_element: segids, chains = libpdb.identify_chainseg(pdb.rel_path, sort=False) chainsegs = sorted(list(set(segids) | set(chains))) # check if any of chainsegs is already in chainid_list if not identifier.endswith("scoring"): if any(chainseg in chainid_list for chainseg in chainsegs): raise ValueError( f"Chain/seg IDs are not unique for pdbs {input_element}." ) chainid_list.extend(chainsegs) for i, _chainseg in enumerate(chainid_list, start=1): segid_str += write_eval_line(f"prot_segid_{i}", _chainseg) else: segids, chains = libpdb.identify_chainseg( input_element.rel_path, sort=False ) chainsegs = sorted(list(set(segids) | set(chains))) for i, _chainseg in enumerate(chainsegs, start=1): segid_str += write_eval_line(f"prot_segid_{i}", _chainseg) output += write_eval_line("count", model_number) if seed is None: seed = RND.randint(100, 99999) seed_str = write_eval_line("seed", seed) inp = default_params + input_str + seed_str + output + segid_str + recipe_str if not debug: return inp else: inp_file = Path(f"{identifier}_{model_number}.inp") inp_file.write_text(inp) return inp_file
[docs] def prepare_expected_pdb( model_obj: Union[PDBFile, tuple[PDBFile, ...]], model_nb: int, path: FilePath, identifier: str, ) -> PDBFile: """Prepare a PDBobject.""" expected_pdb_fname = Path(path, f"{identifier}_{model_nb}.pdb") pdb = PDBFile(expected_pdb_fname, path=path) if isinstance(model_obj, tuple): pdb.topology = [p.topology for p in model_obj] else: pdb.topology = model_obj.topology pdb.seed = model_obj.seed return pdb