Source code for haddock.clis.restraints.z_surface_restraints

"""haddock3-restraints z-surface-restraints subcommand.

Generate both z-restraints and corresponding z-surfaces based on
input pdb structure and residue selection.

Usage:
    haddock3-restraints z-surface-restraints
        --pdb       <path/to/the/structure.pdb>
        --residues  <list of coma separated residue index>
        --output    <base/path/where/to/output/data>
        --spacing   <spacing_between_two_beads>
        --x-size    <size_in_x_dim>
        --y-size    <size_in_y_dim>
        --z-padding <additional_z_padding_between_two_extrema_plans>

e.g:
haddock3-restraints z-surface-restraints
  --pdb mystructure.pdb
  --residues 1,2,3 7,8,9
  --spacing 20
  --x-size 200
  --y-size 200
  --z-padding 5
  --output myZrestaints
"""

import logging
import os
from pathlib import Path

from haddock.core.typing import Generator, Optional, Union
from haddock.libs.librestraints import read_structure, calc_euclidean


# As this script is a subcommand of the `haddock3-restraints` cli,
# it requires its own options and arguments that are managed here.
[docs] def add_z_surf_restraints_arguments(z_surf_restraints_subcommand): """Add arguments to the z_plan subcommand.""" z_surf_restraints_subcommand.add_argument( "--pdb", "-p", help="Path to a pdb file.", required=True, default='', type=str, ) z_surf_restraints_subcommand.add_argument( "--residues", "-r", help=( "List of comma separated residues (can be multiple selections). " "Example 1,2,3 7,8,9 for two selections." ), required=False, default=[], nargs='+', type=str, ) z_surf_restraints_subcommand.add_argument( "--output", "-o", help=( "Base output path. This script will generate two files, " "therefore no extention needed here" ), required=False, default=None, type=str, ) z_surf_restraints_subcommand.add_argument( "--spacing", "-s", type=float, help="Spacing between two beads (A)", required=False, default=20, ) z_surf_restraints_subcommand.add_argument( "--x-size", "-x", help="Size of the plan in X dimension (A)", required=False, default=100, type=float, ) z_surf_restraints_subcommand.add_argument( "--y-size", "-y", help="Size of the plan in Y dimension", required=False, default=100, type=float, ) z_surf_restraints_subcommand.add_argument( "--z-padding", "-z", help="Additional padding between two external plans.", required=False, default=5.0, type=float, ) z_surf_restraints_subcommand.add_argument( "--log_level", default='INFO', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), help="Logging level", required=False, ) return z_surf_restraints_subcommand
[docs] def setup_logging(log_level: str = "INFO") -> None: """Set log level and format.""" logging.basicConfig( level=log_level, format='%(asctime)s L%(lineno)d %(levelname)s - %(message)s', datefmt='%d/%m/%Y %H:%M:%S', )
############################ # SET OF USEFULL FUNCTIONS # ############################
[docs] def load_selected_resiudes_coords( pdb_fpath: Union[str, Path], selections: dict[str, list[int]], ) -> tuple[dict[str, list[tuple[float, float, float]]], list[str], list[str]]: """Load coordinates of selected residues. Parameters ---------- pdb_fpath : Union[str, Path] Path to the PDB file to be parsed. selections : dict[str, list[int]] Dictionary holding the various residues indices for each selection. Returns ------- selection_coords: dict[str, list[tuple[float, float, float]]] Dictionary holding the various Calpha coordinates for each selection. """ # Load structures atom coordinates pdb_atoms = read_structure(pdb_fpath) # Set paring variables selection_coords: dict[str, list[tuple[float, float, float]]] = {} selected_chains: list[str] = [] selected_atoms: list[str] = [] # Loop over atoms for chain, resi, atname, coords in pdb_atoms: # Simplify the probleme to Calpha/Phosphates/BackBone atoms only # FIXME: maybe change P to C1 or C9 ? if not atname in ("CA", "P", "BB", ): continue # Loop over selections for selection, sele_resis in selections.items(): # Check if residue of interest if resi in sele_resis: # Make sure the key is created selection_coords.setdefault(selection, []) # Hold this coordinates selection_coords[selection].append(coords) # Also add chain and atom type selected_chains.append(chain) selected_atoms.append(atname) set_selected_chains = list(set(selected_chains)) set_selected_atoms = list(set(selected_atoms)) return selection_coords, set_selected_chains, set_selected_atoms
[docs] def compute_barycenter( resi_coords: list[tuple[float, float, float]], ) -> tuple[float, float, float]: """Compute center of mass of multiple resiudes coordinates. Parameters ---------- resi_coords : list[tuple[float, float, float]] List of Calpha coordinates. Returns ------- barycenter : tuple[float, float, float] Corrdinates of the center of mass. """ # Combine coordinates xs, ys, zs = [], [], [] for coords in resi_coords: xs.append(coords[0]) ys.append(coords[1]) zs.append(coords[2]) # Compute average x_avg = sum(xs) / len(xs) y_avg = sum(ys) / len(ys) z_avg = sum(zs) / len(zs) # Return barycenter barycenter = (x_avg, y_avg, z_avg) return barycenter
[docs] def load_selections(residues_lists: list[str]) -> dict[str, list[int]]: """Split and cast residues from an initial string to list. Parameters ---------- residues_lists : list[str] List of strings containing coma separated resiudes indices. Returns ------- selections: dict[str, list[int]] Dictionary of resiudes indices. """ selections: dict[str, list[int]] = {} for listid, str_resiudes in enumerate(residues_lists, start=1): resid_indices: list[int] = [] selection_key = f"selection_{listid}" for strresid in str_resiudes.split(','): try: resid = int(strresid) except Exception as _e: msg = f"Could not cast residue {strresid} from {selection_key}" logging.warning(msg) else: resid_indices.append(resid) if resid_indices == []: err_msg = f"Not considering {selection_key} as it is empty !" logging.error(err_msg) else: selections[selection_key] = resid_indices return selections
[docs] def get_z_coords( select_coords: dict[str, list[tuple[float, float, float]]], padding: float = 5.0, ) -> dict[str, float]: """Generate z-coordinates from selection of residues. Here the idea is to find the most distant points between selections, and project it on a Z axis to be able to later orient the protein. Parameters ---------- selection_coords: dict[str, list[tuple[float, float, float]]] Dictionary holding the various Calpha coordinates for each selection. padding : float, optional Extra padding (in Angstrom) of z-coordinate, by default 10 Returns ------- selection_z : dict[str, float] Z coodrinate for each selection. """ # Default when less than 1 selection was made if select_coords == {}: return {"z_1": 0} elif len(select_coords.keys()) == 1: return {s: 0 for s in select_coords.keys()} # Compute geometrical center select_centers = { select: compute_barycenter(select_resids_coords) for select, select_resids_coords in select_coords.items() } # Compute distances dists: dict[str, dict[str, float]] = {s: {} for s in select_centers.keys()} max_dist: float = -1 max_dist_keys: list[str] = [] for select, center in select_centers.items(): for select2, center2 in select_centers.items(): if select == select2: continue # Compute dist dist = calc_euclidean(center, center2) # Hold data dists[select][select2] = dist dists[select2][select] = dist # Define max if max_dist < dist: max_dist = dist max_dist_keys = [select, select2] # Initiate z-coords boundaries max_z = (padding + max_dist) / 2 # Compute Z coords for external surfaces selection_z = { max_dist_keys[0]: max_z, max_dist_keys[1]: -max_z, } # Compute Z coords for internal surfaces (when nb. selections >= 3) for select, dist in dists.items(): # If the selection is part of the external surfaces if select in max_dist_keys: continue # Compute location of z coordinate dist_to_upper = max_z - dist[max_dist_keys[0]] dist_to_lower = dist[max_dist_keys[1]] - max_z select_z_coord = (dist_to_upper + dist_to_lower) / 2 # Hold data selection_z[select] = select_z_coord return selection_z
[docs] def gen_z_restraints( res_select: dict[str, list[int]], selection_z: dict[str, float], rest_dist: float = 7.5, segids: list[str] = ["A"], atome_types: list[str] = ["CA"], ) -> str: """Generate set of z ambiguous restraints according to residue selections. Parameters ---------- res_select : dict[str, list[int]] Dictionary holding the various residues indices for each selection. selection_z : dict[str, float] Z coodrinate for each selection. rest_dist : float, optional Upper boundary (in Angstrom) of satisfied restraints, by default 7.5 Returns ------- all_restraints : str A string containing the AIR restraints. """ # Gather all coordinates all_z_coords = [zcoord for zcoord in selection_z.values()] # Point min and max values minz = min(all_z_coords) maxz = max(all_z_coords) # Initiate restraints holder restraints: list[str] = [] # z_padding variable is used as a padding for the Z beads selection. # This is meant for the CNS selection method using lt(lower than) # and gt (geater than) rather than equal to. z_padding: float = 0.1 # Compile chain selection chain_selection_string = _compile_multiple_cns_selections( "segid", segids, ) # Compile atome selection atom_selection_string = _compile_multiple_cns_selections( "name", atome_types, ) # Loop over selections for select in res_select.keys(): # Point data residues = res_select[select] z_coord = selection_z[select] # Add comment list_residues = ",".join([str(r) for r in residues]) restraints.append(f"! z restraints for {select}: {list_residues}") # Loop over residues selection for resid in residues: # Compute lower/greater than z-coord lt_coord = z_coord + z_padding gt_coord = z_coord - z_padding # If the lower Z coordinate plan if z_coord == minz: rest = ( f"assign (resid {resid:>7d} and " f"{atom_selection_string} and {chain_selection_string}) " f"(name SHA and attr z lt {lt_coord:>-8.2f}) " f"{rest_dist:.1f} {rest_dist:.1f} 0.0" ) # If the upper Z coordinate plan elif z_coord == maxz: rest = ( f"assign (resid {resid:>7d} and " f"{atom_selection_string} and {chain_selection_string}) " f"(name SHA and attr z gt {gt_coord:>-8.2f}) " f"{rest_dist:.1f} {rest_dist:.1f} 0.0" ) # If in between lower and upper plans (when nb. plans >= 3) else: rest = ( f"assign (resid {resid:>7d} and " f"{atom_selection_string} and {chain_selection_string}) " f"(name SHA and attr z lt {lt_coord:>-8.2f} " f"and attr z gt {gt_coord:>-8.2f}) " f"{rest_dist:.1f} {rest_dist:.1f} 0.0" ) restraints.append(rest) all_restraints = os.linesep.join(restraints) return all_restraints
def _compile_multiple_cns_selections( selection_key: str, selections: list[str], logical_operator: str = 'OR', ) -> str: """Generate a selection from multiple ones using logical operator. Parameters ---------- selection_key : str Name of the selection key. (e.g.: segid, name, resid, ...) selections : list[str] List of selections. Returns ------- combined_selection : str The combined selection. """ assert len(selections) >= 1 if len(selections) == 1: return f"{selection_key} {selections[0]}" compiled_selections = [ f"{selection_key} {select}" for select in selections ] joined_selections = f" {logical_operator} ".join(compiled_selections) combined_selection = f"({joined_selections})" return combined_selection
[docs] def output_data( restraints: str, plans: str, output: Optional[Union[str, Path]] = None, ) -> tuple[str, str]: """Write output files. Parameters ---------- restraints : str String containing the ambiguous restraints. plans : str String containing shape beads coordinates as PDB file. output : Optional[Union[str, Path]], optional Base output path, by default None Returns ------- restraints_fpath: str Path to the generated AIRs. beadplans_fpath : str Path to the generated PDB file containing beads. """ # Define base output path if not given if not output: output = 'Zrestraints' # Write restraints restraints_fpath = f"{output}.tbl" with open(restraints_fpath, 'w') as filout: filout.write(restraints) # Write restraints beadplans_fpath = f"{output}_beads.pdb" with open(beadplans_fpath, 'w') as filout: filout.write(plans) # Return filepaths return restraints_fpath, beadplans_fpath
[docs] def gen_bead_plans( spacing: float = 40, x_size: float = 200, y_size: float = 200, z_coords: Optional[list[float]] = None, ) -> str: """Generate multiple bead plans. Parameters ---------- spacing : float, optional Spacing (in Angstrom) between beads in same dimension, by default 40 x_size : float, optional Width (in Angstrom) of the plan, by default 200 y_size : float, optional Height (in Angstrom) of the plan, by default 200 z_coords : Optional[list[float]], optional List of z-coordinates where to generate plans, by default None Returns ------- bead_plans : str A PDB file containing multiple plans. """ # Presets bead_plans: str = '' resindex: int = 0 if not z_coords: z_coords = [0] # Loop over z-coords for z in z_coords: plan, resindex = bead_plan( spacing=spacing, x_size=x_size, y_size=y_size, z_coord=z, resindex=resindex, ) bead_plans += plan return bead_plans
[docs] def bead_plan( spacing: float = 40, x_size: float = 200, y_size: float = 200, z_coord: float = 0, resindex: int = 0, ) -> tuple[str, int]: """Generate a PDB plan made of beads. Parameters ---------- spacing : float, optional Spacing (in Angstrom) between beads in same dimension, by default 40 x_size : float, optional Width (in Angstrom) of the plan, by default 200 y_size : float, optional Height (in Angstrom) of the plan, by default 200 z_coord : float, optional Z-coordinate where to generate the plan, by default 0 resindex : int, optional From which resiude to start the , by default 0 Returns ------- plan : str The PDB plan made of beads. resindex : int Index of the last residue index added. """ plan_beads: list[str] = [] # Loop over x coords for x_coord in step_coords(x_size, spacing): # Loop over y coords for y_coord in step_coords(y_size, spacing): resindex += 1 # Generate new bead bead = shape_bead(x_coord, y_coord, z_coord, resindex) plan_beads.append(bead) # Finalize plan plan = ''.join(plan_beads) return plan, resindex
[docs] def step_coords(_size: float, _spacing: float) -> Generator[float, None, None]: """Generate set of evenly spaced coordinates between of defined size. Parameters ---------- size : float Size (in Angstrom) to be sampled spacing : float Spacing between each coordinate Return ------ Generator[float, None, None] 1D coodinate of current position. """ # Convert to absolute value size = abs(_size) spacing = abs(_spacing) # Check if size is indeed greater than spacing if spacing > size: logging.error("Size must be greater than spacing!") raise ValueError return _step_coords(size, spacing)
def _step_coords(size: float, spacing: float) -> Generator[float, None, None]: """Generate set of evenly spaced coordinates between of defined size. Parameters ---------- size : float Size (in Angstrom) to be sampled spacing : float Spacing between each coordinate Yields ------ float 1D coodinate of current position. """ # Define initial position coord = - (size / 2) # Define oversized position oversized = (size + spacing) / 2 # Loop until oversized while coord <= oversized: yield coord coord += spacing
[docs] def shape_bead( x: float, y: float, z: float, resindex: int, chain: str = "S", atindex: int = 1, bfactor: float = 1.00, ) -> str: """Generate a PDB shape bead. Parameters ---------- x : float x coordinate of the bead y : float y coordinate of the bead z : float z coordinate of the bead resindex : int Residue index chain : str, optional Chain id, by default "S" atindex : int, optional Atome index, by default 1 bfactor : float, optional B-factor of the bead, by default 1.00 Returns ------- bead : str A valid PDB shape bead. """ bead = f"ATOM {atindex:>5d} SHA SHA {chain}{resindex:>4d} {x:-8.3f}{y:-8.3f}{z:-8.3f} 1.00{bfactor:6.2f} SHA {os.linesep}" # noqa : E501 return bead
def _get_ideal_restraint_dist(spacing: float) -> float: """Computes ideal restraint distance based on spacing. Basically, want to return (spacing / 2) - 2 Parameters ---------- spacing : int Spacing between beads. Returns ------- float Effective distance restraint. """ # Compute de distance effective_distance_restraint: float = (spacing / 2) - 2 # Make sure it is not out of allowed boundaries bounded_distance_restraint = max(effective_distance_restraint, 2.0) return bounded_distance_restraint
[docs] def main( pdb: Union[str, Path], residues: Optional[list[str]] = None, output: Optional[str] = None, spacing: float = 40, x_size: float = 200, y_size: float = 200, z_padding: float = 5.0, log_level: str = "INFO", ) -> tuple[str, str]: """Generate both z-restraints and z-surface beads from residue selection. Parameters ---------- pdb : Union[str, Path] Path to the PDB file to be parsed. residues : list[str] List of strings containing coma separated resids, by default None spacing : float, optional Spacing (in Angstrom) between beads in same dimension, by default 40 x_size : float, optional Width (in Angstrom) of the plan, by default 200 y_size : float, optional Height (in Angstrom) of the plan, by default 200 Returns ------- tuple[Union[str, Path], Union[str, Path]] Paths to the Z_restraints.tbl and Z_surface.pdb """ # Load residue selection res_select = load_selections(residues) # Load corresponding coordinates select_coords, chainids, atomtypes = load_selected_resiudes_coords( pdb, res_select ) # Compute z-coordinates for each selection selection_z = get_z_coords(select_coords, padding=z_padding) # Generate corresponding z-surfaces plans = gen_bead_plans( spacing=spacing, x_size=x_size, y_size=y_size, z_coords=[zcoord for zcoord in selection_z.values()], ) # Compute ideal restraint distance based on spacing restraint_distance = _get_ideal_restraint_dist(spacing) # Generate corresponding z-surface restraints restraints = gen_z_restraints( res_select, selection_z, rest_dist=restraint_distance, segids=chainids, atome_types=atomtypes, ) # Output data restraints_tbl, plans_pdb = output_data(restraints, plans, output=output) return restraints_tbl, plans_pdb
gen_z_surface_restraints = main ############################ # COMMAND LINE ENTRY POINT # ############################ if __name__ == "__main__": import argparse # Command line interface parser ap = argparse.ArgumentParser( prog="haddock3-restraints", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) add_z_surf_restraints_arguments(ap) args = vars(ap.parse_args()) setup_logging(log_level=args['log_level']) # Launch main restraints_fpath, plan_s_fpath = gen_z_surface_restraints( args['residues'], residues=args['residues'], output=args['output'], spacing=args['spacing'], x_size=args['x_size'], y_size=args['y_size'], z_padding=args['z_padding'], log_level=args["log_level"], ) logging.info(restraints_fpath) logging.info(plan_s_fpath)