Source code for haddock.clis.cli_pp

"""
HADDOCK3 PDB preprocessing client.

Process PDB files for agreement with HADDOCK3 requirements. Follows the
logic implemented in the :py:mod:`haddock.gear.preprocessing`. See
documentation pages for more details.

You can use the `--dry` option to report on the performed changes
without actually performing the changes.

Corrected PDBs are saved to new files named after the `--suffix` option.
Original PDBs are never overwritten, unless `--suffix` is given an empty
string.

You can pass multiple PDB files to the command-line.

Usage::

    haddock-pp file1.pdb file2.pdb
    haddock-pp file1.pdb file2.pdb --suffix _new
    haddock-pp file1.pdb file2.pdb --dry
"""
import argparse
import sys
from pathlib import Path

from haddock import log
from haddock.core.typing import (
    ArgumentParser,
    Callable,
    FilePath,
    Namespace,
    Optional,
)
from haddock.gear.preprocessing import process_pdbs, read_additional_residues
from haddock.libs.libcli import add_output_dir_arg
from haddock.libs.libio import add_suffix_to_files, save_lines_to_files


SUFFIX_DEFAULT = "_processed"

ap = argparse.ArgumentParser(
    description=__doc__,
    formatter_class=argparse.RawDescriptionHelpFormatter,
)

ap.add_argument(
    "pdb_files",
    help="Input PDB files.",
    nargs="+",
)

ap.add_argument(
    "-d",
    "--dry",
    help="Perform a dry run. Informs changes without modifying files.",
    action="store_true",
)

ap.add_argument(
    "-t",
    "--topfile",
    help="Additional .top files.",
    nargs="*",
)

ap.add_argument(
    "-s",
    "--suffix",
    help=f"Suffix to output files. Defaults to {SUFFIX_DEFAULT!r}",
    default=SUFFIX_DEFAULT,
)

add_output_dir_arg(ap)


# client helper functions
def _ap() -> ArgumentParser:
    return ap


[docs] def load_args(ap: ArgumentParser) -> Namespace: """Load argument parser args.""" return ap.parse_args()
[docs] def cli(ap: ArgumentParser, main: Callable[..., None]) -> None: """Command-line interface entry point.""" cmd = vars(load_args(ap)) # I use this `pop` structure to maintain the unpacking argument in # the `main` function because I foresee the `main` to be used from # other parts of the software(s) main(*cmd.pop("pdb_files"), **cmd)
[docs] def maincli() -> None: """Execute main client.""" cli(ap, main)
[docs] def main( *pdb_files: FilePath, dry: bool = False, output_directory: Optional[FilePath] = None, suffix: str = SUFFIX_DEFAULT, topfile: Optional[FilePath] = None, ) -> None: """ Process PDB files. Parameters ---------- dry : bool Whether to perform a dry test only. output_directory : str or ``pathlib.Path`` The directory where to save the output. Defaults to the current working directory. suffix : str The suffix to append to the new files. Will be added before the file extension. Original extension will be kept. topfile : str or ``pathlib.Path`` The path to an additional HADDOCK3 topology file. """ log.info("Starting processing PDB files.") log.info(f"Total number of PDB files: {len(pdb_files)}") if dry: log.info( "You selected the `--dry` option. No new files will be created. " "A report of the changes that would be performed in the PDB files " "will be printed." ) new_residues = read_additional_residues(topfile) if topfile else None log.info("Processing the PDB files... " "This may take a bit if there are many.") processed_pdbs = process_pdbs( *pdb_files, dry=dry, user_supported_residues=new_residues, ) if dry: log.info("Everything done. Exiting...") sys.exit(0) log.info("Finished processing PDBs. Saving to disk...") if output_directory is None: output_directory = Path.cwd() else: output_directory = Path(output_directory) log.info("Output dir: {!r}".format(str(output_directory))) output_directory.mkdir(parents=True, exist_ok=True) pdb_names = (Path(output_directory, Path(pdb).name) for pdb in pdb_files) out_files = add_suffix_to_files(pdb_names, suffix) save_lines_to_files(out_files, processed_pdbs) log.info("Everything done. Exiting...") return
if __name__ == "__main__": sys.exit(maincli()) # type: ignore