Source code for haddock.gear.known_cns_errors

"""Detect known/common CNS errors.

Inspired from:
https://github.com/haddocking/haddock25/blob/main/tools/check-error-messages.sh
"""

import gzip
from io import BufferedReader
from pathlib import Path

from haddock.core.exceptions import KnownCNSError
from haddock.core.typing import FilePath, Optional, Union

# Dictionary of known errors
# as key:    How to catch it in the cns.cnserr
# as value:  Message to user
KNOWN_ERRORS = {
    "CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH": (
        "Mismatch between chain length for symmetry restraints. "
        "Check your input molecules and symmetry restraints."
        ),
    "NCS-restraints error encountered: Improperly defined non-crystallographic symmetry": (  # noqa : E501
        "Improperly defined non-crystallographic symmetry (NCS). "
        "Check your symmetry restraints definition."
        ),
    "error in SYMMETRY potential, check NOE table": (
        "Check your symmetry restraints definition."
        ),
    "exceeded allocation for NOE-restraints": (
        "Too many distance restraints defined. "
        "Try to reduce this number by checking your definition of active "
        "and passive residues. "
        "Make sure to filter those for solvent accessibility. "
        "Or alternatively increase the nres parameter in the noe statements"
        " in the relevant CNS scripts."
        ),
    "SELRPN error encountered: parsing error": (
        "Check your restraint files."
        ),
    "PARSER error encountered: Encountered too many parsing errors": (
        "Encountered too many parsing errors. "
        "Check your input molecules and symmetry restraints."
        ),
    "XMREAD error encountered:  sectioning of map incompatible with resolution": (  # noqa : E501
        "Check your EM map resolution and sectioning."
        ),
    "ALLHP error encountered: not enough memory available": (
        "Too many distance restraints defined. "
        "Try to reduce this number by checking your definition of active and "
        "passive residues. "
        "Make sure to filter those for solvent accessibility. "
        "Try to decrease the size of your system where possible."
        ),
    "error encountered: missing SCATter definition for SELEcted atoms": (
        "Unsupported atoms/molecules for cryo-EM restraints."
        ),
    "ROTMAT error encountered: rotation vector has zero length": (
        "Check your input parameters and restraints. "
        "Possibly try turning off the sampling of 180 degrees rotation."
        )
    }


[docs] def find_cns_errors(cns_out_fpath: FilePath) -> Optional[KnownCNSError]: """Detect if a known CNS error is in a cns.cnserr file. Parameters ---------- cns_out_fpath : FilePath -> Union[str, Path] Path to the cns.cnserr file to check. Returns ------- Optional[KnownCNSError] An exception for known CNS errors, with its hint on how to solve it! """ # Check for file extension to open it the appropriate way if Path(cns_out_fpath).suffix == ".gz": file_handle = gzip.open(cns_out_fpath, "rb") else: file_handle = open(cns_out_fpath, "rb") # Read the file try: _find_cns_errors(file_handle, KNOWN_ERRORS, filepath=cns_out_fpath) except KnownCNSError as err: return err else: # return the cause return KnownCNSError( "An unfortunate CNS error occured at exection time...", f"Manually check the file `{cns_out_fpath}` to understand why!", cns_out_fpath, )
def _find_cns_errors( file_handle: Union[gzip.GzipFile, BufferedReader], known_errors: dict[str, str], chunk_size: int = 4096, filepath: FilePath = "", ) -> None: """Backward reading and detect first known CNS error in file. Parameters ---------- file_handle: Union[gzip.GzipFile, BufferedReader] An opened file in read bytes mode. known_errors : dict[str, str] Dict of known errors and their hints chunk_size : int, optional Check size (in bytes) to read the file backwards, by default 4096 filepath : FilePath -> Union[str, Path] Path to the cns.cnserr file currently checked. Raises ------ KnownCNSError An exception for known CNS errors, with its hint on how to solve it! """ # Find file size file_handle.seek(0, 2) size = file_handle.tell() buffer = b'' # Set the number of lines to parse # initiated with high value to read all lines the first time # updated to the number of lines that were already parsed # after each chunk iteration, so we do not parse the same lines parsed_lines = 99999 for i in range(size - 1, -1, -chunk_size): # Go to location in file file_handle.seek(max(i - chunk_size, 0)) # Read next chunk chunk = file_handle.read(min(chunk_size, i + 1)) # Increment buffer buffer = chunk + buffer lines = buffer.split(b'\n') # Read lines for line in reversed(lines[-len(lines):parsed_lines]): decoded_line = line.decode('utf-8', errors='replace') # Loop over known errors for error_string, hint in known_errors.items(): # Check if this error is known if error_string in decoded_line: # return the cause raise KnownCNSError( error_string, hint, filepath, ) # Update number of parsed lines so we do not check them again parsed_lines = -len(lines)
[docs] def find_all_cns_errors( directory_path: FilePath, ) -> dict[str, dict[str, Union[int, KnownCNSError]]]: """Find all errors in a directory. Parameters ---------- directory_path : FilePath Path to the directory to be checked. Returns ------- all_errors : dict[str, dict[str, Union[list[FilePath], KnownCNSError]]] Dictionary containing all errors found in this directory. """ all_errors: dict[str, dict[str, Union[int, KnownCNSError]]] = {} # Gather list of all `.cnserr` and `.cnserr.gz` files present in directory all_cns_out_files = list(Path(directory_path).glob("*.cnserr.gz")) all_cns_out_files += list(Path(directory_path).glob("*.cnserr")) # Loop over all .cnserr files for fpath in all_cns_out_files: # Try to dectect an error if (detected_error := find_cns_errors(fpath)): # Hold data if an error is present in that file error_type = all_errors.setdefault( detected_error.cns_error, {"files": [], "error": detected_error} ) error_type["files"].append(detected_error.filepath) return all_errors