"""haddock3-restraints calc_accessibility subcommand.
Given a pdb file, it will calculate the relative accessibility of
the side chains and return a list of surface exposed residues.
Nucleic acids bases are considered to be always accessible.
Usage:
haddock3-restraints calc_accessibility
<input_pdb_file> Path to a PDB file to analyse
[-c <cutoff>] Relative side-chain accessibility cutoff
[--log_level <log_level>] DEBUG, INFO, WARNING, ERROR, or CRITICAL
[--export_to_actpass] Flag to export accessible resiudes
[--probe_radius <probe_radius>] Probe radius for the accessibility calculation
"""
import logging
import os
from pathlib import Path
from freesasa import Structure
from haddock.core.typing import Union
from haddock.libs.librestraints import DEFAULT_PROBE_RADIUS, REL_ASA
# As this script is a subcommand of the `haddock3-restraints` cli,
# it requires its own options and arguments that are managed here.
[docs]
def add_calc_accessibility_arguments(calc_accessibility_subcommand):
"""Add arguments to the calc_accessibility subcommand."""
calc_accessibility_subcommand.add_argument(
"input_pdb_file",
type=str,
help="input PDB structure.",
)
calc_accessibility_subcommand.add_argument(
"-c",
"--cutoff",
help="Relative cutoff for sidechain accessibility",
required=False,
default=0.4,
type=float,
)
calc_accessibility_subcommand.add_argument(
"--log_level",
default='INFO',
choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
help="Logging level",
required=False,
)
calc_accessibility_subcommand.add_argument(
"--export_to_actpass",
default=False,
action="store_true",
help="Export the exposed residues as passive to an actpass file",
required=False,
)
calc_accessibility_subcommand.add_argument(
"--probe_radius",
default=DEFAULT_PROBE_RADIUS,
type=float,
help="Probe radius for the accessibility calculation",
required=False,
)
return calc_accessibility_subcommand
##########################################################
# Define set of resiudes / bases handeled by this script #
##########################################################
DEFAULT_BASES = ["DA", "DC", "DG", "DT", "A", "C", "G", "U"]
MODIFIED_BASES = ["DJ"]
STANDARD_AA = [
'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
]
MODIFIED_AA = [
'ACE', 'ALY', 'ASH', 'CFE', 'CHX', 'CSP', 'CTN', 'CYC', 'CYF', 'CYM',
'DDZ', 'DUM', 'GLH', 'HLY', 'HYP', 'M3L', 'MLY', 'MLZ', 'MSE', 'NEP',
'PNS', 'PTR', 'SEP', 'TOP', 'TYP', 'TYS',
] # , 'THP'='TOP'?
VALID_AA = STANDARD_AA + MODIFIED_AA
VALID_BASES = DEFAULT_BASES + MODIFIED_BASES
VALID_IONS = [
"LI", "F", "NA", "MG", "AL", "CL", "K", "CA", "V", "CR", "MN", "FE", "NI",
"CO", "CU", "ZN", "BR", "KR", "SR", "MO", "AG", "CD", "I", "CS", "HO",
"YB", "OS", "IR", "PT", "AU", "HG", "PB",
]
[docs]
def get_accessibility(
pdb_f: Union[Path, str],
probe_radius: float = DEFAULT_PROBE_RADIUS,
) -> dict[str, dict[int, dict[str, float]]]:
"""Compute per-residue accessibility values.
Calls `FreeSASA <https://freesasa.github.io/>`_ using its Python API
and returns per-residue accessibility values.
Parameters
----------
pdb_f : Union[Path, str]
Path to the PDB file of interest.
Return
------
resid_access : dict[str, dict[int, dict[str, float]]]
Dictionary containing a list of accessible residues for each chain(s).
"""
naccess_unsupported_aa = ['HEC', 'TIP', 'ACE', 'THP', 'HEB', 'CTN']
logging.info("Calculate accessibility...")
try:
from freesasa import Classifier, calc, Parameters
except ImportError as err:
logging.error("calc_accessibility requires the 'freesasa' Python API")
raise ImportError(err)
# Initiate data holders
asa_data: dict[tuple[str, str, int, str], float] = {}
rsa_data: dict[tuple[str, str, int], float] = {}
rel_main_chain: dict[tuple[str, str, int], float] = {}
rel_side_chain: dict[tuple[str, str, int], float] = {}
# Point relative asa data
_rsa = REL_ASA['total']
_rsa_bb = REL_ASA['bb']
_rsa_sc = REL_ASA['sc']
# Workaround to get relative accessibility values for Nucleic Acids
# -> will always be accessible !
for na in VALID_BASES:
_rsa[na] = 1
_rsa_bb[na] = 1
_rsa_sc[na] = 1
_script_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
config_path = _script_path + '/naccess.config'
classifier = Classifier(config_path)
struct = Structure(pdb_f, classifier, options={})
struct.setRadiiWithClassifier(classifier)
# if the probe_radius is different from the default value
# we need to redefine the parameters
if probe_radius != DEFAULT_PROBE_RADIUS:
new_parameters = Parameters(
{
'algorithm': 'LeeRichards',
'probe-radius': probe_radius,
'n-points': Parameters.defaultParameters['n-points'],
'n-slices': Parameters.defaultParameters['n-slices'],
'n-threads': Parameters.defaultParameters['n-threads'],
}
)
result = calc(struct, parameters=new_parameters)
else:
result = calc(struct)
# iterate over all atoms to get SASA and residue name
for idx in range(struct.nAtoms()):
atname = struct.atomName(idx).strip()
resname = struct.residueName(idx).strip()
resid = int(struct.residueNumber(idx))
chain = struct.chainLabel(idx)
at_uid = (chain, resname, resid, atname)
res_uid = (chain, resname, resid)
asa = result.atomArea(idx)
asa_data[at_uid] = asa
# add asa to residue
rsa_data[res_uid] = rsa_data.get(res_uid, 0) + asa
# If residue name is unknown, this is probably an ion,
# set a relative accessibility of -1
if resname not in _rsa and (
resname not in VALID_AA + VALID_BASES
or resname[:2] in VALID_IONS
or resname in naccess_unsupported_aa
):
logging.warning(f"UPDATED RSA for {resname}")
_rsa[resname] = -1
_rsa_bb[resname] = -1
_rsa_sc[resname] = -1
# 3 cases: Regular amino-acid, regular nucleic acid, ion
if (resname not in VALID_BASES and atname in ('C', 'N', 'O')) or \
(resname in VALID_BASES and atname in ('P', 'C1', 'C9')):
rel_main_chain[res_uid] = rel_main_chain.get(res_uid, 0) + asa
elif all([
resname not in VALID_AA + VALID_BASES,
resname[:2] in VALID_IONS,
]):
rel_main_chain[res_uid] = rel_main_chain.get(res_uid, 0) + asa
rel_side_chain[res_uid] = rel_side_chain.get(res_uid, 0) + asa
else:
rel_side_chain[res_uid] = rel_side_chain.get(res_uid, 0) + asa
# convert total asa to relative asa
rsa_data.update(
(res_uid, asa / _rsa[res_uid[1]])
for res_uid, asa in rsa_data.items()
)
rel_main_chain.update(
(res_uid, asa / _rsa_bb[res_uid[1]] * 100)
for res_uid, asa in rel_main_chain.items()
)
rel_side_chain.update(
(res_uid, asa / _rsa_sc[res_uid[1]] * 100)
for res_uid, asa in rel_side_chain.items()
)
# We format to fit the pipeline
resid_access: dict[str, dict[int, dict[str, float]]] = {}
for res_uid, access in rel_main_chain.items():
chain = res_uid[0]
# resname = res_uid[1]
resnum = res_uid[2]
if chain not in resid_access:
resid_access[chain] = {}
resid_access[chain][resnum] = {
'side_chain_rel': rel_side_chain.get(res_uid, 0),
'main_chain_rel': access,
}
# Display accessible residues
for chain in resid_access:
logging.info(f"Chain: {chain} - {len(resid_access[chain])} residues")
return resid_access
[docs]
def apply_cutoff(
access_data: dict[str, dict[int, dict[str, float]]],
cutoff: float,
) -> dict[str, list[int]]:
"""Apply a cutoff to the sidechain relative accessibility."""
logging.info(f'Applying cutoff to side_chain_rel - {cutoff}')
# saving the results in a dictionary
result_dict: dict[str, list[int]] = {}
for chain in access_data:
result_list: list[int] = []
for res in access_data[chain]:
sc_rel_accessibility = access_data[chain][res]['side_chain_rel']
if sc_rel_accessibility >= cutoff * 100:
result_list.append(res)
result_list = list(set(result_list))
result_list.sort()
result_str = ','.join(map(str, result_list))
logging.info(f'Chain {chain} - {result_str}')
# putting the data in the dictionary
result_dict[chain] = result_list
return result_dict
[docs]
def export_passive(
result_dict: dict[str, list[int]],
prefix: str = '',
) -> None:
"""Export the exposed residues as passive to an actpass file."""
for chain in result_dict:
filename = Path(f"{prefix}passive_{chain}.actpass")
logging.info(f"Writing exposed residues as passive in: {filename}")
if filename.exists():
logging.error(
f"File {filename} already exists, nothing performed!"
)
else:
with open(filename, "w") as f:
f.write(os.linesep) # Add empty line as no active residues
f.write(f"{' '.join(map(str, result_dict[chain]))}")
[docs]
def calc_accessibility(
input_pdb_file: Union[Path, str],
cutoff: float = 0.4,
log_level: str = "INFO",
export_to_actpass: bool = False,
probe_radius: float = DEFAULT_PROBE_RADIUS,
) -> None:
"""Calculate the accessibility of the side chains and apply a cutoff.
Parameters
----------
input_pdb_file : str
Path to the PDB file.
cutoff : float
Relative cutoff for sidechain accessibility, default = 0.4
log_level : str
Logging level.
export_to_actpass : bool
Export the exposed residues as passive to an actpass file.
probe_radius : float
Probe radius for the accessibility calculation.
"""
logging.basicConfig(
level=log_level,
format='%(asctime)s L%(lineno)d %(levelname)s - %(message)s',
datefmt='%d/%m/%Y %H:%M:%S',
)
# Compute per-residues accessibilities
access_dic = get_accessibility(input_pdb_file, probe_radius)
# Filter residues based on accessibility cutoff
result_dict = apply_cutoff(access_dic, cutoff)
# export residues as passive to an actpass file
if export_to_actpass:
export_passive(
result_dict,
prefix=f'{Path(input_pdb_file).stem}_',
)