Source code for haddock.libs.libfcc

"""FCC related functions

NOTE: This functions were ported directly from `https://github.com/haddocking/fcc`!
"""



[docs]
class Element:
    """Defines a 'clusterable' Element"""

    __slots__ = ["name", "cluster", "neighbors"]

    def __init__(self, name):
        self.name = name
        self.cluster = 0
        self.neighbors = set()


[docs]
    def add_neighbor(self, neighbor):
        """Adds another element to the neighbor list"""
        self.neighbors.add(neighbor)



[docs]
    def assign_cluster(self, clust_id):
        """Assigns the Element to Cluster. 0 if unclustered"""
        self.cluster = clust_id





[docs]
class Cluster:
    """Defines a Cluster. A Cluster is created with a name and a center (Element class)"""

    __slots__ = ["name", "center", "members"]

    def __init__(self, name, center):
        self.name = name
        self.center = center

        self.members = []

        self.populate()

    def __len__(self):
        return len(self.members) + 1  # +1 Center


[docs]
    def populate(self):
        """
        Populates the Cluster member list through the
        neighbor list of its center.
        """

        name = self.name
        # Assign center
        ctr = self.center
        ctr.assign_cluster(name)

        mlist = self.members
        # Assign members
        ctr_nlist = (n for n in ctr.neighbors if not n.cluster)
        for e in ctr_nlist:
            mlist.append(e)
            e.assign_cluster(name)



[docs]
    def add_member(self, element):
        """
        Adds one single element to the cluster.
        """
        line = self.members
        line.append(element)
        element.assign_cluster(self.name)





[docs]
def cluster_elements(e_pool, threshold):
    """
    Groups Elements within a given threshold
    together in the same cluster.
    """

    cluster_list = []
    threshold -= 1  # Account for center
    ep = e_pool
    cn = 1  # Cluster Number
    while 1:
        # Clusterable elements
        ce = [e for e in ep if not ep[e].cluster]
        if not ce:  # No more elements to cluster
            break

        # Select Cluster Center
        # Element with largest neighbor list
        ctr_nlist, ctr = sorted(
            [(len([se for se in ep[e].neighbors if not se.cluster]), e) for e in ce]
        )[-1]

        # Cluster until length of remaining elements lists are above threshold
        if ctr_nlist < threshold:
            break

        # Create Cluster
        c = Cluster(cn, ep[ctr])
        cn += 1
        cluster_list.append(c)

    return ep, cluster_list




[docs]
def output_clusters(handle, cluster):
    """Outputs the cluster name, center, and members."""

    write = handle.write

    for c in cluster:
        write("Cluster %s -> %s " % (c.name, c.center.name))
        for m in sorted(c.members, key=lambda k: k.name):
            write("%s " % m.name)
        write("\n")




[docs]
def read_matrix(path, cutoff_param, strictness):
    """
    Reads in a four column matrix (1 2 0.123 0.456\n)
    and creates an dictionary of Elements.

    The strictness factor is a <float> that multiplies by the cutoff
    to produce a new cutoff for the second half of the matrix. Used to
    allow some variability while keeping very small interfaces from clustering
    with anything remotely similar.
    """

    cutoff_param = float(cutoff_param)
    partner_cutoff = float(cutoff_param) * float(strictness)

    elements = {}

    f = open(path, "r")
    for line in f:
        ref, mobi, d_rm, d_mr = line.split()
        ref = int(ref)
        mobi = int(mobi)
        d_rm = float(d_rm)
        d_mr = float(d_mr)

        # Create or Retrieve Elements
        if ref not in elements:
            r = Element(ref)
            elements[ref] = r
        else:
            r = elements[ref]

        if mobi not in elements:
            m = Element(mobi)
            elements[mobi] = m
        else:
            m = elements[mobi]

            # Assign neighbors
        if d_rm >= cutoff_param and d_mr >= partner_cutoff:
            r.add_neighbor(m)
        if d_mr >= cutoff_param and d_rm >= partner_cutoff:
            m.add_neighbor(r)

    f.close()

    return elements




[docs]
def parse_contact_file(f_list, ignore_chain):
    """Parses a list of contact files."""

    if ignore_chain:
        contacts = [
            [int(line[0:5] + line[6:-1]) for line in open(con_f)]
            for con_f in f_list
            if con_f.strip()
        ]
    else:
        contacts = [
            set([int(line) for line in open(con_f)])
            for con_f in f_list
            if con_f.strip()
        ]

    return contacts




[docs]
def calculate_fcc(list_a, list_b):
    """
    Calculates the fraction of common elements between two lists
    taking into account chain IDs
    """

    cc = len(list_a.intersection(list_b))
    cc_v = len(list_b.intersection(list_a))

    return cc, cc_v




[docs]
def calculate_fcc_nc(list_a, list_b):
    """
    Calculates the fraction of common elements between two lists
    not taking into account chain IDs. Much Slower.
    """

    largest, smallest = sorted([list_a, list_b], key=len)
    ncommon = len([ele for ele in largest if ele in smallest])
    return ncommon, ncommon




[docs]
def calculate_pairwise_matrix(contacts, ignore_chain):
    """Calculates a matrix of pairwise fraction of common contacts (FCC).
    Outputs numeric indexes.

    contacts: list_of_unique_pairs_of_residues [set/list]

    Returns pairwise matrix as an iterator, each entry in the form:
    FCC(cplx_1/cplx_2) FCC(cplx_2/cplx_1)
    """

    contact_lengths = []
    for con in contacts:
        try:
            ic = 1.0 / len(con)
        except ZeroDivisionError:
            ic = 0
        contact_lengths.append(ic)

    if ignore_chain:
        calc_fcc = calculate_fcc_nc
    else:
        calc_fcc = calculate_fcc

    for i in range(len(contacts)):

        for k in range(i + 1, len(contacts)):
            cc, cc_v = calc_fcc(contacts[i], contacts[k])
            fcc, fcc_v = cc * contact_lengths[i], cc * contact_lengths[k]
            yield i + 1, k + 1, fcc, fcc_v