Source code for conkit.core.contactmap
# coding=utf-8
#
# BSD 3-Clause License
#
# Copyright (c) 2016-21, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""ContactMap container used throughout ConKit"""
from __future__ import division
__author__ = "Felix Simkovic"
__date__ = "03 Aug 2016"
__version__ = "0.13.3"
import collections
import numpy as np
import operator
import sys
from conkit.core.entity import Entity
from conkit.core.struct import Gap, Residue
from conkit.core.mappings import AminoAcidMapping, ContactMatchState
from conkit.core.sequence import Sequence
from conkit.misc import normalize
[docs]class ContactMap(Entity):
"""A contact map object representing a single prediction
The :obj:`~conkit.core.contactmap.ContactMap` class represents a data structure to hold a single
contact map prediction in one place. It contains functions to store, manipulate and organise
:obj:`~conkit.core.contact.Contact` instances.
Examples
--------
>>> from conkit.core import Contact, ContactMap
>>> contact_map = ContactMap("example")
>>> contact_map.add(Contact(1, 10, 0.333))
>>> contact_map.add(Contact(5, 30, 0.667))
>>> print(contact_map)
ContactMap(id="example" ncontacts=2)
Attributes
----------
coverage : float
The sequence coverage score
id : str
A unique identifier
ncontacts : int
The number of :obj:`~conkit.core.contact.Contact` instances in the :obj:`~conkit.core.contactmap.ContactMap`
precision : float
The precision (Positive Predictive Value) score
repr_sequence : :obj:`~conkit.core.sequence.Sequence`
The representative :obj:`~conkit.core.sequence.Sequence` associated with the :obj:`~conkit.core.contactmap.ContactMap`
repr_sequence_altloc : :obj:`~conkit.core.sequence.Sequence`
The representative altloc :obj:`~conkit.core.sequence.Sequence` associated with the :obj:`~conkit.core.contactmap.ContactMap`
sequence : :obj:`~conkit.core.sequence.Sequence`
The :obj:`~conkit.core.sequence.Sequence` associated with the :obj:`~conkit.core.contactmap.ContactMap`
top_contact : :obj:`~conkit.core.contact.Contact`
The first :obj:`~conkit.core.contact.Contact` entry
"""
__slots__ = ["_sequence"]
def __init__(self, id):
"""Initialise a new contact map"""
self._sequence = None
super(ContactMap, self).__init__(id)
def __repr__(self):
return '{}(id="{}", ncontacts={})'.format(self.__class__.__name__, self.id, self.ncontacts)
@property
def coverage(self):
"""The sequence coverage score
The coverage score is calculated by dividing the number of residues
covered by the predicted contact pairs :math:`x_{cov}` by the number
of residues in the sequence :math:`L`.
.. math::
Coverage=\\frac{x_{cov}}{L}
Returns
-------
float
The calculated coverage score
See Also
--------
precision
"""
seq = np.array(self.repr_sequence.seq_encoded, dtype=np.int64)
cov = seq != AminoAcidMapping["X"].value
return np.sum(cov) / float(seq.shape[0])
@property
def empty(self):
"""Empty contact map"""
return len(self) < 1
@property
def ncontacts(self):
"""The number of :obj:`~conkit.core.contact.Contact` instances
Returns
-------
int
The number of contacts in the :obj:`~conkit.core.contactmap.ContactMap`
"""
return len(self)
@property
def short_range(self):
"""The short range contacts found :obj:`~conkit.core.contactmap.ContactMap`
Short range contacts are defined as 6 <= x <= 11 residues apart
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
A copy of the :obj:`~conkit.core.contactmap.ContactMap` with short-range contacts only
See Also
--------
medium_range, long_range
"""
return self.remove_neighbors(min_distance=6, max_distance=11)
@property
def medium_range(self):
"""The medium range contacts found :obj:`~conkit.core.contactmap.ContactMap`
Medium range contacts are defined as 12 <= x <= 23 residues apart
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
A copy of the :obj:`~conkit.core.contactmap.ContactMap` with medium-range contacts only
See Also
--------
short_range, long_range
"""
return self.remove_neighbors(min_distance=12, max_distance=23)
@property
def long_range(self):
"""The long range contacts found :obj:`~conkit.core.contactmap.ContactMap`
Long range contacts are defined as 24 <= x residues apart
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
A copy of the :obj:`~conkit.core.contactmap.ContactMap` with long-range contacts only
See Also
--------
short_range, medium_range
"""
return self.remove_neighbors(min_distance=24)
@property
def precision(self):
"""The precision (Positive Predictive Value) score
The precision value is calculated by analysing the true and false
postive contacts.
.. math::
Precision=\\frac{TruePositives}{TruePositives + FalsePositives}
The status of each contact, i.e true or false positive status, can be
determined by running the :func:`match` function providing a reference
structure.
Returns
-------
float
The calculated precision score
See Also
--------
coverage, recall
"""
if self.empty:
return 0.0
import warnings
statuses = np.array([c.status for c in self])
tp = (statuses == ContactMatchState.true_positive.value).sum()
fp = (statuses == ContactMatchState.false_positive.value).sum()
unk = (statuses == ContactMatchState.unknown.value).sum()
if tp + fp == 0:
warnings.warn("No true positive or false positive found in your contact map. Match two ContactMaps first.")
return 0.0
elif unk > 0:
warnings.warn(
"Some contacts between the ContactMaps are unmatched due to non-identical sequences. "
"The precision value might be inaccurate."
)
return tp / float(tp + fp)
@property
def recall(self):
"""The Recall (Sensitivity) score
The recall value is calculated by analysing the true positive and
false negative contacts.
.. math::
Recall=\\frac{TruePositives}{TruePositives + FalseNegatives}
The status of each contact, i.e true positive and false negative status, can be
determined by running the :meth:`~conkit.core.contactmap.ContactMap.match` function
providing a reference structure.
Note
----
To determine and **save** the false negatives, please use the `add_false_negatives` keyword when
running the :meth:`~conkit.core.contactmap.ContactMap.match` function.
You may wish to run :meth:`~conkit.core.contactmap.ContactMap.remove_false_negatives`
afterwards.
Returns
-------
float
The calculated recall score
See Also
--------
coverage, precision
"""
if self.empty:
return 0.0
import warnings
statuses = np.array([c.status for c in self])
tp = (statuses == ContactMatchState.true_positive.value).sum()
fn = (statuses == ContactMatchState.false_negative.value).sum()
unk = (statuses == ContactMatchState.unknown.value).sum()
if tp + fn == 0:
warnings.warn(
"No true positive or false negative contacts found in your contact map. " "Match two ContactMaps first."
)
return 0.0
elif unk > 0:
warnings.warn(
"Some contacts between the ContactMaps are unmatched due to non-identical sequences. "
"The recall value might be inaccurate."
)
return tp / float(tp + fn)
@property
def repr_sequence(self):
"""The representative :obj:`~conkit.core.sequence.Sequence` associated
with the :obj:`~conkit.core.contactmap.ContactMap`
The peptide sequence constructed from the available
contacts using the normal res_seq positions
Returns
-------
:obj:`~conkit.core.sequence.Sequence`
Raises
------
:exc:`TypeError`
Sequence undefined
See Also
--------
repr_sequence_altloc, sequence
"""
if isinstance(self.sequence, Sequence):
res_seqs = np.unique(np.array(self.as_list()).flatten()).tolist()
return self._construct_repr_sequence(res_seqs)
else:
raise TypeError("Define the sequence as Sequence() instance")
@property
def repr_sequence_altloc(self):
"""The representative altloc :obj:`~conkit.core.sequence.Sequence` associated
with the :obj:`~conkit.core.contactmap.ContactMap`
The peptide sequence constructed from the available
contacts using the :attr:`~conkit.core.contact.Contact.res_altseq` positions
Returns
-------
:obj:`~conkit.core.sequence.Sequence`
Raises
------
:exc:`TypeError`
Sequence undefined
See Also
--------
repr_sequence, sequence
"""
if isinstance(self.sequence, Sequence):
res_seqs = np.unique(np.array(self.as_list(altloc=True)).flatten()).tolist()
return self._construct_repr_sequence(res_seqs)
else:
raise TypeError("Define the sequence as Sequence() instance")
@property
def singletons(self):
"""Singleton contact pairs in the current :obj:`~conkit.core.contactmap.ContactMap`
Contacts are identified by a distance-based grouping analysis. A :obj:`~conkit.core.contact.Contact` is
classified as singleton if not other contacts are found within 2 residues.
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
"""
from conkit.core.ext.c_contactmap import c_singletons
X = np.array(self.as_list(), dtype=np.int64)
throwables = np.full(X.shape[0], False, dtype=np.bool)
c_singletons(X, 2, throwables)
singletons = self.deepcopy()
for i, contact in enumerate(self):
if throwables[i]:
singletons.remove(contact.id)
return singletons
@property
def highest_residue_number(self):
"""The highest residue sequence number among contacts in the :obj:`~conkit.core.contactmap.ContactMap`
Returns
-------
int
Highest residue sequence number in the contact map
"""
if len(self) == 0:
return None
else:
return max([max(contact.id) for contact in self])
@property
def sequence(self):
"""The :obj:`~conkit.core.sequence.Sequence` associated with the :obj:`~conkit.core.contactmap.ContactMap`
Returns
-------
:obj:`~conkit.core.sequence.Sequence`
A :obj:`~conkit.core.sequence.Sequence` object
See Also
--------
repr_sequence, repr_sequence_altloc
"""
return self._sequence
@sequence.setter
def sequence(self, sequence):
"""Associate a :obj:`~conkit.core.sequence.Sequence` instance with the :obj:`~conkit.core.contactmap.ContactMap`
Parameters
----------
sequence : :obj:`~conkit.core.sequence.Sequence`
Raises
------
:exc:`TypeError`
Incorrect hierarchy instance provided
"""
if isinstance(sequence, Sequence):
self._sequence = sequence
else:
raise TypeError("Instance of Sequence() required: {}".format(sequence))
@property
def top_contact(self):
"""The first :obj:`~conkit.core.contact.Contact` entry
Returns
-------
:obj:`~conkit.core.contact.Contact`
The first :obj:`~conkit.core.contact.Contact` entry in :obj:`~conkit.core.contactfile.ContactFile`
"""
return self.top
def _construct_repr_sequence(self, res_seqs):
"""Construct the representative sequence"""
representative_sequence = ""
for i in np.arange(1, self.sequence.seq_len + 1):
if i in res_seqs:
representative_sequence += self.sequence.seq[i - 1]
else:
representative_sequence += "-"
return Sequence(self.sequence.id + "_repr", representative_sequence)
[docs] def as_dict(self, altloc=False):
"""The :obj:`~conkit.core.contactmap.ContactMap` as a dictionary where each key corresponds with the residue
number and the values are sets of tuples with the :attr:`~conkit.core.contact.Contact.id`
Parameters
----------
altloc : bool
Use the :attr:`~conkit.core.contact.Contact.res_altloc` positions [default: False]
Returns
-------
dict
A dictionary represnetation of the :obj:`~conkit.core.contactmap.ContactMap` instance
"""
if self.sequence is None:
seq_len = self.highest_residue_number
else:
seq_len = len(self.sequence)
result = {}
if altloc:
for resn in range(1, seq_len + 1):
result[resn] = {(c.res2_altseq, c.res2_altseq) for c in self if resn in (c.res1_altseq, c.res2_altseq)}
else:
for resn in range(1, seq_len + 1):
result[resn] = {(c.res1_seq, c.res2_seq) for c in self if resn in (c.res1_seq, c.res2_seq)}
return result
[docs] def as_list(self, altloc=False):
"""The :obj:`~conkit.core.contactmap.ContactMap` as a 2D-list containing contact-pair residue indexes
Parameters
----------
altloc : bool
Use the :attr:`~conkit.core.contact.Contact.res_altloc` positions [default: False]
"""
if altloc:
return [[c.res1_altseq, c.res2_altseq] for c in self]
else:
return [[c.res1_seq, c.res2_seq] for c in self]
[docs] def as_set(self, altloc=False):
"""The :obj:`~conkit.core.contactmap.ContactMap` as a 2D-set containing contact-pair residue indexes
Parameters
----------
altloc : bool
Use the :attr:`~conkit.core.contact.Contact.res_altloc` positions [default: False]
"""
if altloc:
return {(c.res1_altseq, c.res2_altseq) for c in self}
else:
return {c.id for c in self}
[docs] def set_sequence_register(self, altloc=False):
"""Assign the amino acids from :obj:`~conkit.core.sequence.Sequence` to all :obj:`~conkit.core.contact.Contact` instances
Parameters
----------
altloc : bool
Use the :attr:`~conkit.core.contact.Contact.res_altloc` positions [default: False]
Raises
------
:exc:`ValueError`
Undefined sequence
"""
if self.sequence is None:
raise ValueError("No sequence defined")
seq_len = len(self.sequence)
for c in self:
if altloc:
res1_index = c.res1_altseq
res2_index = c.res2_altseq
else:
res1_index = c.res1_seq
res2_index = c.res2_seq
if res1_index <= seq_len and res2_index <= seq_len:
c.res1 = self.sequence.seq[res1_index - 1]
c.res2 = self.sequence.seq[res2_index - 1]
else:
raise ValueError('Contact {} is out of sequence bounds'.format(c.id))
[docs] def get_jaccard_index(self, other):
"""Calculate the Jaccard index between two :obj:`~conkit.core.contactmap.ContactMap` instances
This score analyzes the difference of the predicted contacts from two maps,
.. math::
J_{x,y}=\\frac{\\left|x \\cap y\\right|}{\\left|x \\cup y\\right|}
where :math:`x` and :math:`y` are the sets of predicted contacts from two
different predictors, :math:`\\left|x \\cap y\\right|` is the number of
elements in the intersection of :math:`x` and :math:`y`, and the
:math:`\\left|x \\cup y\\right|` represents the number of elements in the
union of :math:`x` and :math:`y`.
The J-score has values in the range of :math:`[0, 1]`, with a value of :math:`1`
corresponding to identical contact maps and :math:`0` to dissimilar ones.
Parameters
----------
other : :obj:`~conkit.core.contactmap.ContactMap`
A ConKit :obj:`~conkit.core.contactmap.ContactMap`
Returns
-------
float
The Jaccard index
See Also
--------
match, precision
Warning
-------
The Jaccard distance ranges from :math:`[0, 1]`, where :math:`1` means
the maps contain identical contacts pairs.
Note
----
The Jaccard index is different from the Jaccard distance mentioned in [#]_. The
Jaccard distance corresponds to :math:`1-Jaccard_{index}`.
.. [#] Q. Wuyun, W. Zheng, Z. Peng, J. Yang (2016). A large-scale comparative assessment
of methods for residue-residue contact prediction. *Briefings in Bioinformatics*,
[doi: 10.1093/bib/bbw106].
"""
union = len(self) + np.sum([1 for contact in other if contact.id not in self])
if union == 0:
return 1.0
intersection = np.sum([1 for contact in self if contact.id in other])
return float(intersection) / union
[docs] def get_contact_density(self, bw_method="amise"):
"""Calculate the contact density in the contact map using Gaussian kernels
Various algorithms can be used to estimate the bandwidth. To calculate the
bandwidth for an 1D data array ``X`` with ``n`` data points and ``d`` dimensions,
the listed algorithms have been implemented. Please note, in rules 2 and 3, the
value of :math:`\\sigma` is the smaller of the standard deviation of ``X`` or
the normalized interquartile range.
Parameters
----------
bw_method : str, optional
The bandwidth estimator to use [default: amise]
Returns
-------
list
The list of per-residue density estimates
Raises
------
:exc:`ImportError`
Cannot find scikit-learn package
:exc:`ValueError`
Undefined bandwidth method
:exc:`ValueError`
:obj:`~conkit.core.contactmap.ContactMap` is empty
"""
try:
import sklearn.neighbors
except ImportError as e:
raise ImportError(e)
if self.empty:
raise ValueError("ContactMap is empty")
x = np.array([i for c in self for i in np.arange(c.res1_seq, c.res2_seq + 1)], dtype=np.int64)[:, np.newaxis]
x_fit = np.arange(x.min(), x.max() + 1)[:, np.newaxis]
from conkit.misc.bandwidth import bandwidth_factory
bandwidth = bandwidth_factory(bw_method)(x).bw
kde = sklearn.neighbors.KernelDensity(bandwidth=bandwidth).fit(x)
return np.exp(kde.score_samples(x_fit)).tolist()
[docs] def set_scalar_score(self):
"""Calculate and set the :attr:`~conkit.core.contact.Contact.scalar_score` for the
:obj:`~conkit.core.contactmap.ContactMap`
This score is a scaled score for all raw scores in a contact
map. It is defined by the formula
.. math::
{x}'=\\frac{x}{\\overline{d}}
where :math:`x` corresponds to the raw score of each predicted
contact and :math:`\\overline{d}` to the mean of all raw scores.
This score is described in more detail in [#]_.
.. [#] S. Ovchinnikov, L. Kinch, H. Park, Y. Liao, J. Pei, D.E. Kim,
H. Kamisetty, N.V. Grishin, D. Baker (2015). Large-scale determination
of previously unsolved protein structures using evolutionary information.
*Elife* **4**, e09248.
"""
raw_scores = np.array([c.raw_score for c in self])
sca_scores = raw_scores / np.mean(raw_scores)
for contact, sca_score in zip(self, sca_scores):
contact.scalar_score = sca_score
[docs] def find(self, register, altloc=False, strict=False, inverse=False):
"""Find all contacts with one or both residues in ``register``
Parameters
----------
register : int, list, tuple
A list of residue register to find
altloc : bool
Use the :attr:`~conkit.core.contact.Contact.res_altloc` positions [default: False]
strict : bool
Both residues of :obj:`~conkit.core.contact.Contact` in register [default: False]
inverse : bool
Select non-matching residues [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
A modified version of the :obj:`~conkit.core.contactmap.ContactMap` containing
the found contacts
"""
if isinstance(register, int):
register = [register]
register = set(register)
def discard(*args, **kwargs):
output = (operator.and_ if strict else operator.or_)(*args, **kwargs)
return output if inverse else not output
contact_map = self.deepcopy()
for contactid in self.as_list(altloc=altloc):
if discard(contactid[0] in register, contactid[1] in register):
contact_map.remove(tuple(contactid))
return contact_map
[docs] def match_naive(self, other, add_false_negatives=False, match_other=False, inplace=False):
"""Modify both hierarchies so residue numbers match one another
This function performs a naive match. It assumes the numbering in both
contact maps is equivalent and it will not attempt to perform a sequence
alignment.
Parameters
----------
other : :obj:`~conkit.core.contactmap.ContactMap`
A ConKit :obj:`~conkit.core.contactmap.ContactMap`
add_false_negatives : bool
Add false negatives to the `self`, which are contacts in `other` but not in `self`
Required for :meth:`~conkit.core.contactmap.ContactMap.recall` and can be undone
with :meth:`~conkit.core.contactmap.ContactMap.remove_false_negatives`
match_other: bool, optional
Match `other` to `self` [default: False]
inplace : bool, optional
Replace the original contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
:obj:`~conkit.core.contactmap.ContactMap` instance, regardless of inplace
"""
contact_map1 = self._inplace(inplace)
if match_other:
contact_map2 = other._inplace(inplace)
else:
contact_map2 = other._inplace(False)
contact_map1_set = contact_map1.as_set()
contact_map2_set = contact_map2.as_set()
for contact in contact_map1:
if contact.id in contact_map2_set:
contact.true_positive = True
else:
contact.false_positive = True
if not add_false_negatives:
return contact_map1
for contact in contact_map2:
if contact.id not in contact_map1_set:
contact.false_negative = True
contact_map1.add(contact)
return contact_map1
[docs] def match(self, other, add_false_negatives=False, match_other=False, remove_unmatched=False, renumber=False,
inplace=False):
"""Modify both hierarchies so residue numbers match one another.
This function is key when plotting contact maps or visualising
contact maps in 3-dimensional space. In particular, when residue
numbers in the structure do not start at count 0 or when peptide
chain breaks are present.
Parameters
----------
add_false_negatives : bool
Add false negatives to the `self`, which are contacts in `other` but not in `self`
Required for :meth:`~conkit.core.contactmap.ContactMap.recall` and can be undone
with :meth:`~conkit.core.contactmap.ContactMap.remove_false_negatives`
other : :obj:`~conkit.core.contactmap.ContactMap`
A ConKit :obj:`~conkit.core.contactmap.ContactMap`
match_other: bool, optional
Match `other` to `self` [default: False]
remove_unmatched : bool, optional
Remove all unmatched contacts [default: False]
renumber : bool, optional
Renumber the :attr:`~conkit.core.contact.Contact.res_seq` entries [default: False]
If ``True``, :attr:`~conkit.core.contact.Contact.res1_seq` and
:attr:`~conkit.core.contact.Contact.res2_seq` changes
but :attr:`~conkit.core.contact.Contact.id` remains the same
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
:obj:`~conkit.core.contactmap.ContactMap` instance, regardless of inplace
Raises
------
:exc:`ValueError`
At least one of the input maps :obj:`~conkit.core.contactmap.ContactMap` is empty
:exc:`ValueError`
Error creating reliable keymap matching the sequence in :obj:`~conkit.core.contactmap.ContactMap`
:exc:`RuntimeError`
Error matching the contacts in :obj:`~conkit.core.contactmap.ContactMap`
"""
contact_map1 = self._inplace(inplace)
if match_other:
contact_map2 = other._inplace(inplace)
else:
contact_map2 = other._inplace(False)
if contact_map1.empty and add_false_negatives:
for contact in contact_map2:
contact.false_negative = True
if contact_map2.empty:
for contact in contact_map1:
contact.false_positive = True
if contact_map2.empty or contact_map1.empty:
return contact_map1
# ================================================================
# 1. Align all sequences
# ================================================================
config = dict(id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.1, inplace=False)
contact_map1_full_sequence, contact_map2_full_sequence = contact_map1.sequence.align_local(
contact_map2.sequence, **config
)
config["inplace"] = True
config["gap_ext_pen"] = -0.2
_, contact_map1_repr_sequence = contact_map1_full_sequence.align_local(contact_map1.repr_sequence, **config)
_, contact_map2_repr_sequence = contact_map2_full_sequence.align_local(
contact_map2.repr_sequence_altloc, **config
)
config["gap_open_pen"] = -1.0
config["gap_ext_pen"] = -0.5
contact_map1_repr_sequence, contact_map2_repr_sequence = contact_map1_repr_sequence.align_local(
contact_map2_repr_sequence, **config
)
# ================================================================
# 2. Identify TPs in other, map them, and match them to self
# ================================================================
encoded_repr = np.asarray([contact_map1_repr_sequence.seq_ascii, contact_map2_repr_sequence.seq_ascii])
contact_map1_keymap = ContactMap._create_keymap(contact_map1)
contact_map2_keymap = ContactMap._create_keymap(contact_map2, altloc=True)
msg = "Error creating reliable keymap matching the sequence in ContactMap: {}"
if len(contact_map1_keymap) != (encoded_repr[0] != ord("-")).sum():
raise ValueError(msg.format(contact_map1.id))
elif len(contact_map2_keymap) != (encoded_repr[1] != ord("-")).sum():
raise ValueError(msg.format(contact_map2.id))
contact_map1_keymap = ContactMap._insert_states(encoded_repr[0], contact_map1_keymap)
contact_map2_keymap = ContactMap._insert_states(encoded_repr[1], contact_map2_keymap)
contact_map1_keymap = ContactMap._reindex_by_keymap(contact_map1_keymap)
contact_map2_keymap = ContactMap._reindex_by_keymap(contact_map2_keymap)
contact_map2 = ContactMap._adjust(contact_map2, contact_map2_keymap)
residues_map2 = np.flatnonzero(np.asarray(contact_map2_full_sequence.seq_ascii) != ord("-")) + 1
for contact in contact_map1:
_id = (contact.res1_seq, contact.res2_seq)
_id_alt = tuple(r.res_seq for r in contact_map2_keymap for i in _id if i == r.res_altseq)
if any(i == Gap.IDENTIFIER for i in _id_alt) and any(j not in residues_map2 for j in _id):
contact_map1[_id].status = ContactMatchState.unknown
elif all(i in residues_map2 for i in _id):
if _id_alt in contact_map2:
contact_map1[_id].status = ContactMatchState.true_positive
else:
contact_map1[_id].status = ContactMatchState.false_positive
else:
raise RuntimeError("Error matching two contact maps - please report this bug")
# ================================================================
# 3. Add false negatives
# ================================================================
if add_false_negatives:
for contactid in contact_map2.as_list():
contactid = tuple(contactid)
if contactid not in contact_map1:
contact = contact_map2[contactid].copy()
contact.false_negative = True
contact_map1.add(contact)
# ================================================================
# 4. Remove unmatched contacts
# ================================================================
if remove_unmatched:
for contactid in contact_map1.as_list():
contactid = tuple(contactid)
if contact_map1[contactid].status_unknown:
contact_map1.remove(contactid)
# ================================================================
# 5. Renumber the contact map 1 based on contact map 2
# ================================================================
if renumber:
contact_map1 = ContactMap._renumber(contact_map1, contact_map1_keymap, contact_map2_keymap)
return contact_map1
[docs] def reindex(self, index, altloc=False, inplace=False):
"""Re-index the :obj:`~conkit.core.contactmap.ContactMap`
Parameters
----------
index : int
The new starting index [assigned to the lowest existing index in the contact map]
altloc : bool
Use the res_altloc positions [default: False]
inplace : bool
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
Raises
------
:exc:`ValueError`
Index must be positive
"""
if index < 0:
raise ValueError("Index must be positive!")
contact_map = self._inplace(inplace)
if contact_map.empty:
return contact_map
res1s, res2s = zip(*contact_map.as_list(altloc=altloc))
offset = min(res1s) - index
for contact in contact_map:
if altloc:
contact.res1_altseq -= offset
contact.res2_altseq -= offset
else:
contact.res1_seq -= offset
contact.res2_seq -= offset
for contact in contact_map:
contact.id = (contact.res1_seq, contact.res2_seq)
return contact_map
[docs] def remove_false_negatives(self, inplace=False):
"""Remove false negatives from the contact map
Parameters
----------
min_distance : int, optional
The minimum number of residues between contacts [default: 5]
max_distance : int, optional
The maximum number of residues between contacts [default: :const:`sys.maxsize`]
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
"""
contact_map = self._inplace(inplace)
for contactid in contact_map.as_list():
if contact_map[tuple(contactid)].false_negative:
contact_map.remove(tuple(contactid))
return contact_map
[docs] def remove_neighbors(self, min_distance=5, max_distance=sys.maxsize, inplace=False):
"""Remove contacts between neighboring residues
The algorithm works by keeping contact pairs that satisfy
``min_distance`` <= ``x`` <= ``max_distance``
Parameters
----------
min_distance : int, optional
The minimum number of residues between contacts [default: 5]
max_distance : int, optional
The maximum number of residues between contacts [default: :const:`sys.maxsize`]
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
"""
contact_map = self._inplace(inplace)
for contactid in contact_map.as_list():
if min_distance <= abs(contactid[1] - contactid[0]) <= max_distance:
continue
else:
contact_map.remove(tuple(contactid))
return contact_map
[docs] def slice_map(self, l_factor, seq_len=None, inplace=False):
"""Slice the contact map using a L * factor
Parameters
----------
l_factor : float
L/N factor to be applied
seq_len : int
Sequence length.
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
Raises
------
:exc:`ValueError`
Either seq_len must be provided or :attr:`~conkit.core.contactmap.ContactMap.sequence` defined
"""
if seq_len is None:
if self.sequence is None:
raise ValueError('Need to define a sequence or provide seq_len')
seq_len = self.sequence.seq_len
contact_map = self._inplace(inplace)
ncontacts = int(seq_len * l_factor)
contact_map.sort('raw_score', reverse=True, inplace=True)
new_contacts = {c.id: c for c in contact_map[:ncontacts]}
contact_map.child_list = list(new_contacts.values())
contact_map.child_dict = new_contacts
return contact_map
[docs] def filter(self, threshold, filter_by='raw_score', inplace=False):
"""Filter out contacts below selected threshold
Parameters
----------
threshold : int
Threshold to be applied in the filter
filter_by : str
Contact attribute to be used in the filter.
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
Raises
------
:exc:`TypeError`
threshold must be int or float
"""
if not isinstance(threshold, (int, float)):
raise TypeError("Score threshold must be an int or float!")
contact_map = self._inplace(inplace)
for contactid in contact_map.as_list():
contactid = tuple(contactid)
if float(getattr(contact_map[contactid], filter_by)) < threshold:
contact_map.remove(contactid)
return contact_map
[docs] def rescale(self, inplace=False):
"""Rescale the raw scores in :obj:`~conkit.core.contactmap.ContactMap`
Parameters
----------
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
"""
contact_map = self._inplace(inplace)
raw_scores = np.array([c.raw_score for c in contact_map])
norm_raw_scores = normalize(raw_scores)
if np.isnan(norm_raw_scores).all():
norm_raw_scores = np.where(norm_raw_scores == np.isnan, 0, 1)
for contact, norm_raw_score in zip(contact_map, norm_raw_scores):
contact.raw_score = norm_raw_score
return contact_map
[docs] def sort(self, kword, reverse=False, inplace=False):
"""Sort the :obj:`~conkit.core.contactmap.ContactMap`
Parameters
----------
kword : str
The dictionary key to sort contacts by
reverse : bool, optional
Sort the contact pairs in descending order [default: False]
inplace : bool, optional
Replace the saved order of contacts [default: False]
Returns
-------
:obj:`~conkit.core.contactmap.ContactMap`
The reference to the :obj:`~conkit.core.contactmap.ContactMap`, regardless of inplace
Raises
------
:exc:`ValueError`
``kword`` not in :obj:`~conkit.core.contactmap.ContactMap`
"""
contact_map = self._inplace(inplace)
contact_map._sort(kword, reverse)
return contact_map
[docs] def to_string(self):
"""Return the :obj:`ContactMap <conkit.core.contactmap.ContactMap>` as :obj:`str`"""
content = ["%d\t%d\t%.5f" % (c.res1_seq, c.res2_seq, c.raw_score) for c in self]
return "\n".join(content)
@staticmethod
def _adjust(contact_map, keymap):
"""Adjust res_altseq entries to insertions and deletions"""
encoder = dict((x.res_seq, x.res_altseq) for x in keymap if isinstance(x, Residue))
for contact in contact_map:
if contact.res1_seq in encoder:
contact.res1_altseq = encoder[contact.res1_seq]
if contact.res2_seq in encoder:
contact.res2_altseq = encoder[contact.res2_seq]
return contact_map
@staticmethod
def _create_keymap(contact_map, altloc=False):
"""Create a simple keymap
Parameters
----------
altloc : bool
Use the res_altloc positions [default: False]
Returns
-------
list
A list of residue mappings
"""
contact_map_keymap = collections.OrderedDict()
for contact in contact_map:
pos1 = Residue(contact.res1_seq, contact.res1_altseq, contact.res1, contact.res1_chain)
pos2 = Residue(contact.res2_seq, contact.res2_altseq, contact.res2, contact.res2_chain)
if altloc:
res1_index, res2_index = contact.res1_altseq, contact.res2_altseq
else:
res1_index, res2_index = contact.res1_seq, contact.res2_seq
contact_map_keymap[res1_index] = pos1
contact_map_keymap[res2_index] = pos2
contact_map_keymap_sorted = sorted(list(contact_map_keymap.items()), key=lambda x: int(x[0]))
return list(zip(*contact_map_keymap_sorted))[1]
@staticmethod
def _find_single(contact_map, index):
"""Find all contacts associated with ``index`` based on id property"""
for c in contact_map:
if c.id[0] == index or c.id[1] == index:
yield c
@staticmethod
def _insert_states(sequence, keymap):
"""Create a sequence matching keymap including deletions and insertions"""
it = iter(keymap)
keymap_ = []
for amino_acid in sequence:
if amino_acid == ord("-"):
keymap_.append(Gap())
else:
keymap_.append(next(it))
return keymap_
@staticmethod
def _reindex_by_keymap(keymap):
"""Reindex a key map"""
return [residue._replace(res_altseq=i + 1) for i, residue in enumerate(keymap)]
@staticmethod
def _renumber(contact_map, self_keymap, other_keymap):
"""Renumber the contact map based on the mapping of self and other keymaps"""
for self_residue, other_residue in zip(self_keymap, other_keymap):
if isinstance(self_residue, Gap):
continue
for contact in ContactMap._find_single(contact_map, self_residue.res_seq):
# Make sure we check with the ID, which doesn't change
if contact.id[0] == self_residue.res_altseq:
contact.res1_seq = other_residue.res_seq
contact.res1_chain = other_residue.res_chain
elif contact.id[1] == self_residue.res_altseq:
contact.res2_seq = other_residue.res_seq
contact.res2_chain = other_residue.res_chain
else:
raise ValueError("Error renumbering contact map - please report this bug")
return contact_map