Source code for conkit.core.contactmap

# coding=utf-8
#
# BSD 3-Clause License
#
# Copyright (c) 2016-17, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""ContactMap container used throughout ConKit"""

from __future__ import division
from __future__ import print_function

__author__ = "Felix Simkovic"
__date__ = "03 Aug 2016"
__version__ = "1.0"

import collections
import numpy as np
import sys

if sys.version_info.major < 3:
    from itertools import izip as zip

from conkit.core._entity import _Entity
from conkit.core._struct import _Gap, _Residue
from conkit.core.contact import ContactMatchState
from conkit.core.sequence import Sequence


[docs]class ContactMap(_Entity): """A contact map object representing a single prediction The :obj:`ContactMap <conkit.core.ContactMap>` class represents a data structure to hold a single contact map prediction in one place. It contains functions to store, manipulate and organise :obj:`Contact <conkit.core.Contact>` instances. Attributes ---------- coverage : float The sequence coverage score id : str A unique identifier ncontacts : int The number of :obj:`Contact <conkit.core.Contact>` instances in the :obj:`ContactMap <conkit.core.ContactMap>` precision : float The precision (Positive Predictive Value) score repr_sequence : :obj:`Sequence <conkit.core.Sequence>` The representative :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` repr_sequence_altloc : :obj:`Sequence <conkit.core.Sequence>` The representative altloc :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` sequence : :obj:`Sequence <conkit.core.Sequence>` The :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` top_contact : :obj:`Contact <conkit.core.Contact>` The first :obj:`Contact <conkit.core.Contact>` entry in :obj:`ContactMap <conkit.core.ContactMap>` Examples -------- >>> from conkit.core import Contact, ContactMap >>> contact_map = ContactMap("example") >>> contact_map.add(Contact(1, 10, 0.333)) >>> contact_map.add(Contact(5, 30, 0.667)) >>> print(contact_map) ContactMap(id="example" ncontacts=2) """ __slots__ = ['_sequence'] def __init__(self, id): """Initialise a new contact map""" self._sequence = None super(ContactMap, self).__init__(id) def __repr__(self): return "{0}(id=\"{1}\", ncontacts={2})".format(self.__class__.__name__, self.id, self.ncontacts) @property def coverage(self): """The sequence coverage score The coverage score is calculated by analysing the number of residues covered by the predicted contact pairs. .. math:: Coverage=\\frac{x_{cov}}{L} The coverage score is calculated by dividing the number of contacts :math:`x_{cov}` by the number of residues in the sequence :math:`L`. Returns ------- float The calculated coverage score See Also -------- precision """ seq_array = np.array(list(self.repr_sequence.seq_ascii)) gaps = np.where(seq_array == ord('-'), 1, 0) return (seq_array.size - np.sum(gaps, axis=0)) / seq_array.size @property def empty(self): """Empty contact map""" return len(self) < 1 @property def ncontacts(self): """The number of :obj:`Contact <conkit.core.Contact>` instances in the :obj:`ContactMap <conkit.core.ContactMap>` Returns ------- int The number of contacts in the :obj:`ContactMap <conkit.core.ContactMap>` """ return len(self) @property def short_range_contacts(self): """The short range contacts found :obj:`ContactMap <conkit.core.ContactMap>` Short range contacts are defined as 6 <= x <= 11 residues apart Returns ------- obj A copy of the :obj:`ContactMap <conkit.core.ContactMap>` with short-range contacts only See Also -------- medium_range_contacts, long_range_contacts """ return self.remove_neighbors(min_distance=6, max_distance=11) @property def medium_range_contacts(self): """The medium range contacts found :obj:`ContactMap <conkit.core.ContactMap>` Medium range contacts are defined as 12 <= x <= 23 residues apart Returns ------- obj A copy of the :obj:`ContactMap <conkit.core.ContactMap>` with medium-range contacts only See Also -------- short_range_contacts, long_range_contacts """ return self.remove_neighbors(min_distance=12, max_distance=23) @property def long_range_contacts(self): """The long range contacts found :obj:`ContactMap <conkit.core.ContactMap>` long range contacts are defined as 24 <= x residues apart Returns ------- obj A copy of the :obj:`ContactMap <conkit.core.ContactMap>` with long-range contacts only See Also -------- short_range_contacts, medium_range_contacts """ return self.remove_neighbors(min_distance=24) @property def precision(self): """The precision (Positive Predictive Value) score The precision value is calculated by analysing the true and false postive contacts. .. math:: Precision=\\frac{TruePositives}{TruePositives - FalsePositives} The status of each contact, i.e true or false positive status, can be determined by running the :func:`match` function providing a reference structure. Returns ------- float The calculated precision score See Also -------- coverage """ if self.empty: return 0.0 import warnings s = np.asarray([c.status for c in self]) cdict = dict(zip(np.unique(s), np.asarray([s[s == i].shape[0] for i in np.unique(s)]))) fp_count = cdict[ContactMatchState.mismatched.value] if ContactMatchState.mismatched.value in cdict else 0.0 uk_count = cdict[ContactMatchState.unknown.value] if ContactMatchState.unknown.value in cdict else 0.0 tp_count = cdict[ContactMatchState.matched.value] if ContactMatchState.matched.value in cdict else 0.0 if fp_count == 0.0 and tp_count == 0.0: warnings.warn("No matches or mismatches found in your contact map. " "Match two ContactMaps first.") return 0.0 elif uk_count > 0: warnings.warn("Some contacts between the ContactMaps are unmatched due to non-identical " "sequences. The precision value might be inaccurate.") return tp_count / (tp_count + fp_count) @property def repr_sequence(self): """The representative :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` The peptide sequence constructed from the available contacts using the normal res_seq positions Returns ------- obj A :obj:`conkit.coreSequence` object Raises ------ TypeError Sequence undefined See Also -------- repr_sequence_altloc, sequence """ if isinstance(self.sequence, Sequence): res1_seqs, res2_seqs = list(zip(*[contact.id for contact in self])) res_seqs = set(sorted(res1_seqs + res2_seqs)) return self._construct_repr_sequence(list(res_seqs)) else: raise TypeError('Define the sequence as Sequence() instance') @property def repr_sequence_altloc(self): """The representative altloc :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` The peptide sequence constructed from the available contacts using the altloc res_seq positions Returns ------- obj A :obj:`Sequence <conkit.core.Sequence>` object Raises ------ ValueError Sequence undefined See Also -------- repr_sequence, sequence """ if isinstance(self.sequence, Sequence): res1_seqs, res2_seqs = list(zip(*[(contact.res1_altseq, contact.res2_altseq) for contact in self])) res_seqs = set(sorted(res1_seqs + res2_seqs)) return self._construct_repr_sequence(list(res_seqs)) else: raise TypeError('Define the sequence as Sequence() instance') @property def sequence(self): """The :obj:`Sequence <conkit.core.Sequence>` associated with the :obj:`ContactMap <conkit.core.ContactMap>` Returns ------- obj A :obj:`Sequence <conkit.core.Sequence>` object See Also -------- repr_sequence, repr_sequence_altloc """ return self._sequence @sequence.setter def sequence(self, sequence): """Associate a :obj:`Sequence <conkit.core.Sequence>` instance with the :obj:`ContactMap <conkit.core.ContactMap>` Parameters ---------- sequence : :obj:`Sequence <conkit.core.Sequence>` Raises ------ ValueError Incorrect hierarchy instance provided """ if isinstance(sequence, Sequence): self._sequence = sequence else: msg = "Instance of Sequence() required: {}".format(sequence) raise TypeError(msg) @property def top_contact(self): """The first :obj:`Contact <conkit.core.Contact>` entry in :obj:`ContactMap <conkit.core.ContactMap>` Returns ------- obj, None The first :obj:`Contact <conkit.core.Contact>` entry in :obj:`ContactFile <conkit.core.ContactFile>` """ return self.top def _construct_repr_sequence(self, res_seqs): """Construct the representative sequence""" representative_sequence = '' for i in np.arange(1, self.sequence.seq_len + 1): if i in res_seqs: representative_sequence += self.sequence.seq[i - 1] else: representative_sequence += '-' return Sequence(self.sequence.id + '_repr', representative_sequence)
[docs] def assign_sequence_register(self, altloc=False): """Assign the amino acids from :obj:`Sequence <conkit.core.Sequence>` to all :obj:`Contact <conkit.core.Contact>` instances Parameters ---------- altloc : bool Use the res_altloc positions [default: False] """ for c in self: if altloc: res1_index = c.res1_altseq res2_index = c.res2_altseq else: res1_index = c.res1_seq res2_index = c.res2_seq c.res1 = self.sequence.seq[res1_index - 1] c.res2 = self.sequence.seq[res2_index - 1]
[docs] def calculate_jaccard_index(self, other): """Calculate the Jaccard index between two :obj:`ContactMap <conkit.core.ContactMap>` instances This score analyzes the difference of the predicted contacts from two maps, .. math:: J_{x,y}=\\frac{\\left|x \\cap y\\right|}{\\left|x \\cup y\\right|} where :math:`x` and :math:`y` are the sets of predicted contacts from two different predictors, :math:`\\left|x \\cap y\\right|` is the number of elements in the intersection of :math:`x` and :math:`y`, and the :math:`\\left|x \\cup y\\right|` represents the number of elements in the union of :math:`x` and :math:`y`. The J-score has values in the range of :math:`[0, 1]`, with a value of :math:`1` corresponding to identical contact maps and :math:`0` to dissimilar ones. Parameters ---------- other : :obj:`ContactMap <conkit.core.ContactMap>` A ConKit :obj:`ContactMap <conkit.core.ContactMap>` Returns ------- float The Jaccard index See Also -------- match, precision Warnings -------- The Jaccard distance ranges from :math:`[0, 1]`, where :math:`1` means the maps contain identical contacts pairs. Notes ----- The Jaccard index is different from the Jaccard distance mentioned in [#]_. The Jaccard distance corresponds to :math:`1-Jaccard_{index}`. .. [#] Q. Wuyun, W. Zheng, Z. Peng, J. Yang (2016). A large-scale comparative assessment of methods for residue-residue contact prediction. *Briefings in Bioinformatics*, [doi: 10.1093/bib/bbw106]. """ intersection = np.sum([1 for contact in self if contact.id in other]) union = len(self) \ + np.sum([1 for contact in other if contact.id not in self]) # If self and other are both empty, we define J(x,y) = 1 if union == 0: return 1.0 return float(intersection) / union
[docs] def calculate_kernel_density(self, *args, **kwargs): """Calculate the contact density in the contact map using Gaussian kernels""" import warnings warnings.warn("This function will be deprecated in a future release! Use calculate_contact_density instead!") return self.calculate_contact_density(*args, **kwargs)
[docs] def calculate_contact_density(self, bw_method="amise"): """Calculate the contact density in the contact map using Gaussian kernels Various algorithms can be used to estimate the bandwidth. To calculate the bandwidth for an 1D data array ``X`` with ``n`` data points and ``d`` dimensions, the listed algorithms have been implemented. Please note, in rules 2 and 3, the value of :math:`\\sigma` is the smaller of the standard deviation of ``X`` or the normalized interquartile range. Parameters ---------- bw_method : str, optional The bandwidth estimator to use [default: amise] Returns ------- list The list of per-residue density estimates Raises ------ RuntimeError Cannot find SciKit package ValueError Undefined bandwidth method """ try: import sklearn.neighbors except ImportError: raise RuntimeError("Cannot find SciKit package") if self.empty: raise ValueError("ContactMap is empty") # TODO: Chunan suggested to fix this bug - results are usually marginally better # REM: Bug in Sadowski's algorithm, res2 is excluded from list to train KDE # REM: Remember to change test cases when corrected implementation benchmarked # x = np.asarray([i for c in self for i in np.arange(c.res1_seq, c.res2_seq + 1)])[:, np.newaxis] x = np.asarray([i for c in self for i in np.arange(c.res1_seq, c.res2_seq)])[:, np.newaxis] x_fit = np.arange(x.min(), x.max() + 1)[:, np.newaxis] from conkit.misc.bandwidth import bandwidth_factory bandwidth = bandwidth_factory(bw_method)(x).bw kde = sklearn.neighbors.KernelDensity(bandwidth=bandwidth).fit(x) return np.exp(kde.score_samples(x_fit)).tolist()
[docs] def calculate_scalar_score(self): """Calculate a scaled score for the :obj:`ContactMap <conkit.core.ContactMap>` This score is a scaled score for all raw scores in a contact map. It is defined by the formula .. math:: {x}'=\\frac{x}{\\overline{d}} where :math:`x` corresponds to the raw score of each predicted contact and :math:`\overline{d}` to the mean of all raw scores. The score is saved in a separate :obj:`Contact <conkit.core.Contact>` attribute called ``scalar_score`` This score is described in more detail in [#]_. .. [#] S. Ovchinnikov, L. Kinch, H. Park, Y. Liao, J. Pei, D.E. Kim, H. Kamisetty, N.V. Grishin, D. Baker (2015). Large-scale determination of previously unsolved protein structures using evolutionary information. *Elife* **4**, e09248. """ raw_scores = np.asarray([c.raw_score for c in self]) sca_scores = raw_scores / np.mean(raw_scores) for contact, sca_score in zip(self, sca_scores): contact.scalar_score = sca_score
[docs] def find(self, register, altloc=False, strict=False): """Find all contacts with one or both residues in ``register`` Parameters ---------- register : int, list, tuple A list of residue register to find altloc : bool Use the res_altloc positions [default: False] strict : bool Both residues of :obj:`Contact <conkit.core.contact.Contact>` in register [default: False] Returns ------- obj A modified version of the :obj:`ContactMap <conkit.core.ContactMap>` containing the found contacts """ if isinstance(register, int): register = [register] register = set(register) comparison_operator = _AND if strict else _OR contact_map = self.deepcopy() for contact in self: if altloc and comparison_operator(contact.res1_altseq in register, contact.res2_altseq in register): continue elif comparison_operator(contact.res1_seq in register, contact.res2_seq in register): continue else: contact_map.remove(contact.id) return contact_map
[docs] def match(self, other, match_other=False, remove_unmatched=False, renumber=False, inplace=False): """Modify both hierarchies so residue numbers match one another. This function is key when plotting contact maps or visualising contact maps in 3-dimensional space. In particular, when residue numbers in the structure do not start at count 0 or when peptide chain breaks are present. Parameters ---------- other : :obj:`ContactMap <conkit.core.ContactMap>` A ConKit :obj:`ContactMap <conkit.core.ContactMap>` match_other: bool, optional Match `other` to `self` [default: False] remove_unmatched : bool, optional Remove all unmatched contacts [default: False] renumber : bool, optional Renumber the res_seq entries [default: False] If ``True``, ``res1_seq`` and ``res2_seq`` changes but ``id`` remains the same inplace : bool, optional Replace the saved order of contacts [default: False] Returns ------- obj :obj:`ContactMap <conkit.core.ContactMap>` instance, regardless of inplace Raises ------ ValueError Error creating reliable keymap matching the sequence in :obj:`ContactMap <conkit.core.ContactMap>` """ contact_map1 = self._inplace(inplace) if match_other: contact_map2 = other._inplace(inplace) else: contact_map2 = other._inplace(False) # ================================================================ # 1. Align all sequences # ================================================================ # Align both full sequences against each other aligned_sequences_full = contact_map1.sequence.align_local( contact_map2.sequence, id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.1) contact_map1_full_sequence, contact_map2_full_sequence = aligned_sequences_full # Align contact map 1 full sequences with representative sequence aligned_sequences_map1 = contact_map1_full_sequence.align_local( contact_map1.repr_sequence, id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.2, inplace=True) contact_map1_repr_sequence = aligned_sequences_map1[-1] # Align contact map 2 full sequences with __ALTLOC__ representative sequence aligned_sequences_map2 = contact_map2_full_sequence.align_local( contact_map2.repr_sequence_altloc, id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.2, inplace=True) contact_map2_repr_sequence = aligned_sequences_map2[-1] # Align both aligned representative sequences aligned_sequences_repr = contact_map1_repr_sequence.align_local( contact_map2_repr_sequence, id_chars=2, nonid_chars=1, gap_open_pen=-1.0, gap_ext_pen=-0.5, inplace=True) contact_map1_repr_sequence, contact_map2_repr_sequence = aligned_sequences_repr # ================================================================ # 2. Identify TPs in other, map them, and match them to self # ================================================================ # Encode the sequences to uint8 character arrays for easier and faster handling encoded_repr = np.array( [list(contact_map1_repr_sequence.seq_ascii), list(contact_map2_repr_sequence.seq_ascii)]) # Create mappings for both contact maps contact_map1_keymap = ContactMap._create_keymap(contact_map1) contact_map2_keymap = ContactMap._create_keymap(contact_map2, altloc=True) # Some checks msg = "Error creating reliable keymap matching the sequence in ContactMap: " if len(contact_map1_keymap) != np.where(encoded_repr[0] != ord('-'))[0].shape[0]: raise ValueError(msg + contact_map1.id) elif len(contact_map2_keymap) != np.where(encoded_repr[1] != ord('-'))[0].shape[0]: raise ValueError(msg + contact_map2.id) # Create a sequence matching keymap including deletions and insertions contact_map1_keymap = ContactMap._insert_states(encoded_repr[0], contact_map1_keymap) contact_map2_keymap = ContactMap._insert_states(encoded_repr[1], contact_map2_keymap) # Reindex the altseq positions to account for insertions/deletions contact_map1_keymap = ContactMap._reindex(contact_map1_keymap) contact_map2_keymap = ContactMap._reindex(contact_map2_keymap) # Adjust the res_altseq based on the insertions and deletions contact_map2 = ContactMap._adjust(contact_map2, contact_map2_keymap) # Get the residue list for matching UNKNOWNs residues_map2 = tuple(i + 1 for i, a in enumerate(aligned_sequences_full[1].seq) if a != '-') # Adjust true and false positive statuses for contact in contact_map1: _id = (contact.res1_seq, contact.res2_seq) _id_alt = tuple(r.res_seq for r in contact_map2_keymap for i in _id if i == r.res_altseq) if any(i == _Gap.IDENTIFIER for i in _id_alt) and any(j not in residues_map2 for j in _id): contact_map1[_id].define_unknown() elif all(i in residues_map2 for i in _id): if _id_alt in contact_map2: contact_map1[_id].define_match() else: contact_map1[_id].define_mismatch() else: msg = "Error matching two contact maps - this should never happen" raise RuntimeError(msg) # ================================================================ # 3. Remove unmatched contacts # ================================================================ if remove_unmatched: for contact in contact_map1.deepcopy(): if contact.is_unknown: contact_map1.remove(contact.id) # ================================================================ # 4. Renumber the contact map 1 based on contact map 2 # ================================================================ if renumber: contact_map1 = ContactMap._renumber(contact_map1, contact_map1_keymap, contact_map2_keymap) return contact_map1
[docs] def remove_neighbors(self, min_distance=5, max_distance=sys.maxsize, inplace=False): """Remove contacts between neighboring residues The algorithm works by keeping contact pairs that satisfy ``min_distance`` <= ``x`` <= ``max_distance`` Parameters ---------- min_distance : int, optional The minimum number of residues between contacts [default: 5] max_distance : int, optional The maximum number of residues between contacts [defailt: maximum nr permitted by OS] inplace : bool, optional Replace the saved order of contacts [default: False] Returns ------- obj The reference to the :obj:`ContactMap <conkit.core.ContactMap>`, regardless of inplace """ contact_map = self._inplace(inplace) for contact in contact_map.deepcopy(): if min_distance <= abs(contact.res1_seq - contact.res2_seq) <= max_distance: continue else: contact_map.remove(contact.id) return contact_map
[docs] def rescale(self, inplace=False): """Rescale the raw scores in :obj:`ContactMap <conkit.core.ContactMap>` Rescaling of the data is done to normalize the raw scores to be in the range [0, 1]. The formula to rescale the data is: .. math:: {x}'=\\frac{x-min(d)}{max(d)-min(d)} :math:`x` is the original value and :math:`d` are all values to be rescaled. Parameters ---------- inplace : bool, optional Replace the saved order of contacts [default: False] Returns ------- obj The reference to the :obj:`ContactMap <conkit.core.ContactMap>`, regardless of inplace """ contact_map = self._inplace(inplace) raw_scores = np.asarray([c.raw_score for c in contact_map]) norm_raw_scores = (raw_scores - raw_scores.min()) / (raw_scores.max() - raw_scores.min()) # Important to not end up with raw scores == np.nan if np.isnan(norm_raw_scores).all(): norm_raw_scores = np.where(norm_raw_scores == np.isnan, 0, 1) for contact, norm_raw_score in zip(contact_map, norm_raw_scores): contact.raw_score = norm_raw_score return contact_map
[docs] def sort(self, kword, reverse=False, inplace=False): """Sort the :obj:`ContactMap <conkit.core.ContactMap>` Parameters ---------- kword : str The dictionary key to sort contacts by reverse : bool, optional Sort the contact pairs in descending order [default: False] inplace : bool, optional Replace the saved order of contacts [default: False] Returns ------- obj The reference to the :obj:`ContactMap <conkit.core.ContactMap>`, regardless of inplace Raises ------ ValueError ``kword`` not in :obj:`ContactMap <conkit.core.ContactMap>` """ contact_map = self._inplace(inplace) contact_map._sort(kword, reverse) return contact_map
@staticmethod def _adjust(contact_map, keymap): """Adjust res_altseq entries to insertions and deletions""" encoder = dict((x.res_seq, x.res_altseq) for x in keymap if isinstance(x, _Residue)) for contact in contact_map: if contact.res1_seq in encoder: contact.res1_altseq = encoder[contact.res1_seq] if contact.res2_seq in encoder: contact.res2_altseq = encoder[contact.res2_seq] return contact_map @staticmethod def _create_keymap(contact_map, altloc=False): """Create a simple keymap Parameters ---------- altloc : bool Use the res_altloc positions [default: False] Returns ------- list A list of residue mappings """ contact_map_keymap = collections.OrderedDict() for contact in contact_map: pos1 = _Residue(contact.res1_seq, contact.res1_altseq, contact.res1, contact.res1_chain) pos2 = _Residue(contact.res2_seq, contact.res2_altseq, contact.res2, contact.res2_chain) if altloc: res1_index, res2_index = contact.res1_altseq, contact.res2_altseq else: res1_index, res2_index = contact.res1_seq, contact.res2_seq contact_map_keymap[res1_index] = pos1 contact_map_keymap[res2_index] = pos2 contact_map_keymap_sorted = sorted(list(contact_map_keymap.items()), key=lambda x: int(x[0])) return list(zip(*contact_map_keymap_sorted))[1] @staticmethod def _find_single(contact_map, index): """Find all contacts associated with ``index`` based on id property""" for c in contact_map: if c.id[0] == index or c.id[1] == index: yield c @staticmethod def _insert_states(sequence, keymap): """Create a sequence matching keymap including deletions and insertions""" it = iter(keymap) keymap_ = [] for amino_acid in sequence: if amino_acid == ord('-'): keymap_.append(_Gap()) else: keymap_.append(next(it)) return keymap_ @staticmethod def _reindex(keymap): """Reindex a key map""" for i, residue in enumerate(keymap): residue.res_altseq = i + 1 return keymap @staticmethod def _renumber(contact_map, self_keymap, other_keymap): """Renumber the contact map based on the mapping of self and other keymaps""" for self_residue, other_residue in zip(self_keymap, other_keymap): if isinstance(self_residue, _Gap): continue for contact in ContactMap._find_single(contact_map, self_residue.res_seq): # Make sure we check with the ID, which doesn't change if contact.id[0] == self_residue.res_altseq: contact.res1_seq = other_residue.res_seq contact.res1_chain = other_residue.res_chain elif contact.id[1] == self_residue.res_altseq: contact.res2_seq = other_residue.res_seq contact.res2_chain = other_residue.res_chain else: raise ValueError('Should never get here') return contact_map
def _AND(a, b): return a and b def _OR(a, b): return a or b