# coding=utf-8
#
# BSD 3-Clause License
#
# Copyright (c) 2016-17, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Sequence container used throughout ConKit"""
from __future__ import division
from __future__ import print_function
__author__ = "Felix Simkovic"
__date__ = "03 Aug 2016"
__version__ = "1.0"
from Bio import pairwise2
from conkit.core._entity import _Entity
# One to three amino acid letter code conversions
ONE_TO_THREE = {'A': 'ALA', 'C': 'CYS', 'B': 'ASX', 'E': 'GLU', 'D': 'ASP', 'G': 'GLY', 'F': 'PHE', 'I': 'ILE',
'H': 'HIS', 'K': 'LYS', 'J': 'XLE', 'M': 'MET', 'L': 'LEU', 'O': 'PYL', 'N': 'ASN', 'Q': 'GLN',
'P': 'PRO', 'S': 'SER', 'R': 'ARG', 'U': 'SEC', 'T': 'THR', 'W': 'TRP', 'V': 'VAL', 'Y': 'TYR',
'X': 'XAA', 'Z': 'GLX'}
# Three to one amino acid letter code conversions
THREE_TO_ONE = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CME': 'C', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E',
'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'MSE': 'M', 'PHE': 'F',
'PRO': 'P', 'PYL': 'O', 'SER': 'S', 'SEC': 'U', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V',
'ASX': 'B', 'GLX': 'Z', 'XAA': 'X', 'UNK': 'X', 'XLE': 'J'}
[docs]class Sequence(_Entity):
"""A sequence template to store all associated information
Attributes
----------
id : str
A unique identifier
remark : list
The :obj:`Sequence <conkit.core.Sequence>`-specific remarks
seq : str
The protein sequence as :obj:`str`
seq_len : int
The protein sequence length
Examples
--------
>>> from conkit.core import Sequence
>>> sequence_entry = Sequence("example", "ABCDEF")
>>> print(sequence_entry)
Sequence(id="example" seq="ABCDEF" seqlen=6)
"""
__slots__ = ['_remark', '_seq']
def __init__(self, id, seq):
"""Initialise a generic sequence
Parameters
----------
id : str
A unique sequence identifier
seq : str
The protein sequence
"""
self._remark = []
self._seq = None
self.seq = seq
super(Sequence, self).__init__(id)
def __add__(self, other):
"""Concatenate two sequence instances to a new"""
id = self.id + '_' + other.id
seq = self.seq + other.seq
return Sequence(id, seq)
def __repr__(self):
if self.seq_len > 12:
seq_string = ''.join([self.seq[:5], '...', self.seq[-5:]])
else:
seq_string = self.seq
return "{0}(id=\"{1}\" seq=\"{2}\" seq_len={3})".format(
self.__class__.__name__, self.id, seq_string, self.seq_len
)
@property
def remark(self):
"""The :obj:`Sequence <conkit.core.Sequence>`-specific remarks"""
return self._remark
@remark.setter
def remark(self, remark):
"""Set the :obj:`Sequence <conkit.core.Sequence>` remark
Parameters
----------
remark : str, list
The remark will be added to the list of remarks
"""
if isinstance(remark, list):
self._remark += remark
elif isinstance(remark, tuple):
self._remark += list(remark)
else:
self._remark += [remark]
@property
def seq(self):
"""The protein sequence as :obj:`str`"""
return self._seq
@seq.setter
def seq(self, seq):
"""Set the sequence
Parameters
----------
seq : str
Raises
------
ValueError
One or more amino acids in the sequence are not recognised
"""
if all(c in ONE_TO_THREE for c in seq.upper() if c != '-'):
self._seq = seq
else:
raise ValueError('Unrecognized amino acids in sequence')
@property
def seq_ascii(self):
"""The protein sequence as ASCII-encoded :obj:`str`"""
return bytearray(self._seq, "ascii")
@property
def seq_len(self):
"""The protein sequence length"""
return len(self.seq)
[docs] def align_global(self, other, id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.1, inplace=False):
"""Generate a global alignment between two :obj:`Sequence <conkit.core.Sequence>` instances
Parameters
----------
other : :obj:`Sequence <conkit.core.Sequence>`
id_chars : int, optional
nonid_chars : int, optional
gap_open_pen : float, optional
gap_ext_pen : float, optional
inplace : bool, optional
Replace the saved order of residues [default: False]
Returns
-------
obj
The reference to the :obj:`Sequence`, regardless of inplace
obj
The reference to the :obj:`Sequence`, regardless of inplace
"""
sequence1 = self._inplace(inplace)
sequence2 = other._inplace(inplace)
alignment = pairwise2.align.globalms(
sequence1.seq, sequence2.seq, id_chars, nonid_chars, gap_open_pen, gap_ext_pen
)
sequence1.seq = alignment[-1][0]
sequence2.seq = alignment[-1][1]
return sequence1, sequence2
[docs] def align_local(self, other, id_chars=2, nonid_chars=1, gap_open_pen=-0.5, gap_ext_pen=-0.1, inplace=False):
"""Generate a local alignment between two :obj:`Sequence <conkit.core.Sequence>` instances
Parameters
----------
other : :obj:`Sequence <conkit.core.Sequence>`
id_chars : int, optional
nonid_chars : int, optional
gap_open_pen : float, optional
gap_ext_pen : float, optional
inplace : bool, optional
Replace the saved order of residues [default: False]
Returns
-------
obj
The reference to the :obj:`Sequence <conkit.core.Sequence>`, regardless of inplace
obj
The reference to the :obj:`Sequence <conkit.core.Sequence>`, regardless of inplace
"""
sequence1 = self._inplace(inplace)
sequence2 = other._inplace(inplace)
alignment = pairwise2.align.localms(
sequence1.seq, sequence2.seq, id_chars, nonid_chars, gap_open_pen, gap_ext_pen
)
sequence1.seq = alignment[-1][0]
sequence2.seq = alignment[-1][1]
return sequence1, sequence2