#!/usr/bin/env python
#
# BSD 3-Clause License
#
# Copyright (c) 2016-17, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""This script provides a command-line interface to ConKit's plotting functionality.
You are provided with a single access point to many different kinds of plots.
For more specific descriptions, call each subcommand's help menu directly.
"""
__author__ = "Felix Simkovic"
__date__ = "08 Feb 2017"
__version__ = "0.1"
import argparse
import inspect
import conkit.command_line
import conkit.io
import conkit.plot
logger = None
# Note:
# New subparsers can be added automatically as long as the convention is followed.
# I.e., To add a new function automatically, call it add_*_args and it will be
# picked up here.
def _add_default_args(parser):
parser.add_argument('-dpi', dest='dpi', default=300, type=int, help="the resolution in DPI [default: 300]")
parser.add_argument(
'-o',
dest='output',
default=None,
type=str,
help="the figure file. Note, the format is determined by the suffix")
def _add_contact_default_args(parser):
parser.add_argument('confile', help="Path to contact file")
parser.add_argument('conformat', help="Format of contact file")
def _add_sequence_default_args(parser):
parser.add_argument('seqfile', help="Path to sequence file")
parser.add_argument('seqformat', help="Format of sequence file")
def _add_msa_default_args(parser):
parser.add_argument("msafile", help="Path to MSA file")
parser.add_argument("msaformat", help="Format of MSA file")
def _add_structure_default_args(parser):
parser.add_argument('pdbfile', help="Path to structure file")
parser.add_argument('pdbformat', help="Format of structure file")
[docs]def add_precision_evaluation_args(subparsers):
description = u"""
This command will plot an evaluation plot illustrating the precision score of
the provided contact prediction compared against a protein structure at different
cutoff thresholds.
"""
subparser = subparsers.add_parser(
'peval',
help="Plot the precision evaluation plot",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter)
subparser.add_argument(
'-c',
dest='pdbchain',
default=None,
help='PDB chain to use [default: first in file]. Inter-molecular '
'predictions use two letter convention, i.e AD for contacts '
'between A and D.')
subparser.add_argument('-d', dest='dtn', default=5, type=int, help='Minimum sequence separation [default: 5]')
subparser.add_argument(
'-j', dest='cutoff_step', default=0.2, type=float, help='The cutoff step for contact selection [default: 0.2]')
subparser.add_argument(
'-min',
dest='min_cutoff',
default=0.0,
type=float,
help='The minimum factor for contact selection [default: 0.0]')
subparser.add_argument(
'-max',
dest='max_cutoff',
default=100.0,
type=float,
help='The maximum factor for contact selection [default: 100.0]')
subparser.add_argument('--interchain', action="store_true", default=False, help='Plot inter-chain contacts')
_add_default_args(subparser)
_add_structure_default_args(subparser)
_add_sequence_default_args(subparser)
_add_contact_default_args(subparser)
subparser.set_defaults(which='precision_evaluation')
[docs]def add_sequence_coverage_args(subparsers):
description = u"""
This command will plot a coverage plot for every position in your alignment.
"""
subparser = subparsers.add_parser(
'scov',
help="Plot the sequence coverage",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter)
subparser.add_argument('-id', dest='identity', default=0.7, type=float, help='sequence identity [default: 0.7]')
_add_default_args(subparser)
_add_msa_default_args(subparser)
subparser.set_defaults(which='sequence_coverage')
[docs]def main():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
subparsers = parser.add_subparsers()
# Add the subparsers
#
# Note:
# New subparsers can be added automatically as long as the convention is followed.
# I.e., To add a new function automatically, call it add_*_args and it will be
# picked up here.
functions = [
k for k in globals()
if k.startswith("add") and k.endswith("args") and (
inspect.ismethod(globals()[k]) or inspect.isfunction(globals()[k]))
]
for f_name in functions:
globals()[f_name](subparsers)
# Parse all arguments
args = parser.parse_args()
# Setup the logger
global logger
logger = conkit.command_line.setup_logging(level='info')
if args.which == 'contact_map':
if args.interchain:
logger.info('This script is experimental for inter-chain contact plotting')
logger.info('Min sequence separation for contacting residues: %d', args.dtn)
logger.info('Contact list cutoff factor: %f * L', args.dfactor)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con.sort('raw_score', reverse=True, inplace=True)
con_sliced = con[:ncontacts]
if args.otherfile:
other = conkit.io.read(args.otherfile, args.otherformat)[0]
other.sequence = seq
other.assign_sequence_register()
other.remove_neighbors(min_distance=args.dtn, inplace=True)
other.sort('raw_score', reverse=True, inplace=True)
other_sliced = other[:ncontacts]
else:
other_sliced = None
if args.reffile:
if args.refid:
reference = conkit.io.read(args.reffile, args.refformat)[args.refid]
else:
reference = conkit.io.read(args.reffile, args.refformat)[0]
if args.refformat not in ['pdb', 'mmcif']:
msg = "The provided format {0} is not yet implemented for the reference flag".format(args.refformat)
raise RuntimeError(msg)
con_matched = con_sliced.match(reference, match_other=True, renumber=True, remove_unmatched=True)
if other_sliced:
other_matched = other_sliced.match(reference, renumber=True, remove_unmatched=True)
else:
other_matched = other_sliced
else:
reference = None
con_matched = con_sliced
other_matched = other_sliced
def altloc_remove(cmap):
"""Remove alternative locations"""
altloc = False
for contact in cmap.copy():
# For now we need this args.interchain check to account for gapped residues
# where res chain was not assigned
if contact.res1_chain != contact.res2_chain and args.interchain:
altloc = True
break
if contact.res1_chain == contact.res2_chain and args.interchain:
cmap.remove(contact.id)
elif contact.res1_chain != contact.res2_chain and not args.interchain:
cmap.remove(contact.id)
return altloc
altloc = altloc_remove(con_matched)
if other_matched:
altloc = altloc_remove(other_matched)
outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.ContactMapFigure(
con_matched,
other=other_matched,
reference=reference,
file_name=outfile,
altloc=altloc,
use_conf=args.confidence,
dpi=args.dpi)
elif args.which == 'contact_map_chord':
logger.info('Min sequence separation for contacting residues: %d', args.dtn)
logger.info('Contact list cutoff factor: %f * L', args.dfactor)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort('raw_score', reverse=True, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con_sliced = con[:ncontacts]
outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.ContactMapChordFigure(con_sliced, use_conf=args.confidence, file_name=outfile, dpi=args.dpi)
elif args.which == 'contact_density':
logger.info('Min sequence separation for contacting residues: %d', args.dtn)
logger.info('Contact list cutoff factor: %f * L', args.dfactor)
logger.info('Bandwidth estimator: %s', args.bw_method)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort('raw_score', reverse=True, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con_sliced = con[:ncontacts]
outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.ContactDensityFigure(con_sliced, bw_method=args.bw_method, file_name=outfile, dpi=args.dpi)
elif args.which == 'precision_evaluation':
if args.interchain:
logger.info('This script is experimental for inter-chain contact plotting')
logger.info('Min sequence separation for contacting residues: %d', args.dtn)
logger.info('Min contact list cutoff factor: %f * L', args.min_cutoff)
logger.info('Max contact list cutoff factor: %f * L', args.max_cutoff)
logger.info('Contact list cutoff factor step: %f', args.cutoff_step)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort('raw_score', reverse=True, inplace=True)
if args.pdbchain:
pdb = conkit.io.read(args.pdbfile, 'pdb')[args.pdbchain]
else:
pdb = conkit.io.read(args.pdbfile, 'pdb')[0]
con_matched = con.match(pdb, renumber=True, remove_unmatched=True)
outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.PrecisionEvaluationFigure(
con_matched,
cutoff_step=args.cutoff_step,
file_name=outfile,
min_cutoff=args.min_cutoff,
max_cutoff=args.max_cutoff,
dpi=args.dpi)
elif args.which == 'sequence_coverage':
hierarchy = conkit.io.read(args.msafile, args.msaformat)
outformat = 'png'
outfile = args.output if args.output else args.msafile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.SequenceCoverageFigure(hierarchy, file_name=outfile, dpi=args.dpi)
logger.info('Final plot written in %s format to %s', plot.format.upper(), plot.file_name)
return
if __name__ == "__main__":
main()