#!/usr/bin/env python
#
# BSD 3-Clause License
#
# Copyright (c) 2016-21, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""This script provides a command-line interface to ConKit's plotting functionality.
You are provided with a single access point to many different kinds of plots.
For more specific descriptions, call each subcommand's help menu directly.
"""
__author__ = "Felix Simkovic"
__date__ = "08 Feb 2017"
__version__ = "0.13.3"
import argparse
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP
import inspect
import conkit.command_line
import conkit.io
from conkit.io import DISTANCE_FILE_PARSERS
import conkit.plot
import conkit.plot.tools
logger = None
def _add_default_args(parser):
parser.add_argument("-dpi", dest="dpi", default=300, type=int, help="the resolution in DPI")
parser.add_argument("-o", dest="output", default=None, type=str, help="output file")
parser.add_argument(
"--overwrite", dest="overwrite", default=False, action="store_true", help="overwrite output file if exists"
)
def _add_contact_default_args(parser):
parser.add_argument("confile", help="Path to contact file")
parser.add_argument("conformat", help="Format of contact file")
def _add_sequence_default_args(parser):
parser.add_argument("seqfile", help="Path to sequence file")
parser.add_argument("seqformat", help="Format of sequence file")
def _add_msa_default_args(parser):
parser.add_argument("msafile", help="Path to MSA file")
parser.add_argument("msaformat", help="Format of MSA file")
def _add_structure_default_args(parser):
parser.add_argument("pdbfile", help="Path to structure file")
parser.add_argument("pdbformat", help="Format of structure file", choices=['pdb', 'mmcif'])
def _add_distance_default_args(parser):
parser.add_argument("distfile", help="Path to distance prediction file")
parser.add_argument("distformat", help="Format of distance prediction file",
choices=list(DISTANCE_FILE_PARSERS.keys()))
[docs]def add_covariance_validation_args(subparsers):
description = u"""
This command will plot a covariance validation sequence profile using the provided
distance predictions alongside a model.
"""
subparser = subparsers.add_parser(
"coval",
help="Plot a covariance validation profile",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparser.add_argument("-dssp_exe", dest="dssp", default='mkdssp', help="path to dssp executable",
type=conkit.plot.tools.is_executable)
subparser.add_argument("--map_align_exe", dest="map_align_exe", default=None, help="path to map_align executable",
type=conkit.plot.tools.is_executable)
_add_default_args(subparser)
_add_sequence_default_args(subparser)
_add_distance_default_args(subparser)
_add_structure_default_args(subparser)
subparser.set_defaults(which="covariance_validation")
[docs]def add_distorgam_heatmap_args(subparsers):
description = u"""
This command will plot a distogram heatmap using the provided distance predictions.
"""
subparser = subparsers.add_parser(
"distheat",
help="Plot a distance prediction heatmap",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparser.add_argument("-e", dest="otherfile", default=None, help="a second contact map to plot for comparison")
subparser.add_argument("-ef", dest="otherformat", default=None, help="the format of the second contact map")
_add_default_args(subparser)
_add_sequence_default_args(subparser)
_add_distance_default_args(subparser)
subparser.set_defaults(which="distogram_heatmap")
[docs]def add_precision_evaluation_args(subparsers):
description = u"""
This command will plot an evaluation plot illustrating the precision score of
the provided contact prediction compared against a protein structure at different
cutoff thresholds.
"""
subparser = subparsers.add_parser(
"peval",
help="Plot the precision evaluation plot",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparser.add_argument(
"-c",
dest="pdbchain",
default=None,
help="PDB chain to use [default: first in file]. Inter-molecular predictions use two letter convention, i.e AD for contacts between A and D.",
)
subparser.add_argument("-d", dest="dtn", default=5, type=int, help="Minimum sequence separation")
subparser.add_argument(
"-j", dest="cutoff_step", default=0.2, type=float, help="The cutoff step for contact selection"
)
subparser.add_argument(
"-min", dest="min_cutoff", default=0.0, type=float, help="The minimum factor for contact selection"
)
subparser.add_argument(
"-max", dest="max_cutoff", default=100.0, type=float, help="The maximum factor for contact selection"
)
subparser.add_argument("--interchain", action="store_true", default=False, help="Plot inter-chain contacts")
_add_default_args(subparser)
_add_structure_default_args(subparser)
_add_sequence_default_args(subparser)
_add_contact_default_args(subparser)
subparser.set_defaults(which="precision_evaluation")
[docs]def add_sequence_coverage_args(subparsers):
description = u"""
This command will plot a coverage plot for every position in your alignment.
"""
subparser = subparsers.add_parser(
"scov",
help="Plot the sequence coverage",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparser.add_argument("-id", dest="identity", default=0.7, type=float, help="sequence identity")
_add_default_args(subparser)
_add_msa_default_args(subparser)
subparser.set_defaults(which="sequence_coverage")
[docs]def main(argv=None):
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers()
# Add the subparsers
#
# Note:
# New subparsers can be added automatically as long as the convention is followed.
# I.e., To add a new function automatically, call it add_*_args and it will be
# picked up here.
functions = [
k
for k in globals()
if k.startswith("add")
and k.endswith("args")
and (inspect.ismethod(globals()[k]) or inspect.isfunction(globals()[k]))
]
for f_name in functions:
globals()[f_name](subparsers)
args = parser.parse_args(args=argv)
global logger
logger = conkit.command_line.setup_logging(level="info")
if args.which == "contact_map":
if args.interchain:
logger.info("This script is experimental for inter-chain contact plotting")
logger.info("Min sequence separation for contacting residues: %d", args.dtn)
logger.info("Contact list cutoff factor: %f * L", args.dfactor)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
if args.conformat in DISTANCE_FILE_PARSERS.keys():
con = con.as_contactmap()
con.sequence = seq
con.set_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con.sort("raw_score", reverse=True, inplace=True)
con_sliced = con[:ncontacts]
if args.otherfile:
other = conkit.io.read(args.otherfile, args.otherformat)[0]
if args.otherformat in DISTANCE_FILE_PARSERS.keys():
other = other.as_contactmap()
other.sequence = seq
other.set_sequence_register()
other.remove_neighbors(min_distance=args.dtn, inplace=True)
other.sort("raw_score", reverse=True, inplace=True)
other_sliced = other[:ncontacts]
else:
other_sliced = None
if args.reffile:
if args.refid:
reference = conkit.io.read(args.reffile, args.refformat)[args.refid]
else:
reference = conkit.io.read(args.reffile, args.refformat)[0]
reference = reference.as_contactmap()
reference.sequence = seq
reference.set_sequence_register()
con_matched = con_sliced.match(reference, match_other=True, renumber=True, remove_unmatched=True)
if other_sliced:
other_matched = other_sliced.match(reference, renumber=True, remove_unmatched=True)
else:
other_matched = other_sliced
else:
reference = None
con_matched = con_sliced
other_matched = other_sliced
def altloc_remove(cmap):
"""Remove alternative locations"""
altloc = False
for contact in cmap.copy():
# For now we need this args.interchain check to account for gapped residues
# where res chain was not assigned
if contact.res1_chain != contact.res2_chain and args.interchain:
altloc = True
break
if contact.res1_chain == contact.res2_chain and args.interchain:
cmap.remove(contact.id)
elif contact.res1_chain != contact.res2_chain and not args.interchain:
cmap.remove(contact.id)
return altloc
altloc = altloc_remove(con_matched)
if other_matched:
altloc = altloc_remove(other_matched)
figure = conkit.plot.ContactMapFigure(
con_matched, other=other_matched, reference=reference, altloc=altloc, use_conf=args.confidence, legend=True
)
figure_aspect_ratio = 1.0
elif args.which == "contact_map_chord":
logger.info("Min sequence separation for contacting residues: %d", args.dtn)
logger.info("Contact list cutoff factor: %f * L", args.dfactor)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
if args.conformat in DISTANCE_FILE_PARSERS:
con = con.as_contactmap()
con.sequence = seq
con.set_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort("raw_score", reverse=True, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con_sliced = con[:ncontacts]
figure = conkit.plot.ContactMapChordFigure(con_sliced, use_conf=args.confidence, legend=True)
figure_aspect_ratio = 1.0
elif args.which == "covariance_validation":
sequence = conkit.io.read(args.seqfile, args.seqformat)[0]
if len(sequence) < 5:
raise ValueError('Cannot validate a model with less than 5 residues')
prediction = conkit.io.read(args.distfile, args.distformat)[0]
model = conkit.io.read(args.pdbfile, args.pdbformat)[0]
p = PDBParser()
structure = p.get_structure('structure', args.pdbfile)[0]
dssp = DSSP(structure, args.pdbfile, dssp=args.dssp, acc_array='Wilke')
figure = conkit.plot.ModelValidationFigure(model, prediction, sequence, dssp, map_align_exe=args.map_align_exe)
figure_aspect_ratio = None
elif args.which == "distogram_heatmap":
sequence = conkit.io.read(args.seqfile, args.seqformat)[0]
distogram = conkit.io.read(args.distfile, args.distformat)[0]
distogram.sequence = sequence
distogram.set_sequence_register()
if args.otherfile:
if args.otherformat not in DISTANCE_FILE_PARSERS.keys():
raise ValueError('Other prediction file format can only be a distance file')
other = conkit.io.read(args.otherfile, args.otherformat)[0]
other.sequence = sequence
other.set_sequence_register()
figure = conkit.plot.DistogramHeatmapFigure(distogram, other=other)
else:
figure = conkit.plot.DistogramHeatmapFigure(distogram)
figure_aspect_ratio = 1.0
elif args.which == "contact_map_matrix":
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
if args.conformat in DISTANCE_FILE_PARSERS:
con = con.as_contactmap()
con.sequence = seq
con.set_sequence_register()
if args.otherfile:
other = conkit.io.read(args.otherfile, args.otherformat)[0]
if args.otherformat in DISTANCE_FILE_PARSERS:
other = other.as_contactmap()
other.sequence = seq
other.set_sequence_register()
else:
other = con
figure = conkit.plot.ContactMapMatrixFigure(con, other=other, legend=True)
figure_aspect_ratio = 1.0
elif args.which == "contact_density":
logger.info("Min sequence separation for contacting residues: %d", args.dtn)
logger.info("Contact list cutoff factor: %f * L", args.dfactor)
logger.info("Bandwidth estimator: %s", args.bw_method)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
if args.conformat in DISTANCE_FILE_PARSERS:
con = con.as_contactmap()
con.sequence = seq
con.set_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort("raw_score", reverse=True, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con_sliced = con[:ncontacts]
figure = conkit.plot.ContactDensityFigure(con_sliced, bw_method=args.bw_method, legend=True)
figure_aspect_ratio = 0.3
elif args.which == "precision_evaluation":
if args.interchain:
logger.info("This script is experimental for inter-chain contact plotting")
logger.info("Min sequence separation for contacting residues: %d", args.dtn)
logger.info("Min contact list cutoff factor: %f * L", args.min_cutoff)
logger.info("Max contact list cutoff factor: %f * L", args.max_cutoff)
logger.info("Contact list cutoff factor step: %f", args.cutoff_step)
seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]
if args.conformat in DISTANCE_FILE_PARSERS:
con = con.as_contactmap()
con.sequence = seq
con.set_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort("raw_score", reverse=True, inplace=True)
if args.pdbchain:
pdb = conkit.io.read(args.pdbfile, args.pdbformat)[args.pdbchain]
else:
pdb = conkit.io.read(args.pdbfile, args.pdbformat)[0]
pdb.sequence = seq
pdb.set_sequence_register()
pdb = pdb.as_contactmap()
con_matched = con.match(pdb, renumber=True, remove_unmatched=True)
figure = conkit.plot.PrecisionEvaluationFigure(
con_matched,
cutoff_step=args.cutoff_step,
min_cutoff=args.min_cutoff,
max_cutoff=args.max_cutoff,
legend=True,
)
figure_aspect_ratio = 0.3
elif args.which == "sequence_coverage":
hierarchy = conkit.io.read(args.msafile, args.msaformat)
figure = conkit.plot.SequenceCoverageFigure(hierarchy, legend=True)
figure_aspect_ratio = 0.3
if not args.output:
args.output = "conkit.png"
if figure_aspect_ratio is not None:
figure.ax.set_aspect(conkit.plot.tools.get_adjusted_aspect(figure.ax, figure_aspect_ratio))
figure.savefig(args.output, dpi=args.dpi, overwrite=args.overwrite)
logger.info("Final plot written to %s", args.output)
if __name__ == "__main__":
main()