Source code for conkit.command_line.conkit_validate

#!/usr/bin/env python
#
# BSD 3-Clause License
#
# Copyright (c) 2016-21, University of Liverpool
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""This script provides a model validation pipeline. It will
take a sequence, a predicted distogram and a PDB model, then
compare the distances observed in the model and those predicted.
It will then report regions in the model where an outlier was
detected, and use map_align to provide a solution for potential
register errors.

It uses one external program to perform this task:

   map_align for contact map alignment

*** This program needs to be installed separately from https://github.com/sokrypton/map_align***

"""

import argparse
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP
import os
from prettytable import PrettyTable

import conkit.applications
import conkit.command_line
import conkit.io
import conkit.plot
from conkit.plot.tools import is_executable

logger = None


[docs]def create_argument_parser(): """Create a parser for the command line arguments used in conkit-validate""" parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("seqfile", type=check_file_exists, help="Path to sequence file") parser.add_argument("seqformat", type=str, help="Sequence format") parser.add_argument("distfile", type=check_file_exists, help="Path to distance prediction file") parser.add_argument("distformat", type=str, help="Format of distance prediction file", choices=list(conkit.io.DISTANCE_FILE_PARSERS.keys())) parser.add_argument("pdbfile", type=check_file_exists, help="Path to structure file") parser.add_argument("pdbformat", type=str, help="Format of structure file", choices=['pdb', 'mmcif']) parser.add_argument("-dssp_exe", dest="dssp", default='mkdssp', help="path to dssp executable", type=is_executable) parser.add_argument("-output", dest="output", default="conkit.png", help="path to output figure png file", type=str) parser.add_argument("--overwrite", dest="overwrite", default=False, action="store_true", help="overwrite output figure png file if it already exists") parser.add_argument("--map_align_exe", dest="map_align_exe", default=None, type=is_executable, help="Path to the map_align executable") parser.add_argument("--gap_opening_penalty", dest="gap_opening_penalty", default=-1, type=float, help="Gap opening penalty") parser.add_argument("--gap_extension_penalty", dest="gap_extension_penalty", default=-0.01, type=float, help="Gap extension penalty") parser.add_argument("--seq_separation_cutoff", dest="seq_separation_cutoff", default=3, type=int, help="Sequence separation cutoff"), parser.add_argument("--n_iterations", dest="n_iterations", default=20, type=int, help="Number of iterations") return parser
[docs]def check_file_exists(input_path): """Check if a given path exists Parameters ---------- input_path : str, None Location of the file to be tested Returns ------- abspath : str, None The absolute path of the file if it exists, None if the input is None Raises ------ :exc:`FileNotFoundError` The file doesn't exist """ if input_path is None: return None if os.path.isfile(os.path.abspath(input_path)): return os.path.abspath(input_path) else: raise FileNotFoundError("{} cannot be found".format(input_path))
[docs]def main(): """The main routine for conkit-validate functionality""" parser = create_argument_parser() args = parser.parse_args() global logger logger = conkit.command_line.setup_logging(level="info") if os.path.isfile(args.output) and not args.overwrite: raise FileExistsError('The output file {} already exists!'.format(args.output)) logger.info(os.linesep + "Working directory: %s", os.getcwd()) logger.info("Reading input sequence: %s", args.seqfile) sequence = conkit.io.read(args.seqfile, args.seqformat).top if len(sequence) < 5: raise ValueError('Cannot validate model with less than 5 residues') logger.info("Length of the sequence: %d", len(sequence)) logger.info("Reading input distance prediction: %s", args.distfile) prediction = conkit.io.read(args.distfile, args.distformat).top logger.info("Reading input PDB model: %s", args.pdbfile) model = conkit.io.read(args.pdbfile, args.pdbformat).top p = PDBParser() structure = p.get_structure('structure', args.pdbfile)[0] dssp = DSSP(structure, args.pdbfile, dssp=args.dssp, acc_array='Wilke') logger.info(os.linesep + "Validating model.") if len(sequence) > 500: logger.info("Input model has more than 500 residues, this might take a while...") figure = conkit.plot.ModelValidationFigure(model, prediction, sequence, dssp, map_align_exe=args.map_align_exe) figure.savefig(args.output, overwrite=args.overwrite) logger.info(os.linesep + "Validation plot written to %s", args.output) residue_info = figure.data.loc[:, ['RESNUM', 'SCORE', 'MISALIGNED']] table = PrettyTable() table.field_names = ["Residue", "Predicted score", "Suggested register"] _resnum_template = '{} ({})' _error_score_template = '*** {0:.2f} ***' _correct_score_template = ' {0:.2f} ' _register_template = '*** {} ({}) ***' _empty_register = ' ' for residue in residue_info.values: resnum, score, misalignment = residue current_residue = _resnum_template.format(sequence.seq[resnum - 1], resnum) score = _error_score_template.format(score) if score > 0.5 else _correct_score_template.format(score) if misalignment and resnum in figure.alignment.keys(): register = _register_template.format(sequence.seq[figure.alignment[resnum] - 1], figure.alignment[resnum]) else: register = _empty_register table.add_row([current_residue, score, register]) logger.info(os.linesep) logger.info(table)
if __name__ == "__main__": import sys import traceback try: main() sys.exit(0) except Exception as e: if not isinstance(e, SystemExit): msg = "".join(traceback.format_exception(*sys.exc_info())) logger.critical(msg) sys.exit(1)