Warning
External software is required to execute this script. For further information, refer to the Installation page.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | """
Simple contact prediction pipeline
==================================
This script contains a simple example of how we can create pipelines
using ConKit.
.. warning::
You need to exchange the paths to the executables
"""
import conkit.applications
import conkit.io
import conkit.plot
# Define the input variables
sequence_file = "toxd/toxd.fasta"
sequence_format = "fasta"
# Define the paths to the software we use
hhblits_exe = "path/to/hhblits" # <-- MODIFY THIS
hhblits_database = "path/to/hhblits_database" # <-- MODIFY THIS
ccmpred_exe = "path/to/ccmpred" # <-- MODIFY THIS
# Generate a Multiple Sequence Alignment
print("Generating the Multiple Sequence Alignment")
a3m_file = "toxd/toxd.a3m"
hhblits_cline = conkit.applications.HHblitsCommandLine(
cmd=hhblits_exe, database=hhblits_database, input=sequence_file, oa3m=a3m_file)
hhblits_cline() # Execute HHblits
# Analyse the alignment
msa = conkit.io.read(a3m_file, "a3m")
print("Length of the Target Sequence: %d" % msa.top_sequence.seq_len)
print("Total Number of Sequences: %d" % msa.nseq)
print("Number of Effective Sequences: %d" % msa.neff)
# Plot the amino acid coverage per position in the alignment
fig1 = conkit.plot.SequenceCoverageFigure(msa)
seq_cov_file = "toxd/toxd.freq.png"
fig2.savefig(seq_cov_file)
print("Sequence Coverage Plot: %s" % seq_cov_file)
# Convert the alignment into a CCMpred-readable format
jones_file = "toxd/toxd.jones"
conkit.io.write(jones_file, "jones", msa)
# Predict the contacts
print("Predicting contacts")
mat_file = "toxd/toxd.mat"
ccmpred_cline = conkit.applications.CCMpredCommandLine(alnfile=jones_file, matfile=mat_file)
ccmpred_cline() # Execute CCMpred
# Plot the top-30 contacts
conpred = conkit.io.read(mat_file, "ccmpred").top_map
# Remove contacts of neigbouring residues
conpred.remove_neighbors(inplace=True)
# Sort the list of contacts by their score
conpred.sort("raw_score", reverse=True, inplace=True)
conpred = conpred[:30] # Slice the contact map
fig2 = conkit.plot.ContactMapFigure(conpred, legend=True)
contact_map_file = "toxd/toxd.map.png"
fig2.savefig(contact_map_file)
print("Contact Map Plot: %s" % contact_map_file)
# Convert the contact prediction to a standardised format
casp_file = "toxd/toxd.rr"
conkit.io.convert(mat_file, "ccmpred", casp_file, "casprr")
print("Final Contact Prediction File: %s" % casp_file)
|