Contact PredictionΒΆ

Warning

External software is required to execute this script. For further information, refer to the Installation page.

(Source Code)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Simple contact prediction pipeline
==================================

This script contains a simple example of how we can create pipelines
using ConKit.

.. warning::
   You need to exchange the paths to the executables

"""

import conkit.applications
import conkit.io
import conkit.plot

# Define the input variables
sequence_file = "toxd/toxd.fasta"
sequence_format = "fasta"

# Define the paths to the software we use
hhblits_exe = "path/to/hhblits"                 # <-- MODIFY THIS
hhblits_database = "path/to/hhblits_database"   # <-- MODIFY THIS
ccmpred_exe = "path/to/ccmpred"                 # <-- MODIFY THIS

# Generate a Multiple Sequence Alignment
print("Generating the Multiple Sequence Alignment")
a3m_file = "toxd/toxd.a3m"
hhblits_cline = conkit.applications.HHblitsCommandLine(
    cmd=hhblits_exe, database=hhblits_database, input=sequence_file, oa3m=a3m_file
)
hhblits_cline()  # Execute HHblits

# Analyse the alignment
msa = conkit.io.read(a3m_file, "a3m")
print("Length of the Target Sequence:    %d" % msa.top_sequence.seq_len)
print("Total Number of Sequences:        %d" % msa.nseq)
print("Number of Effective Sequences:    %d" % msa.neff)

# Plot the amino acid coverage per position in the alignment
seq_cov_file = "toxd/toxd.freq.png"
conkit.plot.SequenceCoverageFigure(msa, file_name=seq_cov_file)
print("Sequence Coverage Plot:           %s" % seq_cov_file)

# Convert the alignment into a CCMpred-readable format
jones_file = "toxd/toxd.jones"
conkit.io.write(jones_file, "jones", msa)

# Predict the contacts
print("Predicting contacts")
mat_file = "toxd/toxd.mat"
ccmpred_cline = conkit.applications.CCMpredCommandLine(
    alnfile=jones_file, matfile=mat_file
)
ccmpred_cline()  # Execute CCMpred

# Plot the top-30 contacts
conpred = conkit.io.read(mat_file,  "ccmpred").top_map
# Remove contacts of neigbouring residues
conpred.remove_neighbors(inplace=True)
# Sort the list of contacts by their score
conpred.sort("raw_score", reverse=True, inplace=True)
conpred = conpred[:30]                                  # Slice the contact map
contact_map_file = "toxd/toxd.map.png"
conkit.plot.ContactMapFigure(conpred, file_name=contact_map_file)
print("Contact Map Plot:                 %s" % contact_map_file)

# Convert the contact prediction to a standardised format
casp_file = "toxd/toxd.rr"
conkit.io.convert(mat_file, "ccmpred", casp_file, "casprr")
print("Final Contact Prediction File:    %s" % casp_file)