"""Testing facility for conkit.io.GremlinIO"""
__author__ = "Felix Simkovic"
__date__ = "04 Oct 2016"
import os
import unittest
from conkit.core.contact import Contact
from conkit.core.contactfile import ContactFile
from conkit.core.contactmap import ContactMap
from conkit.core.sequence import Sequence
from conkit.io.gremlin import GremlinParser
from conkit.io.tests.helpers import ParserTestCase
[docs]class TestGremlinParser(ParserTestCase):
[docs] def test_read_1(self):
content = """i j i_id j_id r_sco s_sco prob
179 246 179_C 246_L 0.2019 4.740 1.000
262 305 262_G 305_Y 0.1742 4.090 1.000
428 448 428_A 448_N 0.1638 3.846 1.000
214 231 214_F 231_V 0.1342 3.150 1.000
457 488 457_L 488_Y 0.1254 2.945 1.000
220 223 220_A 223_A 0.1187 2.786 0.999
143 209 143_I 209_D 0.1139 2.674 0.999
79 365 79_M 365_I 0.1114 2.615 0.998
215 268 215_V 268_A 0.1109 2.604 0.998
262 266 262_G 266_K 0.1040 2.442 0.997
"""
f_name = self.tempfile(content=content)
with open(f_name, "r") as f_in:
contact_file = GremlinParser().read(f_in)
contact_map1 = contact_file.top_map
self.assertEqual(1, len(contact_file))
self.assertEqual(10, len(contact_map1))
self.assertEqual([179, 262, 428, 214, 457, 220, 143, 79, 215, 262], [c.res1_seq for c in contact_map1])
self.assertEqual(
[0.2019, 0.1742, 0.1638, 0.1342, 0.1254, 0.1187, 0.1139, 0.1114, 0.1109, 0.1040],
[c.raw_score for c in contact_map1],
)
[docs] def test_read_2(self):
content = """# Some comments
# That are here for whatever reason
i j i_id j_id r_sco s_sco prob
179 246 179_C 246_L 0.2019 4.740 1.000
262 305 262_G 305_Y 0.1742 4.090 1.000
428 448 428_A 448_N 0.1638 3.846 1.000
214 231 214_F 231_V 0.1342 3.150 1.000
457 488 457_L 488_Y 0.1254 2.945 1.000
220 223 220_A 223_A 0.1187 2.786 0.999
143 209 143_I 209_D 0.1139 2.674 0.999
79 365 79_M 365_I 0.1114 2.615 0.998
215 268 215_V 268_A 0.1109 2.604 0.998
262 266 262_G 266_K 0.1040 2.442 0.997
"""
f_name = self.tempfile(content=content)
with open(f_name, "r") as f_in:
contact_file = GremlinParser().read(f_in)
contact_map1 = contact_file.top_map
self.assertEqual(1, len(contact_file))
self.assertEqual(10, len(contact_map1))
self.assertEqual([179, 262, 428, 214, 457, 220, 143, 79, 215, 262], [c.res1_seq for c in contact_map1])
self.assertEqual(
[0.2019, 0.1742, 0.1638, 0.1342, 0.1254, 0.1187, 0.1139, 0.1114, 0.1109, 0.1040],
[c.raw_score for c in contact_map1],
)
[docs] def test_read_3(self):
content = """i j gene i_id j_id r_sco s_sco prob I_prob
127 187 A 127_V 187_I 0.183 3.635 1.000 N/A
83 87 A 83_E 87_Q 0.183 3.633 1.000 N/A
108 111 A 108_P 111_P 0.105 2.095 0.989 N/A
431 435 B 241_L 245_L 0.104 2.076 0.988 N/A
63 83 A 63_T 83_E 0.098 1.952 0.980 N/A
23 434 AB 23_T 244_L 0.082 1.624 0.924 0.519
20 438 AB 20_Y 248_T 0.059 1.178 0.647 0.181
265 275 B 75_E 85_V 0.059 1.175 0.644 N/A
263 267 B 73_A 77_G 0.059 1.172 0.641 N/A
19 438 AB 19_L 248_T 0.059 1.17 0.640 0.176
211 215 B 21_D 25_A 0.054 1.069 0.536 N/A
30 65 A 30_A 65_T 0.054 1.065 0.532 N/A
24 434 AB 24_A 244_L 0.054 1.064 0.531 0.123
"""
f_name = self.tempfile(content=content)
with open(f_name, "r") as f_in:
contact_file = GremlinParser().read(f_in)
self.assertEqual(3, len(contact_file))
chain_a_res1seq = [127, 83, 108, 63, 30]
chain_a_rawscore = [0.183, 0.183, 0.105, 0.098, 0.054]
chain_b_res1seq = [431, 265, 263, 211]
chain_b_rawscore = [0.104, 0.059, 0.059, 0.054]
chain_ab_res1seq = [23, 20, 19, 24]
chain_ab_rawscore = [0.082, 0.059, 0.059, 0.054]
for count, res1_seqs, raw_scores, cmap in zip(
[5, 4, 4],
[chain_a_res1seq, chain_ab_res1seq, chain_b_res1seq],
[chain_a_rawscore, chain_ab_rawscore, chain_b_rawscore],
contact_file,
):
self.assertEqual(count, len(cmap))
self.assertEqual(res1_seqs, [c.res1_seq for c in cmap])
self.assertEqual(raw_scores, [c.raw_score for c in cmap])
[docs] def test_write_1(self):
contact_file = ContactFile("test")
contact_map = ContactMap("A")
contact_file.add(contact_map)
for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]:
contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3]))
contact_map.add(contact)
contact_map.sequence = Sequence("1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD")
contact_map.set_sequence_register()
f_name = self.tempfile()
with open(f_name, "w") as f_out:
GremlinParser().write(f_out, contact_file)
content = [
"i j i_id j_id r_sco s_sco prob",
"1 9 1_H 9_L 0.7 1.0 1.0",
"1 10 1_H 10_L 0.7 1.0 1.0",
"2 8 2_L 8_I 0.9 1.3 1.0",
"3 12 3_E 12_K 0.4 0.6 1.0",
]
with open(f_name, "r") as f_in:
output = f_in.read().splitlines()
self.assertEqual(content, output)
[docs] def test_write_2(self):
contact_file = ContactFile("TEST")
contact_map = ContactMap("1")
contact_file.add(contact_map)
for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]:
contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3]))
contact_map.add(contact)
f_name = self.tempfile()
with open(f_name, "w") as f_out:
GremlinParser().write(f_out, contact_file)
content = [
"i j i_id j_id r_sco s_sco prob",
"1 9 1_X 9_X 0.7 1.0 1.0",
"1 10 1_X 10_X 0.7 1.0 1.0",
"2 8 2_X 8_X 0.9 1.3 1.0",
"3 12 3_X 12_X 0.4 0.6 1.0",
]
with open(f_name, "r") as f_in:
output = f_in.read().splitlines()
self.assertEqual(content, output)
[docs] def test_write_3(self):
contact_file = ContactFile("TEST")
contact_maps = [ContactMap("A"), ContactMap("AB"), ContactMap("B")]
contacts = [
(Contact(1, 9, 0.7), Contact(1, 10, 0.7), Contact(2, 8, 0.9), Contact(3, 12, 0.4)),
(Contact(1, 9, 0.7), Contact(1, 10, 0.7), Contact(2, 8, 0.9), Contact(3, 12, 0.4)),
(Contact(1, 9, 0.7), Contact(1, 10, 0.7), Contact(2, 8, 0.9), Contact(3, 12, 0.4)),
]
chains = [("A", "A"), ("A", "B"), ("B", "B")]
for contact_map, contacts, chain in zip(contact_maps, contacts, chains):
contact_file.add(contact_map)
for c in contacts:
c.res1_chain = chain[0]
c.res2_chain = chain[1]
contact_map.add(c)
contact_map.sequence = Sequence("1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD")
contact_map.set_sequence_register()
f_name = self.tempfile()
with open(f_name, "w") as f_out:
GremlinParser().write(f_out, contact_file)
content = [
"i j gene i_id j_id r_sco s_sco prob I_prob",
"1 9 A 1_H 9_L 0.7 1.0 1.0 N/A",
"1 10 A 1_H 10_L 0.7 1.0 1.0 N/A",
"2 8 A 2_L 8_I 0.9 1.3 1.0 N/A",
"3 12 A 3_E 12_K 0.4 0.6 1.0 N/A",
"1 9 AB 1_H 9_L 0.7 1.0 1.0 N/A",
"1 10 AB 1_H 10_L 0.7 1.0 1.0 N/A",
"2 8 AB 2_L 8_I 0.9 1.3 1.0 N/A",
"3 12 AB 3_E 12_K 0.4 0.6 1.0 N/A",
"1 9 B 1_H 9_L 0.7 1.0 1.0 N/A",
"1 10 B 1_H 10_L 0.7 1.0 1.0 N/A",
"2 8 B 2_L 8_I 0.9 1.3 1.0 N/A",
"3 12 B 3_E 12_K 0.4 0.6 1.0 N/A",
]
with open(f_name, "r") as f_in:
output = f_in.read().splitlines()
self.assertEqual(content, output)
if __name__ == "__main__":
unittest.main(verbosity=2)