Source code for conkit.io.tests.test_clustal

"""Testing facility for conkit.io.FastaIO"""

__author__ = "Felix Simkovic"
__date__ = "09 Sep 2016"

import os
import unittest

from conkit.io.clustal import ClustalParser
from conkit.io.tests.helpers import ParserTestCase


[docs]class TestClustalParser(ParserTestCase):
[docs] def test_read_1(self): seq = """CLUSTAL W seq_0 MLDLEVVPE-RSLGNEQW-------E-F-TLG-MPLAQAV-AILQKHC-- seq_0 -RIIKNVQV """ f_name = self.tempfile(content=seq) parser = ClustalParser() with open(f_name, "r") as f_in: sequence_file = parser.read(f_in) sequence_entry = sequence_file.top_sequence ref_id = "seq_0" self.assertEqual(ref_id, sequence_entry.id) ref_seq = "MLDLEVVPE-RSLGNEQW-------E-F-TLG-MPLAQAV-AILQKHC---RIIKNVQV" self.assertEqual(ref_seq, sequence_entry.seq)
[docs] def test_read_2(self): msa = """CLUSTAL W seq_0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA seq_1 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB seq_2 CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ************************************************** seq_0 AAAAAAAAA seq_1 BBBBBBBBB seq_2 CCCCCCCCC ********* """ f_name = self.tempfile(content=msa) parser = ClustalParser() with open(f_name, "r") as f_in: sequence_file = parser.read(f_in) for i, sequence_entry in enumerate(sequence_file): if i == 0: self.assertEqual("seq_0", sequence_entry.id) self.assertEqual("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", sequence_entry.seq) elif i == 1: self.assertEqual("seq_1", sequence_entry.id) self.assertEqual("BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB", sequence_entry.seq) elif i == 2: self.assertEqual("seq_2", sequence_entry.id) self.assertEqual("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", sequence_entry.seq)
[docs] def test_read_3(self): msa = """CLUSTAL FORMAT for seq_0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA seq_1 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB seq_2 CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC seq_0 AAAAAAAAA seq_1 BBBBBBBBB seq_2 CCCCCCCCC """ f_name = self.tempfile(content=msa) parser = ClustalParser() with open(f_name, "r") as f_in: sequence_file = parser.read(f_in) for i, sequence_entry in enumerate(sequence_file): if i == 0: self.assertEqual("seq_0", sequence_entry.id) self.assertEqual("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", sequence_entry.seq) elif i == 1: self.assertEqual("seq_1", sequence_entry.id) self.assertEqual("BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB", sequence_entry.seq) elif i == 2: self.assertEqual("seq_2", sequence_entry.id) self.assertEqual("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", sequence_entry.seq)
[docs] def test_write_1(self): seq = [ "CLUSTAL FORMAT written with ConKit", "", "seq_0\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "seq_1\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB", "" "seq_0\tAAAAAAAAAAAAAAAAAAAAAA", "seq_1\tBBBBBBBBBBBBBBBBBBBBBB", ] joinedseq = "\n".join(seq) f_name_in = self.tempfile(content=joinedseq) f_name_out = self.tempfile() parser = ClustalParser() with open(f_name_in, "r") as f_in, open(f_name_out, "w") as f_out: sequence_file = parser.read(f_in) parser.write(f_out, sequence_file) with open(f_name_out, "r") as f_in: output = f_in.read().splitlines() self.assertEqual(seq, output)
if __name__ == "__main__": unittest.main(verbosity=2)