Source code for conkit.io.tests.test_stockholm

"""Testing facility for conkit.io.StockholmIO"""

__author__ = "Felix Simkovic"
__date__ = "12 Sep 2016"

import os
import unittest

from conkit.io.stockholm import StockholmParser
from conkit.io.tests.helpers import ParserTestCase


[docs]class TestStockholmParser(ParserTestCase):
[docs] def test_read_1(self): msa = """# STOCKHOLM 1.0 #=GF ID 1EAZ:A|PDBID|CHAIN|SEQUENCE-i5 #=GS UniRef100_A0A0D2WIY8/647-745 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3 #=GS UniRef100_A0A0D2WIY8/761-857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3 #=GS UniRef100_A0A0D2WIY8/1126-1228 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3 #=GS UniRef100_A0A0D2WIY8/1245-1341 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3 #=GS UniRef100_A0A0D2WIY8/1752-1857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3 1EAZ:A|PDBID|CHAIN|SEQUENCE GSMFTPKPPQDSAVIKAG-YC-V------K-Q-------------------------------------------------G-A------------VM------------------------------------------------------------------------------------------------------ UniRef100_A0A0D2WIY8/647-745 ---------------IEG-YL-S------K-Q-------------------------------------------------G-GV----------NNN------------------------------------------------------------------------------------------------------ #=GR UniRef100_A0A0D2WIY8/647-745 PP ...............79*.**.*......*.*.................................................*.99..........9**...................................................................................................... UniRef100_A0A0D2WIY8/761-857 -----------ANPDKEG-WL-K------K-Q-------------------------------------------------G-N-----------SMA------------------------------------------------------------------------------------------------------ #=GR UniRef100_A0A0D2WIY8/761-857 PP ...........5899***.**.*......*.*.................................................*.9...........9**...................................................................................................... UniRef100_A0A0D2WIY8/1126-1228 ------------AVRKLG-FL-Y------K-Q-------------------------------------------------G-G------------SN------------------------------------------------------------------------------------------------------ #=GR UniRef100_A0A0D2WIY8/1126-1228 PP ............68999*.**.*......*.*.................................................*.*............**...................................................................................................... UniRef100_A0A0D2WIY8/1245-1341 ------------NPARQG-WL-C------K-R-------------------------------------------------G-G------------TY------------------------------------------------------------------------------------------------------ #=GR UniRef100_A0A0D2WIY8/1245-1341 PP ............6789**.**.*......*.*.................................................*.*............**...................................................................................................... UniRef100_A0A0D2WIY8/1752-1857 -------QRATPGFKMKG-WL-H------K-E-------------------------------------------------G-G------------SV------------------------------------------------------------------------------------------------------ #=GR UniRef100_A0A0D2WIY8/1752-1857 PP .......556677889**.**.*......*.*.................................................*.*............**...................................................................................................... #=GC PP_cons .......*....*...................................*.....*..*................*.................*..*....*................................................*****999875555677766776. #=GC RF .......x....x...................................x.....x..x................x.................x..x....x................................................xxxxxxxxxxxxxxxxxxxxxxxx 1EAZ:A|PDBID|CHAIN|SEQUENCE -------------------K-----------------------NW---------------------------------------------------------------------------------------------K--R-R------------Y----F-QL--------D--E----------------------- UniRef100_A0A0D2WIY8/647-745 -------------------K-----------------------GW---------------------------------------------------------------------------------------------K--R-R------------Y----C-VL--------E--N----------------------- #=GR UniRef100_A0A0D2WIY8/647-745 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*....................... UniRef100_A0A0D2WIY8/761-857 -------------------K-----------------------DW---------------------------------------------------------------------------------------------K--K-R------------Y----I-AI--------K--E----------------------- #=GR UniRef100_A0A0D2WIY8/761-857 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*....................... UniRef100_A0A0D2WIY8/1126-1228 -------------------K-----------------------GW---------------------------------------------------------------------------------------------R--K-R------------W----I-VM--------E--H----------------------- #=GR UniRef100_A0A0D2WIY8/1126-1228 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*....................... UniRef100_A0A0D2WIY8/1245-1341 -------------------T-----------------------SW---------------------------------------------------------------------------------------------K--K-R------------W----L-VL--------K--G----------------------- #=GR UniRef100_A0A0D2WIY8/1245-1341 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*....................... UniRef100_A0A0D2WIY8/1752-1857 -------------------K-----------------------TW---------------------------------------------------------------------------------------------K--R-R------------W----F-ST--------T--P----------------------- #=GR UniRef100_A0A0D2WIY8/1752-1857 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*....................... #=GC PP_cons ...................9.......................**.............................................................................................*..*.*............*....*.**........9..9....................... #=GC RF ...................x.......................xx.............................................................................................x..x.x............x....x.xx........x..x....................... // """ f_name = self.tempfile(content=msa) parser = StockholmParser() with open(f_name, "r") as f_in: hierarchy = parser.read(f_in) for i, sequence_entry in enumerate(hierarchy): if i == 0: self.assertEqual("1EAZ:A|PDBID|CHAIN|SEQUENCE", sequence_entry.id) self.assertEqual( "GSMFTPKPPQDSAVIKAG-YC-V------K-Q-------------------------------------------------G-A-" "-----------VM------------------------------------------------------------------------" "-------------------------------------------------K-----------------------NW----------" "-----------------------------------------------------------------------------------K-" "-R-R------------Y----F-QL--------D--E-----------------------", sequence_entry.seq, ) elif i == 1: self.assertEqual("UniRef100_A0A0D2WIY8/647-745", sequence_entry.id) self.assertEqual( "---------------IEG-YL-S------K-Q-------------------------------------------------G-GV" "----------NNN------------------------------------------------------------------------" "-------------------------------------------------K-----------------------GW----------" "-----------------------------------------------------------------------------------K-" "-R-R------------Y----C-VL--------E--N-----------------------", sequence_entry.seq, ) self.assertEqual( [ "[subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=C" "apsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3" ], sequence_entry.remark, ) elif i == 2: self.assertEqual("UniRef100_A0A0D2WIY8/761-857", sequence_entry.id) self.assertEqual( "-----------ANPDKEG-WL-K------K-Q-------------------------------------------------G-N-" "----------SMA------------------------------------------------------------------------" "-------------------------------------------------K-----------------------DW----------" "-----------------------------------------------------------------------------------K-" "-K-R------------Y----I-AI--------K--E-----------------------", sequence_entry.seq, ) self.assertEqual( [ "[subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=C" "apsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3" ], sequence_entry.remark, ) elif i == 3: self.assertEqual("UniRef100_A0A0D2WIY8/1126-1228", sequence_entry.id) self.assertEqual( "------------AVRKLG-FL-Y------K-Q-------------------------------------------------G-G-" "-----------SN------------------------------------------------------------------------" "-------------------------------------------------K-----------------------GW----------" "-----------------------------------------------------------------------------------R-" "-K-R------------W----I-VM--------E--H-----------------------", sequence_entry.seq, ) self.assertEqual( [ "[subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=C" "apsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3" ], sequence_entry.remark, ) elif i == 4: self.assertEqual("UniRef100_A0A0D2WIY8/1245-1341", sequence_entry.id) self.assertEqual( "------------NPARQG-WL-C------K-R-------------------------------------------------G-G-" "-----------TY------------------------------------------------------------------------" "-------------------------------------------------T-----------------------SW----------" "-----------------------------------------------------------------------------------K-" "-K-R------------W----L-VL--------K--G-----------------------", sequence_entry.seq, ) self.assertEqual( [ "[subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=C" "apsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3" ], sequence_entry.remark, ) elif i == 5: self.assertEqual("UniRef100_A0A0D2WIY8/1752-1857", sequence_entry.id) self.assertEqual( "-------QRATPGFKMKG-WL-H------K-E-------------------------------------------------G-G-" "-----------SV------------------------------------------------------------------------" "-------------------------------------------------K-----------------------TW----------" "-----------------------------------------------------------------------------------K-" "-R-R------------W----F-ST--------T--P-----------------------", sequence_entry.seq, ) self.assertEqual( [ "[subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=C" "apsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3" ], sequence_entry.remark, )
[docs] def test_write_1(self): msa = [ "# STOCKHOLM 1.0", "#=GF ID 1EAZ:A|PDBID|CHAIN|SEQUENCE-i5", "", "#=GS UniRef100_A0A0D2WIY8/647-745 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/761-857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1126-1228 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1245-1341 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1752-1857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "", "1EAZ:A|PDBID|CHAIN|SEQUENCE GSMFTPKPPQDSAVIKAG-YC-V------K-Q-------------------------------------------------G-A------------VM------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/647-745 ---------------IEG-YL-S------K-Q-------------------------------------------------G-GV----------NNN------------------------------------------------------------------------------------------------------", "#=GR UniRef100_A0A0D2WIY8/647-745 PP ...............79*.**.*......*.*.................................................*.99..........9**......................................................................................................", "UniRef100_A0A0D2WIY8/761-857 -----------ANPDKEG-WL-K------K-Q-------------------------------------------------G-N-----------SMA------------------------------------------------------------------------------------------------------", "#=GR UniRef100_A0A0D2WIY8/761-857 PP ...........5899***.**.*......*.*.................................................*.9...........9**......................................................................................................", "UniRef100_A0A0D2WIY8/1126-1228 ------------AVRKLG-FL-Y------K-Q-------------------------------------------------G-G------------SN------------------------------------------------------------------------------------------------------", "#=GR UniRef100_A0A0D2WIY8/1126-1228 PP ............68999*.**.*......*.*.................................................*.*............**......................................................................................................", "UniRef100_A0A0D2WIY8/1245-1341 ------------NPARQG-WL-C------K-R-------------------------------------------------G-G------------TY------------------------------------------------------------------------------------------------------", "#=GR UniRef100_A0A0D2WIY8/1245-1341 PP ............6789**.**.*......*.*.................................................*.*............**......................................................................................................", "UniRef100_A0A0D2WIY8/1752-1857 -------QRATPGFKMKG-WL-H------K-E-------------------------------------------------G-G------------SV------------------------------------------------------------------------------------------------------", "#=GR UniRef100_A0A0D2WIY8/1752-1857 PP .......556677889**.**.*......*.*.................................................*.*............**......................................................................................................", "#=GC PP_cons .......*....*...................................*.....*..*................*.................*..*....*................................................*****999875555677766776.", "#=GC RF .......x....x...................................x.....x..x................x.................x..x....x................................................xxxxxxxxxxxxxxxxxxxxxxxx", "", "1EAZ:A|PDBID|CHAIN|SEQUENCE -------------------K-----------------------NW---------------------------------------------------------------------------------------------K--R-R------------Y----F-QL--------D--E-----------------------", "UniRef100_A0A0D2WIY8/647-745 -------------------K-----------------------GW---------------------------------------------------------------------------------------------K--R-R------------Y----C-VL--------E--N-----------------------", "#=GR UniRef100_A0A0D2WIY8/647-745 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*.......................", "UniRef100_A0A0D2WIY8/761-857 -------------------K-----------------------DW---------------------------------------------------------------------------------------------K--K-R------------Y----I-AI--------K--E-----------------------", "#=GR UniRef100_A0A0D2WIY8/761-857 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*.......................", "UniRef100_A0A0D2WIY8/1126-1228 -------------------K-----------------------GW---------------------------------------------------------------------------------------------R--K-R------------W----I-VM--------E--H-----------------------", "#=GR UniRef100_A0A0D2WIY8/1126-1228 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*.......................", "UniRef100_A0A0D2WIY8/1245-1341 -------------------T-----------------------SW---------------------------------------------------------------------------------------------K--K-R------------W----L-VL--------K--G-----------------------", "#=GR UniRef100_A0A0D2WIY8/1245-1341 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*.......................", "UniRef100_A0A0D2WIY8/1752-1857 -------------------K-----------------------TW---------------------------------------------------------------------------------------------K--R-R------------W----F-ST--------T--P-----------------------", "#=GR UniRef100_A0A0D2WIY8/1752-1857 PP ...................*.......................**.............................................................................................*..*.*............*....*.**........*..*.......................", "#=GC PP_cons ...................9.......................**.............................................................................................*..*.*............*....*.**........9..9.......................", "#=GC RF ...................x.......................xx.............................................................................................x..x.x............x....x.xx........x..x.......................", "//", ] parser = StockholmParser() f_name_in = self.tempfile(content="\n".join(msa)) f_name_out = self.tempfile() with open(f_name_in, "r") as f_in, open(f_name_out, "w") as f_out: hierarchy = parser.read(f_in) parser.write(f_out, hierarchy) ref = [ "# STOCKHOLM 1.0", "#=GF ID 1EAZ:A|PDBID|CHAIN|SEQUENCE", "", "#=GS UniRef100_A0A0D2WIY8/647-745 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/761-857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1126-1228 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1245-1341 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "#=GS UniRef100_A0A0D2WIY8/1752-1857 DE [subseq from] Stromal membrane-associated GTPase-activating protein 2 n=1 Tax=Capsaspora owczarzaki (strain ATCC 30864) RepID=A0A0D2WIY8_CAPO3", "", "1EAZ:A|PDBID|CHAIN|SEQUENCE GSMFTPKPPQDSAVIKAG-YC-V------K-Q-------------------------------------------------G-A------------VM------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/647-745 ---------------IEG-YL-S------K-Q-------------------------------------------------G-GV----------NNN------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/761-857 -----------ANPDKEG-WL-K------K-Q-------------------------------------------------G-N-----------SMA------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/1126-1228 ------------AVRKLG-FL-Y------K-Q-------------------------------------------------G-G------------SN------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/1245-1341 ------------NPARQG-WL-C------K-R-------------------------------------------------G-G------------TY------------------------------------------------------------------------------------------------------", "UniRef100_A0A0D2WIY8/1752-1857 -------QRATPGFKMKG-WL-H------K-E-------------------------------------------------G-G------------SV------------------------------------------------------------------------------------------------------", "", "1EAZ:A|PDBID|CHAIN|SEQUENCE -------------------K-----------------------NW---------------------------------------------------------------------------------------------K--R-R------------Y----F-QL--------D--E-----------------------", "UniRef100_A0A0D2WIY8/647-745 -------------------K-----------------------GW---------------------------------------------------------------------------------------------K--R-R------------Y----C-VL--------E--N-----------------------", "UniRef100_A0A0D2WIY8/761-857 -------------------K-----------------------DW---------------------------------------------------------------------------------------------K--K-R------------Y----I-AI--------K--E-----------------------", "UniRef100_A0A0D2WIY8/1126-1228 -------------------K-----------------------GW---------------------------------------------------------------------------------------------R--K-R------------W----I-VM--------E--H-----------------------", "UniRef100_A0A0D2WIY8/1245-1341 -------------------T-----------------------SW---------------------------------------------------------------------------------------------K--K-R------------W----L-VL--------K--G-----------------------", "UniRef100_A0A0D2WIY8/1752-1857 -------------------K-----------------------TW---------------------------------------------------------------------------------------------K--R-R------------W----F-ST--------T--P-----------------------", "//", ] with open(f_name_out, "r") as f_in: output = f_in.read().splitlines() self.assertEqual(ref, output)
if __name__ == "__main__": unittest.main(verbosity=2)