Source code for conkit.io

"""I/O interface for file reading, writing and conversions"""

__author__ = 'Felix Simkovic'
__date__ = '13 Sep 2016'
__version__ = "0.1"

from conkit.io.CaspIO import CaspParser
from conkit.io.CCMpredIO import CCMpredParser
from conkit.io.ComsatIO import ComsatParser
from conkit.io.BbcontactsIO import BbcontactsParser
from conkit.io.BCLContactIO import BCLContactParser
from conkit.io.EPCMapIO import EPCMapParser
from conkit.io.EVfoldIO import EVfoldParser
from conkit.io.FreeContactIO import FreeContactParser
from conkit.io.GremlinIO import GremlinParser
from conkit.io.MemBrainIO import MemBrainParser
from conkit.io.PconsIO import PconsParser
from conkit.io.PdbIO import PdbParser
from conkit.io.PdbIO import MmCifParser
from conkit.io.PlmDCAIO import PlmDCAParser
from conkit.io.PsicovIO import PsicovParser

from conkit.io.A3mIO import A3mParser
from conkit.io.FastaIO import FastaParser
from conkit.io.JonesIO import JonesParser
from conkit.io.StockholmIO import StockholmParser

from conkit.io._iotools import open_f_handle


CONTACT_FILE_PARSERS = {
    'casprr': CaspParser,
    'ccmpred': CCMpredParser,
    'comsat': ComsatParser,
    'bbcontacts': BbcontactsParser,
    'bclcontact': BCLContactParser,
    'epcmap': EPCMapParser,
    'evfold': EVfoldParser,
    'freecontact': FreeContactParser,
    'gremlin': GremlinParser,
    'membrain': MemBrainParser,
    'metapsicov': PsicovParser,
    'mmcif': MmCifParser,
    'pconsc': PconsParser,
    'pconsc2': PconsParser,
    'pconsc3': PconsParser,
    'pdb': PdbParser,
    'plmdca': PlmDCAParser,
    'psicov': PsicovParser,
}

SEQUENCE_FILE_PARSERS = {
    'a3m': A3mParser,
    'a3m-inserts': A3mParser,
    'fasta': FastaParser,
    'jones': JonesParser,
    'stockholm': StockholmParser,
}


[docs]def convert(fname_in, format_in, fname_out, format_out): """Convert a file in format x to file in format y Parameters ---------- fname_in : filehandle, filename format_in : str File format of f_in fname_out : filehandle, filename format_out : str File format of f_out Examples -------- 1) Convert a sequence file from A3M format to FASTA format: >>> from conkit import io >>> with open('example.a3m', 'r') as f_in, open('example.fas', 'w') as f_out: ... io.convert(f_in, 'a3m', f_out, 'fasta') Notes ----- A3M format comes by default WITHOUT insert states, these are removed. To obtain an alignment WITH insert states, use format ``a3m-inserts``. 2) Convert a PconsC3 contact prediction file to the standard Casp RR format: >>> from conkit import io >>> with open('example.out', 'r') as f_in, open('example.rr', 'w') as f_out: ... io.convert(f_in, 'pconsc3', f_out, 'casprr')) """ # Check for the correct format and values provided kwargs = {} if not (format_in in CONTACT_FILE_PARSERS or format_in in SEQUENCE_FILE_PARSERS): raise ValueError("Unrecognised input file format: '{selected}'".format(selected=format_in)) elif not (format_out in CONTACT_FILE_PARSERS or format_out in SEQUENCE_FILE_PARSERS): raise ValueError("Unrecognised output file format: '{selected}'".format(selected=format_out)) elif format_in in CONTACT_FILE_PARSERS and format_out in SEQUENCE_FILE_PARSERS: raise ValueError("Cannot convert contact file to sequence file") elif format_in in SEQUENCE_FILE_PARSERS and format_out in CONTACT_FILE_PARSERS: raise ValueError("Cannot convert sequence file to contact file") elif format_in in CONTACT_FILE_PARSERS: parser_in = CONTACT_FILE_PARSERS[format_in]() parser_out = CONTACT_FILE_PARSERS[format_out]() elif format_in in SEQUENCE_FILE_PARSERS: parser_in = SEQUENCE_FILE_PARSERS[format_in]() parser_out = SEQUENCE_FILE_PARSERS[format_out]() if format_in == 'a3m-inserts': kwargs['remove_inserts'] = False else: raise Exception("Should never be here") with open_f_handle(fname_in, 'read') as f_in, open_f_handle(fname_out, 'write') as f_out: hierarchy = parser_in.read(f_in, **kwargs) parser_out.write(f_out, hierarchy) return
[docs]def read(f_in, format, f_id='conkit'): """Parse a file handle to read into structure Parameters ---------- f_in Open file handle for input file [read-permissions] format : str File format of handle f_id : str Identifier for the returned file Returns ------- hierarchy The hierarchy instance of the requested file Examples -------- 1) Read a Multiple Sequence Alignment file into a ConKit hierarchy: >>> from conkit import io >>> with open('example.a3m', 'r') as f_in: ... hierarchy = io.read(f_in, 'a3m') 2) Read a contact prediction file into a conkit hierarchy: >>> from conkit import io >>> with open('example.mat', 'r') as f_in: ... hierarchy = io.read(f_in, 'ccmpred') """ # Check for the correct format and values provided if not (format in CONTACT_FILE_PARSERS or format in SEQUENCE_FILE_PARSERS): raise ValueError("Unrecognised format: '{selected}'".format(selected=format)) elif format in CONTACT_FILE_PARSERS: parser = CONTACT_FILE_PARSERS[format]() elif format in SEQUENCE_FILE_PARSERS: parser = SEQUENCE_FILE_PARSERS[format]() else: raise Exception("Should never be here") with open_f_handle(f_in, 'read') as f_in: hierarchy = parser.read(f_in, f_id=f_id) return hierarchy
[docs]def write(fname, format, hierarchy): """Parse a file handle to read into structure Parameters ---------- fname : filehandle, filename format : str File format of handle hierarchy ConKit hierarchy to write Examples -------- 1) Write a ConKit hierarchy into a Multiple Sequence Alignment file: >>> from conkit import io >>> with open('example.fas', 'r') as f_in, open('example.a3m', 'w') as f_out: ... hierarchy = io.read(f_in, 'fasta') ... io.write(f_out, 'a3m', hierarchy) 2) Write a ConKit hierarchy into a contact prediction file: >>> from conkit import io >>> with open('example.txt', 'r') as f_in, open('example.rr', 'w') as f_out: ... hierarchy = io.read(f_in, 'psicov') ... io.write(f_out, 'casprr', hierarchy) """ # Check for the correct format and values provided if not (format in CONTACT_FILE_PARSERS or format in SEQUENCE_FILE_PARSERS): raise ValueError("Unrecognised format: '{selected}'".format(selected=format)) elif format in CONTACT_FILE_PARSERS: parser = CONTACT_FILE_PARSERS[format]() elif format in SEQUENCE_FILE_PARSERS: parser = SEQUENCE_FILE_PARSERS[format]() else: raise Exception("Should never be here") with open_f_handle(fname, 'write') as f_out: parser.write(f_out, hierarchy) return