

""" Protein secondary structure and associated tools and data.

Constants:
    - secstruc_alphabet     
        -- Secondary structure alphabet: 'HGIEBbTSC _-L?X'
        Contains a complete set of secondary structure codes generated by both
        STRIDE and DSSP
    - secstruc_ehl_alphabet 
        -- Standard 3 state secondary structure alphabet: EHLX
        E : Extended strand
        H : Helix
        L : Loop
        X : Unknown
        
There are two common ways of reducing the full secondary structure alphabet to 
the simpler three letter EHL alphabet. The EHL reduction converts 3/10 and pi
helixes to H (helix) and beta-bridges to strands (E), whereas the FA reduction 
converts all non-canonical helixes and strands to L (loop). The FA reduction is
more predictable.
    - fa_reduce_secstruc_to_ehl
    - reduce_secstruc_to_ehl
    
    
Usage :

>>> from corebio.secstruc import *
>>> record = dssp.DsspRecord( open('test_corebio/data/1crn.dssp') )
>>> record.secondary()
' EE SSHHHHHHHHHHHTTT  HHHHHHHHS EE SSS   GGG  '
>>> fa_reduce_secstruc_to_ehl(record.secondary())
'LEELLLHHHHHHHHHHHLLLLLHHHHHHHHLLEELLLLLLLLLLLL' 

""" 

__all__ = ['dssp', 'stride','secstruc_alphabet','secstruc_ehl_alphabet', 
    'fa_reduce_secstruc_to_ehl', 'ehl_reduce_secstruc_to_ehl']

from corebio.seq import Alphabet, Seq
from corebio.transform import Transform

# ------------------- SECONDARY STRUCTURE ALPHABETS -------------------
secstruc_alphabet = Alphabet("HGIEBbTSC _-L?X")
secstruc_ehl_alphabet = Alphabet("EHLX")

fa_reduce_secstruc_to_ehl = \
    Transform(  Seq("HGIEBbTSC _-L?X", secstruc_alphabet),
                  Seq("HLLELLLLLLLLLXX", secstruc_ehl_alphabet) )

ehl_reduce_secstruc_to_ehl = \
    Transform( Seq("HGIEBbTSC _-L?X", secstruc_alphabet),
                 Seq("HHHEEELLLLLLLXX", secstruc_ehl_alphabet) )
                 
                 
                 
