# -------------------------------------------------------------------------
#     This file is part of mMass - the spectrum analysis tool for MS.
#     Copyright (C) 2005-07 Martin Strohalm <mmass@biographics.cz>

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     Complete text of GNU GPL can be found in the file LICENSE in the
#     main directory of the program
# -------------------------------------------------------------------------

# Function: DNA/RNA/protein sequence converter.

# load libs
import re
import string

# define patterns
patt1 = ('^(A|C|D|E|F|G|H|I|K|L|M|N|P|Q|R|S|T|V|W|Y)*$')
patt3 = ('^(Ala|Cys|Asp|Glu|Phe|Gly|His|Ile|Lys|Leu|Met|Asn|Pro|Gln|Arg|Ser|Thr|Val|Trp|Tyr)*$')
pattRNA = ('^([AUCG]{3})*$')

dic1 = {'A':'Ala', 'C':'Cys', 'D':'Asp', 'E':'Glu', 'F':'Phe',\
        'G':'Gly', 'H':'His', 'I':'Ile', 'K':'Lys', 'L':'Leu',\
        'M':'Met', 'N':'Asn', 'P':'Pro', 'Q':'Gln', 'R':'Arg',\
        'S':'Ser', 'T':'Thr', 'V':'Val', 'W':'Trp', 'Y':'Tyr'}

dic3 = {'Ala':'A', 'Cys':'C', 'Asp':'D', 'Glu':'E', 'Phe':'F',\
        'Gly':'G', 'His':'H', 'Ile':'I', 'Lys':'K', 'Leu':'L',\
        'Met':'M', 'Asn':'N', 'Pro':'P', 'Gln':'Q', 'Arg':'R',\
        'Ser':'S', 'Thr':'T', 'Val':'V', 'Trp':'W', 'Tyr':'Y'}

dicRNA = {'GCU':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A',\
        'UGU':'C', 'UGC':'C',\
        'GAU':'D', 'GAC':'D',\
        'GAA':'E', 'GAG':'E',\
        'UUU':'F', 'UUC':'F',\
        'GGC':'G', 'GGA':'G', 'GGG':'G', 'GGU':'G',\
        'CAU':'H', 'CAC':'H',\
        'AUU':'I', 'AUC':'I', 'AUA':'I',\
        'AAA':'K', 'AAG':'K',\
        'CUU':'L', 'CUC':'L', 'CUA':'L', 'CUG':'L', 'UUA':'L', 'UUG':'L',\
        'AUG':'M',\
        'AAU':'N', 'AAC':'N',\
        'CCU':'P', 'CCC':'P', 'CCA':'P', 'CCG':'P',\
        'CAA':'Q', 'CAG':'Q',\
        'CGU':'R', 'CGC':'R', 'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R',\
        'AGU':'S', 'AGC':'S', 'UCU':'S', 'UCC':'S', 'UCA':'S', 'UCG':'S',\
        'ACU':'T', 'ACC':'T', 'ACA':'T', 'ACG':'T',\
        'GUU':'V', 'GUC':'V', 'GUA':'V', 'GUG':'V',\
        'UGG':'W',\
        'UAU':'Y', 'UAC':'Y',\
        'UAA':'', 'UAG':'', 'UGA':''}


# ----
def converter(sourceSeq, switch):
    """ Get selected converter and convert sequence. """

    # remove contaminants
    buff = ''
    for char in sourceSeq:
        if char.isalpha():
            buff += char
    sourceSeq = buff

    if sourceSeq == '':
        return False

    # uppercase if all lowercase
    if sourceSeq.islower():
        sourceSeq = sourceSeq.upper()

    # switch converter and convert
    if switch == '1to1':
        resultSeq = conv1to1(sourceSeq)
    elif switch == '3to1':
        resultSeq = conv3to1(sourceSeq)
    elif switch == '1to3':
        resultSeq = conv1to3(sourceSeq)
    elif switch == 'NAto1':
        resultSeq = convRNAto1(sourceSeq)

    # return result
    return resultSeq
# ----


# ----
def conv1to1(sequence):
    """ Filter 1-letter protein sequence. """

    # all uppercase
    sequence = sequence.upper()

    # filter sequence
    buff = ''
    for amino in sequence:
        if amino in dic1:
            buff += amino

    return buff
# ----


# ----
def conv3to1(sequence):
    """ Convert 3-letter protein to 1-letter protein sequence. """

    # check sequence
    if not re.search(patt3, sequence):
        return False

    # translate sequence
    buff = ''
    start = 0
    for stop in range(3, len(sequence)+1, 3):
        buff += dic3[sequence[start:stop]]
        start = stop

    return buff
# ----


# ----
def conv1to3(sequence):
    """ Convert 1-letter protein to 3-letter protein sequence. """

    # check sequence
    if not re.search(patt1, sequence):
        return False

    # translate sequence
    buff = ''
    for amino in sequence:
        buff += dic1[amino]

    return buff
# ----


# ----
def convRNAto1(sequence):
    """ Convert rna to 1-letter protein. """

    # convert DNA to RNA
    sequence = sequence.replace('T', 'U')

    # check sequence
    if not re.search(pattRNA, sequence):
        return False

    # translate sequence
    buff = ''
    start = 0
    for stop in range(3, len(sequence)+1, 3):
        try:
            buff += dicRNA[sequence[start:stop]]
        except KeyError:
            return False
        start = stop

    return buff
# ----


# ----
def convRNAto3(sequence):
    """ Convert rna to 3-letter protein. """

    # translate sequence to 1-letter protein
    buff = convRNAto1(sequence)

    if not buff:
        return False

    # translate 1-letter protein to 3-letter protein
    buff = conv1to3(buff)

    return buff
# ----
