#!/usr/bin/python

import re

# the "reverse" genetic code
# This hash has amino acid symbols as keys:
# and the degenerate codons as values

reverse_code = {
    'L': 'YUN',
    'F': 'UUY',
    'I': 'AUH',
    'M': 'AUG',
    'V': 'GUN',
    'S': 'WSN',
    'P': 'CCN',
    'T': 'ACN',
    'A': 'GCN',
    'G': 'GGN',
    'Y': 'UAY',
    'H': 'CAY',
    'Q': 'CAR',
    'N': 'AAY',
    'K': 'AAR',
    'D': 'GAY',
    'E': 'GAR',
    'C': 'UGY',
    'W': 'UGG',
    'R': 'MGN',
    }

# reading the protein sequence from a file

pep = ''
for line in open('brca1.pep'):
    if not re.search('>', line):
        line = line.rstrip()
        pep += line

for i in range(0, len(pep) - 7):
    pos = i + 1
    print pos,
    test = pep[i:i + 7]
    degen = 1
    for j in range(0, 7):
        aa = test[j]
        codon = reverse_code[aa]
        print codon,

        # calculate degeneracy

        for k in range(0, 3):
            base = codon[k]
            if re.search('[RYWMSK]', base):
                degen *= 2
            if re.search('[VHDB]', base):
                degen *= 3
            if re.search('[N]', base):
                degen *= 4

    print '\t', degen