#!/usr/bin/python
import re
# the "reverse" genetic code
# This hash has amino acid symbols as keys:
# and the degenerate codons as values
reverse_code = {
'L': 'YUN',
'F': 'UUY',
'I': 'AUH',
'M': 'AUG',
'V': 'GUN',
'S': 'WSN',
'P': 'CCN',
'T': 'ACN',
'A': 'GCN',
'G': 'GGN',
'Y': 'UAY',
'H': 'CAY',
'Q': 'CAR',
'N': 'AAY',
'K': 'AAR',
'D': 'GAY',
'E': 'GAR',
'C': 'UGY',
'W': 'UGG',
'R': 'MGN',
}
# reading the protein sequence from a file
pep = ''
for line in open('brca1.pep'):
if not re.search('>', line):
line = line.rstrip()
pep += line
for i in range(0, len(pep) - 7):
pos = i + 1
print pos,
test = pep[i:i + 7]
degen = 1
for j in range(0, 7):
aa = test[j]
codon = reverse_code[aa]
print codon,
# calculate degeneracy
for k in range(0, 3):
base = codon[k]
if re.search('[RYWMSK]', base):
degen *= 2
if re.search('[VHDB]', base):
degen *= 3
if re.search('[N]', base):
degen *= 4
print '\t', degen