#!/usr/bin/python
import re
def find_cag_repeat(id, seq):
if re.search('CAG', seq):
match = re.search('((CAG){6,})', seq)
if match:
length = len(match.group(1)) # the string matched within
# the outer parentheses is stored
# in memory and recalled
id = id[0:20]
print id, '\t', 'repeat length', length
id = ''
seq = ''
for line in open('refseq_human'):
line = line.rstrip()
if re.search('^>', line):
if id != '':
find_cag_repeat(id, seq)
id = line
seq = ''
else:
seq += line