#!/usr/bin/python

import re

def find_cag_repeat(id, seq):
    if re.search('CAG', seq):
        match = re.search('((CAG){6,})', seq)
        if match:
            length = len(match.group(1))  # the string matched within

                                        # the outer parentheses is stored
                                          # in memory and recalled

            id = id[0:20]
            print id, '\t', 'repeat length', length

id = ''
seq = ''

for line in open('refseq_human'):
    line = line.rstrip()
    if re.search('^>', line):
        if id != '':
            find_cag_repeat(id, seq)
        id = line
        seq = ''
    else:
        seq += line