#!/usr/bin/python

import re

seq = ''

for line in open('mrna.fa'):
    if not re.search('>', line):
        line = line.rstrip()
        seq = seq + line

for i in range(0, len(seq) - 23):

    testseq = seq[i:i + 23]

     # check if first two positions are AA and
     # last are TT

    if re.search('^AA.*TT$', testseq):

     # test GC content
     # count the number of G's and C's

        gc_content = 0.0
        gc_content += testseq.count('G')
        gc_content += testseq.count('C')
        gc_content /= 23

        # is the GC content within the range 30-50?

        if gc_content >= 0.3 and gc_content <= 0.5:

            # does the sequence contain stretches of As, Ts, Cs or Gs?

            if not (re.search('A{4}', testseq) or re.search('T{4}',
                    testseq) or re.search('C{4}', testseq)
                    or re.search('G{4}', testseq)
                    or re.search('(G|C){6}', testseq)):  # avoid also regions of six positions with G or C
                print i, '\t', testseq