#!/usr/bin/python

import re

# first read the sequences that were used as input
# to cmsearch from the file pattern_search.out

origseq = {}
myid = ''
strand = ''

for line in open('pattern_search.out'):
    line = line.rstrip()
    match = re.search('^>(.*)', line)
    if match:
        myid = match.group(1)
    else:
        origseq[myid] = line

# read the output from cmsearch

for line in open('chlorophyta.tab'):

    match = \
        re.search('SRP_bact +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+)'
                  , line)
    if match:
        myid = match.group(1)
        beg = int(match.group(2))
        end = int(match.group(3))
        score = float(match.group(6))
        length = end - beg + 1

        ret = (origseq[myid])[beg - 1:beg - 1 + length]
        if score > 20:
            myid = '>' + myid
            print myid, 'BEG:', beg, 'END:', end
            print ret