#!/usr/bin/python

import re

genome = {}

for line in open('chr7.txt'):

    if not re.search('^\#', line):
        line = line.rstrip('\n')
        columns = re.split('\t', line)

        if columns[8]:  # if there is a protein identifier

            exonStarts = columns[6]
            exonStarts = re.sub('\,$', '', exonStarts)  # remove the trailing ','
            exonStarts = re.split(',', exonStarts)
            exonEnds = columns[7]
            exonEnds = re.sub('\,$', '', exonEnds)
            exonEnds = re.split(',', exonEnds)

            for i in range(0, len(exonStarts)):

                start = int(exonStarts[i])
                stop = int(exonEnds[i])

                for j in range(start, stop + 1):

                    #  save name, strand and proteinID

                    genome[j] = columns[0] + ' '
                    genome[j] += columns[2] + ' ' + columns[8]

for line in open('bushmen.out'):

    columns = re.split('\t', line)
    pos = int(columns[1])
    if pos in genome:
        print pos, genome[pos]