#!/usr/bin/python
import re
# read matrix
i = 0
for line in open('matrix5.txt'):
line = line.rstrip()
if i == 0:
pssm = [[] * 9]
if i > 0:
pssm.append([]) # add one row
col = re.split(' ', line)
for j in range(0, 9): # fill the row with numbers
pssm[i].append(float(col[j]))
i += 1
# read sequence to be analyzed
seq = ''
for line in open('amyloid.fa'):
if not re.search('>', line):
line = line.rstrip()
seq += line
print 'pos\tscore' # print header
seq = seq.upper()
bases = ['A', 'T', 'C', 'G']
# score with the matrix
for k in range(0, len(seq) - 9):
test = seq[k:k + 9]
score = 0
for j in range(0, 9):
base = test[j]
for b in range(0, 4):
if bases[b] == base:
score += pssm[b][j]
score = 2 ** score # convert log2 to real values
# ** is the exponentiation operator
pos = k + 3 # We want to print a position
# next to the exon-intron
# junction
print pos, '\t', score