#!/usr/bin/python
import math
number_of_sequences = 0 # we want to count the number of
# sequences in the file splice5.txt
infile = 'splice5.txt'
for line in open('splice5.txt'):
line = line.rstrip()
if number_of_sequences == 0:
msa_matrix = [[] * 9]
# two dimensional array to
# to store multiple alignment
# create first empty row
if number_of_sequences > 0:
msa_matrix.append([]) # add one row
for j in range(0, 9): # fill the row with numbers
msa_matrix[number_of_sequences].append(line[j])
number_of_sequences += 1
# produce count matrix
bases = ['A', 'T', 'C', 'G']
pssm = [[] * 9]
for i in range(0, 4):
if i > 0:
pssm.append([])
for j in range(0, 9):
# add pseudocount = 1 to each of the values in the matrix
pssm[i].append(1.0)
# add counts to the pssm matrix
for k in range(0, number_of_sequences):
if msa_matrix[k][j] == bases[i]:
pssm[i][j] += 1
# from count matrix produce PSSM by
# calculating the log odds values
for i in range(0, 4):
for j in range(0, 9):
pssm[i][j] = math.log(pssm[i][j] / (number_of_sequences + 4)
* 4) / math.log(2)
print pssm[i][j], # print PSSM
print ''