#!/usr/bin/python

import math

number_of_sequences = 0  # we want to count the number of
                         # sequences in the file splice5.txt

infile = 'splice5.txt'

for line in open('splice5.txt'):
    line = line.rstrip()

    if number_of_sequences == 0:
        msa_matrix = [[] * 9]

            # two dimensional array to
            # to store multiple alignment
            # create first empty row

    if number_of_sequences > 0:
        msa_matrix.append([])  # add one row
    for j in range(0, 9):  # fill the row with numbers
        msa_matrix[number_of_sequences].append(line[j])

    number_of_sequences += 1

# produce count matrix

bases = ['A', 'T', 'C', 'G']
pssm = [[] * 9]

for i in range(0, 4):
    if i > 0:
        pssm.append([])
    for j in range(0, 9):

        # add pseudocount = 1 to each of the values in the matrix

        pssm[i].append(1.0)

    # add counts to the pssm matrix

        for k in range(0, number_of_sequences):
            if msa_matrix[k][j] == bases[i]:
                pssm[i][j] += 1

# from count matrix produce PSSM by
# calculating the log odds values

for i in range(0, 4):
    for j in range(0, 9):
        pssm[i][j] = math.log(pssm[i][j] / (number_of_sequences + 4)
                              * 4) / math.log(2)

        print pssm[i][j],   # print PSSM
    print ''