#!/usr/bin/perl -w

use strict;

my $i = 0;
my @msa_matrix; # two dimensional array to 
                # to store multiple alignment
		
my $number_of_sequences = 0; # we want to count the number of 
                             # sequences in the file splice3.txt

my $infile = 'splice3.txt';

open IN,  $infile or die "Could not open $infile\n";  

while (<IN>) {
	chomp;
	for (my $j=0; $j<15; $j++) {# 15 positions in alignment
		$msa_matrix[$number_of_sequences][$j] = substr($_,$j,1);
		}
		$number_of_sequences++; 		
}

close IN;


# produce count matrix

my @bases = ( 'A', 'T', 'C', 'G' );
my @pssm ;

for (my $i=0; $i<4; $i++){
	
	for (my $j=0; $j<15; $j++) {

	# add pseudocount = 1 to each of the values in the matrix
	$pssm[$i][$j] = 1;

	# add counts to the pssm matrix
        	for ( my $k = 0 ; $k < $number_of_sequences ; $k++ ) {
            		if ( $msa_matrix[$k][$j] eq $bases[$i] ) { $pssm[$i][$j]++; }
        	}	
  	}	
}
	
# calculate the log odds values and print the final pssm matrix
for (my $i=0; $i<4; $i++){
	for (my $j=0; $j<15; $j++) {
		$pssm[$i][$j] = 
		log(($pssm[$i][$j] / ($number_of_sequences +4) * 4))/log(2);
		print "$pssm[$i][$j] ";# 
		# print with two decimals
		# printf( "%.2f\t", $pssm[$i][$j] ); 
		}
	print "\n";
	}
