16/make_matrix5.pl


#!/usr/bin/perl -w

use strict;

my @msa_matrix;    # two dimensional array to
                   # to store multiple alignment

my $number_of_sequences = 0;    # we want to count the number of
                                # sequences in the file splice5.txt

my $infile = 'splice5.txt';

open(IN, $infile) or die "Oops, could not open $infile\n";

while (<IN>) {
    chomp;
    for ( my $j = 0 ; $j < 9 ; $j++ ) { # nine positions in alignment
        $msa_matrix[$number_of_sequences][$j] = substr( $_, $j, 1 );
    }
    $number_of_sequences++;
}

close IN;

# produce count matrix

my @bases = ( 'A', 'T', 'C', 'G' );
my @pssm;

for ( my $i = 0 ; $i < 4 ; $i++ ) {
    for ( my $j = 0 ; $j < 9 ; $j++ ) {

        # add pseudocount = 1 to each of the values in the matrix
        $pssm[$i][$j] = 1;

        # add counts to the pssm matrix
        for ( my $k = 0 ; $k < $number_of_sequences ; $k++ ) {
            if ( $msa_matrix[$k][$j] eq $bases[$i] ) {
               $pssm[$i][$j]++; 
            }
        }
    }
}

# from count matrix produce PSSM by
# calculating the log odds values
for ( my $i = 0 ; $i < 4 ; $i++ ) {
    for ( my $j = 0 ; $j < 9 ; $j++ ) {
        $pssm[$i][$j] =
          log( ( $pssm[$i][$j] / ( $number_of_sequences + 4 ) * 4 ) )
          / log(2);

        print "$pssm[$i][$j] "; # print PSSM
        # print with two decimal places:
        # printf( "%.2f\t", $pssm[$i][$j] ); 
    }
    print "\n";
}