13/pts.pl


#!/usr/bin/perl -w

use strict;

# Basic parameters used
my $wid  = 100;    # size of sliding window

# check if argument to the script is there.
if ( $ARGV[0] eq '' ) {
     die "File in FASTA sequence format is to be 
          used as argument to the script\n";
}

# read the sequence from the input file
my $seq = '';
open(IN, "$ARGV[0]") or die "Could not open file $ARGV[0]\n";
while (<IN>) {
     chomp;

     # in the identifier line all is captured
     # in the variable $id except for
     # the > character
     if (/>(.*)/) { my $id = $1; }
     else         { $seq .= $_; }
}

close IN;

# Now analyze the sequence in $seq
print "Position\tProline\tThreonine\tSerine\n";

for ( my $i = 0 ; $i < length($seq) - $wid + 1 ; $i++ ) {
     my $test = substr( $seq, $i, $wid );

     # Count proline, threonine and serine
     my $count_p = ( $test =~ tr/P// ) / $wid;
     my $count_t = ( $test =~ tr/T// ) / $wid;
     my $count_s = ( $test =~ tr/S// ) / $wid;
     my $pos     = $i + 1 + $wid/2;
     print "$pos\t$count_p\t$count_t\t$count_s\n";
}