#!/usr/bin/python

import re
import sys

# Basic parameters used

wid  = 100  # size of sliding window
step = 1    # size of step to move sliding window

# check if argument to the script is there.

if len(sys.argv) > 1:
    file = sys.argv[1]
else:
    exit('File in FASTA sequence format is to be used as argument to the script'
         )

# read the sequence from the input file

seq = ''
id = ''

for line in open(file):
    line = line.rstrip()

        # in the identifier line all is captured
        # in the variable 'id' except for
        # the > character

    match = re.search('>(.*)', line)
    if match:
        id = match.group(1)
    else:
        seq = seq + line

# Now analyze the sequence in $seq

print 'Position\tProline\tThreonine\tSerine'

for i in range(0, len(seq) - wid, step):

    test = seq[i:i + wid]

    # Count proline, threonine and serine

    count_p = float(test.count('P')) / wid
    count_t = float(test.count('T')) / wid
    count_s = float(test.count('S')) / wid
    pos = i + 1 + wid / 2
    print pos, '\t', count_p, '\t', count_t, '\t', count_s