#!/usr/bin/python
import re
import sys
# Basic parameters used
wid = 100 # size of sliding window
step = 1 # size of step to move sliding window
# check if argument to the script is there.
if len(sys.argv) > 1:
file = sys.argv[1]
else:
exit('File in FASTA sequence format is to be used as argument to the script'
)
# read the sequence from the input file
seq = ''
id = ''
for line in open(file):
line = line.rstrip()
# in the identifier line all is captured
# in the variable 'id' except for
# the > character
match = re.search('>(.*)', line)
if match:
id = match.group(1)
else:
seq = seq + line
# Now analyze the sequence in $seq
print 'Position\tProline\tThreonine\tSerine'
for i in range(0, len(seq) - wid, step):
test = seq[i:i + wid]
# Count proline, threonine and serine
count_p = float(test.count('P')) / wid
count_t = float(test.count('T')) / wid
count_s = float(test.count('S')) / wid
pos = i + 1 + wid / 2
print pos, '\t', count_p, '\t', count_t, '\t', count_s