#!/usr/bin/python
import re
win = 500
step = 10
seq = ''
for line in open('short.fa'): # a shorter sequence
if not re.search('>', line):
line = line.rstrip()
seq = seq + line
print 'pos\tcpg\tcg_ratio\tcg_obs_exp'
for i in range(0, len(seq) - win, step):
testseq = seq[i:i + win]
c = float(testseq.count('C'))
g = float(testseq.count('G'))
cg = float(testseq.count('CG'))
cg_ratio = (c + g) * 100 / len(testseq)
cg_obs_exp = cg * len(testseq) / (c * g)
pos = i + win / 2
if cg_ratio >= 55 and cg_obs_exp >= 0.65:
print pos, '\t', 1, '\t', cg_ratio, '\t', cg_obs_exp
else:
print pos, '\t', 0, '\t', cg_ratio, '\t', cg_obs_exp