15/cpg.pl
#!/usr/bin/perl -w
use strict;
my $win = 500;
my $step = 10;
my $seq = '';
my $infile = 'short.fa'; # a shorter sequence
# my $infile = 'chr4_region.fa'; # alternative sequence
open( IN, $infile ) or die "Could not open $infile\n";
while (<IN>) {
unless (/>/) {
chomp;
$seq .= $_;
}
}
close IN;
print "pos\tcpg\tcg_ratio\tcg_obs_exp\n";
for ( my $i = 0 ; $i < length($seq) - $win + 1; $i += $step ) {
my $testseq = substr( $seq, $i, $win );
my $c = ( $testseq =~ s/C/C/g );
my $g = ( $testseq =~ s/G/G/g );
my $cg = ( $testseq =~ s/CG/CG/g );
my $cg_ratio = ( $c + $g ) * 100 / length($testseq);
my $cg_obs_exp = ( $cg * length($testseq) ) / ( $c * $g );
my $pos = $i + $win / 2;
if ( ( $cg_ratio >= 55 ) && ( $cg_obs_exp >= 0.65 ) ) {
print "$pos\t1\t$cg_ratio\t$cg_obs_exp\n";
}
else { print "$pos\t0\t$cg_ratio\t$cg_obs_exp\n"; }
}