15/cpg.pl


#!/usr/bin/perl -w

use strict;

my $win  = 500;
my $step = 10;
my $seq  = '';

my $infile = 'short.fa';    # a shorter sequence
# my $infile = 'chr4_region.fa'; # alternative sequence
open( IN, $infile ) or die "Could not open $infile\n";
while (<IN>) {
    unless (/>/) {
        chomp;
        $seq .= $_;
    }
}
close IN;

print "pos\tcpg\tcg_ratio\tcg_obs_exp\n";

for ( my $i = 0 ; $i < length($seq) - $win + 1; $i += $step ) {
    my $testseq = substr( $seq, $i, $win );
    my $c  = ( $testseq =~ s/C/C/g );
    my $g  = ( $testseq =~ s/G/G/g );
    my $cg = ( $testseq =~ s/CG/CG/g );
    my $cg_ratio   = ( $c + $g ) * 100 / length($testseq);
    my $cg_obs_exp = ( $cg * length($testseq) ) / ( $c * $g );
    my $pos        = $i + $win / 2;
    if ( ( $cg_ratio >= 55 ) && ( $cg_obs_exp >= 0.65 ) ) {
        print "$pos\t1\t$cg_ratio\t$cg_obs_exp\n";
    }
    else { print "$pos\t0\t$cg_ratio\t$cg_obs_exp\n"; }
}