5/find_cag.pl


#!/usr/bin/perl -w

use strict;

my $infile = 'refseq_human';    # all human refseq
                                # sequences Dec 2010.

open IN, $infile or die "Oops, could not open $infile\n";

my $id  = '';
my $seq = '';
while (<IN>) {
    chomp;
    if (/^>/) {
        if ( $id ne '' ) {
            find_cag_repeat( $id, $seq );
        }
        $id  = $_;
        $seq = '';
    }
    else {
        $seq .= $_;
    }
}
close IN;

find_cag_repeat( $id, $seq );

sub find_cag_repeat {
    my ( $id, $seq ) = @_;
    if ( $seq =~ /((CAG){6,})/ ) {
        my $len = length($1);
        $id = substr( $id, 0, 20 );
        print "$id\trepeat length $len\n";
    }
}