4/pcr.pl


#!/usr/bin/perl -w

use strict;

# the "reverse" genetic code
# This hash has amino acid symbols as keys,
# and the degenerate codons as values

my %reverse_code = (
    'L', 'YUN', 'F', 'UUY', 'I', 'AUH', 'M', 'AUG', 'V', 'GUN',
    'S', 'WSN', 'P', 'CCN', 'T', 'ACN', 'A', 'GCN', 'G', 'GGN',
    'Y', 'UAY', 'H', 'CAY', 'Q', 'CAR', 'N', 'AAY', 'K', 'AAR',
    'D', 'GAY', 'E', 'GAR', 'C', 'UGY', 'W', 'UGG', 'R', 'MGN'
);

# reading the protein sequence from a file

my $pep = '';
open(IN, 'brca1.pep') or die "Could not open file\n";
while (<IN>) {
    unless (/>/) { chomp; $pep .= $_; }
}
close IN;

for ( my $i = 0 ; $i < length($pep) - 6 ; $i++ ) {
    my $pos = $i + 1;
    print "$pos ";
    my $test = substr( $pep, $i, 7 );
    my $degen = 1;

    for ( my $j = 0 ; $j < 7 ; $j++ ) {
        my $aa = substr( $test, $j, 1 );
        my $codon = $reverse_code{$aa};
        print "$codon";

        # calculate degeneracy
        for ( my $k = 0 ; $k < 3 ; $k++ ) {
            my $base = substr( $codon, $k, 1 );
            if ( $base =~ /[RYWMSK]/ ) { $degen *= 2; }
            if ( $base =~ /[VHDB]/ )   { $degen *= 3; }
            if ( $base =~ /[N]/ )      { $degen *= 4; }
        }
    }
    print "\t$degen\n";
}