4/pcr.pl
#!/usr/bin/perl -w
use strict;
# the "reverse" genetic code
# This hash has amino acid symbols as keys,
# and the degenerate codons as values
my %reverse_code = (
'L', 'YUN', 'F', 'UUY', 'I', 'AUH', 'M', 'AUG', 'V', 'GUN',
'S', 'WSN', 'P', 'CCN', 'T', 'ACN', 'A', 'GCN', 'G', 'GGN',
'Y', 'UAY', 'H', 'CAY', 'Q', 'CAR', 'N', 'AAY', 'K', 'AAR',
'D', 'GAY', 'E', 'GAR', 'C', 'UGY', 'W', 'UGG', 'R', 'MGN'
);
# reading the protein sequence from a file
my $pep = '';
open(IN, 'brca1.pep') or die "Could not open file\n";
while (<IN>) {
unless (/>/) { chomp; $pep .= $_; }
}
close IN;
for ( my $i = 0 ; $i < length($pep) - 6 ; $i++ ) {
my $pos = $i + 1;
print "$pos ";
my $test = substr( $pep, $i, 7 );
my $degen = 1;
for ( my $j = 0 ; $j < 7 ; $j++ ) {
my $aa = substr( $test, $j, 1 );
my $codon = $reverse_code{$aa};
print "$codon";
# calculate degeneracy
for ( my $k = 0 ; $k < 3 ; $k++ ) {
my $base = substr( $codon, $k, 1 );
if ( $base =~ /[RYWMSK]/ ) { $degen *= 2; }
if ( $base =~ /[VHDB]/ ) { $degen *= 3; }
if ( $base =~ /[N]/ ) { $degen *= 4; }
}
}
print "\t$degen\n";
}