3/sirna.pl
# First read the sequence from a file named 'mrna.fa'
$seq = '';
open(IN, 'mrna.fa') or die "Could not open file mrna.fa\n";
while (<IN>) {
unless (/>/) {
chomp;
$seq .= $_;
}
}
close IN;
# Now analyze the sequence read from file
# Step through each position of the sequence
for ( $i = 0 ; $i < length($seq) -22 ; $i++ ) {
$testseq = substr( $seq, $i, 23 );
# check if first two positions are AA and
# last are TT
if ( $testseq =~ /^AA.*TT$/ ) {
# test GC content
# count the number of G's and C's
$gc_content = ( $testseq =~ tr/GC// ) / 23;
# is the GC content within the range 30-50?
if ( ( $gc_content >= 0.3 ) && ( $gc_content <= 0.5 ) ) {
# does the sequence contain stretches of As, Ts, Cs or Gs?
unless ( ( $testseq =~ /A{4}/ )
|| ( $testseq =~ /T{4}/ )
|| ( $testseq =~ /G{4}/ )
|| ( $testseq =~ /C{4}/ )
# avoid also regions of six positions with G or C
|| ( $testseq =~ /[GC]{6}/ ))
{
print "pos $i $testseq\n";
}
}
}
}