17/extract_chloroplast_rnas.pl
#!/usr/bin/perl -w
use strict;
# first read the sequences that were used as input
# to cmsearch from the file pattern_search.out
my %origseq;
my $id;
my $strand;
my $infile = 'pattern_search.out';
open IN, $infile or die "Oops, could not open $infile\n";
while (<IN>) {
chomp;
if (/>(.*)/) { $id = $1; }
else { $origseq{$id} = $_; }
}
close IN;
# read the output from cmsearch
$infile = 'chlorophyta.tab';
open IN, $infile or die "0ops, could not open $infile\n";
while (<IN>) {
if (/SRP_bact +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+)/) {
$id = $1;
my $beg = $2;
my $end = $3;
my $score = $6;
my $len = $end - $beg + 1;
my $ret = substr( $origseq{$id}, $beg - 1, $len );
if ( $score > 20 ) {
print ">$id BEG:$beg END:$end\n$ret\n";
}
}
}
close IN;