17/extract_chloroplast_rnas.pl


#!/usr/bin/perl -w

use strict;

# first read the sequences that were used as input
# to cmsearch from the file pattern_search.out

my %origseq;
my $id;
my $strand;
my $infile = 'pattern_search.out';
open IN, $infile or die "Oops, could not open $infile\n";
while (<IN>) {
    chomp;
    if   (/>(.*)/) { $id           = $1; }
    else           { $origseq{$id} = $_; }
}
close IN;

# read the output from cmsearch

$infile = 'chlorophyta.tab';
open IN, $infile or die "0ops, could not open $infile\n";
while (<IN>) {

    if (/SRP_bact +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+) +(\S+)/) {

        $id = $1;
        my $beg   = $2;
        my $end   = $3;
        my $score = $6;
        my $len   = $end - $beg + 1;
        my $ret   = substr( $origseq{$id}, $beg - 1, $len );
        if ( $score > 20 ) {
            print ">$id BEG:$beg END:$end\n$ret\n";

        }
    }
}

close IN;