19/map2exons.pl
#!/usr/bin/perl -w
use strict;
my %genome;
my $infile = 'chr7.txt'; # based on NCBI36 assembly
open(EXONS, $infile) or die "Could not open $infile\n";
while (<EXONS>) {
unless (/^\#/) {
chomp;
my @columns = split("\t");
if ( $columns[8] ) { # if there is a protein identifier
my $exonStarts = $columns[6];
$exonStarts =~ s/\,$//; # remove the trailing ','
my @exonStarts = split( ',', $exonStarts );
my $exonEnds = $columns[7];
$exonEnds =~ s/\,$//; # remove the trailing ','
my @exonEnds = split( ',', $exonEnds );
for ( my $i = 0 ; $i <= $#exonStarts ; $i++ ) {
my $start = $exonStarts[$i];
my $stop = $exonEnds[$i];
for ( my $j = $start ; $j <= $stop ; $j++ ) {
# save name, strand and proteinID
$genome{$j} = "$columns[0] ";
$genome{$j} .= "$columns[2] $columns[8]";
}
}
}
}
}
close EXONS;
$infile = 'bushmen.out';
open(BUSHMEN, $infile) or die "Could not open $infile\n";
while (<BUSHMEN>) {
my @columns = split("\t");
my $pos = $columns[1];
if ( $genome{$pos} ) { print "$pos $genome{$pos}\n"; }
}
close BUSHMEN;