根据bed坐标从bam文件中计算reads数目,相当于bedto

2020-05-29  本文已影响0人  余绕
open BED, "$ARGV[0]";

while(<BED>){
    chomp;
    @temp=split /\t/,$_;
    $chr=$temp[0];
    $hash_bed{$temp[3]}=[$temp[0],$temp[1],$temp[2]];
}

open FA,"$ARGV[1]";

while(<FA>){
    
    chomp;
    @temp=split /\t/,$_;
    @matchM=$temp[5]=~m/(\d*)M/g; #Put  the matched values in an array!
    @matchD=$temp[5]=~m/(\d*)D/g;
    @MATCH=(@matchM,@matchD);
    $match=0;
    foreach(@MATCH){
        $match+=$_;
        }
    $left=$temp[3]-1;
    $right=$left+$match;
    $hash_bam{$temp[2]}{"$left\t$right"}=1;
}
    
foreach(keys %hash_bed){
    my $n=0; #非常重要,每次循环清零,不然连续累加
    my $loc=$_;
    #$array=$hash_bed{$_};
    my($chr,$gene_start,$gene_end)=@{$hash_bed{$_}};
        #print "$chr\t$gene_start\t$gene_end\n";
    foreach(keys %{$hash_bam{$chr}}){
        
        my($peak_start,$peak_end)=split /\t/,$_;
        $peak_len=$peak_end-$peak_start+1;
        #print "$peak_start\t$peak_end\t$peak_len\n";
        my ($a1,$a2,$b1,$b2)=($gene_start,$gene_end,$peak_start,$peak_end); 
        if($a1>=$b1 and $a1<=$b2 and $a2>$b1 and $a2<=$b2){
            $n+=1;
        
        }
        elsif(!($a2<$b1 or $b2<$a1)){
            @see=sort{$a<=>$b}($a1,$a2,$b1,$b2);
            $overlalp=$see[2]-$see[1];
            $ratio=$overlalp/$peak_len;
                if($ratio>0){
                    $n+=1;
                        }
    
    }
    $peak_loc_chain{$chr}{"$gene_start\t$gene_end"}="$loc\t$n";
}

}

foreach $CHR(sort keys %peak_loc_chain){
    foreach(sort keys %{$peak_loc_chain{$CHR}}){
    print "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
    }
}

code 练习


image.png
上一篇下一篇

猜你喜欢

热点阅读