生信

滑动窗口,并计算每一个窗口的SNP个数

2020-08-13  本文已影响0人  余绕
$a=23532; #开始位置
$b=31236919; #结束位置
$LENGTH=$b-$a+1;
    $num=int($LENGTH/5000);
    #print "$LENGTH\n$num\n";
    $counts=$num-1;
    #print "$counts\n"; 
    $start=23532;#开始位置
    $end=23532;#开始位置
    for ($i=1;$i<=$counts;$i+=1){
    $end=$start+5000; #bin的大小
    $BIN{$n}=[$start,$end];
    #print "$chr\t$start\t$end\n";
    $start+=1000; #每次滑动1kb
    $n+=1;
    }
     $n+=1;
$BIN{$n}=[$end,$LENGTH{$chr}];
#print "$_\t$end\t$LENGTH{$_}\n";

open PEAK, "$ARGV[0]";

while(<PEAK>){
chomp;
@temp=split /\t/,$_;
#print "@temp\n";
$peak{$temp[0]}=$temp[1];
}


foreach(keys %BIN){
$n=0;# VERY IMPORT, set $n =0 ,every cycle!!!!
my($peak_start,$peak_end)=@{$BIN{$_}};
    #print "$peak_start\t$peak_end\n";
    foreach(keys %peak){
       #print "$peak{$_}\n";
     
        my($a1,$a2,$b1)=($peak_start,$peak_end,$peak{$_});
        #print "$a1\t$a2\t$b1\n";
        if($a1<=$b1 and $a2>=$b1){
        $n+=1;
        }
        
    
    }
    $peak_loc_chain{$_}{"$peak_start\t$peak_end"}=$n;
    
    }


open OU, ">$ARGV[1]";

foreach $CHR (sort {$a<=>$b} keys %peak_loc_chain ){
    foreach(sort{$a<=>$b} keys %{$peak_loc_chain{$CHR}}){
    print OU "$CHR\t$_\t$peak_loc_chain{$CHR}{$_}\n";
    }   
}

数据类型

image.png

输出结果

image.png
上一篇下一篇

猜你喜欢

热点阅读