单细胞测序

基因组denovo组装计算

2018-12-01  本文已影响103人  Biofantasy

SOAPdenovo 配置example.config

#mal read length
max_rd_len=100
[LIB]
#average
avg_ins=450
#if sequence needs to be reversed
reverse_seq=0
#in which part(s) the reads are used
asm_flags=3
#use only first 100 bps of each read
rd_len_cutoff=100
#in which order the reads are used while scaffolding
rank=1

#cutoff of pair number for a reliable connection (at least 3 for short insert size)
pair_num_cutoff=3
#minimum aligned length to contigs for a reliable read location (at least 32 for short insert size)
map_len=32
#a pair of fastq file, read 1 file should always be followed by read 2 file
q1=newBGIseq500_1.fq1
q2=newBGIseq500_2.fq1

length.pl

#!/bin/perl -w
use strict;
$/ = ">";
open(IN,"output.scafSeq") || die $!;
while (<IN>){
    chomp;
    next if (/^$/);
    my @tmp = split /\n/,$_,2;
    $tmp[1] =~ s/\n//g;
    print length($tmp[1])."\n"
}
close IN;

计算长度累计曲线
perl length.pl |sort -k 1rn |awk 'BEGIN{sum=0}{if(!NF){next};sum+=$1;print sum}' >len.txt
计算contig N50

perl -e 'my ($len,$total)=(0,0);my @x;while(<>){if(/^[\>\@]/){if($len>0){$total+=$len;push@x,$len;};$len=0;}else{s/\s//g;$len+=length($_);}}if ($len>0){$total+=$len;push @x,$len;}@x=sort{$b<=>$a}@x; my ($count,$half)=(0,0);for (my $j=0;$j<@x;$j++){$count+=$x[$j];if(($count>=$total/2)&&($half==0)){print "N50: $x[$j]\n";$half=$x[$j]}elsif($count>=$total*0.9){print "N90: $x[$j]\n";exit;}}' output.scafSeq

R

setwd("/home/u2406/files/")
rm(list=ls())
pdf("length.pdf") 
lens <- read.table("len.txt")
plot(x=seq(1,length(lens$V1)),y=lens$V1,type="l")
dev.off()

chr17染色体
vcftools --vcf chr17.vcf --chr chr17 --to-bp 7687550 --out TP53 --recode --from-bp 7661779
TP53位置信息:
Chromosome 17: 7,661,779-7,687,550
该区域中包含的变异位点的脚本:

#!/usr/bin/perl 
use strict; 
use warnings; 
 my @sample; 
 my %hash;
  open (IN,"chr17.vcf") or die $!; 
  while (<IN>) {    
    chomp;  
    next if (/^##/);    
     my $line=$_;   
     if ($line=~/^#CHROM/) {                            (undef,undef,undef,undef,undef,undef,undef,undef,undef,@sample)=split/\t/,$line;        
   next;    
    }   
   #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  27DMBDM4YT      7XKZJA3JWX      APRDKV0LDS 
     my ($chrom,$pos,$id,$ref,$alt,$qual,$filter,$info,$format,@Other)=split/\t/,$line;     
    if ($pos>=7661779 && $pos <=7687550){       
         for (my $i=0;$i<@sample ;$i++) {           
             my $lis=(split/:/,$Other[$i])[0];          
             my ($aa,$bb)=split/\//,$lis;           
            if ($aa == $bb) {               
                  if (exists $hash{$sample[$i]}{'aa'}) {                    
                     $hash{$sample[$i]}{'aa'} +=1;              
                  }else{                    
                    $hash{$sample[$i]}{'aa'}=1;                 
                  }             
              }else{                
                 if (exists $hash{$sample[$i]}{'ab'}) {                     
                     $hash{$sample[$i]}{'ab'} +=1;              
                  }else{                    
                     $hash{$sample[$i]}{'ab'}=1;                
                  }             
             }      
         }  
      } 
   } 
   close IN; 
   print "#Sample\tHomozygous\tHeterozygote\n"; 
    foreach my $name (keys %hash) {     
     print "$name\t$hash{$name}{'aa'}\t$hash{$name}{'ab'}\n"; 
    }

看到脚本在后台稳稳运行,好舒心

看到脚本在后台稳稳运行,好舒心
上一篇 下一篇

猜你喜欢

热点阅读