bcftools的常用命令

2021-05-08  本文已影响0人  石博士

tabix

tabix -p vcf myvcf.vcf.gz 
#或者用
bcftools index -t --threads 10 myvcf.vcf.gz
tabix -h myvcf.vcf.gz chr1 > chr1.vcf #-h会加上vcf的header
#还可以用文件,列出所有要包含的染色体
tabix -h -R regions.list input.vcf.gz > output.vcf

BCFTOOLS

export BCFTOOLS_PLUGINS=/path/to/bcftools/plugins
bcftools +counts input.vcf.gz
#或者直接用
bcftools index -n input.vcf.gz

过滤

bcftools view -i "F_MISSING <= 0.2" 相当于vcftools的max-missing 0.8
或者用bcftools filter -e "F_MISSING > 0.2" 相当于vcftools的max-missing 0.8
bcftools view input/input.recode.vcf.gz -S 170.ID -i "F_MISSING <= 0.5" 
bcftools filter -e "MAF < 0.05" input.vcf
vcftools --vcf input.vcf --maf 0.05 --recode --out OUTPUT
bcftools filter -e "F_MISSING > 0.3 ||  MAF < 0.05" input.vcf.gz -Oz -o output.vcf.gz
vcftools --gzvcf input.vcf.gz --maf 0.05 --miss 0.7 --recode --out output2
bcftools的subset和F_MISSING过滤,一定要分两步做,不然会出错:
bcftools view -S kee.ID input.vcf | bcftools filter -e "F_MISSING > 0.2" > output.vcf #分两步过滤
#相当于
vcftools --vcf input.vcf --max-missing 0.8 --keep keep.ID --recode --out output
bcftools view -i "MAC>=0"
bcftools view -c 1 -c 1:nonmajor
bcftools filter -e "MAC == 0"

提取、合并、删除

bcftools view --region chr1,chr5,chr8 input.vcf.gz 
bcftools query -f "%CHROM\t%POS\n" 
zcat input.vcf.zg |awk '/^[^#]/{print $1"\t"$2}' > file.pos
bcftools view -R file.pos -Oz  -o output.vcf.gz  input.vcf.gz
bcftools view input/input.SNP.revhet.recode.vcf.gz -R input_Chr01.pos -S input_samples.list -Oz -o output_fixed.vcf.gz
bcftools view -t Chr01:888 input.recode.vcf.gz |bcftools query -f "%CHROM\t%POS\t[\n%GT]"|sort -u #查看一个sites的genotype
bcftools query -l input.vcf  > samples.txt
bcftools reheader --samples <your file> -o <output> <your input file> 
#file里面是
oldname newname
oldname2 newname2
#删除之后需要再过滤
bcftools view -s "^sample101" input.vcf.gz |bcftools view -i "MAC >0"-Oz -o output.vcf.gz
bcftools concat -f concat_vcf.list --threads 10 -Oz -o output/output.SNP.revhet.recode.vcf.gz
#不用file.list,可以这样:
bcftools concat -Oz -o output.vcf.gz *.vcf

python

import gzip

#open file according to suffix
def openfile(filename, mode='r'):
    if filename.endswith('.gz'):
        return gzip.open(filename, mode)
    else:
        return open(filename, mode)


#decode bytes lines if your read lines from gzip file
def decode_line(line):
    if type(line) == bytes:
        line = line.decode('utf-8')
    return line

with openfile("input.vcf.gz") as fh:
    for line in fh:
        line = decode_line(line)
        #do something

ChangeLog:

上一篇 下一篇

猜你喜欢

热点阅读