mmseq2序列聚类
2024-02-19 本文已影响0人
胡童远
文章
标题:MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets
期刊:Nature Biotechnology
时间:2017
安装
source /hwfsxx1/ST_HN/P18Z10200N0423/huty/software/miniconda3_2/etc/profile.d/conda.sh
conda activate mmseq2
conda install bioconda::mmseqs2
/hwfsxx1/ST_HN/P18Z10200N0423/huty/software/miniconda3_2/envs/mmseq2/bin/mmseqs -h
使用
$符前加#脚本会报错,qsub option invalid
# merge
#mkdir 03_geneset
#touch 03_geneset/01_gene_merge.fna
#for i in `ls 02_assembly/`; do
# cat 02_assembly/$i/gene/${i}_rename.fna >> 03_geneset/01_gene_merge.fna
# echo -e "$i done..."
#done
# mmseqs pipline
source /hwfsxx1/ST_HN/P18Z10200N0423/huty/software/miniconda3_2/etc/profile.d/conda.sh
conda activate mmseq2
route="/hwfsxx1/ST_HN/P18Z10200N0423/huty/software/miniconda3_2/envs/mmseq2/bin"
sed -i 's/ #.*$//' 03_geneset/01_gene_merge.fna
# 2 BD
mkdir 03_geneset/02_DB
$route/mmseqs createdb 03_geneset/01_gene_merge.fna 03_geneset/02_DB/DB
# 3 cluster
mkdir 03_geneset/03_cluster
mkdir 03_geneset/03_tmp
$route/mmseqs cluster \
03_geneset/02_DB/DB \
03_geneset/03_cluster/cluster \
03_geneset/03_tmp \
--cov-mode 0 -c 0.8 --min-seq-id 0.9 --threads 24
rm -r 03_geneset/03_tmp
# 4 extract cluster info
mkdir 03_geneset/04_extract_info
$route/mmseqs createtsv \
03_geneset/02_DB/DB \
03_geneset/02_DB/DB \
03_geneset/03_cluster/cluster \
03_geneset/04_extract_info/cluter_info.tsv
# 5 extract rep fasta
mkdir 03_geneset/05_extract_rep
$route/mmseqs createsubdb \
03_geneset/03_cluster/cluster \
03_geneset/02_DB/DB \
03_geneset/05_extract_rep/cluster_rep
$route/mmseqs convert2fasta \
03_geneset/05_extract_rep/cluster_rep \
03_geneset/05_extract_rep/cluster_rep.fasta