比较基因组从入门到放弃(2)
2021-05-23 本文已影响0人
Morriyaty
树的构建
基因树
#对align文件夹下的每一个Ortholog的cds文件用iqtree建树
iqtree.py
#合并所有树
cat align/*/cds.best.fas.treefile > ./all.tree
#修改枝长
sed -i 's/[0-9.]\+/1/g' all.tree
#重新定根
nw_reroot all.tree cattle > all.reroot.tree
#phybase
perl 10.noclock2clock.pl all.reroot.tree
##修改生成脚本 第一行为
library('phybase',lib.loc="/data/00/user/user157/R/x86_64-redhat-linux-gnu-library/4.0");
#运行
Rscript all.reroot.tree.Rscript > all.reroot.clocktre
#Densitree画图展示基因树
并联树
java -jar astral.5.7.7.jar -i all.tree -o out.tree
#查看支持度
java -jar astral.5.7.7.jar -i all.tree -t 16 -o out.tree
串联树
##根据orthofinder结果提取基因列表 Orthogroups_SingleCopyOrthologues.txt Orthogroups.txt 为orthofinder结果
python3 1.AllClusters.txt.single.copygene.txt.py Orthogroups_SingleCopyOrthologues.txt Orthogroups.txt > scg.txt
##准备cds与pep文件夹data 命名为xxx.cds.clean.fa xxx.pep.clean.fa
##获得supergene(cds)
perl 1.splitSeq.pl data scg.txt
perl 2.align.pl > 2.align.pl.sh
sh 2.align.pl.sh #(提交任务)#
perl 3.connect.pl
perl 4.4Dsites.pl 3.connect.pl.connect.cds.fa
perl fasta2phylip.pl 4.4Dsites.pl.connect4Dsites.fa > A-c.phy
##构建cds物种树 与timtree上物种树校对
iqtree -s A-c.phy
mcmctree建议使用氨基酸序列做
######分歧时间估计#####
##注 :得到替换速率用@ 计算时间用>
############ way 1 pep ###############
##准备文件 SpeciesTreeAlignment.fa SpeciesTree_rooted.txt
##将fa格式转换为phy格式
perl /data/01/user152/software/script/genome_compare/script/fasta2phylip.pl SpeciesTreeAlignment.fa > A-p.phy
##修改树文件,添加化石时间信息
#例
#6 1
#(cu,((un,(ru,ca)),(no,mu)'>.183<.234'))'>.76<.88';
##运行mcmctree
/data/01/user152/software/paml4.9i/bin/mcmctree mcmctree.ctl
##修改tmp0001.ctl、tmp0001.trees 并运行codeml
mv tmp0001.ctl codeml.ctl
#getSE = 0
#clock = 1
/data/01/user152/software/paml4.9i/bin/codeml codeml.ctl
#在结果文件tmp0001.out查看替换速率
#Substitution rate is per time unit
# 0.123789
##修改mcmctree.ctl
##运行mcmctree
/home/share/users/yangyongzhi2012/tools/paml/paml4.8/bin/mcmctree mcmctree.ctl
##将out.BV 改为in.BV 修改mcmctree.ctl 创建r01 r02 运行两次
mv out.BV in.BV
mkdir r01 r02
cp mcmctree.ctl in.BV A-p.phy tree.file r01
cp mcmctree.ctl in.BV A-p.phy tree.file r02
#提交任务做
############ way 2 cds ###############
#运行baseml
/home/share/users/yangyongzhi2012/tools/paml/paml4.8/bin/baseml baseml.ctl
#在结果文件mlb查看替换速率
#Substitution rate is per time unit
# 0.416311 +- 0.000609
##其余步骤同pep##