鉴定同源基因对
2023-08-02 本文已影响0人
Felix_xpz
工具一:Blast系列
1. 核酸序列比对 Blastn
mkdir 01.Data 02.MakeDB 03.Blastn
####### step1 Input data #######
for i in `cat Sample |cut -f 1|sort|uniq`;do
cd 01.Data
ln -s ${rawdata_dir}/${i}.cds
cd ../
done
####### step2 Make DataBase #######
module load BLAST+/2.9.0
for i in `cat Sample |cut -f 1|sort|uniq`;do
makeblastdb -in ./01.Data/${i}.cds -dbtype nucl -out ./02.MakeDB/${i}.cds;
done
#### step3 run Blastn #####
module load BLAST+/2.9.0
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
blastn -evalue 1e-5 -outfmt 6 -max_hsps 1 -num_threads 10 -db ./02.MakeDB/${i}.cds -query ./01.Data/${j}.cds -out ./03.Blastn/${j}_vs_${i}_DB.cds
done
2. 蛋白序列比对 Blastp
mkdir 01.Data 02.MakeDB 03.Blastp
####### step1 Input data #######
for i in `cat Sample |cut -f 1|sort|uniq`;do
cd 01.Data
ln -s ${rawdata_dir}/${i}.pep
cd ../
done
####### step2 Make DataBase #######
module load BLAST+/2.9.0
for i in `cat Sample |cut -f 1|sort|uniq`;do
makeblastdb -in ./01.Data/${i}.pep -dbtype prot -out ./02.MakeDB/${i}.pep;
done
#### step3 run Blastp #####
module load BLAST+/2.9.0
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
blastp -evalue 1e-5 -outfmt 6 -max_hsps 1 -num_threads 10 -db ./02.MakeDB/${i}.pep -query ./01.Data/${j}.pep -out ./03.Blastp/${j}_vs_${i}_DB.pep
done
3. 蛋白序列比对 diamond
mkdir 01.Data 02.MakeDB 03.DBlastp
####### step1 Input data #######
for i in `cat Sample |cut -f 1|sort|uniq`;do
cd 01.Data
ln -s ${rawdata_dir}/${i}.pep
cd ../
done
####### step2 Make DataBase #######
module load diamond/v2.0.12
for i in `cat Sample |cut -f 1|sort|uniq`;do
diamond makedb --in ./01.Data/${i}.pep -d ./02.MakeDB/${i}.dmnd;
done
####### step3 run diamond #######
module load diamond/v2.0.12
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
diamond blastp --evalue 1e-7 -p 10 --db ./02.MakeDB/${i}.dmnd -q ./01.Data/${j}.pep --out ./03.DBlastp/${j}_vs_${i}_DB;
done
工具二:Jcvi
1. 安装Jcvi 略
2. 运行Jcvi
输入文件:
- sample1.bed
- sample1.pep
- sample2.bed
- sample2.pep
注意事项1: 样本名称尽量短且不要出现.
注意事项2: pep中的蛋白名称在bed中,且bed/pep的蛋白名称也不要出现.
rawdata_dir = $PWD
####### step1 Input data #######
module load jcvi/v1.0.5
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
mkdir ${i}_vs_${j}
ln -s ${rawdata_dir}/${i}.pep
ln -s ${rawdata_dir}/${i}.bed
ln -s ${rawdata_dir}/${j}.pep
ln -s ${rawdata_dir}/${j}.bed
done
####### step2 run Jcvi #######
module load jcvi/v1.0.5
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
cd ${i}_vs_${j}
python -m jcvi.compara.catalog ortholog --nochpf --dbtype prot --no_strip_names ${j} ${i}
python -m jcvi.compara.synteny screen --minspan=1 --simple ${j}.${i}.anchors ${j}.${i}.anchors.simple.minspan1
cat ${j}.${i}.anchors.simple.minspan1 |grep -v '#'|cut -f 1-2 > ${j}_vs_${i}.homologs
cd ../
done
工具三:SynOrths
1. 安装SynOrths 略
2. 运行SynOrths
输入文件:
- sample1.bed
- sample1.fas
- sample2.bed
- sample2.fas
注意事项1: bed文件是5列,Gene_ID + Chr + Start + End +Strand
rawdata_dir = $PWD
####### step1 Input data #######
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
mkdir ${i}_vs_${j}
cd ${i}_vs_${j}
ln -s ${rawdata_dir}/${i}.pep ${i}.fas
ln -s ${rawdata_dir}/${i}.bed
ln -s ${rawdata_dir}/${j}.pep ${i}.fas
ln -s ${rawdata_dir}/${j}.bed
cd ../
done
####### step2 run SynOrths #######
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
cd ${i}_vs_${j}
module load SynOrths/v1.0
SynOrths -a ${j}.fas -b ${i}.fas -p ${j}.bed -q ${i}.bed -m 20 -n 20 -r 0.2 -o Out_${j}_vs_${i}_r0.2
cd ../
done
工具四:GeneTribe
1. 安装GeneTribe 略
2. 运行GeneTribe
输入文件:
- sample1.bed
- sample1.fa
- sample1.chrlist
- sample2.bed
- sample2.fa
- sample2.chrlist
注意事项1: 因为要调用Jcvi,因此fa中的蛋白名称在bed中,且bed/fa的蛋白名称也不要出现.
注意事项2:因为物种名称不能出现.
,因此我将文件名称替换成s1、s2
注意事项3:将bed文件的染色体排序后进行替换,因此chrlist全部文件内容为N
rawdata_dir = $PWD
mkdir 01.Rawdata 02.ChuliData 03.Run
####### step1 Input Rawdata #######
for i in `cat Sample |cut -f 1|sort|uniq`;do
cd 01.Rawdata
cat ${rawdata_dir}/${i}.bed|sort -k 1,1 -k 2n,2 -k 3n,3 > ${i}.sortbed;
ln -s ${rawdata_dir}/${i}.pep
cd ../
done
####### step2 Input 02.ChuliData #######
for i in `cat Sample |cut -f 1|sort|uniq`;do
cd 02.ChuliData
cat ../01.Rawdata/${i}.sortbed |cut -f 1|uniq > a
cat -n a|tr -d ' ' > ${i}.Mapping
rm a
echo "N" >> ${i}.chrlist
~/anaconda3/bin/python change_chr_ID.py ${i}; ## 脚本替换染色体名称
cat ../01.Rawdata/${i}.pep |tr '.' '#' > ${i}.fa ## ## 将基因名称中的'.'替换为'#'
cat ${i}.bed |tr '.' '#' > ${i}.bed1; mv ${i}.bed1 ${i}.bed ## 将基因名称中的'.'替换为'#'
cd ../
done
####### step3 run GeneTribe #######
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
mkdir ${i}_vs_${j}
cd ${i}_vs_${j}
ln -s ${i}.fa s1.fa
ln -s ${i}.chrlist s1.chrlist
ln -s ${i}.bed s1.bed
ln -s ${j}.fa s2.fa
ln -s ${j}.chrlist s2.chrlist
ln -s ${j}.bed s2.bed
module load jcvi/1.0.6
nohup genetribe core -l s1 -f s2 &
cd ../
done
####### step4 replace name #######
cat Sample | while read arg; do
i=`echo $arg|awk '{print $1}'`
j=`echo $arg|awk '{print $2}'`
cd ${i}_vs_${j}
mv s1.bed ${i}.bed
mv s1.chrlist ${i}.chrlist
mv s1.fa ${i}.fa
mv s1_s2.block_pos ${i}_${j}.block_pos
mv s1_s2.collinearity_info ${i}_${j}.collinearity_info
mv s1_s2.one2many ${i}_${j}.one2many
mv s1_s2.one2one ${i}_${j}.one2one
mv s1_s2.SBH ${i}_${j}.SBH
mv s1_s2.singleton ${i}_${j}.singleton
mv s2.bed ${j}.bed
mv s2.chrlist ${j}.chrlist
mv s2.fa ${j}.fa
mv s2_s1.block_pos ${j}_${i}.block_pos
mv s2_s1.collinearity_info ${j}_${i}.collinearity_info
mv s2_s1.one2many ${j}_${i}.one2many
mv s2_s1.one2one ${j}_${i}.one2one
mv s2_s1.RBH ${j}_${i}.RBH
mv s2_s1.SBH ${j}_${i}.SBH
mv s2_s1.singleton ${j}_${i}.singleton
cd ../
done