Repeatmodeler+EDTA+homology-base
2023-08-02 本文已影响0人
多啦A梦的时光机_648d
1. repeatmodeler
$BuildDatabase -name genome genome.fa
$/home/lx_sky6/yt/soft/RepeatModeler-2.0.3/RepeatModeler -database genome -pa 20 -LTRStruct 1>repeatmodeler.log 2>&1
2. EDTA
##整合SINE和LINE
$wget https://sines.eimb.ru/banks/SINEs.bnk
$wget https://sines.eimb.ru/banks/LINEs.bnk
$awk '{print $1}' SINEs.bnk |awk '{if(/>/)print $1"#SINE/"$1;else print $1}'|sed 's/\/>/\//g' >SINEs.fa
$awk '{print $1}' LINEs.bnk |awk '{if(/>/)print $1"#LINE/"$1;else print $1}'|sed 's/\/>/\//g' >LINEs.fa
$EDTA.pl --genome genome_renamed.fa --species others --sensitive 0 --anno 1 --threads 40 --curatedlib SINE_LINE.fa
3. 下载homology-based库
##(我的是蔷薇科物种,所以选择rosids)
$/home/lx_sky6/yt/soft/RepeatMasker/famdb.py -i Libraries/RepeatMaskerLib.h5 families -f embl -a -d rosids > rosids_ad.embl
$perl /home/lx_sky6/yt/soft/RepeatMasker/util/buildRMLibFromEMBL.pl rosids_ad.emb >rosids_ad.fasta
4. repeatmasker
$cat ../9-EDTA/genome.fa.mod.EDTA.TElib.fa ../8-Repeatmodeler/genome-families.fa rosids_ad.fasta > repeats.lib.fa ##同时可以加入RBbase数据库
$RepeatMasker -e ncbi -pa 64 -xsmall -lib repeats.lib.fa genome.fa
$/home/lx_sky6/software/RepeatMasker/util/rmOutToGFF3.pl genome.fa.out > repeats.gff
##x修改为maker需要的格式
$cat repeats.Rhodiola_juparensis.gff|grep -v -e "Satellite" -e ")n" -e "-rich"|perl -ane '$id; if(!/^\#/){@F = split(/\t/, $_); chomp $F[-1];$id++; $F[-1] .= "\;ID=$id"; $_ = join("\t", @F)."\n"} print $_' >repeats.reformat.gff
$/home/lx_sky6/software/RepeatMasker/util/buildSummary.pl genome.fa.out > repeats.summary