基因家族软件安装
2021-09-04 本文已影响0人
新_世_界
########################## perl 安装 #####################################
cd /usr/local/bin/tfastm 链接到tfastm36的安装路径
ln -s /usr/local/bin/tfastm
tfastm -> ../bioinf/fasta/fasta/bin/tfastm36
注意:../bioinf/fasta/fasta/bin/tfastm36 是源文件
/usr/local/bin/tfastm 是目标文件(相当于快捷键)
sudo ln -s /home/tangyimiao/biosoft/myBin/bin/perl5.31.6 /usr/local/bin/perl
# 创建软链接
ln -s /home/tangyimiao/biosoft/perl/perl-5.31.6 /usr/local/bin/per5_31
# 删除软链接
rm -rf ./per5_31 删除链接
######################### 安装perl-5.31.6 ##################
find / -name perl
#查询perl路径,确认相关
which perl
#解压压缩包
tar -zxvf perl-5.31.6.tar.gz
#进入文件目录
cd perl-5.31.6
#指定编译安装路径
./Configure -des -Dusedevel -Dprefix=/home/tangyimiao/biosoft/myBin
#安装
make
make test
make install
#如果之前有perl安装在电脑上,那可以采用以下设置
#替换掉旧的perl命令
#cd /usr/bin # 这个里面有默认的perl
mv /usr/bin/perl /usr/bin/perl.bak (备份,改名)
#做一个软链接,使用新的perl
sudo ln -s /home/tangyimiao/biosoft/myBin/bin/perl5.31.6 /usr/local/bin/perl
####################### 安装 KaKs_Calculator 1.2 (For Linux/Unix/OS) ##########################
# 计算Ka/Ks
Please go to the [KaKs_Calculator homepage](https://code.google.com/archive/p/kaks-calculator/downloads) and download KaKs_Calculator.
# 1. 下载 download KaKs_Calculator,得到KaKs_Calculator1.2.tar.gz 和 KaKs_Calculator1.2.tar 两个压缩包
wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/kaks-calculator/KaKs_Calculator1.2.tar.gz
# 2. 解压 unzip(任选一个压缩包,然后解压)
gzip -d KaKs_Calculator1.2.tar.gz # 或者tar -xf KaKs_Calculator1.2.tar
# 3. 安装install
cd KaKs_Calculator1.2/src
sudo make
sudo cp KaKs_Calculator /usr/local/bin/ #这句最为重要!
####################### 安装 KaKs_Calculator2.0 (For Linux/Unix/OS) ##########################
# 1. 下载https://sourceforge.net/projects/kakscalculator2/ 压缩包至本地
# 2. 解压KaKs_Calculator2.0.rar ,得KaKs_Calculator2.0 文件夹
# 3. 将KaKs_Calculator2.0 文件夹上传至 /home/manager/biosoft 目录下
cd /home/manager/biosoft/KaKs_Calculator2.0/src
sudo make
sudo cp KaKs_Calculator /usr/local/bin/
######################### meme软件 #######################################
# 1. 下载 Download the software from http://meme-suite.org/doc/download.html/
# 2. 解压 Type the following commands
tar zxf meme-5.1.0.tar.gz
# 3. 安装install
cd meme-5.1.0
./configure --prefix=$HOME/meme --with-url=http://meme-suite.org/ --enable-build-libxml2 --enable-build-libxslt
make
make test
make install
# 4. Edit your shell configuration file to add $HOME/meme/bin and $HOME/meme/libexec/meme-5.1.0 to your shell's path. This can often be done by editing the file named .profile to add the following line
export PATH=$HOME/meme/bin:$HOME/meme/libexec/meme-5.1.0:$PATH
备注:在线使用网址:http://meme-suite.org/tools/meme
######################### MCScanx软件 #######################################
cd /home/manager/biosoft/mcscanx
wget http://chibba.pgml.uga.edu/mcscan2/MCScanX.zip
uzip MCScanX.zip
cd MCScanX
make
如果make报错:
g++ struct.cc mcscan.cc read_data.cc out_utils.cc dagchainer.cc msa.cc permutation.cc -o MCScanX
msa.cc: In function ‘void msa_main(const char*)’:
msa.cc:289:22: error: ‘chdir’ was not declared in this scope
if (chdir(html_fn)<0)
^
make: *** [mcscanx] Error 1
解决方案:
这个错误的原因是,MCScanX 不支持64位系统。如果要在 64位上运行,需要修改下源代码。
只需要给MCScanX 目录下的 msa.h, dissect_multiple_alignment.h, and detect_collinear_tandem_arrays.h 这三个文件内容 最前面添加 #include <unistd.h>
echo 'PATH=$PATH:~/biosoft/mcscanx/MCScanX/ ' >> ~/.bashrc
source ~/.bashrc
MCScanX
[Usage] MCScanX prefix_fn [options]
-k MATCH_SCORE, final score=MATCH_SCORE+NUM_GAPS*GAP_PENALTY
(default: 50)
-g GAP_PENALTY, gap penalty (default: -1)
-s MATCH_SIZE, number of genes required to call a collinear block
(default: 5)
-e E_VALUE, alignment significance (default: 1e-05)
-m MAX_GAPS, maximum gaps allowed (default: 25)
-w OVERLAP_WINDOW, maximum distance (# of genes) to collapse BLAST matches (default: 5)
-a only builds the pairwise blocks (.collinearity file)
-b patterns of collinear blocks. 0:intra- and inter-species (default); 1:intra-species; 2:inter-species
-h print this help page
# sp# gene staring_position ending_position (共四列)
# 其中"sp#"的sp意味着你要用2个字母代表物种(多个字母好像也不影响结果),#则表示是哪条染色体。而"gene"则要是你蛋白序列的基因名。
gff3 文件第九列是 = 接连,利用awk 指定多个分隔符就能得到;
awk -F "[= \t]" '$3 == "gene" {print $1"\t"$11"\t"$4"\t"$5}' Arabidopsis_thaliana.TAIR10.41.gff3|head -n 10
awk -F "[= \t]" '$3 == "gene" {print $1"\t"$11"\t"$4"\t"$5}' Arabidopsis_thaliana.TAIR10.41.gff3|head -n 10
sudo java ~/biosoft/mcscanx/MCScanX/downstream_analyses/family_circle_plotter -g at.gff -s .at.collinearity -c family.ctl -f MADS_box_family.txt -o MADS.circle.PNG
######################################## Linux Anaconda3 安装 ##############################
参考: https://zhuanlan.zhihu.com/p/32925500 有修改
# 第一步
wget https://repo.anaconda.com/archive/Anaconda3-2019.10-Linux-x86_64.sh
# 进入安装文件目录
md5sum /home/manager/biosoft/PythonModule/Anaconda3/Anaconda3-2019.10-Linux-x86_64.sh
# 第二步
bash Anaconda3-2019.10-Linux-x86_64.sh
# 第三步
# 安装过程中自动安装到.bshrc 文件中,我们需要将以下内容粘贴到.zshrc中,等价于添加环境变量
# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/home/manager/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/home/manager/anaconda3/etc/profile.d/conda.sh" ]; then
. "/home/manager/anaconda3/etc/profile.d/conda.sh"
else
export PATH="/home/manager/anaconda3/bin:$PATH"
fi
fi
unset __conda_setup
# <<< conda initialize <<<
# 第四步
# 更新.zshrc以立即生效
source ~/.zshrc
# 第五步
# 用conda list 验证是否安装好
# 第六步 设置国内镜像
由于Anaconda.org的服务器在国外,所以通常在国内使用conda下载速度会很慢。所以在国内通常使用清华TUNA镜像源。
# 第七步 添加Anaconda的TUNA镜像
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/main/
# 设置搜索时显示通道地址
conda config --set show_channel_urls yes
ok!
https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
main
######################################### blastall 参数详细说明 ######################
参考:https://www.omicsclass.com/article/504
##
cp /home/manager/gene_family/my_gene_family/Arabidopsis_thaliana.TAIR10.41.gff3 .
awk -F "[=;\t]" '$3 == "gene" {print$1"\t"$10"\t"$4"\t"$5}' Arabidopsis_thaliana.TAIR10.41.gff3|head -n 10
结果如下:
Chr1 AT1G01010 3631 5899
Chr1 AT1G01020 5928 8737
Chr1 AT1G01030 11649 13714
Chr1 AT1G01040 23146 31227
Chr1 AT1G01050 31170 33153
Chr1 AT1G01060 33379 37871
Chr1 AT1G01070 38752 40944
Chr1 AT1G01073 44677 44787
Chr1 AT1G01080 45296 47019
Chr1 AT1G01090 47485 49286
Circos 软件安装
## cpanm是安装Perl模块的最方便的方法。自动下载安装依赖包。使用CPAN shell或下载源码包安装模块,遇到大量依赖关系,非常头痛。
1. 安装cpanm
cpanm其实是一个可执行文件而已。将它下载到bin目录,然后添加执行权限就可以了。
wget http://xrl.us/cpanm --no-check-certificate -O /usr/bin/cpanm
chmod 700 /usr/bin/cpanm
2. 使用cpanm安装模块
cpanm -h
3. 安装Circos 必要perl模块(这一步可能非常繁琐)
sudo cpanm Clone Config::General Font::TTF::Font GD GD::Polyline Math::Bezier Math::Round Math::VecStat Params::Validate Readonly Regexp::Common SVG Set::IntSpan Statistics::Basic Text::Format
4. 所有模块安装完后在检查一遍:
./circos -modules
# 全部为ok即可
5. 切换目录到circos
cd /home/manager/biosoft/circos/
6. 解压
tar xvf circos-0.69-9.tgz
ll
总用量 29M
drwxrwxr-x 10 manager manager 4.0K 11月 26 16:27 circos-0.69-9
-rw-rw-r-- 1 manager manager 29M 11月 25 11:27 circos-0.69-9.tgz
7. 生成软链接
cd /home/manager/biosoft/circos/
ln -s circos-0.69-9 current # 生成软链接
ll
总用量 29M
drwxrwxr-x 10 manager manager 4.0K 11月 26 16:27 circos-0.69-9
-rw-rw-r-- 1 manager manager 29M 11月 25 11:27 circos-0.69-9.tgz
lrwxrwxrwx 1 manager manager 13 11月 26 17:23 current -> circos-0.69-9
8. 进入.zshrc 文件,在文件最后一行加入:
echo 'export PATH=/home/manager/biosoft/circos/current/bin:$PATH' >>~/.zshrc
source ~/.zshrc
9. 检查是否安装好
circos -v
########################################################################
# mkdir cir && cd cir
# ll
-rw-rw-r-- 1 manager manager 204 11月 25 10:33 chr.info
-rw-rw-r-- 1 manager manager 288 11月 25 10:33 cir.sh
-rw-rw-r-- 1 manager manager 2.5K 11月 25 10:33 config1.txt
-rw-rw-r-- 1 manager manager 2.9K 11月 25 10:33 config2.txt
-rw-rw-r-- 1 manager manager 1.7K 11月 25 10:33 genome.txt
-rw-rw-r-- 1 manager manager 200 11月 25 10:33 link.txt
drwxrwxr-x 2 manager manager 4.0K 11月 25 10:33 result
-rw-rw-r-- 1 manager manager 320 11月 25 10:33 text.txt
################
cd /home/manager/gene_family/my_gene_family/circos_plot
circos -conf /home/manager/gene_family/my_gene_family/circos_plot/conf/config2.txt
配置文件
# 1. 染色体文件
# chr.info
第一列 第二列 第三列 第四列 第五列 第六列 第七列
chr - chr01 Chr1 0 302956453 chr1
chr - chr02 Chr2 0 241307389 chr2
chr - chr03 Chr3 0 232953155 chr3
chr - chr04 Chr4 0 242006640 chr4
chr - chr05 Chr5 0 215148664 chr5
chr - chr06 Chr6 0 165010417 chr6
# 前两列固定,
# 第三列:基因组文件中染色体的编号;
# 第四列:实际希望在图中展示的染色体编号;
# 第五、六列为染色体起始和终止位置(长度);
# 第七列:颜色设置(默认有对应颜色,但可以自己修改)
# 2. 共线性文件
# link.txt
1 chr01 230647852 230649724 chr05 23322345 23324725
2 chr01 249064852 249066345 chr05 20077643 20079139
3 chr02 43381422 43385332 chr05 172468991 172471289
4 chr04 239656815 239657190 chr05 73785793 73787640
备注:组内的文件如上;
# 3. 共线性区块文件
1 chr04 235985192 238520393 chr05 89127420 77919313
2 chr01 177745754 179756069 chr01 263221976 261689189
3 chr02 226660231 227489681 chr04 21034937 23267356
4 chr01 257237981 259662579 chr05 16839069 18254076
5 chr02 7313083 10264901 chr04 29042531 25887059
6 chr01 72781187 78190879 chr02 213991895 211892225
7 chr03 10161437 12538552 chr06 126316653 127947519
8 chr01 13199428 16659014 chr05 33280286 36117488
9 chr02 11039996 22253136 chr05 197970559 185883292
10 chr01 247942338 252515573 chr05 21179147 18361206
11 chr03 9637008 38688558 chr08 18392025 3044143
12 chr04 235545789 235928643 chr05 89236231 93996019
13 chr01 263545596 301474757 chr05 13038255 960992
14 chr02 224426294 224827815 chr04 16011093 15055409
15 chr04 235076669 235495146 chr05 101090117 95339508
16 chr01 100393815 104124989 chr05 43593097 44784264
17 chr05 40695082 42005127 chr06 89614367 90338798
18 chr01 79166192 82359578 chr02 15670012 13962160
19 chr01 3635462 4144525 chr01 78813994 79779467
20 chr02 191431054 193577343 chr05 207321134 206112159
21 chr01 3131661 3450838 chr01 228088856 226688602
22 chr01 225786615 242895913 chr05 20236510 33124260
23 chr02 236303593 238275076 chr05 3518540 4544483
24 chr05 57879365 64457951 chr06 85758193 80992737
# 4. 文本文件(text.txt)
# 前三列:基因具体的位置信息; 第四列: 展示的基因名称
1 chr01 230647852 230649724 Zm00008a004052
2 chr05 23322345 23324725 Zm00008a019932
3 chr01 249064852 249066345 Zm00008a004460
4 chr05 20077643 20079139 Zm00008a019854
5 chr02 43381422 43385332 Zm00008a007262
6 chr05 172468991 172471289 Zm00008a022078
7 chr04 239656815 239657190 Zm00008a018843
8 chr05 73785793 73787640 Zm00008a020843
# 5. 主配置文件(config1.txt)
chromosomes_units=1000000 # 针对整个染色体长度的配置,单元MB
chromosomes_reverse=/chr[01]/ #
<ideogram> # 对染色体的细节设置
fill=yes #颜色填充
label_font=default
label_parallel=yes
label_radius=dims(image,radius)-60p
label_size=45
radius=0.90r # 离圆心的位置
show_label=yes
<spacing>
default=0.005r # 染色体间隙
</spacing>
stroke_color=dgrey # 染色体轮廓的颜色设置
stroke_thickness=2p # 染色体轮廓的宽度设置
thickness=0.03r
</ideogram>
karyotype=/home/manager/share/cir/chr.info # 设置文件路径
<links>
bezier_radius=0r # 连线的弯曲度
bezier_radius_purity=0.75 # 两个基因连线之间的弯曲度
color=black
crest=0.5
<link> # 对连线细节的设置
bezier_radius=0r
bezier_radius_purity=0.75
color=set2-8-qual-1
crest=0.5
file=/home/manager/share/cir/link.txt # 设置link文件路径
radius=0.88r
<rules> # 连线规则
<rule>
color=red
condition=var(intrachr)
</rule>
<rule>
color=green
condition=var(interchr) # 染色体之间的连线颜色
</rule>
</rules>
thickness=6 # 染色体之间的连线厚度
z=20 # 图层位置,越大越上
<plots>
<plot>
color=set2-8-qual-2
file=/home/manager/share/cir/text.txt
label_font=light
link_color=black
link_dims=0p,2p,5p,2p,2p
link_thickness=2p
r0=0.88r
r1=0.99r
rpadding=5p
show_links=no
type=text
</plot>
type=histogram
condition=var(interchr)
</rule>
</rules>
thickness=6
z=20
</link>
<link>
bezier_radius=0r
bezier_radius_purity=0.75
color=230,230,230,0.2 # rgb 颜色格式,关于透明的设置
crest=0.5
ribbon=yes # 大区块条带设置
file=/home/manager/share/cir/genome.txt
radius=0.88r
<rules>
<rule>
condition=var(intrachr)
</rule>
<rule>
condition=var(interchr)
</rule>
</rules>
thickness=1
z=15
</link>
radius=0.40r
thickness=1
</links>
<plots>
<plot>
color=set2-8-qual-2
file=/home/manager/share/cir/text.txt #txt 文件路径
label_font=light
link_color=black
link_dims=0p,2p,5p,2p,2p
link_thickness=2p
r0=0.88r # 起始位置
r1=0.99r # 终止位置
rpadding=5p
show_links=no
type=text
</plot>
type=histogram
</plots>
show_tick_labels=yes
show_ticks=yes
spacing=10u
<ticks> ## 对刻度的设置
color=black
format=%d
multiplier=1e-6
radius=1r
thickness=2p
<tick>
size=10p
spacing=5u
</tick>
<tick>
color=black
format=%d
label_offset=10p
label_size=25p
show_label=yes
size=15p
spacing=25u
thickness=4p
</tick>
</ticks>
<colors>
<<include etc/colors.conf>>
<<include etc/brewer.conf>>
#<<include etc/colors_fonts_patterns.conf>>
#<<include colors.ucsc.conf>>
#<<include colors.hsv.conf>>
</colors>
<fonts>
<<include etc/fonts.conf>>
</fonts>
<image>
<<include etc/image.conf>>
</image>
<<include etc/housekeeping.conf>>
to be continued ....