【sed】笔记
2019-01-22 本文已影响9人
caokai001
Day2 生信宝典sed
一.给序列起名字
ct@ehbio:~/SXBD$ cat seq
ACDGTFGGCATGCDTGD
ACDGAGCDTAGCDGTA
CAGDTAGDCTADTG
ct@ehbio:~/SXBD$ sed = seq
1
ACDGTFGGCATGCDTGD
2
ACDGAGCDTAGCDGTA
3
CAGDTAGDCTADTG
# 同时缓冲两行,但只对第一行行首操作
ct@ehbio:~/SXBD$ sed = seq | sed 'N;s/^/>/;'
>1
ACDGTFGGCATGCDTGD
>2
ACDGAGCDTAGCDGTA
>3
CAGDTAGDCTADTG
二.记忆匹配
()启动记忆匹配;\1为第一个匹配项,\2为第二个匹配项;匹配项的计数根据左括号出现的位置来定,第一个(包括起来的为\1。
ct@ehbio:~/SXBD$ echo "hah ehbio hah" | sed 's/ \(.*\) /\t\1\t\1\t/'
hah ehbio ehbio hah
奇偶数行处理
ct@ehbio:~/SXBD$ echo -e "odd\neven\nodd\neven"
odd
even
odd
even
# 奇偶数行合并
ct@ehbio:~/SXBD$ echo -e "odd\neven\nodd\neven" | sed 'N;s/\n/\t/'
odd even
odd even
# 取出偶数行,比较简单
# 注意 n (小写)撇掉了奇数行
ct@ehbio:~/SXBD$ echo -e "odd\neven\nodd\neven" | sed -n 'n;p'
even
even
# 取出奇数行
# 先都读进去,然后替换偶数行为空值,再输出
ct@ehbio:~/SXBD$ echo -e "odd\neven\nodd\neven" | sed -n 'N;s/\n.*//;p'
odd
odd
Day1 fastq转成fasta
### cat test1.fq
@SRR400264.2496 HWI-ST216_0180:3:1101:11339:2337 length=36
CTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCC
+SRR400264.2496 HWI-ST216_0180:3:1101:11339:2337 length=36
HHHHHHHHHHHHHHHHHHGHHHHGHHGHHGHHFEHH
@SRR400264.2497 HWI-ST216_0180:3:1101:11298:2341 length=35
AGCTTTTTTTTTTCTTTTTCTTTTTTGAGATGGCA
+SRR400264.2497 HWI-ST216_0180:3:1101:11298:2341 length=35
HHHHHHHHHHHHHGHHHHHHHHHHHHDFHAF=GGF
@SRR400264.2498 HWI-ST216_0180:3:1101:11256:2344 length=33
CTGTCTTCTTCCCCAGTTCATCTGCATCTCGTT
+SRR400264.2498 HWI-ST216_0180:3:1101:11256:2344 length=33
HHHHHHHHHHHHHHHHHGGHGHHGHGHHFHHHH
@SRR400264.2499 HWI-ST216_0180:3:1101:11384:2357 length=30
ATCGATAACTAAACTTCCTTTCCATATGAC
+SRR400264.2499 HWI-ST216_0180:3:1101:11384:2357 length=30
HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
@SRR400264.2500 HWI-ST216_0180:3:1101:11352:2366 length=35
CCGTAACTCTTATAAGCTAGCTTATATAAGAGCTT
+SRR400264.2500 HWI-ST216_0180:3:1101:11352:2366 length=35
HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHG
### $ cat test1.fq |sed -n '1~4s/^@/>/p ;2~4p' |sed 's/ .*//'
# -n :sed 处理的行输出
>SRR400264.2496
CTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCC
>SRR400264.2497
AGCTTTTTTTTTTCTTTTTCTTTTTTGAGATGGCA
>SRR400264.2498
CTGTCTTCTTCCCCAGTTCATCTGCATCTCGTT
>SRR400264.2499
ATCGATAACTAAACTTCCTTTCCATATGAC
>SRR400264.2500
CCGTAACTCTTATAAGCTAGCTTATATAAGAGCTT