根据seqid提取fastq序列
工具:seqtk
head40.fq a.list从fasta/fastq文件中提取子集
seqtk subseq head40.fq a.list
提取fq时需要其文件开头用>: sed -i 's/@/>@/g' head40.fq
工具:seqkit
seqkit grep -f a.list head40.fq [输出格式没有楼上好]
samtools view WT-1.validpairs.bam | head
提取valid_seqid:
samtools view WT-1.validpairs.bam | awk '{print "@"$1}' | sort |uniq > WT-1.validpairs.seqid
提取原始fq_seqid:
zcat WT-1_R1.fq.gz | grep "@" | awk '{print $1}' | sort |uniq > WT-1.all.seqid
提取补集:
cat WT-1.all.seqid WT-1.validpairs.seqid WT-1.validpairs.seqid | sort | uniq -u > WT-1.discard.seqid
直接grep:
提取validpairs:
for i in `head -n 10 WT-1.validpairs.seqid `
do
echo "$i" grep validpairs ......
zcat ../02_rawfq/raw_fqgz_files/WT-1_R1.fq.gz | grep "$i" -A 3 >> WT-1.validpairs.R1.10.fq
zcat ../02_rawfq/raw_fqgz_files/WT-1_R2.fq.gz | grep "$i" -A 3 >> WT-1.validpairs.R2.10.fq
done
提取discard:
for i in `head -n 10 WT-1.discard.seqid `
do
echo "$i" grep discard ......
zcat ../02_rawfq/raw_fqgz_files/WT-1_R1.fq.gz | grep "$i" -A 3 >> WT-1. discard.R1.10.fq
zcat ../02_rawfq/raw_fqgz_files/WT-1_R2.fq.gz | grep "$i" -A 3 >> WT-1. discard.R2.10.fq
done