[Bioconductor] learning1
2019-05-16 本文已影响0人
happyxhz
# [study]Bioconductor
# [2018-05-R与Bioconductor的入门课](https://www.bilibili.com/video/av24355734/?p=7)
# 2019-03-25 hesy
#
rm(list=ls())
# set packages: eg. GenomicRanges
source("https://bioconductor.org/biocLite.R")
biocLite('Biostrings')
biocLite('GenomicRanges')
#browse the manual
browseVignettes(package = "GenomicRanges")
#easy to go
library(GenomicRanges)
gr = GRanges(seq = "chr1", ranges = IRanges(start =1, end=10))
gr
gr1 = GRanges(seq = c("chr1",'chr2','chr3'),
ranges = IRanges(start =c(10,100,150), end=c(20,130,200)),
strand = c("*","+","-"),
bbq =1:3,
score = 6:8,
GC = seq(0,1,length.out = 3))
gr1$bbq
# Rle
gr3 = GRanges(seq = Rle(c("chr1","chr2"),c(2,4)),
ranges = IRanges(start = 16:21,width=5))
gr3
seqnames(gr3)
as.vector(seqnames(gr3))
end(gr3)
#meta cols 取其他列
mcols(gr1)[,1]
length(gr3)
names(gr3) <-1:6
gr3
sort(gr3) # sort by coordinary
gr1[order(gr1$GC,decreasing = T)]
#过滤
gr1[gr1$bbq<3 & gr1$GC<0.3]
# merge GRange obj
gr4 =c(gr3,gr3)
gr4
# flank shift reduce disjoin
flank(gr3,width=5) # upstream 5 bp
flank(gr3,width = 10,start=F ) # downstream
shift(gr3,shift=-10)
#reduce disjoin
#gtf -> GRange obj -> reduce -> total(exon)
#gtf -> disjoin -> remove overlap region -> specific region
#-> alternative splicing
findOverlaps(gr6,gr7)
gr6[gr6 %over% gr7]
# 统计基因组exon长度
library(EnsDb.Hsapiens.V75)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("EnsDb.Hsapiens.v75", version = "3.8")
# PART.2 BioStrings
library(Biostrings)
str1 = DNAString(x='ACTGACTGAATTCCGTA')
reverse(str1)
reverseComplement(str1)
translate(str1)
alphabetFrequency(str1)
IUPAC_CODE_MAP
latterFrequency(str1,'GC',as.prob =T)
# PART.3 BSgenome 提供标准的注释信息
library(BSgenome)
available.genomes()
library(BSgenome.Hsapiens.UCSC.hg19)
hg19.genome = BSgenome.Hsapiens.UCSC.hg19
hg19.genome