[Bioconductor] learning1

2019-05-16  本文已影响0人  happyxhz
# [study]Bioconductor
# [2018-05-R与Bioconductor的入门课](https://www.bilibili.com/video/av24355734/?p=7)
# 2019-03-25 hesy 
#

rm(list=ls())

# set packages: eg. GenomicRanges
source("https://bioconductor.org/biocLite.R")
biocLite('Biostrings')
biocLite('GenomicRanges')


#browse the manual
browseVignettes(package = "GenomicRanges")

#easy to go
library(GenomicRanges)
gr = GRanges(seq = "chr1", ranges = IRanges(start =1, end=10))
gr
gr1 = GRanges(seq = c("chr1",'chr2','chr3'),
             ranges = IRanges(start =c(10,100,150), end=c(20,130,200)),
             strand = c("*","+","-"),
             bbq =1:3,
             score = 6:8,
             GC = seq(0,1,length.out = 3))
gr1$bbq

# Rle
gr3 = GRanges(seq = Rle(c("chr1","chr2"),c(2,4)),
              ranges = IRanges(start = 16:21,width=5))
gr3
seqnames(gr3)
as.vector(seqnames(gr3))              

end(gr3)

#meta cols 取其他列
mcols(gr1)[,1]

length(gr3)
names(gr3) <-1:6
gr3
sort(gr3) # sort by coordinary
gr1[order(gr1$GC,decreasing = T)]
#过滤
gr1[gr1$bbq<3 & gr1$GC<0.3]
# merge GRange obj
gr4 =c(gr3,gr3)
gr4

# flank shift reduce disjoin
flank(gr3,width=5)  # upstream 5 bp 
flank(gr3,width = 10,start=F ) # downstream 

shift(gr3,shift=-10)

#reduce disjoin

#gtf -> GRange obj -> reduce -> total(exon)
#gtf -> disjoin -> remove overlap region -> specific region 
#-> alternative splicing

findOverlaps(gr6,gr7)

gr6[gr6 %over% gr7]

# 统计基因组exon长度
library(EnsDb.Hsapiens.V75)

if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("EnsDb.Hsapiens.v75", version = "3.8")


# PART.2 BioStrings

library(Biostrings)
str1 = DNAString(x='ACTGACTGAATTCCGTA')
reverse(str1)
reverseComplement(str1)
translate(str1)
alphabetFrequency(str1)
IUPAC_CODE_MAP
latterFrequency(str1,'GC',as.prob =T)

# PART.3 BSgenome 提供标准的注释信息

library(BSgenome)
available.genomes()

library(BSgenome.Hsapiens.UCSC.hg19)
hg19.genome = BSgenome.Hsapiens.UCSC.hg19
hg19.genome

上一篇 下一篇

猜你喜欢

热点阅读