一般统计分析与假设检验
2020-02-28 本文已影响0人
挽山
步骤:
- 检验正太分布
- 检验方差齐性
- 检验统计学差异
# 载入数据sampleTait
data.all<-read.csv(file.choose(),header=T,sep=",", stringsAsFactors=FALSE, fileEncoding = "utf-8")
names(data.all)
# 数据筛选-年龄
library(plyr)
#不等长合并
age.ASD<-data.all[data.all$Diagnosis=='ASD', 5]
age.CTL<-data.all[data.all$Diagnosis=='CTL', 5]
list_Age<-list()
list_Age[[1]] <- data.frame(t(age.ASD))
list_Age[[2]] <- data.frame(t(age.CTL))
Age<-t(rbind.fill(list_Age))
colnames(Age)=c("ASD","CTL")
# 统计描述
summary(Age)
sd(na.omit(Age[,1]))# 标准差
# 箱式图(观察数据)
boxplot(Age,
main = '两组年龄分布比较',
ylab = 'Age(年/月)',
ylim = c(1.0,70),
xaxt='n') # 不显示x轴刻度
axis(1,labels=c("ASD组","对照组"),at=c(1,2),las=1)# 重置横轴坐标(1-x轴,横着)
# 正态性检验(分别进行) https://www.it610.com/article/2580278.htm
# SAS 规定: 当样本含量n ≤ 2000时, 结果以Shapiro-Wilk(shapiro.test)为准
# 当样本含量n >2000时, 结果以Kolmogorov-Smirnov(lillie.test)为准 Rpackage: nortest
# SPSS以5000为阈值
data.1<-Age; head(data.1)
norm.ASD<-shapiro.test(data.1[,1]); norm.ASD# 非缺失值个数应在3到5000个
norm.cTL<-shapiro.test(data.1[,2]); norm.cTL
# 重置矩阵
asd.age<-as.data.frame(data.1[c(1:length(na.omit(data.1[,1]))),1]); dim(asd.age)
colnames(asd.age)<-'Age'
ctl.age<-as.data.frame(data.1[c(1:length(na.omit(data.1[,2]))),2]); dim(ctl.age)
colnames(ctl.age)<-'Age'
Age<-rbind(asd.age, ctl.age); head(Age); dim(Age)
group<-c(rep("ASD",length(na.omit(data.1[,1]))),
rep("CTL",length(na.omit(data.1[,2]))))
Age.2<-cbind(Age, group); head(Age.2)
# 方差齐性检验,"group"为因子 https://www.jianshu.com/p/dc8896fcd505
library(car)
group<-factor(group, levels = c('ASD', 'CTL'))
levene.test<-leveneTest(Age.2$Age, group, median); levene.test
# 非参数检验---------------------------------
# Mann–Whitney U
library(ggpubr) #https://blog.csdn.net/weixin_40561293/article/details/85208567(绘图)
wilcox.test<-wilcox.test(Age~group,data = Age.2, paired=F); wilcox.test
# 参数检验
t.test<-t.test(Age.2[Age.2$group=='CTL', 1],
Age.2[Age.2$group=='ASD', 1],
paired=F)
# 数据筛选-性别
# 载入数据sampleTait
data.all<-read.csv(file.choose(),header=T,sep=",", fileEncoding = "utf-8")
names(data.all)
Diagnosis<-data.all[,4]
Sex<-data.all[,6]
x<-table(Diagnosis, Sex);x #混淆矩阵
# 卡方检验
chisq.test(x)$expected
chisq.test<-chisq.test(x)
fisher.test<-fisher.test(x)
# 把结果保存到.txt
sink("1-demo.sig_test.txt")
norm.ASD
norm.con
wilcox.test
levene.test
t.test
chisq.test
fisher.test
sink()
- 求均值和标准差
rm(list = ls())
data<-read.table(file.choose(),header = T, sep = "\t")
head(data)
results<-c()
for (i in 1:nrow(data)){
#i=1
result<-c(rep(data[i,1],data[i,dim(data)[2]]))
results<-c(result,results)
}
#number<-apply(data[c('N')],2,sum)
summary(results) # mean8-1=82.94
sd(results) #sd8-1=57.18782
功效分析
#https://www.jianshu.com/p/ed20760ef243
#https://blog.csdn.net/cyydjt/article/details/81711612
#install.packages('pwr')
library(pwr)
#1.t,test检验
#已知功效
pwr.t.test(d=0.8,sig.level = 0.05,power = 0.8,type = "two.sample")
#已知样本量(n为一组数值)
pwr.t.test(n=15, d=0.8, sig.level = 0.05, type = "two.sample")