R描述性统计分析与绘图
setwd("D:/《用商业案例学R语言数据挖掘》教材代码及数据/data")
dat0 <- read.csv("accepts.csv",header = T)
View(dat0)
fs = dat0$fico_score
mean(fs,na.rm = T) #求变量fico_score的均值,忽略缺省值
[1] 693.5287
quantile(fs,probs = c(0.25,0.5,0.75),na.rm = T)
25% 50% 75%
653.0 693.0 735.5
hist(fs,nclass = 15)
直方图.png
str(dat0) #使用str查看数据中各个变量的类型
'data.frame': 5845 obs. of 24 variables:
account_number: int 11613 13449 14323 15359 15812 16979 17842 19715 23924 24866 ...
vehicle_year : int 1998 2000 1998 1997 2000 2000 2000 1994 1994 1999 ...
bankruptcy_ind: Factor w/ 3 levels "","N","Y": 2 2 2 2 2 3 2 2 2 3 ...
tot_tr : int 9 21 10 10 10 15 13 2 13 20 ...
tot_open_tr : int 2 11 NA 5 2 4 4 NA 3 NA ...
tot_rev_debt : int 506 34605 NA 4019 0 3651 2094 146 2602 1815 ...
rev_util : int 101 60 0 68 0 64 10 55 47 87 ...
purch_price : num 17200 19589 13595 12999 26328 ...
down_pyt : num 0 684 0 3099 0 ...
loan_amt : num 17200 19589 10500 10800 26328 ...
tot_income : num 6550 4667 2000 1500 4144 ...
used_ind : int 1 0 1 1 0 0 0 1 1 1 ...
table(dat0$bad_ind) #查看违约与正常客户的频数
0 1
4648 1197
barplot(table(dat0$bad_ind)) # 使用barplot函数输出条形频数图
hist(fs,freq = TRUE,main = "fico_score",sub = "source:汽车贷款数据",xlab = "fico_score 打分",ylab = "频数",nclass = 20) #直方图的参数使用
Rplot01.png
箱型图的绘制