2019290183姜朱玹

2020-10-11
Word count: 727 | Reading time≈ 3 min
install.packages("vcd")
install.packages("psych")
install.packages("data.table") #执行命令library(gclus)
# latest development version:data.table::update.dev.pkg()
install.packages("e1071")
library(e1071) 
library(vcd)
library(data.table)

#3.1
Q3_1 = c(1004,602,1540,522,878,916,1166,1062,344,1200,921,990,1309,528,838,1492,928,1299,
         1107,981,928,1135,789,1018,905,935,939,729,1802,645,1148,877,2270,957,840,576,1110,
         570,1253,1133,1416,1380,513,1423,1224,289,1247,657,1816,1481)
#(1)计算平均数、标准差、极差和四分位差
mean(Q3_1)
sd(Q3_1)
max(Q3_1)-min(Q3_1)
quantile(Q3_1,c(.25,.5,.75,1))
#(2)计算10%，25%，50%，75%，90%的分位数
quantile(Q3_1,c(.1,.25,.75,.9))
#(3)计算标准分数（scale)，监测数据的离群点
scale(Q3_1)
plot(density(Q3_1)) #打印出Q3_1的概率密度函数
boxplot(Q3_1) #打印出Q3_1箱线图，从箱线图中可以看到地步有一个离群值
boxplot.stats(Q3_1)$out  #将这个离群值输出出来
#(4)计算偏度系数和风度系数，分析网购金额的分布特点。
skewness(Q3_1) #得知往右偏一点
kurtosis(Q3_1) #因为正态分布的峰度（系数）为常数3，均匀分布的峰度（系数）为常数1.8，
#根据均值不等式，可以确定出峰度（系数）的取值范围：它的下限不会低于1，上限不会高于数据的个数……？


#3.2
方法1 <- c(164,178,168,165,170,165,164,168,164,162,163,166,167,166,165)
方法2 <- c(129,130,129,130,131,130,129,127,128,128,127,128,128,125,132)
方法3 <- c(125,126,126,127,126,128,127,126,127,127,125,126,116,126,125)
Q3_2 = data.table(方法1,方法2,方法3)

exercise4_1
sd(方法1)
sd(方法2)
sd(方法3)
plot(方法1)
hist(方法1)
describe(方法1)
describe(方法2)
describe(方法3)

par(mfrow=c(1,3))
hist(方法1)
hist(方法2)
hist(方法3)
sd(方法1)
sd(方法2)
sd(方法3)

skewness(方法1)
mean(((方法1-mean(方法1))/sd(方法1))^3)
skewness(方法2)
mean(((方法2-mean(方法2))/sd(方法2))^3)
skewness(方法3)
mean(((方法3-mean(方法3))/sd(方法3))^3)
# 由此可知，该三种方法偏度 (分布不均衡)方法三 < 方法二 < 方法一(分布均衡)


# R语言内置数据文件:USArrests。里面由美国50个州4个犯罪指标的数据
#(1)请使用适当的统计量和统计图分析，美国这4个犯罪指标各有怎样的分布特征。
attributes(USArrests)
USArrests <- scale(USArrests) #系数标准化
cor(USArrests)
round(cor(USArrests),2) #查看相关关系
x <- as.matrix(USArrests[, -3]) #hitmap需matrix
result <- heatmap(x, scale="column", Colv=NA, cexCol=1,
                  main="Violent Crime Rates by US State (1973)")
# 去掉URbanPop（城市中心人口比例），按每当十万人口的暴力、杀人、强奸比例构建hitmap： 从安全到危险

#(2)分别给出各犯罪指标下，治安最好的5个州，和治安最不好的5个州。
row.names(USArrests)[result$rowInd[1:5]]
row.names(USArrests)[result$rowInd[46:50]]
Donate
Copyright： Copyright is owned by the author. For commercial reprints, please contact the author for authorization. For non-commercial reprints, please indicate the source.