# Population,总体
# 总体均值设置为100
miu <- 100
# 总体标准差设置为15
delta <- 15
# 随机种子值设置为20240906
set.seed(20240906)
# 生成10000个均值为miu、标注差为delta的IQ分数
IQ <- rnorm(10000, mean = miu, sd = delta)
# 生成10000个ID(被试编号)
ID <- seq(1:10000)
# 将ID与IQ存入数据表中
Population10000 <- data.frame(ID, IQ)
# 查看数据表
# View(Population10000)
# 查看数据表前6行
head(Population10000)
## ID IQ
## 1 1 100.5975
## 2 2 121.5878
## 3 3 106.6454
## 4 4 111.8736
## 5 5 114.5555
## 6 6 77.9921
# 从总体中随机抽取一个样本量为100的样本,抽取其ID
sample1ID <- sample(Population10000$ID, 100)
# 根据ID选出sample1的数据
sample1 <- Population10000[sample1ID, ]
# 计算sample1的IQ的均值
sample1_mean <- mean(sample1$IQ)
sample1_mean
## [1] 99.16138
# 计算sample1的IQ的标准差
sample1_sd <- sqrt(sum((sample1$IQ - sample1_mean)^2)/100)
sample1_sd
## [1] 14.07578
# 基于sample1估计总体的标准差
sample1_sd_unbiased <- sqrt(sum((sample1$IQ - sample1_mean)^2)/(100 - 1))
sample1_sd_unbiased
## [1] 14.14669
# 使用sd函数计算sample1的无偏标准差(总体标准差的估计值)
sd(sample1$IQ)
## [1] 14.14669
# 从总数为10000的样本中累计抽取1000个样本量为100的样本,存入samples
set.seed(20240906)
samples <- list()
for (index in 1:1000) {
samples[[index]] <- Population10000[sample(Population10000$ID, 100), ]
}
# 分别计算1000个样本的均值,一共得到1000个均值,存入samples_means
samples_means <- list()
for (index in 1:length(samples)) {
samples_means[[index]] <- mean(samples[[index]]$IQ)
}
# 将samples_means转换为向量
samples_means <- unlist(samples_means)
head(samples_means)
## [1] 99.48403 99.52555 99.94199 98.66905 101.19954 101.14274
# 比较三种分布的横坐标与纵坐标。
# 总体分布。10000个个案的分布。
hist(
Population10000$IQ,
breaks = 10,
xlim = c(40, 160)
)