library(Keng)
data("depress")
# 查看所有变量名
# names(depress)
# 计算三次抑郁得分的均值作为这一段时间内抑郁特质的真分数
# 注意:每个时间点的抑郁得分都有其真分数T1、T2、T3与误差,真分数代表了被试在该时间点的真实抑郁水平
T <- rowMeans(depress[c("dm1","dm2","dm3")])
# 合并数据,cbind函数执行column bind
dat <- cbind(depress[c("dm1","dm2","dm3")], T)
# 注意查看数据,理解信度是一组被试的信度,信度的计算涉及列内的计算
head(dat)
## dm1 dm2 dm3 T
## 1 1.55 1.80 2.00 1.783333
## 2 1.55 1.85 2.25 1.883333
## 3 1.90 1.65 1.65 1.733333
## 4 1.35 1.65 1.60 1.533333
## 5 1.30 2.25 2.10 1.883333
## 6 1.95 1.60 1.65 1.733333
# 抑郁量表的信度
# 信度等价定义1
## dm1的信度,即抑郁量表在第一个时间点的信度
var(dat$T)/var(dat$dm1)
## [1] 1.012509
## 使用sapply函数,计算3次的信度
sapply(X = dat[c("dm1","dm2","dm3")], FUN = function(col) var(dat$T)/var(col))
## dm1 dm2 dm3
## 1.0125092 0.7252350 0.7701847
# 信度等价定义2
## Time 1的信度
cor(dat$dm1, dat$T)^2
## [1] 0.698917
## 拓展:使用回归分析的决定系数估计信度
## 基于回归分析的理解,可以得出事实上定义1与定义2中真分数的估计方法不同,真分数的估计值不同。
fit <- lm(dm1~T, dat)
summary(fit)
##
## Call:
## lm(formula = dm1 ~ T, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61427 -0.13115 -0.01188 0.11331 0.48423
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.26331 0.11459 2.298 0.0238 *
## T 0.83083 0.05685 14.614 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2111 on 92 degrees of freedom
## Multiple R-squared: 0.6989, Adjusted R-squared: 0.6956
## F-statistic: 213.6 on 1 and 92 DF, p-value: < 2.2e-16
## 使用sapply函数,计算3次的信度
sapply(X = dat[c("dm1","dm2","dm3")], FUN = function(col) cor(dat$T, col)^2)
## dm1 dm2 dm3
## 0.6989170 0.8917877 0.8658211
# 信度等价定义3
# depress数据中没有平行测验,这里不再演示第4章
Chapter 4
1 基于信度的三种等价定义的信度计算
注意查看数据,理解信度是一组被试的信度,信度的计算涉及列内的计算。相应的R代码如下:
2 重测信度
以抑郁量表为例。
# 计算dm1、dm2、dm3的重测信度
cor(dat[c("dm1","dm2","dm3")])
## dm1 dm2 dm3
## dm1 1.0000000 0.6727909 0.6355674
## dm2 0.6727909 1.0000000 0.8689690
## dm3 0.6355674 0.8689690 1.0000000
library(psych)
corr.test(dat[c("dm1","dm2","dm3")])
## Call:corr.test(x = dat[c("dm1", "dm2", "dm3")])
## Correlation matrix
## dm1 dm2 dm3
## dm1 1.00 0.67 0.64
## dm2 0.67 1.00 0.87
## dm3 0.64 0.87 1.00
## Sample Size
## [1] 94
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## dm1 dm2 dm3
## dm1 0 0 0
## dm2 0 0 0
## dm3 0 0 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option3 分半信度
以抑郁量表为例。
3.1 手动分半计算信度
# 手动分半
# 题项变量名
deprItems <- c("depr1i1", "depr1i2", "depr1i3", "depr1i4", "depr1i5",
"depr1i6", "depr1i7", "depr1i8", "depr1i9", "depr1i10",
"depr1i11", "depr1i12", "depr1i13", "depr1i14", "depr1i15",
"depr1i16", "depr1i17", "depr1i18", "depr1i19", "depr1i20")
# 随机取出一半题项
set.seed(20250917)
half1 <- sample(x = deprItems, size = 10)
# setdiff取补集,取出剩下的题项
half2 <- setdiff(deprItems, half1)
# rowMeans计算所有被试在总量表与两半量表上的得分
X <- rowSums(depress[deprItems])
X_half1 <- rowSums(depress[half1])
X_half2 <- rowSums(depress[half2])
# 计算两半量表的得分的相关
r_half <- cor(X_half1, X_half2)
# 对相关系数进行校正,从而得到分半信度
## Spearman-Brown公式校正
2*r_half/(1 + r_half)
## [1] 0.7183504
## Flanagan公式校正
2*(1 - (var(X_half1) + var(X_half2))/var(X))
## [1] 0.7040296
## Rulon公式校正
1 - (var(X_half1 - X_half2)/var(X))
## [1] 0.70402963.2 使用psych:程序包中的splitHalf()计算分半信度
library(psych)
out <- splitHalf(depress[deprItems], raw = TRUE, brute = TRUE)
out
## Split half reliabilities
## Call: splitHalf(r = depress[deprItems], raw = TRUE, brute = TRUE)
##
## Maximum split half reliability (lambda 4) = 0.9
## Guttman lambda 6 = 0.82
## Average split half reliability = 0.77
## Guttman lambda 3 (alpha) = 0.77
## Guttman lambda 2 = 0.78
## Minimum split half reliability (beta) = 0.52
## Average interitem r = 0.14 with median = 0.14
## 2.5% 50% 97.5%
## Quantiles of split half reliability = 0.68 0.77 0.83
splitHalf(depress[deprItems], raw = TRUE, n.sample = 100)
## Split half reliabilities
## Call: splitHalf(r = depress[deprItems], raw = TRUE, n.sample = 100)
##
## Maximum split half reliability (lambda 4) = 0.85
## Guttman lambda 6 = 0.82
## Average split half reliability = 0.77
## Guttman lambda 3 (alpha) = 0.77
## Guttman lambda 2 = 0.78
## Minimum split half reliability (beta) = 0.64
## Average interitem r = 0.14 with median = 0.14
## 2.5% 50% 97.5%
## Quantiles of split half reliability = 0.69 0.77 0.83注意,splitHalf()的计算结果中,Guttman lambda 6、Guttman lambda 3 (alpha)、Guttman lambda 2、Average interitem r median r 不会随分半方案的变化而变化。查看psych的Reference manual可知,lambda 2、3、6是基于所有题项得分的相关计算得到的。Maximum split half reliability (lambda 4)、Average split half reliability、Minimum split half reliability (beta)会变化。
# 查看`splitHalf()`的所有输出
names(out)
## [1] "maxrb" "minrb" "maxAB" "minAB" "meanr" "av.r" "med.r"
## [8] "alpha" "lambda2" "lambda6" "raw" "ci" "covar" "Call"
# 查看最大、最小分半信度对应的分半方案
out$maxAB
## $A
## [1] "depr1i1" "depr1i2" "depr1i7" "depr1i8" "depr1i11" "depr1i12"
## [7] "depr1i15" "depr1i16" "depr1i17" "depr1i19"
##
## $B
## [1] "depr1i3" "depr1i4" "depr1i5" "depr1i6" "depr1i9" "depr1i10"
## [7] "depr1i13" "depr1i14" "depr1i18" "depr1i20"
out$minAB
## $A
## [1] "depr1i1" "depr1i3" "depr1i4" "depr1i5" "depr1i7" "depr1i8"
## [7] "depr1i9" "depr1i10" "depr1i13" "depr1i19"
##
## $B
## [1] "depr1i2" "depr1i6" "depr1i11" "depr1i12" "depr1i14" "depr1i15"
## [7] "depr1i16" "depr1i17" "depr1i18" "depr1i20"查看splitHalf()的代码,确认Average split half reliability(meanr)的计算方法。可得Average split half reliability(meanr)就是所有分半相关系数的算数平均值。然而,统计学家建议使用Fisher Z Transformation计算平均相关系数。