第4章

Chapter 4

发布于

2025年9月18日

1 基于信度的三种等价定义的信度计算

注意查看数据,理解信度是一组被试的信度,信度的计算涉及列内的计算。相应的R代码如下:

library(Keng)
data("depress")
# 查看所有变量名
# names(depress)
# 计算三次抑郁得分的均值作为这一段时间内抑郁特质的真分数
#     注意:每个时间点的抑郁得分都有其真分数T1、T2、T3与误差,真分数代表了被试在该时间点的真实抑郁水平
T <- rowMeans(depress[c("dm1","dm2","dm3")])
# 合并数据,cbind函数执行column bind
dat <- cbind(depress[c("dm1","dm2","dm3")], T)
# 注意查看数据,理解信度是一组被试的信度,信度的计算涉及列内的计算
head(dat)
##    dm1  dm2  dm3        T
## 1 1.55 1.80 2.00 1.783333
## 2 1.55 1.85 2.25 1.883333
## 3 1.90 1.65 1.65 1.733333
## 4 1.35 1.65 1.60 1.533333
## 5 1.30 2.25 2.10 1.883333
## 6 1.95 1.60 1.65 1.733333

# 抑郁量表的信度
# 信度等价定义1
## dm1的信度,即抑郁量表在第一个时间点的信度
var(dat$T)/var(dat$dm1)
## [1] 1.012509
## 使用sapply函数,计算3次的信度
sapply(X = dat[c("dm1","dm2","dm3")], FUN = function(col) var(dat$T)/var(col))
##       dm1       dm2       dm3 
## 1.0125092 0.7252350 0.7701847
# 信度等价定义2
## Time 1的信度
cor(dat$dm1, dat$T)^2
## [1] 0.698917
## 拓展:使用回归分析的决定系数估计信度
##       基于回归分析的理解,可以得出事实上定义1与定义2中真分数的估计方法不同,真分数的估计值不同。
fit <- lm(dm1~T, dat)
summary(fit)
## 
## Call:
## lm(formula = dm1 ~ T, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.61427 -0.13115 -0.01188  0.11331  0.48423 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.26331    0.11459   2.298   0.0238 *  
## T            0.83083    0.05685  14.614   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2111 on 92 degrees of freedom
## Multiple R-squared:  0.6989, Adjusted R-squared:  0.6956 
## F-statistic: 213.6 on 1 and 92 DF,  p-value: < 2.2e-16
## 使用sapply函数,计算3次的信度
sapply(X = dat[c("dm1","dm2","dm3")], FUN = function(col) cor(dat$T, col)^2)
##       dm1       dm2       dm3 
## 0.6989170 0.8917877 0.8658211
# 信度等价定义3
# depress数据中没有平行测验,这里不再演示

2 重测信度

以抑郁量表为例。

# 计算dm1、dm2、dm3的重测信度
cor(dat[c("dm1","dm2","dm3")])
##           dm1       dm2       dm3
## dm1 1.0000000 0.6727909 0.6355674
## dm2 0.6727909 1.0000000 0.8689690
## dm3 0.6355674 0.8689690 1.0000000
library(psych)
corr.test(dat[c("dm1","dm2","dm3")])
## Call:corr.test(x = dat[c("dm1", "dm2", "dm3")])
## Correlation matrix 
##      dm1  dm2  dm3
## dm1 1.00 0.67 0.64
## dm2 0.67 1.00 0.87
## dm3 0.64 0.87 1.00
## Sample Size 
## [1] 94
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##     dm1 dm2 dm3
## dm1   0   0   0
## dm2   0   0   0
## dm3   0   0   0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

3 分半信度

以抑郁量表为例。

3.1 手动分半计算信度

# 手动分半
# 题项变量名
deprItems <- c("depr1i1", "depr1i2", "depr1i3", "depr1i4", "depr1i5", 
               "depr1i6", "depr1i7", "depr1i8", "depr1i9", "depr1i10", 
               "depr1i11", "depr1i12", "depr1i13", "depr1i14", "depr1i15", 
               "depr1i16", "depr1i17", "depr1i18", "depr1i19", "depr1i20")
# 随机取出一半题项
set.seed(20250917)
half1 <- sample(x = deprItems, size = 10)
# setdiff取补集,取出剩下的题项
half2 <- setdiff(deprItems, half1)
# rowMeans计算所有被试在总量表与两半量表上的得分
X <- rowSums(depress[deprItems])
X_half1 <- rowSums(depress[half1])
X_half2 <- rowSums(depress[half2])
# 计算两半量表的得分的相关
r_half <- cor(X_half1, X_half2)
# 对相关系数进行校正,从而得到分半信度
## Spearman-Brown公式校正
2*r_half/(1 + r_half)
## [1] 0.7183504
## Flanagan公式校正
2*(1 - (var(X_half1) + var(X_half2))/var(X))
## [1] 0.7040296
## Rulon公式校正
1 - (var(X_half1 - X_half2)/var(X))
## [1] 0.7040296

3.2 使用psych:程序包中的splitHalf()计算分半信度

library(psych)
out <- splitHalf(depress[deprItems], raw = TRUE, brute = TRUE)
out
## Split half reliabilities  
## Call: splitHalf(r = depress[deprItems], raw = TRUE, brute = TRUE)
## 
## Maximum split half reliability (lambda 4) =  0.9
## Guttman lambda 6                          =  0.82
## Average split half reliability            =  0.77
## Guttman lambda 3 (alpha)                  =  0.77
## Guttman lambda 2                          =  0.78
## Minimum split half reliability  (beta)    =  0.52
## Average interitem r =  0.14  with median =  0.14
##                                              2.5% 50% 97.5%
##  Quantiles of split half reliability      =  0.68 0.77 0.83
splitHalf(depress[deprItems], raw = TRUE, n.sample = 100)
## Split half reliabilities  
## Call: splitHalf(r = depress[deprItems], raw = TRUE, n.sample = 100)
## 
## Maximum split half reliability (lambda 4) =  0.85
## Guttman lambda 6                          =  0.82
## Average split half reliability            =  0.77
## Guttman lambda 3 (alpha)                  =  0.77
## Guttman lambda 2                          =  0.78
## Minimum split half reliability  (beta)    =  0.64
## Average interitem r =  0.14  with median =  0.14
##                                              2.5% 50% 97.5%
##  Quantiles of split half reliability      =  0.69 0.77 0.83

注意,splitHalf()的计算结果中,Guttman lambda 6Guttman lambda 3 (alpha)Guttman lambda 2Average interitem r median r 不会随分半方案的变化而变化。查看psych的Reference manual可知,lambda 2、3、6是基于所有题项得分的相关计算得到的。Maximum split half reliability (lambda 4)Average split half reliabilityMinimum split half reliability (beta)会变化。

# 查看`splitHalf()`的所有输出
names(out)
##  [1] "maxrb"   "minrb"   "maxAB"   "minAB"   "meanr"   "av.r"    "med.r"  
##  [8] "alpha"   "lambda2" "lambda6" "raw"     "ci"      "covar"   "Call"
# 查看最大、最小分半信度对应的分半方案
out$maxAB
## $A
##  [1] "depr1i1"  "depr1i2"  "depr1i7"  "depr1i8"  "depr1i11" "depr1i12"
##  [7] "depr1i15" "depr1i16" "depr1i17" "depr1i19"
## 
## $B
##  [1] "depr1i3"  "depr1i4"  "depr1i5"  "depr1i6"  "depr1i9"  "depr1i10"
##  [7] "depr1i13" "depr1i14" "depr1i18" "depr1i20"
out$minAB
## $A
##  [1] "depr1i1"  "depr1i3"  "depr1i4"  "depr1i5"  "depr1i7"  "depr1i8" 
##  [7] "depr1i9"  "depr1i10" "depr1i13" "depr1i19"
## 
## $B
##  [1] "depr1i2"  "depr1i6"  "depr1i11" "depr1i12" "depr1i14" "depr1i15"
##  [7] "depr1i16" "depr1i17" "depr1i18" "depr1i20"

查看splitHalf()的代码,确认Average split half reliability(meanr)的计算方法。可得Average split half reliability(meanr)就是所有分半相关系数的算数平均值。然而,统计学家建议使用Fisher Z Transformation计算平均相关系数。

# 查看splitHalf原始代码
getAnywhere(splitHalf)

4 同质性信度