February 2010 Archives

Multilevel Rasch: Estimation with R


My last post described how test development designs could be thought of as multistage sampling designs and how multilevel Rasch could be used to estimate parameters under that assumption. I decided to use Rlogo.jpg to compare estimates from the usual Rasch model, the generalized linear model (GLM) with a logit link, and generalized linear mixed-effects regression (GLMER), treating items as fixed then random effects. The GLMER model with item fixed effects exhibited the best fit of the Law School Admission Test (LSAT) data provided with the ltm package. Moreover, intraclass correlation has reduced the effective sample size relative to simple random sampling, lending further support to the multilevel Rasch approach.

#Load libraries.

#Prepare data for GLM.
LSAT.long <- reshape(LSAT, times=1:5, timevar="Item", varying=list(1:5), direction="long")
names(LSAT.long) <- c("Item", "Score", "ID")
LSAT.long$Item <- as.factor(LSAT.long$Item)
LSAT.long$ID <- as.factor(LSAT.long$ID)

#Compute Rasch, GLM, and GLMER estimates and compare fit.
out.rasch <- rasch(LSAT, constraint=cbind(ncol(LSAT)+1, 1))
print(xtable(summary(out.rasch)$coefficients, digits=3), type="html")

value std.err z.vals
Dffclt.Item1 -2.872 0.129 -22.307
Dffclt.Item2 -1.063 0.082 -12.946
Dffclt.Item3 -0.258 0.077 -3.363
Dffclt.Item4 -1.388 0.086 -16.048
Dffclt.Item5 -2.219 0.105 -21.166
Dscrmn 1.000

out.glm <- glm(Score ~ Item, LSAT.long, family="binomial")
print(xtable(summary(out.glm)$coefficients, digits=3), type="html")

Estimate Std. Error z value Pr(> |z|)
(Intercept) 2.498 0.119 20.933 0.000
Item2 -1.607 0.138 -11.635 0.000
Item3 -2.285 0.135 -16.899 0.000
Item4 -1.329 0.141 -9.450 0.000
Item5 -0.597 0.152 -3.930 0.000

out.glmer <- glmer(Score ~ Item + (1|ID), LSAT.long, family="binomial")
print(xtable(summary(out.glmer)@coefs, digits=3, caption="Fixed effects"), type="html", caption.placement="top")
print(xtable(summary(out.glmer)@REmat, digits=3, caption="Random effects"), type="html", caption.placement="top", include.rownames=F)

Fixed effects
Estimate Std. Error z value Pr(> |z|)
(Intercept) 2.705 0.129 21.029 0.000
Item2 -1.711 0.145 -11.774 0.000
Item3 -2.467 0.142 -17.353 0.000
Item4 -1.406 0.148 -9.498 0.000
Item5 -0.623 0.160 -3.880 0.000

Random effects
Groups Name Variance Std.Dev.
ID (Intercept) 0.502 0.70852

out.glmer.re <- glmer(Score ~ (1|Item) + (1|ID), LSAT.long, family="binomial")
print(xtable(summary(out.glmer.re)@coefs, digits=3, caption="Fixed effects"), type="html", caption.placement="top")
print(xtable(summary(out.glmer.re)@REmat[,-6], digits=3, caption="Random effects"), type="html", caption.placement="top", include.rownames=F)

Fixed effects
Estimate Std. Error z value Pr(> |z|)
(Intercept) 1.448 0.379 3.818 0.000

Random effects
Groups Name Variance Std.Dev.
ID (Intercept) 0.45193 0.67226
Item (Intercept) 0.70968 0.84243

print(xtable(AICs.Rasch.GLM <- data.frame(Rasch=summary(out.rasch)$AIC, GLM=summary(out.glm)$aic, GLMER.fe=summary(out.glmer)@AICtab$AIC, GLMER.re=summary(out.glmer.re)@AICtab$AIC, row.names="AIC"), caption="Akaike's information criteria (AICs)"), type="html", caption.placement="top")

Akaike's information criteria (AICs)
AIC 4956.11 4996.87 4950.80 4977.25

#Easiness estimates.
easiness <- data.frame(Rasch=out.rasch$coefficients[,1],
GLM=c(out.glm$coefficients[1], out.glm$coefficients[1]+out.glm$coefficients[-1]),
GLMER.fe=c(fixef(out.glmer)[1], fixef(out.glmer)[1]+fixef(out.glmer)[-1]),
print(xtable(easiness, digits=3), type="html")

Item 1 2.872 2.498 2.705 2.527
Item 2 1.063 0.891 0.994 0.917
Item 3 0.258 0.213 0.237 0.224
Item 4 1.388 1.169 1.299 1.201
Item 5 2.219 1.901 2.082 1.938

#Estimated probabilities of a correct response.
pr.correct <- sapply(easiness, plogis)
row.names(pr.correct) <- row.names(easiness)
print(xtable(pr.correct, digits=3), type="html")

Item 1 0.946 0.924 0.937 0.926
Item 2 0.743 0.709 0.730 0.714
Item 3 0.564 0.553 0.559 0.556
Item 4 0.800 0.763 0.786 0.769
Item 5 0.902 0.870 0.889 0.874

#Difficulty estimates.
difficulties <- easiness*-1
print(xtable(difficulties, digits=3), type="html")

Item 1 -2.872 -2.498 -2.705 -2.527
Item 2 -1.063 -0.891 -0.994 -0.917
Item 3 -0.258 -0.213 -0.237 -0.224
Item 4 -1.388 -1.169 -1.299 -1.201
Item 5 -2.219 -1.901 -2.082 -1.938

#Calculate design effects and effective samples sizes from intraclass correlation coefficients and sampling unit sizes.
multistage.consequences <- function(ICC, N) {
M <- nrow(LSAT.long)
n <- M/N #number of responses per item
deff <- 1+(n-1)*ICC
M.effective <- trunc(M/deff)
return(data.frame(ICC, M, N, n, deff, M.effective))
model.ICC.Item <- glmer(Score ~ 1 + (1|Item), family=binomial, data=LSAT.long)
ICC.Item <- as.numeric(VarCorr(model.ICC.Item)$Item)/(as.numeric(VarCorr(model.ICC.Item)$Item)+pi^2/3)
multistage.Item <- multistage.consequences(ICC.Item, 5)
model.ICC.ID <- glmer(Score ~ 1 + (1|ID), family=binomial, data=LSAT.long)
ICC.ID <- as.numeric(VarCorr(model.ICC.ID)$ID)/(as.numeric(VarCorr(model.ICC.ID)$ID)+pi^2/3)
multistage.ID <- multistage.consequences(ICC.ID, 1000)
multistage <- data.frame(cbind(t(multistage.Item), t(multistage.ID)))
names(multistage) <- c("Item", "ID")
print(xtable(multistage, digits=3), type="html")

Item ID
ICC 0.159 0.070
M 5000.000 5000.000
N 5.000 1000.000
n 1000.000 5.000
deff 160.295 1.281
M.effective 31.000 3903.000

#Plot test characteristic curves
plot.tcc <- function(difficulties, add, col, lty) {
thetas <- seq(-3.8,3.8,length=100)
temp <- matrix(nrow=length(difficulties), ncol=length(thetas))
for(i in 1:length(thetas)) {for(theta in thetas) temp[,which(thetas==theta)] <- plogis(theta-difficulties)}
if(missing(add)) {plot(thetas, colSums(temp), ylim=c(0,5), col=NULL, ylab=expression(tau), xlab=expression(theta), main="Test characteristic curves")}
lines(thetas, colSums(temp), col=col, lty=lty)
plot.tcc(difficulties=difficulties[,1], col="black", lty=1)
plot.tcc(difficulties=difficulties[,2], add=T, col="blue", lty=2)
plot.tcc(difficulties=difficulties[,3], add=T, col="red", lty=3)
plot.tcc(difficulties=difficulties[,4], add=T, col="green", lty=4)
legend("bottomright", c("Rasch", "GLM", "GLMER, fixed items", "GLMER, random items "), col=c("black", "blue", "red", "green"), lty=1:4)


Multilevel Rasch


After years of studying and applying quantitative social science research methods, it has become easier to catch glimpses of larger connections between seemingly independent methodological approaches. I have learned, for example, that survey development has a lot in common with test development. I have also learned that longitudinal data analysis is like spatial/geographic data analysis in that both attend to reference-based dependencies (i.e., one dimensional time and two-or-more dimensional space).

Many methods seem independent because they are taught in isolation as topics courses (e.g., survey methods) or only within disciplines where they are traditionally applied (e.g., social network analysis within sociology). I credit my professors with teaching me to see the larger connections. I also credit the University of Minnesota with offering interdisciplinary courses, such as Latent Variable Measurement Models and Path Analysis taught by Melanie Wall in Biostatistics--a course traditionally taught by psychologists.

Item response theory (IRT) scaling and multilevel modeling are two other quantitative methods that have more in common than they seem at first glance. The Rasch model is usually expressed as


where represents a test taker's latent ability, and represents the difficulty of an item. The Rasch model can also be expressed as a generalized linear model (GLM) with a logit link:


where is a dummy variable (1 if a correct response, 0 otherwise) and is the easiness of an item.

Test development designs can be thought of as multistage sampling designs in which test takers represent primary sampling units and item responses represent secondary sampling units nested within test takers. Items can also be thought of as primary sampling units in which responses are nested. If item responses exhibit sizable intraclass correlation, then mixed-effects modeling may be appropriate to account for loss of statistical power relative to simple random sampling. Mixed-effects models may also help with vertical scaling and identifying differential item functioning (DIF) by considering fixed effects for test taker age and group membership. Some authors treat items as fixed effects; while others treat them as random effects. In the latter case, the Rasch model can be expressed as a two-level mixed model with crossed random effects:


Regression discontinuity gallery: R code for nonparametric estimation


Choosing a functional form for the relationship between the assignment and outcome variable from a regression discontinuity design is an important step in estimating the local average treatment effect. Plotting nonparametric smoothers can suggest a functional form, and nonparametric inference may be more appropriate when a curvilinear relationship defies classification. Some researchers exclusively prefer locally-weighted regression because they feel it objectively and conservatively excludes distal observations, although the choice of bandwidth can be subjective.

The following example shows a way to obtain and plot nonparametric estimates with Rlogo.jpg. The approach yields an effect size estimate of about 2.1 standard deviations, which is over twice as large as the true effect (1 σ) and much worse than the parametric estimates (1.1 SD). Increasing the default span from 0.75 to 1 (not shown) improved the estimate slightly (1.3 SD), as did reducing it to 0.5 (1.6 SD). It would be interesting to conduct a simulation study to compare parametric and nonparametric effect size recovery over many random samples while systematically varying regression functions, bandwidths, and cutoff locations.

#Simulate and plot the data as described in the first part of the previous entry.

#Use the full sample to fit a locally-weighted regression line.
lo <- loess(y ~ x, surface="direct")
x.lo <- min(x):max(x)
pred <- predict(lo, data.frame(x=x.lo))
lines(x.lo, pred, col="red")

#Use treatment observations to fit a locally-weighted regression line.
lo.tx <- loess(y ~ x, data.frame(y, x)[which(x<0),], surface="direct")
x.lo.tx <- 0:min(x)
pred.tx <- predict(lo.tx, data.frame(x=x.lo.tx))
lines(x.lo.tx, pred.tx, col="blue")

#Use control observations to fit a locally-weighted regression line.
lo.c <- loess(y ~ x, data.frame(y, x)[which(x>=0),], surface="direct")
x.lo.c <- 0:max(x)
pred.c <- predict(lo.c, data.frame(x=x.lo.c))
lines(x.lo.c, pred.c, col="blue")

#Add legend.
legend("bottomright", bg="white", cex=.75, pt.cex=.75, c("Treatment observation", "Control observation", "Cutoff", "Population regression line ", "Piecewise local smoother", "Full local smoother"), lty=c(NA, NA, 2, 1, 1, 1), lwd=c(NA, NA, 2, 2, 1, 1), col=c("darkgrey", "darkgrey", "darkgrey", "black", "blue", "red"), pch=c(4, 1, NA, NA, NA, NA))

#Nonparametric estimate [95% confidence interval] of local average treatement effect.
yhat.tx <- predict(lo.tx, 0, se=T)
yhat.c <- predict(lo.c, 0, se=T)
yhat.tx$fit - yhat.c$fit
c((yhat.tx$fit - qt(0.975, yhat.tx$df)*yhat.tx$se.fit) - (yhat.c$fit + qt(0.975, yhat.c$df)*yhat.c$se.fit), (yhat.tx$fit + qt(0.975, yhat.tx$df)*yhat.tx$se.fit) - (yhat.c$fit - qt(0.975, yhat.c$df)*yhat.c$se.fit))

Yields 21.28786 [7.453552, 35.122162].