Code
library(lavaan)
library(semTools)
library(dplyr)
set.seed(1234)Tommaso Feraco
By the end of this lab you will be able to:
Two-step mindset: (1) Measurement invariance over time → (2) Growth model.
If the measurement shifts, “growth” can be an artifact.
We simulate:
N <- 600
pop <- '
# ----------------------------
# Exogenous observed predictor
# ----------------------------
x ~ 0*1
x ~~ 1*x
# ----------------------------
# Latent growth factors (unmeasured: declared via variances/means/regressions)
# ----------------------------
i ~~ 0.80*i
s ~~ 0.20*s
q ~~ 0.03*q
i ~~ -0.15*s
i ~~ 0*q
s ~~ 0*q
# Means + effects of x (true generating values)
i ~ 0.00*1 + 0.50*x
s ~ 0.25*1 + 0.25*x
q ~ 0.05*1
# ----------------------------
# Wave-specific latent states (measured by items)
# Quadratic time scores: 0,1,4,9
# ----------------------------
F1 ~ 1*i + 0*s + 0*q
F2 ~ 1*i + 1*s + 1*q
F3 ~ 1*i + 2*s + 4*q
F4 ~ 1*i + 3*s + 9*q
# Residual (occasion-specific) variance of latent states
F1 ~~ 0.30*F1
F2 ~~ 0.30*F2
F3 ~~ 0.30*F3
F4 ~~ 0.30*F4
# ----------------------------
# Measurement model (4 items per wave)
# Metric non-invariance planted: loading of y4_4 is larger
# ----------------------------
F1 =~ 1*y1_1 + 0.80*y2_1 + 0.90*y3_1 + 0.70*y4_1
F2 =~ 1*y1_2 + 0.80*y2_2 + 0.90*y3_2 + 0.70*y4_2
F3 =~ 1*y1_3 + 0.80*y2_3 + 0.90*y3_3 + 0.70*y4_3
F4 =~ 1*y1_4 + 0.80*y2_4 + 0.90*y3_4 + 0.95*y4_4 # <-- metric violation (item 4 at wave 4)
# Item residual variances (constant across waves)
y1_1 ~~ 0.50*y1_1; y2_1 ~~ 0.60*y2_1; y3_1 ~~ 0.50*y3_1; y4_1 ~~ 0.70*y4_1
y1_2 ~~ 0.50*y1_2; y2_2 ~~ 0.60*y2_2; y3_2 ~~ 0.50*y3_2; y4_2 ~~ 0.70*y4_2
y1_3 ~~ 0.50*y1_3; y2_3 ~~ 0.60*y2_3; y3_3 ~~ 0.50*y3_3; y4_3 ~~ 0.70*y4_3
y1_4 ~~ 0.50*y1_4; y2_4 ~~ 0.60*y2_4; y3_4 ~~ 0.50*y3_4; y4_4 ~~ 0.70*y4_4
# ----------------------------
# Item intercepts (scalar non-invariance planted: y2_3 intercept is higher)
# ----------------------------
y1_1 ~ 0*1; y2_1 ~ 0*1; y3_1 ~ 0*1; y4_1 ~ 0*1
y1_2 ~ 0*1; y2_2 ~ 0*1; y3_2 ~ 0*1; y4_2 ~ 0*1
y1_3 ~ 0*1; y2_3 ~ 0.40*1; y3_3 ~ 0*1; y4_3 ~ 0*1 # <-- scalar violation (item 2 at wave 3)
y1_4 ~ 0*1; y2_4 ~ 0*1; y3_4 ~ 0*1; y4_4 ~ 0*1
# ----------------------------
# Correlated residuals across adjacent waves (same indicator carryover)
# ----------------------------
y1_1 ~~ 0.25*y1_2
y1_2 ~~ 0.25*y1_3
y1_3 ~~ 0.25*y1_4
y2_1 ~~ 0.20*y2_2
y2_2 ~~ 0.20*y2_3
y2_3 ~~ 0.20*y2_4
'
dat <- simulateData(pop, sample.nobs = N, meanstructure = TRUE)
dat <- subset(dat, select = -c(i,s,q)) # remove growth factorsIf you want a more realistic workflow, introduce missing data and use FIML.
Fit the configural model: same factor structure each wave, with correlated residuals for the same indicator across adjacent waves.
mod_config <- '
F1 =~ y1_1 + y2_1 + y3_1 + y4_1
F2 =~ y1_2 + y2_2 + y3_2 + y4_2
F3 =~ y1_3 + y2_3 + y3_3 + y4_3
F4 =~ y1_4 + y2_4 + y3_4 + y4_4
# correlated residuals across time for same indicator (adjacent waves)
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
'
fit_config <- cfa(mod_config, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_config, c("chisq","df","cfi","tli","rmsea","srmr")) chisq df cfi tli rmsea srmr
68.453 92.000 1.000 1.003 0.000 0.015
Impose equality constraints on loadings using labels (l1, l2, …). Keep the residual correlations.
mod_metric <- '
F1 =~ l1*y1_1 + l2*y2_1 + l3*y3_1 + l4*y4_1
F2 =~ l1*y1_2 + l2*y2_2 + l3*y3_2 + l4*y4_2
F3 =~ l1*y1_3 + l2*y2_3 + l3*y3_3 + l4*y4_3
F4 =~ l1*y1_4 + l2*y2_4 + l3*y3_4 + l4*y4_4
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
'
fit_metric <- cfa(mod_metric, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_metric, c("chisq","df","cfi","tli","rmsea","srmr")) chisq df cfi tli rmsea srmr
187.694 101.000 0.990 0.989 0.038 0.054
Compare configural vs metric:
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit_config 92 24794 25058 68.453
fit_metric 101 24896 25120 187.694 119.24 0.14288 9 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cfi rmsea srmr
config 1.0000000 0.00000000 0.01522844
metric 0.9904561 0.03782326 0.05360879
Add equality constraints on intercepts as well (i1, i2, …). This is the gateway to interpreting latent mean change.
mod_scalar <- '
F1 =~ l1*y1_1 + l2*y2_1 + l3*y3_1 + l4*y4_1
F2 =~ l1*y1_2 + l2*y2_2 + l3*y3_2 + l4*y4_2
F3 =~ l1*y1_3 + l2*y2_3 + l3*y3_3 + l4*y4_3
F4 =~ l1*y1_4 + l2*y2_4 + l3*y3_4 + l4*y4_4
# equal intercepts across time
y1_1 ~ i1*1; y1_2 ~ i1*1; y1_3 ~ i1*1; y1_4 ~ i1*1
y2_1 ~ i2*1; y2_2 ~ i2*1; y2_3 ~ i2*1; y2_4 ~ i2*1
y3_1 ~ i3*1; y3_2 ~ i3*1; y3_3 ~ i3*1; y3_4 ~ i3*1
y4_1 ~ i4*1; y4_2 ~ i4*1; y4_3 ~ i4*1; y4_4 ~ i4*1
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
#
F1 ~ 0*1
F2 ~ 1
F3 ~ 1
F4 ~ 1
'
fit_scalar <- cfa(mod_scalar, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_scalar, c("chisq","df","cfi","tli","rmsea","srmr")) chisq df cfi tli rmsea srmr
342.180 110.000 0.974 0.972 0.059 0.057
Compare the three models:
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit_config 92 24794 25058 68.453
fit_metric 101 24896 25120 187.694 119.24 0.14288 9 < 2.2e-16 ***
fit_scalar 110 25032 25217 342.180 154.49 0.16414 9 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# A tibble: 3 × 4
model cfi rmsea srmr
<chr> <lvn.vctr> <lvn.vctr> <lvn.vctr>
1 config 1.0000000 0.00000000 0.01522844
2 metric 0.9904561 0.03782326 0.05360879
3 scalar 0.9744401 0.05931165 0.05691011
Inspect modification indices (MIs) for the scalar model:
univariate score tests:
lhs op rhs X2 df p.value
1 .p21. == .p23. 125.339 1 0.000
2 .p4. == .p16. 122.072 1 0.000
3 .p4. == .p12. 45.242 1 0.000
4 .p21. == .p24. 39.036 1 0.000
5 .p2. == .p10. 33.044 1 0.000
6 .p17. == .p19. 31.833 1 0.000
7 .p2. == .p14. 21.811 1 0.000
8 .p21. == .p22. 21.571 1 0.000
9 .p17. == .p18. 19.486 1 0.000
10 .p4. == .p8. 17.777 1 0.000
11 .p29. == .p32. 15.301 1 0.000
12 .p3. == .p15. 12.089 1 0.001
13 .p29. == .p31. 6.382 1 0.012
14 .p17. == .p20. 5.888 1 0.015
15 .p3. == .p11. 3.980 1 0.046
Partial invariance is acceptable when:
Hint (how to free one intercept constraint):
If you labeled all intercepts equal (e.g., y3_1 ~ i3*1; y3_2 ~ i3*1; ...), you can break equality for one wave by giving it a different label:
Create your modified model below.
To keep growth interpretation simple, we create a wave score as the mean of the four indicators.
Fit a linear LGM with time scores (0, 1, 2, 3).
lgm_linear <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ 0*y_t1 + 1*y_t2 + 2*y_t3 + 3*y_t4
# optional residual autocorrelation (often helpful)
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm <- growth(lgm_linear, data = dat, missing = "fiml")
summary(fit_lgm, fit.measures = TRUE, standardized = TRUE)lavaan 0.6-19 ended normally after 29 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 12
Number of observations 600
Number of missing patterns 2
Model Test User Model:
Test statistic 15.740
Degrees of freedom 2
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 1387.600
Degrees of freedom 6
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.990
Tucker-Lewis Index (TLI) 0.970
Robust Comparative Fit Index (CFI) 0.991
Robust Tucker-Lewis Index (TLI) 0.973
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -3289.535
Loglikelihood unrestricted model (H1) -3281.665
Akaike (AIC) 6603.070
Bayesian (BIC) 6655.834
Sample-size adjusted Bayesian (SABIC) 6617.737
Root Mean Square Error of Approximation:
RMSEA 0.107
90 Percent confidence interval - lower 0.062
90 Percent confidence interval - upper 0.159
P-value H_0: RMSEA <= 0.050 0.020
P-value H_0: RMSEA >= 0.080 0.852
Robust RMSEA 0.107
90 Percent confidence interval - lower 0.059
90 Percent confidence interval - upper 0.162
P-value H_0: Robust RMSEA <= 0.050 0.027
P-value H_0: Robust RMSEA >= 0.080 0.838
Standardized Root Mean Square Residual:
SRMR 0.022
Parameter Estimates:
Standard errors Standard
Information Observed
Observed information based on Hessian
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
i =~
y_t1 1.000 0.715 0.674
y_t2 1.000 0.715 0.632
y_t3 1.000 0.715 0.481
y_t4 1.000 0.715 0.316
s =~
y_t1 0.000 0.000 0.000
y_t2 1.000 0.434 0.384
y_t3 2.000 0.868 0.584
y_t4 3.000 1.302 0.576
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.y_t1 ~~
.y_t2 0.114 0.070 1.619 0.105 0.114 0.262
.y_t2 ~~
.y_t3 -0.126 0.033 -3.879 0.000 -0.126 -0.359
.y_t3 ~~
.y_t4 0.563 0.147 3.832 0.000 0.563 0.615
i ~~
s 0.135 0.060 2.254 0.024 0.436 0.436
Intercepts:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
i 0.015 0.041 0.359 0.720 0.021 0.021
s 0.337 0.027 12.574 0.000 0.776 0.776
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.y_t1 0.613 0.119 5.146 0.000 0.613 0.545
.y_t2 0.308 0.060 5.152 0.000 0.308 0.241
.y_t3 0.400 0.109 3.657 0.000 0.400 0.181
.y_t4 2.094 0.255 8.229 0.000 2.094 0.410
i 0.511 0.118 4.343 0.000 1.000 1.000
s 0.188 0.041 4.623 0.000 1.000 1.000
Extract and interpret the growth factor means/variances:
lhs op rhs est se z pvalue ci.lower ci.upper
1 i ~~ i 0.511 0.118 4.343 0.000 0.281 0.742
2 s ~~ s 0.188 0.041 4.623 0.000 0.109 0.268
3 i ~~ s 0.135 0.060 2.254 0.024 0.018 0.253
4 i ~1 0.015 0.041 0.359 0.720 -0.066 0.096
5 s ~1 0.337 0.027 12.574 0.000 0.284 0.390
Change the slope loadings to make the intercept represent wave 2.
Example time scores: (-1, 0, 1, 2).
lhs op rhs est se z pvalue ci.lower ci.upper
1 i ~~ i 0.970 0.071 13.600 0 0.831 1.110
2 s ~~ s 0.188 0.041 4.623 0 0.109 0.268
3 i ~~ s 0.324 0.045 7.130 0 0.235 0.413
4 i ~1 0.352 0.043 8.157 0 0.267 0.436
5 s ~1 0.337 0.027 12.574 0 0.284 0.390
lgm_quad <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ 0*y_t1 + 1*y_t2 + 2*y_t3 + 3*y_t4
q =~ 0*y_t1 + 1*y_t2 + 4*y_t3 + 9*y_t4
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm_q <- growth(lgm_quad, data = dat, missing = "fiml")
# anova(fit_lgm, fit_lgm_q)
fitMeasures(fit_lgm_q, c("cfi","rmsea","srmr")) cfi rmsea srmr
NA 0.00 0.02
Create a predictor x (in real applications: trait, condition, demographic variable, baseline stress, etc.).
Fit conditional growth:
lavaan 0.6-19 ended normally after 32 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 14
Number of observations 600
Number of missing patterns 2
Model Test User Model:
Test statistic 19.734
Degrees of freedom 4
P-value (Chi-square) 0.001
Model Test Baseline Model:
Test statistic 1391.891
Degrees of freedom 10
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.989
Tucker-Lewis Index (TLI) 0.972
Robust Comparative Fit Index (CFI) 0.990
Robust Tucker-Lewis Index (TLI) 0.974
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -3289.387
Loglikelihood unrestricted model (H1) -3279.520
Akaike (AIC) 6606.774
Bayesian (BIC) 6668.331
Sample-size adjusted Bayesian (SABIC) 6623.884
Root Mean Square Error of Approximation:
RMSEA 0.081
90 Percent confidence interval - lower 0.048
90 Percent confidence interval - upper 0.118
P-value H_0: RMSEA <= 0.050 0.062
P-value H_0: RMSEA >= 0.080 0.564
Robust RMSEA 0.082
90 Percent confidence interval - lower 0.046
90 Percent confidence interval - upper 0.121
P-value H_0: Robust RMSEA <= 0.050 0.068
P-value H_0: Robust RMSEA >= 0.080 0.578
Standardized Root Mean Square Residual:
SRMR 0.021
Parameter Estimates:
Standard errors Standard
Information Observed
Observed information based on Hessian
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
i =~
y_t1 1.000 0.713 0.672
y_t2 1.000 0.713 0.630
y_t3 1.000 0.713 0.480
y_t4 1.000 0.713 0.315
s =~
y_t1 0.000 0.000 0.000
y_t2 1.000 0.434 0.384
y_t3 2.000 0.868 0.584
y_t4 3.000 1.302 0.575
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
i ~
x 0.018 0.042 0.440 0.660 0.026 0.025
s ~
x 0.005 0.027 0.200 0.841 0.012 0.012
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.y_t1 ~~
.y_t2 0.116 0.071 1.647 0.100 0.116 0.266
.y_t2 ~~
.y_t3 -0.127 0.033 -3.895 0.000 -0.127 -0.362
.y_t3 ~~
.y_t4 0.560 0.147 3.813 0.000 0.560 0.615
.i ~~
.s 0.137 0.060 2.273 0.023 0.442 0.442
Intercepts:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.i 0.014 0.041 0.333 0.739 0.019 0.019
.s 0.337 0.027 12.543 0.000 0.776 0.776
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.y_t1 0.617 0.119 5.162 0.000 0.617 0.548
.y_t2 0.310 0.060 5.169 0.000 0.310 0.242
.y_t3 0.397 0.110 3.617 0.000 0.397 0.180
.y_t4 2.094 0.255 8.226 0.000 2.094 0.409
.i 0.508 0.118 4.308 0.000 0.999 0.999
.s 0.188 0.041 4.614 0.000 1.000 1.000
x → i mean? What does x → s mean?x → s is positive, how would you explain it in words?The group= workflow (and semTools::measurementInvariance()) becomes natural if you reshape data to long format: one row per person–wave, and a wave grouping variable.
This is optional and meant to connect the longitudinal invariance logic to what you did in MG-CFA.
# OPTIONAL: reshape to long format for invariance automation
# Requires tidyr
library(tidyr)
dat_long <- dat |>
mutate(id = row_number()) |>
pivot_longer(
cols = matches("^y[1-4]_[1-4]$"),
names_to = c(".value","wave"),
names_pattern = "^(y\\d)_(\\d)$"
) |>
mutate(wave = factor(wave))
head(dat_long)# A tibble: 6 × 11
x y_t1 y_t2 y_t3 y_t4 id wave y1 y2 y3 y4
<dbl> <dbl> <dbl> <dbl> <dbl> <int> <fct> <dbl> <dbl> <dbl> <dbl>
1 -0.550 0.713 0.951 1.61 4.35 1 1 1.05 0.0293 1.29 0.485
2 -0.550 0.713 0.951 1.61 4.35 1 2 0.970 0.939 0.365 1.53
3 -0.550 0.713 0.951 1.61 4.35 1 3 1.23 2.56 1.40 1.24
4 -0.550 0.713 0.951 1.61 4.35 1 4 4.29 4.41 4.15 4.57
5 1.09 -1.59 0.450 0.922 NaN 2 1 -0.574 -1.88 -2.53 -1.39
6 1.09 -1.59 0.450 0.922 NaN 2 2 0.725 -0.0941 0.757 0.414
Run invariance:
Keep it “report-ready”: always report χ²(df), CFI/TLI, RMSEA (+ CI if possible), SRMR + the key local decision you made (e.g., which intercept you freed).
---
title: "Lab 09: Longitudinal SEM — Invariance Over Time & Latent Growth"
author: "Tommaso Feraco"
format:
html:
toc: true
toc-depth: 3
number-sections: true
code-fold: true
code-tools: true
execute:
echo: true
warning: false
message: false
bibliography: ../refs/references.bib
csl: ../refs/apa7.csl
---
## What you will do
By the end of this lab you will be able to:
1. Fit a **longitudinal CFA** and evaluate **configural → metric → scalar** invariance across waves.
2. Use **global** and **local** diagnostics to motivate *disciplined* partial invariance.
3. Fit **latent growth curve models (LGM)**: linear, (optional) quadratic, and interpret growth parameters.
4. Extend LGM with a predictor (**conditional growth**).
::: {.callout-note}
**Two-step mindset:** (1) Measurement invariance over time → (2) Growth model.
If the measurement shifts, “growth” can be an artifact.
:::
---
## Setup
```{r}
library(lavaan)
library(semTools)
library(dplyr)
set.seed(1234)
```
---
## Data (transparent simulation)
We simulate:
- 4 waves, 4 indicators per wave
- **Quadratic growth** in the latent state (so quadratic LGM should improve over linear)
- A covariate **x** that truly affects both **intercept** and **slope**
- Two planned invariance violations:
- **Metric violation:** item 4 loading is larger at wave 4
- **Scalar violation:** item 2 intercept is higher at wave 3
- Correlated residuals across adjacent waves for items 1 and 2
```{r}
N <- 600
pop <- '
# ----------------------------
# Exogenous observed predictor
# ----------------------------
x ~ 0*1
x ~~ 1*x
# ----------------------------
# Latent growth factors (unmeasured: declared via variances/means/regressions)
# ----------------------------
i ~~ 0.80*i
s ~~ 0.20*s
q ~~ 0.03*q
i ~~ -0.15*s
i ~~ 0*q
s ~~ 0*q
# Means + effects of x (true generating values)
i ~ 0.00*1 + 0.50*x
s ~ 0.25*1 + 0.25*x
q ~ 0.05*1
# ----------------------------
# Wave-specific latent states (measured by items)
# Quadratic time scores: 0,1,4,9
# ----------------------------
F1 ~ 1*i + 0*s + 0*q
F2 ~ 1*i + 1*s + 1*q
F3 ~ 1*i + 2*s + 4*q
F4 ~ 1*i + 3*s + 9*q
# Residual (occasion-specific) variance of latent states
F1 ~~ 0.30*F1
F2 ~~ 0.30*F2
F3 ~~ 0.30*F3
F4 ~~ 0.30*F4
# ----------------------------
# Measurement model (4 items per wave)
# Metric non-invariance planted: loading of y4_4 is larger
# ----------------------------
F1 =~ 1*y1_1 + 0.80*y2_1 + 0.90*y3_1 + 0.70*y4_1
F2 =~ 1*y1_2 + 0.80*y2_2 + 0.90*y3_2 + 0.70*y4_2
F3 =~ 1*y1_3 + 0.80*y2_3 + 0.90*y3_3 + 0.70*y4_3
F4 =~ 1*y1_4 + 0.80*y2_4 + 0.90*y3_4 + 0.95*y4_4 # <-- metric violation (item 4 at wave 4)
# Item residual variances (constant across waves)
y1_1 ~~ 0.50*y1_1; y2_1 ~~ 0.60*y2_1; y3_1 ~~ 0.50*y3_1; y4_1 ~~ 0.70*y4_1
y1_2 ~~ 0.50*y1_2; y2_2 ~~ 0.60*y2_2; y3_2 ~~ 0.50*y3_2; y4_2 ~~ 0.70*y4_2
y1_3 ~~ 0.50*y1_3; y2_3 ~~ 0.60*y2_3; y3_3 ~~ 0.50*y3_3; y4_3 ~~ 0.70*y4_3
y1_4 ~~ 0.50*y1_4; y2_4 ~~ 0.60*y2_4; y3_4 ~~ 0.50*y3_4; y4_4 ~~ 0.70*y4_4
# ----------------------------
# Item intercepts (scalar non-invariance planted: y2_3 intercept is higher)
# ----------------------------
y1_1 ~ 0*1; y2_1 ~ 0*1; y3_1 ~ 0*1; y4_1 ~ 0*1
y1_2 ~ 0*1; y2_2 ~ 0*1; y3_2 ~ 0*1; y4_2 ~ 0*1
y1_3 ~ 0*1; y2_3 ~ 0.40*1; y3_3 ~ 0*1; y4_3 ~ 0*1 # <-- scalar violation (item 2 at wave 3)
y1_4 ~ 0*1; y2_4 ~ 0*1; y3_4 ~ 0*1; y4_4 ~ 0*1
# ----------------------------
# Correlated residuals across adjacent waves (same indicator carryover)
# ----------------------------
y1_1 ~~ 0.25*y1_2
y1_2 ~~ 0.25*y1_3
y1_3 ~~ 0.25*y1_4
y2_1 ~~ 0.20*y2_2
y2_2 ~~ 0.20*y2_3
y2_3 ~~ 0.20*y2_4
'
dat <- simulateData(pop, sample.nobs = N, meanstructure = TRUE)
dat <- subset(dat, select = -c(i,s,q)) # remove growth factors
```
### Optional: add missingness (realistic in longitudinal studies)
If you want a more realistic workflow, introduce missing data and use FIML.
```{r}
# OPTIONAL: comment out if you want complete data
set.seed(2026)
miss_id <- sample(seq_len(nrow(dat)), size = round(0.20*nrow(dat)))
dat[miss_id, c("y1_4","y2_4","y3_4","y4_4")] <- NA
```
---
# Part A — Longitudinal CFA invariance ladder
## A1. Configural invariance
Fit the configural model: same factor structure each wave, with correlated residuals for the *same* indicator across adjacent waves.
```{r}
mod_config <- '
F1 =~ y1_1 + y2_1 + y3_1 + y4_1
F2 =~ y1_2 + y2_2 + y3_2 + y4_2
F3 =~ y1_3 + y2_3 + y3_3 + y4_3
F4 =~ y1_4 + y2_4 + y3_4 + y4_4
# correlated residuals across time for same indicator (adjacent waves)
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
'
fit_config <- cfa(mod_config, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_config, c("chisq","df","cfi","tli","rmsea","srmr"))
```
### Questions
- Is the overall fit acceptable for a baseline model?
- Do the correlated residuals feel substantively justified here? Why might they be needed in repeated-measures measurement models?
---
## A2. Metric invariance (equal loadings across waves)
Impose equality constraints on loadings using labels (`l1`, `l2`, ...). Keep the residual correlations.
```{r}
mod_metric <- '
F1 =~ l1*y1_1 + l2*y2_1 + l3*y3_1 + l4*y4_1
F2 =~ l1*y1_2 + l2*y2_2 + l3*y3_2 + l4*y4_2
F3 =~ l1*y1_3 + l2*y2_3 + l3*y3_3 + l4*y4_3
F4 =~ l1*y1_4 + l2*y2_4 + l3*y3_4 + l4*y4_4
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
'
fit_metric <- cfa(mod_metric, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_metric, c("chisq","df","cfi","tli","rmsea","srmr"))
```
Compare configural vs metric:
```{r}
lavTestLRT(fit_config, fit_metric)
```
```{r}
rbind(
config = fitMeasures(fit_config, c("cfi","rmsea","srmr")),
metric = fitMeasures(fit_metric, c("cfi","rmsea","srmr"))
)
```
### Questions
- Does metric invariance appear plausible (based on fit changes)?
- If you see a meaningful fit drop, where might non-invariance be coming from?
---
## A3. Scalar invariance (equal intercepts across waves)
Add equality constraints on intercepts as well (`i1`, `i2`, ...). This is the gateway to interpreting **latent mean change**.
```{r}
mod_scalar <- '
F1 =~ l1*y1_1 + l2*y2_1 + l3*y3_1 + l4*y4_1
F2 =~ l1*y1_2 + l2*y2_2 + l3*y3_2 + l4*y4_2
F3 =~ l1*y1_3 + l2*y2_3 + l3*y3_3 + l4*y4_3
F4 =~ l1*y1_4 + l2*y2_4 + l3*y3_4 + l4*y4_4
# equal intercepts across time
y1_1 ~ i1*1; y1_2 ~ i1*1; y1_3 ~ i1*1; y1_4 ~ i1*1
y2_1 ~ i2*1; y2_2 ~ i2*1; y2_3 ~ i2*1; y2_4 ~ i2*1
y3_1 ~ i3*1; y3_2 ~ i3*1; y3_3 ~ i3*1; y3_4 ~ i3*1
y4_1 ~ i4*1; y4_2 ~ i4*1; y4_3 ~ i4*1; y4_4 ~ i4*1
y1_1 ~~ y1_2
y1_2 ~~ y1_3
y1_3 ~~ y1_4
y2_1 ~~ y2_2
y2_2 ~~ y2_3
y2_3 ~~ y2_4
#
F1 ~ 0*1
F2 ~ 1
F3 ~ 1
F4 ~ 1
'
fit_scalar <- cfa(mod_scalar, data = dat, meanstructure = TRUE, missing = "fiml")
fitMeasures(fit_scalar, c("chisq","df","cfi","tli","rmsea","srmr"))
```
Compare the three models:
```{r}
lavTestLRT(fit_config, fit_metric, fit_scalar)
```
```{r}
bind_rows(
config = fitMeasures(fit_config, c("cfi","rmsea","srmr")),
metric = fitMeasures(fit_metric, c("cfi","rmsea","srmr")),
scalar = fitMeasures(fit_scalar, c("cfi","rmsea","srmr")),
.id = "model"
)
```
---
## A4. Local diagnostics + partial scalar invariance (disciplined)
Inspect modification indices (MIs) for the scalar model:
```{r}
mi <- lavTestScore(fit_scalar)$uni |>
arrange(desc(X2))
mi |> head(15)
```
### Your task
1. Identify **one** intercept constraint that is strongly suggested to be freed (large MI) *and* is at least somewhat plausible substantively (e.g., an item that could drift over time).
2. Fit a **partial scalar** model freeing that single intercept equality.
3. Re-evaluate fit vs full scalar.
::: {.callout-warning}
Partial invariance is acceptable when:
- you free the *minimum* needed,
- you justify it,
- you report it transparently,
- you check your substantive conclusions don’t hinge on it.
:::
**Hint (how to free one intercept constraint):**
If you labeled all intercepts equal (e.g., `y3_1 ~ i3*1; y3_2 ~ i3*1; ...`), you can break equality for one wave by giving it a different label:
```{r}
# Example (DO NOT run blindly): make y3_4 intercept free from the others
# y3_1 ~ i3*1; y3_2 ~ i3*1; y3_3 ~ i3*1; y3_4 ~ i3_4*1
```
Create your modified model below.
```{r}
# TODO: write mod_partial_scalar
mod_partial_scalar <- mod_scalar
# fit_partial_scalar <- cfa(mod_partial_scalar, data = dat, meanstructure = TRUE, missing = "fiml")
# fitMeasures(fit_partial_scalar, c("cfi","rmsea","srmr"))
```
---
# Part B — Latent Growth Curve Models (LGM)
## B1. Build wave-level observed scores (teaching-friendly)
To keep growth interpretation simple, we create a wave score as the mean of the four indicators.
```{r}
dat <- dat |>
mutate(
y_t1 = rowMeans(across(c(y1_1,y2_1,y3_1,y4_1)), na.rm = TRUE),
y_t2 = rowMeans(across(c(y1_2,y2_2,y3_2,y4_2)), na.rm = TRUE),
y_t3 = rowMeans(across(c(y1_3,y2_3,y3_3,y4_3)), na.rm = TRUE),
y_t4 = rowMeans(across(c(y1_4,y2_4,y3_4,y4_4)), na.rm = TRUE)
)
```
---
## B2. Linear growth model
Fit a linear LGM with time scores (0, 1, 2, 3).
```{r}
lgm_linear <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ 0*y_t1 + 1*y_t2 + 2*y_t3 + 3*y_t4
# optional residual autocorrelation (often helpful)
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm <- growth(lgm_linear, data = dat, missing = "fiml")
summary(fit_lgm, fit.measures = TRUE, standardized = TRUE)
```
Extract and interpret the growth factor means/variances:
```{r}
parameterEstimates(fit_lgm) |>
filter(op %in% c("~1","~~") & lhs %in% c("i","s"))
```
### Questions
- What is the interpretation of **Mean(i)** and **Mean(s)** with this time coding?
- Are there individual differences in baseline and change (variances of i and s)?
- What does **Cov(i, s)** suggest about baseline–change association?
---
## B3. Re-centering time (interpretation exercise)
Change the slope loadings to make the intercept represent wave 2.
Example time scores: (-1, 0, 1, 2).
```{r}
lgm_center_w2 <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ -1*y_t1 + 0*y_t2 + 1*y_t3 + 2*y_t4
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm_w2 <- growth(lgm_center_w2, data = dat, missing = "fiml")
parameterEstimates(fit_lgm_w2) |>
filter(op %in% c("~1","~~") & lhs %in% c("i","s"))
```
### Question
- How did the **Mean(i)** change relative to the previous model, and why?
---
## B4. If linear growth is not enough: quadratic growth (optional but recommended)
```{r}
lgm_quad <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ 0*y_t1 + 1*y_t2 + 2*y_t3 + 3*y_t4
q =~ 0*y_t1 + 1*y_t2 + 4*y_t3 + 9*y_t4
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm_q <- growth(lgm_quad, data = dat, missing = "fiml")
# anova(fit_lgm, fit_lgm_q)
fitMeasures(fit_lgm_q, c("cfi","rmsea","srmr"))
```
### Questions
- Does quadratic growth improve fit meaningfully?
- Is the quadratic factor mean/variance interpretable and substantively plausible?
---
# Part C — Conditional growth (predictors of intercept and slope)
## C1. Add a predictor
Create a predictor `x` (in real applications: trait, condition, demographic variable, baseline stress, etc.).
```{r}
set.seed(77)
dat$x <- rnorm(nrow(dat))
```
Fit conditional growth:
```{r}
lgm_cond <- '
i =~ 1*y_t1 + 1*y_t2 + 1*y_t3 + 1*y_t4
s =~ 0*y_t1 + 1*y_t2 + 2*y_t3 + 3*y_t4
i ~ x
s ~ x
y_t1 ~~ y_t2
y_t2 ~~ y_t3
y_t3 ~~ y_t4
'
fit_lgm_c <- growth(lgm_cond, data = dat, missing = "fiml")
summary(fit_lgm_c, fit.measures = TRUE, standardized = TRUE)
```
### Questions
- What does `x → i` mean? What does `x → s` mean?
- If `x → s` is positive, how would you explain it in words?
---
# Optional extension — “Classic MG-CFA” style via long format
The `group=` workflow (and `semTools::measurementInvariance()`) becomes natural if you reshape data to **long format**:
one row per person–wave, and a `wave` grouping variable.
This is **optional** and meant to connect the longitudinal invariance logic to what you did in MG-CFA.
```{r}
# OPTIONAL: reshape to long format for invariance automation
# Requires tidyr
library(tidyr)
dat_long <- dat |>
mutate(id = row_number()) |>
pivot_longer(
cols = matches("^y[1-4]_[1-4]$"),
names_to = c(".value","wave"),
names_pattern = "^(y\\d)_(\\d)$"
) |>
mutate(wave = factor(wave))
head(dat_long)
```
Run invariance:
```{r}
# OPTIONAL: classic style invariance test in long data
# model_long <- 'F =~ y1 + y2 + y3 + y4'
# inv <- measurementInvariance(model = model_long, data = dat_long, group = "wave", strict = FALSE)
# inv$fit.measures
```
---
## What to hand in (suggested)
1. A small table with fit indices for configural, metric, scalar (and partial scalar if used).
2. A brief justification (2–6 sentences) of your invariance decision.
3. Linear LGM results: interpret mean(i), mean(s), var(s), cov(i,s).
4. One centering exercise: explain how time coding changes intercept meaning.
5. (Optional) Quadratic growth comparison + interpretation.
6. Conditional growth: interpret the regression(s) of i and s on x.
::: {.callout-tip}
Keep it “report-ready”: always report χ²(df), CFI/TLI, RMSEA (+ CI if possible), SRMR + the key local decision you made (e.g., which intercept you freed).
:::