# Load all packages:
library("psychonetrics")
library("dplyr")
library("qgraph")
library("ggplot2")
library("bootnet")
library("Rmpfr")
library("semPlot")
First we can load the data and make a vector of the observed variables:
# Load the data:
data("StarWars")
# Observed variables:
obsvars <- c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10")
Next, we can form our first model: a GGM model with all edges included:
# Form GGM model:
ggm1 <- ggm(StarWars, vars = obsvars)
# Run model:
ggm1 <- ggm1 %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
If we are interested in the parameter values and significance of edges, we can use the following command:
# Check parameters:
ggm1 %>% parameters
Next, we can automatically prune all edges:
# Prune non-significant:
ggm1 <- ggm1 %>%
prune(adjust = "fdr", alpha = 0.01)
We can check the modification indices to see wich edges we can add now to improve fit:
# Check modification indices:
ggm1 %>% MIs
##
## Top 10 modification indices:
##
## var1 op var2 est mi pmi epc matrix row col group group_id
## Q1 ~1 <NA> 2.760148 mu 1 1 fullsample 1
## Q2 ~1 <NA> 4.188192 mu 2 1 fullsample 1
## Q3 ~1 <NA> 2.819188 mu 3 1 fullsample 1
## Q4 ~1 <NA> 3.214022 mu 4 1 fullsample 1
## Q5 ~1 <NA> 2.435424 mu 5 1 fullsample 1
## Q6 ~1 <NA> 2.686347 mu 6 1 fullsample 1
## Q7 ~1 <NA> 2.996310 mu 7 1 fullsample 1
## Q8 ~1 <NA> 2.383764 mu 8 1 fullsample 1
## Q9 ~1 <NA> 3.247232 mu 9 1 fullsample 1
## Q10 ~1 <NA> 2.118081 mu 10 1 fullsample 1
To automatically add edges at \(\alpha = 0.05\) until BIC can no longer be improved, use the following function:
# Stepup estimation:
ggm1 <- ggm1 %>% stepup(criterion = "bic", alpha = 0.05)
This process can be simplified with a single Dplyr pipe!
### Slide 33 ###
# Form GGM model:
ggm1 <- ggm(StarWars, vars = obsvars)
# Run model and estimate structure:
ggm1 <- ggm1 %>% runmodel %>%
prune(adjust = "fdr", alpha = 0.01) %>%
stepup(criterion = "bic", alpha = 0.05)
We can look at fit indices (although they will likely be good):
# Model fit:
ggm1 %>% fit
## Measure Value
## logl -4131.26
## unrestricted.logl -4109.06
## baseline.logl -4322.24
## nvar 10
## nobs 65
## npar 33
## df 32
## objective 12.11
## chisq 44.39
## pvalue 0.071
## baseline.chisq 426.35
## baseline.df 45
## baseline.pvalue ~0
## nfi 0.90
## pnfi 0.64
## tli 0.95
## nnfi 0.95
## rfi 0.85
## ifi 0.97
## rni 0.97
## cfi 0.97
## rmsea 0.038
## rmsea.ci.lower ~0
## rmsea.ci.upper 0.063
## rmsea.pvalue 0.77
## aic.ll 8328.52
## aic.ll2 8337.98
## aic.x -19.61
## aic.x2 110.39
## bic 8447.39
## bic2 8342.75
## ebic.25 8523.37
## ebic.5 8599.36
## ebic.75 8660.14
## ebic1 8751.33
Now, we can extract the network and plot it:
# Obtain network:
net1 <- getmatrix(ggm1, "omega")
# Plot:
qgraph(net1,
layout = "spring",
theme = "colorblind",
labels = obsvars)
We can also use bootnet for the model estimation:
# Estimate model using bootnet:
mod <- estimateNetwork(StarWars[,obsvars], default = "ggmModSelect")
structure <- 1*(mod$graph!=0)
# Form second GGM model with fixed structure:
ggm2 <- ggm(StarWars, omega = structure, vars = obsvars)
# ERun the model:
ggm2 <- ggm2 %>% runmodel
# Obtain network:
net2 <- getmatrix(ggm2, "omega")
This will likely be slightly different from the network estimated by psychonetrics. We can plot both networks next to each other:
# Obtain average layout (function from qgraph)
Layout <- averageLayout(net1,net2)
# plot both networks:
layout(t(1:2))
qgraph(net1, layout = Layout, theme = "colorblind", title = "psychonetrics",
labels = obsvars)
qgraph(net2, layout = Layout, theme = "colorblind", title = "ggmModSelect",
labels = obsvars)
We can even statistically compare these networks:
### Slide 36 ###
compare(psychonetrics = ggm1, ggmModSelect = ggm2)
## model DF AIC BIC RMSEA Chisq Chisq_diff DF_diff
## 1 ggmModSelect 31 8327.476 8449.948 0.03510471 41.35289 NA NA
## 2 psychonetrics 32 8328.515 8447.385 0.03780140 44.39182 3.038923 1
## p_value
## 1 NA
## 2 0.08128984
To perform a confirmatory factor analysis, we first need to specify the factor structure:
# Factor loadings matrix:
Lambda <- matrix(0, 10, 3)
Lambda[1:4,1] <- 1
Lambda[c(1,5:7),2] <- 1
Lambda[c(1,8:10),3] <- 1
print(Lambda)
## [,1] [,2] [,3]
## [1,] 1 1 1
## [2,] 1 0 0
## [3,] 1 0 0
## [4,] 1 0 0
## [5,] 0 1 0
## [6,] 0 1 0
## [7,] 0 1 0
## [8,] 0 0 1
## [9,] 0 0 1
## [10,] 0 0 1
Next, we can make a vector with names for the latent variables:
# Latents:
latents <- c("Prequels","Original","Sequels")
Now, we can form the CFA model and run it. We use the lvm
model function (latent variable model) for this:
# Make CFA model:
cfamod <- lvm(StarWars, lambda = Lambda, vars = obsvars,
identification = "variance", latents = latents)
# Run model:
cfamod <- cfamod %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
We can look at some fit indices of this model:
cfamod@fitmeasures[c("chisq","pvalue","rmsea","cfi")]
## $chisq
## [1] 34.57911
##
## $pvalue
## [1] 0.2582765
##
## $rmsea
## [1] 0.02373261
##
## $cfi
## [1] 0.9879924
These perfectly reproduce the famous lavaan package!
library("lavaan")
## This is lavaan 0.6-6.1487
## lavaan is BETA software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psychonetrics':
##
## duplicationMatrix
lavmod <- '
Prequels =~ Q1 + Q2 + Q3 + Q4
Original =~ Q1 + Q5 + Q6 + Q7
Sequels =~ Q1 + Q8 + Q9 + Q10
'
lavfit <- cfa(lavmod,StarWars,std.lv=TRUE)
fitMeasures(lavfit)[c("chisq","pvalue","rmsea","cfi")]
## chisq pvalue rmsea cfi
## 34.57911327 0.25827655 0.02373261 0.98799236
Next, we can form an LNM model in the exact same way, using the lnm
function (alternatively, we can use lvm(..., latent = "ggm")
):
# Latent network model:
lnmmod <- lnm(StarWars, lambda = Lambda, vars = obsvars,
identification = "variance", latents = latents)
We can run this model and estimate a latent network structure with the same algorithm we used before:
# Run model and estimate latent structure:
lnmmod <- lnmmod %>% runmodel %>%
prune(adjust = "fdr", alpha = 0.01) %>%
stepup(criterion = "bic", alpha = 0.05)
Finally, we can compare all models:
# Compare all models:
comparison <- compare(
ggm_psychonetrics = ggm1,
ggm_ggmModSelect = ggm2,
CFA = cfamod,
LNM = lnmmod)
print(comparison)
## model DF AIC BIC RMSEA Chisq Chisq_diff DF_diff
## 1 CFA 30 8322.703 8448.777 0.02373261 34.57911 NA NA
## 2 ggm_ggmModSelect 31 8327.476 8449.948 0.03510471 41.35289 6.773781 1
## 3 LNM 31 8321.016 8443.488 0.02152480 34.89233 6.460568 0
## 4 ggm_psychonetrics 32 8328.515 8447.385 0.03780140 44.39182 9.499491 1
## p_value
## 1 NA
## 2 0.009250668
## 3 0.000000000
## 4 0.002055289
The BICs are close, so one thing we could do is transform these to posterior model probabilities:
# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)
# Make a plot:
g <- ggplot(comparison, aes(x=model, y = modelProbability)) +
geom_bar(stat="identity") + xlab("") +
ylab("Posterior model probability") + theme_bw() +
ylim(0,1)
print(g)
The latent network model performs best in this case!
Finally, we can make a path diagram. This is not very trivial… but can be done with the semPlot package:
# LISREL Model: LY = Lambda (lambda-y), TE = Theta (theta-epsilon), PS = Psi
mod_lisrel <- lisrelModel(LY = getmatrix(lnmmod, "lambda"),
PS = getmatrix(lnmmod, "sigma_zeta"),
TE = getmatrix(lnmmod, "sigma_epsilon"),
latNamesEndo = latents,
manNamesEndo = obsvars)
# Change latent to pcors:
omega <- getmatrix(lnmmod, "omega_zeta")
mod_lisrel@Pars$std[23:28] <- mod_lisrel@Pars$est[23:28] <- omega[lower.tri(omega,diag=TRUE)]
# We can make this nicer (set whatLabels = "none" to hide labels):
graph <- semPaths(mod_lisrel,
what = "std", # this argument controls what the color of edges represent. In this case, standardized parameters
whatLabels = "omit", # This argument controls what the edge labels represent. In this case, parameter estimates
# as.expression = c("nodes","edges"), # This argument draws the node and edge labels as mathematical exprssions
style = "lisrel", # This will plot residuals as arrows, closer to what we use in class
residScale = 10, # This makes the residuals larger
theme = "colorblind", # qgraph colorblind friendly theme
layout = "tree2", # tree layout options are "tree", "tree2", and "tree3"
cardinal = "lat cov", # This makes the latent covariances connet at a cardinal center point
curvePivot = TRUE, # Changes curve into rounded straight lines
sizeMan = 4, # Size of manifest variables
sizeLat = 10, # Size of latent varibales
edge.label.cex = 1,
mar = c(6,1,8,1), # Sets the margins
reorder = FALSE, # Prevents re-ordering of ovbserved variables
# filetype = "pdf", # Store to PDF
# filename = "semPlotExample1", # Set the name of the file
width = 8, # Width of the plot
height = 5, # Height of plot
groups = "latents", # Colors according to latents,
pastel = TRUE, # Pastel colors
borders = FALSE,
nCharNodes = 0, # Disable borders
DoNotPlot = TRUE # Prevents plotting for now
)
graph$Edgelist$directed[23:26] <- FALSE # We need to make the latent structure undirected not bidirectional
plot(graph) # Plot the results
We can load the dataset into R with:
# Load dataset:
data("Jonas")
Next, we can define the variables to use, and reorder the dataset to prevent some numerical issues lateron:
# Variables to use:
vars <- names(Jonas)[1:10]
# Arranged groups to put unfamiliar group first (beta constrained to 1):
Jonas <- Jonas[order(Jonas$group),]
Now, we can form the first model: a two-group Ising model with all edges included and different networks and thresholds per group. The \(\beta\) parameter is identified to \(1\) in both groups:
# Form saturated model:
model1 <- Ising(Jonas, vars = vars, groups = "group")
# Run model:
model1 <- model1 %>% runmodel
We may also be interested in finding a sparse model with fewer parameters, as a dense model in two groups will cost a lot of parameters, and we can expect BIC to prefer equality constraint models then simply to reduce the number of parameters. Here, we will use a liberal search algorithm, removing all edges at \(\alpha = 0.05\) and adding edges at the same significance level until BIC is optimized:
# Prune-stepup to find a sparse model:
model1b <- model1 %>% prune(alpha = 0.05) %>% stepup(alpha = 0.05)
Next, we can constrain networks to be equal across groups. This automatically allows \(\beta\) to be freely estimated in the second group:
# Equal networks:
model2 <- model1 %>% groupequal("omega") %>% runmodel
# Prune-stepup to find a sparse model:
model2b <- model2 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)
We can also constrain the threshold parameters equal across groups:
# Equal thresholds:
model3 <- model2 %>% groupequal("tau") %>% runmodel
# Prune-stepup to find a sparse model:
model3b <- model3 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)
Finally, we can constrain \(\beta\) to be equal across groups. This sets \(\beta\) to \(1\) in the second group again, but this time it is a constraint, not an identification:
# Equal beta:
model4 <- model3 %>% groupequal("beta") %>% runmodel
# Prune-stepup to find a sparse model:
model4b <- model4 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)
After running these models, we can compare all models on AIC and BIC:
# Compare all models:
comparison <- compare(
`1. all parameters free (dense)` = model1,
`2. all parameters free (sparse)` = model1b,
`3. equal networks (dense)` = model2,
`4. equal networks (sparse)` = model2b,
`5. equal networks and thresholds (dense)` = model3,
`6. equal networks and thresholds (sparse)` = model3b,
`7. all parameters equal (dense)` = model4,
`8. all parameters equal (sparse)` = model4b
) %>% arrange(BIC) %>% select(model, DF, BIC)
# Print results:
print(comparison)
## model DF BIC
## 1 6. equal networks and thresholds (sparse) 89 2114.029
## 2 8. all parameters equal (sparse) 92 2136.647
## 3 4. equal networks (sparse) 80 2138.304
## 4 5. equal networks and thresholds (dense) 54 2224.369
## 5 2. all parameters free (sparse) 54 2239.699
## 6 7. all parameters equal (dense) 55 2245.442
## 7 3. equal networks (dense) 44 2251.331
## 8 1. all parameters free (dense) 0 2443.210
We can again transform this BIC to a posterior model probability:
# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)
# Numeric model:
comparison$mod <- as.numeric(substr(comparison$model,1,1))
# Make a plot:
g <- ggplot(comparison, aes(x=mod, y = modelProbability)) +
geom_bar(stat="identity") + xlab("Model") +
ylab("Posterior model probability") + theme_bw() +
scale_x_continuous(breaks=seq_len(nrow(comparison))) +
ylim(0,1)
print(g)
This shows that the sparse model with equal networks and thresholds, but not equal temperature, fits best! We can plot the temperature (inverse of \(\beta\)):
# Extract beta:
beta <- unlist(getmatrix(model3b, "beta"))
# Standard errors::
SEs <- model3b@parameters$se[model3b@parameters$matrix == "beta"]
# Make a data frame:
df <- data.frame(
temperature = 1/beta,
group = names(beta),
lower = 1 / (beta-qnorm(0.975) * SEs),
upper = 1 / (beta+qnorm(0.975) * SEs),
stringsAsFactors = FALSE
)
# Some extra values:
df$fixed <- is.na(df$lower)
df$group <- factor(df$group, levels = c("Doesn't know Jonas", "Knows Jonas"))
# Create the plot:
library("ggplot2")
g <- ggplot(df,aes(x=as.numeric(group), y = temperature, ymin = lower, ymax = upper)) +
geom_line() +
geom_errorbar(width = 0.05) +
geom_point(cex = 5, colour = "black") +
geom_point(aes(colour = fixed), cex = 4) + theme_bw() +
xlab("") + ylab(expression(paste("Temperature (",1/beta,")"))) +
scale_x_continuous(breaks = 1:2, labels = levels(df$group), expand = c(0.1,0.1)) +
scale_y_continuous(expand = c(0,.1), limits = c(0,1)) +
theme( panel.grid.major.x = element_blank(),panel.grid.minor.x = element_blank())+
ggtitle(expression(paste("Model 6: 2 groups; ",bold(Omega)," sparse & equal; ",bold(tau)," equal; ",beta," free"))) +
scale_colour_manual(values = c("black","white")) +
theme(legend.position = "none")
# Plot:
print(g)
This shows that in line with predictions, temperature is lower (\(\beta\) is higher) in the group familliar with the attitude object! Finally, we can plot the network model:
# Make labels:
labels <- c("good scientist",
"wears beautiful\njeans",
"cares about\npeople like you",
"solve economics",
"hardworking",
"honest",
"in touch with\nordinary people",
"knowledgeable",
"can't make up\nhis mind",
"gets things done")
# Extract network structure and thresholds:
network <- getmatrix(model3b, "omega")[[1]]
thresholds <- getmatrix(model3b, "tau")[[1]]
# Scale thresholds for colors:
scaledthresh <- as.vector(thresholds / (2*max(abs(thresholds))))
# Make colors:
cols <- ifelse(scaledthresh < 0, "red", "darkblue")
cols[scaledthresh>0] <- qgraph:::Fade(cols[scaledthresh>0],alpha = scaledthresh[scaledthresh>0], "white")
cols[scaledthresh<0] <- qgraph:::Fade(cols[scaledthresh<0],alpha = abs(scaledthresh)[scaledthresh<0], "white")
# Plot network and save to file:
qgraph(network, layout = "spring", labels = labels,
shape = "rectangle", vsize = 15, vsize2 = 8,
theme = "colorblind", color = cols,
cut = 0.5, repulsion = 0.9)
Download the SHARE data summary statistics from Dropbox and load these into R:
# Load SHARE data:
load("SHARE.RData")
First, we need to form a design matrix. Each row indicates a variable and each column a wave of data. Values can be missing if a certain variable has not been measured at a certain wave of measurement:
# Form design matrix:
design <- matrix(colnames(SHAREcovs),nrow=5,ncol=3)
print(design)
## [,1] [,2] [,3]
## [1,] "4_bmi" "5_bmi" "6_bmi"
## [2,] "4_casp" "5_casp" "6_casp"
## [3,] "4_eurod" "5_eurod" "6_eurod"
## [4,] "4_maxgrip" "5_maxgrip" "6_maxgrip"
## [5,] "4_sphus" "5_sphus" "6_sphus"
Next, we can form amd run the model:
# Form model:
model <- panelgvar(covs=SHAREcovs, means = SHAREmeans, nobs = SHAREnobs,
vars = design, covtype = "UB")
# Estimate saturated model:
model <- model %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
Following, we can perform model search strategies to find a sparse model:
# Model search:
model2 <- model %>% prune(alpha = 0.01, recursive = FALSE) %>% modelsearch(verbose=FALSE)
The model fits very well!
# Fit measures:
model2 %>% fit
## Measure Value
## logl -323506.51
## unrestricted.logl -322148.38
## baseline.logl -432331.46
## nvar 15
## nobs 135
## npar 43
## df 92
## objective 2.98
## chisq 2716.26
## pvalue ~0
## baseline.chisq 220366.17
## baseline.df 105
## baseline.pvalue ~0
## nfi 0.99
## pnfi 0.87
## tli 0.99
## nnfi 0.99
## rfi 0.99
## ifi 0.99
## rni 0.99
## cfi 0.99
## rmsea 0.037
## rmsea.ci.lower 0.036
## rmsea.ci.upper 0.038
## rmsea.pvalue 1
## aic.ll 647099.02
## aic.ll2 647099.20
## aic.x 2532.26
## aic.x2 2802.26
## bic 647441.34
## bic2 647304.69
## ebic.25 647557.79
## ebic.5 647674.23
## ebic.75 647767.39
## ebic1 647907.13
Finally, we can make a plot of these networks:
# Extract networks:
temporal <- getmatrix(model2, "PDC")
contemporaneous <- getmatrix(model2, "omega_zeta_within")
between <- getmatrix(model2, "omega_zeta_between")
# Labels:
labels <- c("BMI","quality of life", "depression", "grip strength","perceived health")
# Plot networks:
layout(t(1:3))
qgraph(temporal, layout = "circle", labels = labels,
title = "Temporal", shape = "rectangle",
vsize = 30, vsize2 = 20,
mar = rep(8,4), asize = 7,
theme = "colorblind")
box("figure")
qgraph(contemporaneous, layout = "circle", labels = labels,
title = "Contemporaneous",shape = "rectangle",
vsize = 30, vsize2 = 20,
mar = rep(8,4),
theme = "colorblind")
box("figure")
qgraph(between, layout = "circle", labels = labels,
title = "Between-persons",shape = "rectangle",
vsize = 30, vsize2 = 20,
mar = rep(8,4),
theme = "colorblind")
box("figure")
For more advanced panel data models, see the pre-print and supplementary materials.
Download the datafile selfEsteem.csv
from Dropbox. We can load this file in R as follows:
data <- read.csv("selfEsteem.csv", sep = "\t")
This is a large dataset obtained from openpsychometrics.org containing measures of the Rosenberg Self-Esteem Scale. The items are the following:
Item label | Item description |
---|---|
Q1 | I feel that I am a person of worth, at least on an equal plane with others. |
Q2 | I feel that I have a number of good qualities. |
Q3 | All in all, I am inclined to feel that I am a failure. |
Q4 | I am able to do things as well as most other people. |
Q5 | I feel I do not have much to be proud of. |
Q6 | I take a positive attitude toward myself. |
Q7 | On the whole, I am satisfied with myself. |
Q8 | I wish I could have more respect for myself. |
Q9 | I certainly feel useless at times. |
Q10 | At times I think I am no good at all. |
In addition, the dataset contains information about gender, age, datasource and country. We are only interested in the items:
items <- paste0("Q",1:10)
Because the data is so large, we can split the sample in two datasets. We use all odd cases (\(1, 3, 5, \ldots\)) to train models and all the even cases (\(2, 4, 6, \ldots\)) to test models:
trainData <- data[c(TRUE,FALSE),]
testData <- data[c(FALSE,TRUE),]
Let’s first estimate a GGM on the training data, using full information maximum likelihood (FIML) estimation:
# Form saturated model:
GGM_train <- ggm(trainData, vars = items, estimator = "FIML")
##
|
| | 0%
|
|======================================================================| 100%
# Perform model selection steps:
GGM_train <- GGM_train %>% runmodel %>%
prune(alpha = 0.01, recursive = FALSE) %>%
modelsearch(verbose = FALSE)
Here, we used the newer modelsearch
function to find the best model. We can plot this network:
# Obtain the network:
net_train <- getmatrix(GGM_train, "omega")
qgraph(net_train, theme = "colorblind", layout = "spring")
Next, we can fit this model to the test data:
# Obtain structure:
structure <- 1*(net_train != 0)
# Form test model:
GGM_test <- ggm(testData, vars = items, estimator = "FIML",
omega = structure)
## Computing missingness patterns...
##
|
| | 0%
|
|======================================================================| 100%
# Fit model:
GGM_test <- GGM_test %>% runmodel
## Estimating baseline model...
## Estimating saturated model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
# Obtain the network:
net_test <- getmatrix(GGM_test, "omega")
qgraph(net_test, theme = "colorblind", layout = "spring")
We can now investigate the fit of this model:
GGM_test %>% fit
## Measure Value
## logl -262354.71
## unrestricted.logl -262337.41
## baseline.logl -325682.47
## nvar 10
## nobs 65
## npar 59
## df 6
## objective 3.50
## chisq 34.60
## pvalue ~0
## baseline.chisq 126690.11
## baseline.df 45
## baseline.pvalue ~0
## nfi 1.0
## pnfi 0.13
## tli 1.0
## nnfi 1.0
## rfi 1.0
## ifi 1.0
## rni 1.0
## cfi 1.0
## rmsea 0.014
## rmsea.ci.lower 0.0098
## rmsea.ci.upper 0.019
## rmsea.pvalue 1
## aic.ll 524827.42
## aic.ll2 524827.71
## aic.x 22.60
## aic.x2 152.60
## bic 525304.45
## bic2 525116.95
## ebic.25 525440.30
## ebic.5 525576.15
## ebic.75 525684.83
## ebic1 525847.86
If you are familiar with SEM fit indices, you may see that this model fits very well.
To control for a potential common self-esteem factor, we can estimate a Residual Network Model (RNM). First, we will start with a saturated CFA model:
# Factor loadings matrix (one factor):
Lambda <- matrix(1,10,1)
# CFA model:
CFA_train <- rnm(trainData, vars = items, estimator = "FIML",
lambda = Lambda) %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
CFA_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda) %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
This model does not fit great in both datasets:
rbind(
train = CFA_train@fitmeasures,
test = CFA_test@fitmeasures)[,c("rmsea","cfi","tli","rni")]
## rmsea cfi tli rni
## train 0.1375208 0.875883 0.840421 0.875883
## test 0.14104 0.8681317 0.8304551 0.8681317
Next, we can train an RNM model using stepup model selection using modification indices, followed by modelsearch
for more finetuned search:
# Estimate RNM:
RNM_train <- CFA_train %>%
stepup(criterion = "bic",verbose = FALSE) %>%
modelsearch(verbose = FALSE)
# Obtain residual network:
residnet_train <- getmatrix(RNM_train, "omega_epsilon")
Finally, we can test this RNM in the test data:
structure <- 1*(residnet_train!=0)
# Form RNM on test data:
RNM_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda, omega_epsilon = structure)
## Computing missingness patterns...
##
|
| | 0%
|
|======================================================================| 100%
# Run model:
RNM_test <- RNM_test %>% runmodel
## Estimating baseline model...
## Estimating saturated model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
This model fits very well in the test data:
RNM_test %>% fit
## Measure Value
## logl -262387.72
## unrestricted.logl -262337.41
## baseline.logl -325682.47
## nvar 10
## nobs 65
## npar 53
## df 12
## objective 3.50
## chisq 100.63
## pvalue ~0
## baseline.chisq 126690.11
## baseline.df 45
## baseline.pvalue ~0
## nfi 1.0
## pnfi 0.27
## tli 1.0
## nnfi 1.0
## rfi 1.0
## ifi 1.0
## rni 1.0
## cfi 1.0
## rmsea 0.018
## rmsea.ci.lower 0.014
## rmsea.ci.upper 0.021
## rmsea.pvalue 1
## aic.ll 524881.45
## aic.ll2 524881.69
## aic.x 76.63
## aic.x2 206.63
## bic 525309.97
## bic2 525141.54
## ebic.25 525432.01
## ebic.5 525554.04
## ebic.75 525651.67
## ebic1 525798.12
We can plot the factor loadings and residual network as follows:
# Obtain residual network:
residnet_test <- getmatrix(RNM_test, "omega_epsilon")
# Obtain factor loadings:
factorloadings <- getmatrix(RNM_test, "lambda")
# Plot:
layout(t(1:2))
qgraph(residnet_test, theme = "colorblind", layout = "spring",
title = "Residual network", vsize = 8)
qgraph.loadings(factorloadings, theme = "colorblind", model = "reflective",
title = "Factor loadings", vsize = c(8,13), asize = 5)
Finally, we can compare the RNM to the earlier estimated GGM:
# Comparison in training data:
compare(
GGM = GGM_train,
RNM = RNM_train
)
## model DF AIC BIC RMSEA Chisq Chisq_diff DF_diff p_value
## 1 GGM 6 521870.8 522347.8 0.01049712 21.85869 NA NA NA
## 2 RNM 12 521878.4 522306.9 0.01011165 41.43076 19.57207 6 0.003299056
# Comparison in test data:
compare(
GGM = GGM_test,
RNM = RNM_test
)
## model DF AIC BIC RMSEA Chisq Chisq_diff DF_diff
## 1 GGM 6 524827.4 525304.4 0.01409590 34.59649 NA NA
## 2 RNM 12 524881.4 525310.0 0.01754744 100.63077 66.03427 6
## p_value
## 1 NA
## 2 2.65204e-12
We obtain a very similar fit, with BIC prefering the RNM in the training data but not in the test data. The RNM also contains less parameters (higher degrees of freedom) than the GGM, and fits well in the test data. At this point, we would likely use theoretical reasoning to choose which of these competing models we would prefer.
From the figure above, we can see that the residual network distincly forms clusters for positively and negatively frased items. To this end, we could also explore a two-factor and a bi-factor model. The code below shows how to estimate a RNM from a two-factor model:
# Factor loadings matrix (two factors):
Lambda2 <- matrix(0,10,2)
Lambda2[c(1,2,4,6,7),1] <- 1 # Positive items
Lambda2[c(3,5,8,9,10),2] <- 1 # Negative items
# Fit 2-factor models:
twofac_train <- rnm(trainData, vars = items, estimator = "FIML",
lambda = Lambda2) %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
twofac_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda2) %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
# Fit 2-factor RNM to train data:
RNM_twofac_train <- twofac_train %>%
stepup(criterion = "bic",verbose = FALSE) %>%
modelsearch(verbose = FALSE)
# Fit 2-factor RNM to test data:
structure <- 1*(getmatrix(RNM_twofac_train,"omega_epsilon")!=0)
RNM_twofac_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda2, omega_epsilon = structure) %>%
runmodel
##
|
| | 0%
|
|======================================================================| 100%
Subsequently, the code below shows how to estimate a RNM from a bi-factor model:
# Factor loadings matrix (bifactor):
Lambda3 <- matrix(0,10,3)
Lambda3[,1] <- 1 # Self esteem
Lambda3[c(1,2,4,6,7),2] <- 1 # Positive items
Lambda3[c(3,5,8,9,10),3] <- 1 # Negative items
# Fit bifactor:
bi_train <- rnm(trainData, vars = items, estimator = "FIML",
lambda = Lambda3, sigma_zeta = "empty") %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
bi_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda3, sigma_zeta = "empty") %>% runmodel
##
|
| | 0%
|
|======================================================================| 100%
# Fit bifactor RNM to train data:
RNM_bi_train <- bi_train %>%
stepup(criterion = "bic",verbose = FALSE) %>%
modelsearch(verbose = FALSE)
# Fit bifactor RNM to test data:
structure <- 1*(getmatrix(RNM_bi_train,"omega_epsilon")!=0)
RNM_bi_test <- rnm(testData, vars = items, estimator = "FIML",
lambda = Lambda3, omega_epsilon = structure) %>%
runmodel
##
|
| | 0%
|
|======================================================================| 100%
We can now compare all these models:
comparison <- compare(
GGM = GGM_test,
RNM = RNM_test,
twofactor = twofac_test,
twofactor_RNM = RNM_twofac_test,
bifactor = bi_test,
bifactor_RNM = RNM_bi_test
)
print(comparison)
## model DF AIC BIC RMSEA Chisq Chisq_diff DF_diff
## 1 GGM 6 524827.4 525304.4 0.01409590 34.59649 NA NA
## 2 bifactor_RNM 11 524954.6 525391.2 0.02468536 171.78570 137.18920 5
## 3 RNM 12 524881.4 525310.0 0.01754744 100.63077 71.15493 1
## 4 twofactor_RNM 13 524839.3 525259.7 0.01234011 60.48512 40.14564 1
## 5 bifactor 25 526445.6 526769.0 0.05270428 1690.74196 1630.25683 12
## 6 twofactor 34 533001.5 533252.1 0.10045926 8264.66314 6573.92119 9
## p_value
## 1 NA
## 2 7.079513e-28
## 3 3.302466e-17
## 4 2.357165e-10
## 5 0.000000e+00
## 6 0.000000e+00
Translating to posterior model probabilities:
# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)
# Make a plot:
g <- ggplot(comparison, aes(x=model, y = modelProbability)) +
geom_bar(stat="identity") + xlab("") +
ylab("Posterior model probability") + theme_bw() +
ylim(0,1)
print(g)
Which shows that the two-factor RNM also fits well. This model looks like this:
# Two-factor RNM:
# Obtain residual network:
residnet_test <- getmatrix(RNM_twofac_test, "omega_epsilon")
# Obtain factor loadings:
factorloadings <- getmatrix(RNM_twofac_test, "lambda")
# Obtain correlations:
factorCors <- getmatrix(RNM_twofac_test, "sigma_zeta")
# Plot:
layout(t(1:2))
qgraph(residnet_test, theme = "colorblind", layout = "spring",
title = "Residual network", vsize = 8)
qgraph.loadings(factorloadings, theme = "colorblind", model = "reflective",
title = "Factor loadings", vsize = c(8,13), asize = 5,
factorCors = factorCors)