# Load all packages:
library("psychonetrics")
library("dplyr")
library("qgraph")
library("ggplot2")
library("bootnet")
library("Rmpfr")
library("semPlot")

Star Wars Analysis

Data Preperation

First we can load the data and make a vector of the observed variables:

# Load the data:
data("StarWars")

# Observed variables:
obsvars <- c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10")

GGM Analysis

Next, we can form our first model: a GGM model with all edges included:

# Form GGM model:
ggm1 <- ggm(StarWars, vars = obsvars)

# Run model:
ggm1 <- ggm1 %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...

If we are interested in the parameter values and significance of edges, we can use the following command:

# Check parameters:
ggm1 %>% parameters

Next, we can automatically prune all edges:

# Prune non-significant:
ggm1 <- ggm1 %>% 
  prune(adjust = "fdr", alpha = 0.01)

We can check the modification indices to see wich edges we can add now to improve fit:

# Check modification indices:
ggm1 %>% MIs
## 
## Top 10 modification indices:
## 
##  var1 op var2      est mi pmi epc matrix row col      group group_id
##    Q1 ~1 <NA> 2.760148                mu   1   1 fullsample        1
##    Q2 ~1 <NA> 4.188192                mu   2   1 fullsample        1
##    Q3 ~1 <NA> 2.819188                mu   3   1 fullsample        1
##    Q4 ~1 <NA> 3.214022                mu   4   1 fullsample        1
##    Q5 ~1 <NA> 2.435424                mu   5   1 fullsample        1
##    Q6 ~1 <NA> 2.686347                mu   6   1 fullsample        1
##    Q7 ~1 <NA> 2.996310                mu   7   1 fullsample        1
##    Q8 ~1 <NA> 2.383764                mu   8   1 fullsample        1
##    Q9 ~1 <NA> 3.247232                mu   9   1 fullsample        1
##   Q10 ~1 <NA> 2.118081                mu  10   1 fullsample        1

To automatically add edges at \(\alpha = 0.05\) until BIC can no longer be improved, use the following function:

# Stepup estimation:
ggm1 <- ggm1 %>% stepup(criterion = "bic", alpha = 0.05)

This process can be simplified with a single Dplyr pipe!

### Slide 33 ###
# Form GGM model:
ggm1 <- ggm(StarWars, vars = obsvars)

# Run model and estimate structure:
ggm1 <- ggm1 %>% runmodel %>% 
  prune(adjust = "fdr", alpha = 0.01) %>% 
  stepup(criterion = "bic", alpha = 0.05)

We can look at fit indices (although they will likely be good):

# Model fit:
ggm1 %>% fit
##            Measure    Value
##               logl -4131.26
##  unrestricted.logl -4109.06
##      baseline.logl -4322.24
##               nvar       10
##               nobs       65
##               npar       33
##                 df       32
##          objective    12.11
##              chisq    44.39
##             pvalue    0.071
##     baseline.chisq   426.35
##        baseline.df       45
##    baseline.pvalue       ~0
##                nfi     0.90
##               pnfi     0.64
##                tli     0.95
##               nnfi     0.95
##                rfi     0.85
##                ifi     0.97
##                rni     0.97
##                cfi     0.97
##              rmsea    0.038
##     rmsea.ci.lower       ~0
##     rmsea.ci.upper    0.063
##       rmsea.pvalue     0.77
##             aic.ll  8328.52
##            aic.ll2  8337.98
##              aic.x   -19.61
##             aic.x2   110.39
##                bic  8447.39
##               bic2  8342.75
##            ebic.25  8523.37
##             ebic.5  8599.36
##            ebic.75  8660.14
##              ebic1  8751.33

Now, we can extract the network and plot it:

# Obtain network:
net1 <- getmatrix(ggm1, "omega")

# Plot:
qgraph(net1, 
       layout = "spring", 
       theme = "colorblind",
       labels = obsvars)

Network from bootnet

We can also use bootnet for the model estimation:

# Estimate model using bootnet:
mod <- estimateNetwork(StarWars[,obsvars], default = "ggmModSelect")
structure <- 1*(mod$graph!=0)

# Form second GGM model with fixed structure:
ggm2 <- ggm(StarWars, omega = structure, vars = obsvars)

# ERun the model:
ggm2 <- ggm2 %>% runmodel

# Obtain network:
net2 <- getmatrix(ggm2, "omega")

This will likely be slightly different from the network estimated by psychonetrics. We can plot both networks next to each other:

# Obtain average layout (function from qgraph)
Layout <- averageLayout(net1,net2)

# plot both networks:
layout(t(1:2))
qgraph(net1, layout = Layout, theme = "colorblind", title = "psychonetrics", 
       labels = obsvars)
qgraph(net2, layout = Layout, theme = "colorblind", title = "ggmModSelect",
       labels = obsvars)

We can even statistically compare these networks:

### Slide 36 ###
compare(psychonetrics = ggm1, ggmModSelect = ggm2)
##           model DF      AIC      BIC      RMSEA    Chisq Chisq_diff DF_diff
## 1  ggmModSelect 31 8327.476 8449.948 0.03510471 41.35289         NA      NA
## 2 psychonetrics 32 8328.515 8447.385 0.03780140 44.39182   3.038923       1
##      p_value
## 1         NA
## 2 0.08128984

Confirmatory Factor Analysis

To perform a confirmatory factor analysis, we first need to specify the factor structure:

# Factor loadings matrix:
Lambda <- matrix(0, 10, 3)
Lambda[1:4,1] <- 1
Lambda[c(1,5:7),2] <- 1
Lambda[c(1,8:10),3] <- 1
print(Lambda)
##       [,1] [,2] [,3]
##  [1,]    1    1    1
##  [2,]    1    0    0
##  [3,]    1    0    0
##  [4,]    1    0    0
##  [5,]    0    1    0
##  [6,]    0    1    0
##  [7,]    0    1    0
##  [8,]    0    0    1
##  [9,]    0    0    1
## [10,]    0    0    1

Next, we can make a vector with names for the latent variables:

# Latents:
latents <- c("Prequels","Original","Sequels")

Now, we can form the CFA model and run it. We use the lvm model function (latent variable model) for this:

# Make CFA model:
cfamod <- lvm(StarWars, lambda = Lambda, vars = obsvars, 
              identification = "variance", latents = latents)

# Run model:
cfamod <- cfamod %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...

We can look at some fit indices of this model:

cfamod@fitmeasures[c("chisq","pvalue","rmsea","cfi")]
## $chisq
## [1] 34.57911
## 
## $pvalue
## [1] 0.2582765
## 
## $rmsea
## [1] 0.02373261
## 
## $cfi
## [1] 0.9879924

These perfectly reproduce the famous lavaan package!

library("lavaan")
## This is lavaan 0.6-6.1487
## lavaan is BETA software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psychonetrics':
## 
##     duplicationMatrix
lavmod <- '
Prequels =~ Q1 + Q2 + Q3 + Q4
Original =~ Q1 + Q5 + Q6 + Q7
Sequels =~ Q1 + Q8 + Q9 + Q10
'
lavfit <- cfa(lavmod,StarWars,std.lv=TRUE)
fitMeasures(lavfit)[c("chisq","pvalue","rmsea","cfi")]
##       chisq      pvalue       rmsea         cfi 
## 34.57911327  0.25827655  0.02373261  0.98799236

Latent Network Model

Next, we can form an LNM model in the exact same way, using the lnm function (alternatively, we can use lvm(..., latent = "ggm")):

# Latent network model:
lnmmod <- lnm(StarWars, lambda = Lambda, vars = obsvars, 
              identification = "variance", latents = latents)

We can run this model and estimate a latent network structure with the same algorithm we used before:

# Run model and estimate latent structure:
lnmmod <- lnmmod %>% runmodel %>%
  prune(adjust = "fdr", alpha = 0.01) %>% 
  stepup(criterion = "bic", alpha = 0.05)

Finally, we can compare all models:

# Compare all models:
comparison <- compare(
  ggm_psychonetrics = ggm1, 
  ggm_ggmModSelect = ggm2,
  CFA = cfamod, 
  LNM = lnmmod)

print(comparison)
##               model DF      AIC      BIC      RMSEA    Chisq Chisq_diff DF_diff
## 1               CFA 30 8322.703 8448.777 0.02373261 34.57911         NA      NA
## 2  ggm_ggmModSelect 31 8327.476 8449.948 0.03510471 41.35289   6.773781       1
## 3               LNM 31 8321.016 8443.488 0.02152480 34.89233   6.460568       0
## 4 ggm_psychonetrics 32 8328.515 8447.385 0.03780140 44.39182   9.499491       1
##       p_value
## 1          NA
## 2 0.009250668
## 3 0.000000000
## 4 0.002055289

The BICs are close, so one thing we could do is transform these to posterior model probabilities:

# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)

# Make a plot:
g <- ggplot(comparison, aes(x=model, y = modelProbability)) + 
  geom_bar(stat="identity") + xlab("") + 
  ylab("Posterior model probability") + theme_bw() + 
  ylim(0,1)
print(g)

The latent network model performs best in this case!

Finally, we can make a path diagram. This is not very trivial… but can be done with the semPlot package:

# LISREL Model: LY = Lambda (lambda-y), TE = Theta (theta-epsilon), PS = Psi
mod_lisrel <- lisrelModel(LY = getmatrix(lnmmod, "lambda"), 
                   PS = getmatrix(lnmmod, "sigma_zeta"), 
                   TE = getmatrix(lnmmod, "sigma_epsilon"),
                   latNamesEndo = latents,
                   manNamesEndo = obsvars)
# Change latent to pcors:
omega <- getmatrix(lnmmod, "omega_zeta")
mod_lisrel@Pars$std[23:28] <- mod_lisrel@Pars$est[23:28] <- omega[lower.tri(omega,diag=TRUE)]


# We can make this nicer (set whatLabels = "none" to hide labels):
graph <- semPaths(mod_lisrel,
         what = "std", # this argument controls what the color of edges represent. In this case, standardized parameters
         whatLabels = "omit", # This argument controls what the edge labels represent. In this case, parameter estimates
         # as.expression = c("nodes","edges"), # This argument draws the node and edge labels as mathematical exprssions
         style = "lisrel", # This will plot residuals as arrows, closer to what we use in class
         residScale = 10, # This makes the residuals larger
         theme = "colorblind", # qgraph colorblind friendly theme
         layout = "tree2", # tree layout options are "tree", "tree2", and "tree3"
         cardinal = "lat cov", # This makes the latent covariances connet at a cardinal center point
         curvePivot = TRUE, # Changes curve into rounded straight lines
         sizeMan = 4, # Size of manifest variables
         sizeLat = 10, # Size of latent varibales
         edge.label.cex = 1,
         mar = c(6,1,8,1), # Sets the margins
         reorder = FALSE, # Prevents re-ordering of ovbserved variables
         # filetype = "pdf", # Store to PDF
         # filename = "semPlotExample1", # Set the name of the file
         width = 8, # Width of the plot
         height = 5, # Height of plot
         groups = "latents", # Colors according to latents,
         pastel = TRUE, # Pastel colors
         borders = FALSE,
         nCharNodes = 0, # Disable borders
         DoNotPlot = TRUE # Prevents plotting for now
)
graph$Edgelist$directed[23:26] <- FALSE # We need to make the latent structure undirected not bidirectional
plot(graph) # Plot the results

Multigroup Ising model

Data preperation

We can load the dataset into R with:

# Load dataset:
data("Jonas")

Next, we can define the variables to use, and reorder the dataset to prevent some numerical issues lateron:

# Variables to use:
vars <- names(Jonas)[1:10]

# Arranged groups to put unfamiliar group first (beta constrained to 1):
Jonas <- Jonas[order(Jonas$group),]

Now, we can form the first model: a two-group Ising model with all edges included and different networks and thresholds per group. The \(\beta\) parameter is identified to \(1\) in both groups:

# Form saturated model:
model1 <- Ising(Jonas, vars = vars, groups = "group")

# Run model:
model1 <- model1 %>% runmodel

We may also be interested in finding a sparse model with fewer parameters, as a dense model in two groups will cost a lot of parameters, and we can expect BIC to prefer equality constraint models then simply to reduce the number of parameters. Here, we will use a liberal search algorithm, removing all edges at \(\alpha = 0.05\) and adding edges at the same significance level until BIC is optimized:

# Prune-stepup to find a sparse model: 
model1b <- model1 %>% prune(alpha = 0.05) %>% stepup(alpha = 0.05)

Next, we can constrain networks to be equal across groups. This automatically allows \(\beta\) to be freely estimated in the second group:

# Equal networks:
model2 <- model1 %>% groupequal("omega") %>% runmodel

# Prune-stepup to find a sparse model:
model2b <- model2 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)

We can also constrain the threshold parameters equal across groups:

# Equal thresholds:
model3 <- model2 %>% groupequal("tau") %>% runmodel

# Prune-stepup to find a sparse model:
model3b <- model3 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)

Finally, we can constrain \(\beta\) to be equal across groups. This sets \(\beta\) to \(1\) in the second group again, but this time it is a constraint, not an identification:

# Equal beta:
model4 <- model3 %>% groupequal("beta") %>% runmodel

# Prune-stepup to find a sparse model:
model4b <- model4 %>% prune(alpha = 0.05) %>% stepup(mi = "mi_equal", alpha = 0.05)

Comparing all models

After running these models, we can compare all models on AIC and BIC:

# Compare all models:
comparison <- compare(
  `1. all parameters free (dense)` = model1,
  `2. all parameters free (sparse)` = model1b,
  `3. equal networks (dense)` = model2,
  `4. equal networks (sparse)` = model2b,
  `5. equal networks and thresholds (dense)` = model3,
  `6. equal networks and thresholds (sparse)` = model3b,
  `7. all parameters equal (dense)` = model4,
  `8. all parameters equal (sparse)` = model4b
) %>% arrange(BIC) %>% select(model, DF, BIC)

# Print results:
print(comparison)
##                                       model DF      BIC
## 1 6. equal networks and thresholds (sparse) 89 2114.029
## 2          8. all parameters equal (sparse) 92 2136.647
## 3                4. equal networks (sparse) 80 2138.304
## 4  5. equal networks and thresholds (dense) 54 2224.369
## 5           2. all parameters free (sparse) 54 2239.699
## 6           7. all parameters equal (dense) 55 2245.442
## 7                 3. equal networks (dense) 44 2251.331
## 8            1. all parameters free (dense)  0 2443.210

We can again transform this BIC to a posterior model probability:

# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)

# Numeric model:
comparison$mod <- as.numeric(substr(comparison$model,1,1))

# Make a plot:
g <- ggplot(comparison, aes(x=mod, y = modelProbability)) + 
  geom_bar(stat="identity") + xlab("Model") + 
  ylab("Posterior model probability") + theme_bw() + 
  scale_x_continuous(breaks=seq_len(nrow(comparison))) + 
  ylim(0,1)
print(g)

This shows that the sparse model with equal networks and thresholds, but not equal temperature, fits best! We can plot the temperature (inverse of \(\beta\)):

# Extract beta:
beta <-  unlist(getmatrix(model3b, "beta"))

# Standard errors::
SEs <- model3b@parameters$se[model3b@parameters$matrix == "beta"]

# Make a data frame:
df <- data.frame(
  temperature = 1/beta,
  group = names(beta),
  lower = 1 / (beta-qnorm(0.975) * SEs),
  upper = 1 / (beta+qnorm(0.975) * SEs),
  stringsAsFactors = FALSE
)

# Some extra values:
df$fixed <- is.na(df$lower)
df$group <- factor(df$group, levels = c("Doesn't know Jonas", "Knows Jonas"))

# Create the plot:
library("ggplot2")
g <- ggplot(df,aes(x=as.numeric(group), y = temperature, ymin = lower, ymax = upper)) + 
  geom_line() + 
  geom_errorbar(width = 0.05) + 
  geom_point(cex = 5, colour = "black") +
  geom_point(aes(colour = fixed), cex = 4) +  theme_bw()  +
  xlab("") + ylab(expression(paste("Temperature (",1/beta,")"))) + 
  scale_x_continuous(breaks =  1:2, labels = levels(df$group), expand = c(0.1,0.1)) + 
  scale_y_continuous(expand = c(0,.1), limits = c(0,1)) + 
  theme( panel.grid.major.x = element_blank(),panel.grid.minor.x = element_blank())+ 
  ggtitle(expression(paste("Model 6: 2 groups; ",bold(Omega)," sparse & equal; ",bold(tau)," equal; ",beta," free"))) + 
  scale_colour_manual(values = c("black","white")) + 
  theme(legend.position = "none")

# Plot:
print(g)

This shows that in line with predictions, temperature is lower (\(\beta\) is higher) in the group familliar with the attitude object! Finally, we can plot the network model:

# Make labels:
labels <- c("good scientist",
            "wears beautiful\njeans",
            "cares about\npeople like you",
            "solve economics",
            "hardworking",
            "honest",
            "in touch with\nordinary people",
            "knowledgeable",
            "can't make up\nhis mind",
            "gets things done")

# Extract network structure and thresholds:
network <- getmatrix(model3b, "omega")[[1]]
thresholds <- getmatrix(model3b, "tau")[[1]]

# Scale thresholds for colors:
scaledthresh <- as.vector(thresholds / (2*max(abs(thresholds))))

# Make colors:
cols <- ifelse(scaledthresh < 0, "red", "darkblue")
cols[scaledthresh>0] <- qgraph:::Fade(cols[scaledthresh>0],alpha = scaledthresh[scaledthresh>0], "white")
cols[scaledthresh<0] <- qgraph:::Fade(cols[scaledthresh<0],alpha = abs(scaledthresh)[scaledthresh<0], "white")

# Plot network and save to file:
qgraph(network, layout = "spring", labels = labels,
            shape = "rectangle", vsize = 15, vsize2 = 8,
            theme = "colorblind", color = cols,
            cut = 0.5, repulsion = 0.9)

Panel data GVAR

Data setup

Download the SHARE data summary statistics from Dropbox and load these into R:

# Load SHARE data:
load("SHARE.RData")

First, we need to form a design matrix. Each row indicates a variable and each column a wave of data. Values can be missing if a certain variable has not been measured at a certain wave of measurement:

# Form design matrix:
design <- matrix(colnames(SHAREcovs),nrow=5,ncol=3)
print(design)
##      [,1]        [,2]        [,3]       
## [1,] "4_bmi"     "5_bmi"     "6_bmi"    
## [2,] "4_casp"    "5_casp"    "6_casp"   
## [3,] "4_eurod"   "5_eurod"   "6_eurod"  
## [4,] "4_maxgrip" "5_maxgrip" "6_maxgrip"
## [5,] "4_sphus"   "5_sphus"   "6_sphus"

Model estimation

Next, we can form amd run the model:

# Form model:
model <- panelgvar(covs=SHAREcovs, means = SHAREmeans, nobs = SHAREnobs,
                    vars = design, covtype = "UB")

# Estimate saturated model:
model <- model %>% runmodel
## Estimating baseline model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...

Following, we can perform model search strategies to find a sparse model:

# Model search:
model2 <- model %>% prune(alpha = 0.01, recursive = FALSE) %>% modelsearch(verbose=FALSE)

The model fits very well!

# Fit measures:
model2 %>% fit
##            Measure      Value
##               logl -323506.51
##  unrestricted.logl -322148.38
##      baseline.logl -432331.46
##               nvar         15
##               nobs        135
##               npar         43
##                 df         92
##          objective       2.98
##              chisq    2716.26
##             pvalue         ~0
##     baseline.chisq  220366.17
##        baseline.df        105
##    baseline.pvalue         ~0
##                nfi       0.99
##               pnfi       0.87
##                tli       0.99
##               nnfi       0.99
##                rfi       0.99
##                ifi       0.99
##                rni       0.99
##                cfi       0.99
##              rmsea      0.037
##     rmsea.ci.lower      0.036
##     rmsea.ci.upper      0.038
##       rmsea.pvalue          1
##             aic.ll  647099.02
##            aic.ll2  647099.20
##              aic.x    2532.26
##             aic.x2    2802.26
##                bic  647441.34
##               bic2  647304.69
##            ebic.25  647557.79
##             ebic.5  647674.23
##            ebic.75  647767.39
##              ebic1  647907.13

Finally, we can make a plot of these networks:

# Extract networks:
temporal <- getmatrix(model2, "PDC")
contemporaneous <- getmatrix(model2, "omega_zeta_within")
between <- getmatrix(model2, "omega_zeta_between")

# Labels:
labels <- c("BMI","quality of life", "depression", "grip strength","perceived health")

# Plot networks:
layout(t(1:3))
qgraph(temporal, layout = "circle", labels = labels,
       title = "Temporal", shape = "rectangle", 
       vsize = 30, vsize2 = 20,
       mar = rep(8,4), asize = 7,
       theme = "colorblind")
box("figure")

qgraph(contemporaneous, layout = "circle", labels = labels,
       title = "Contemporaneous",shape = "rectangle", 
       vsize = 30, vsize2 = 20,
       mar = rep(8,4),
       theme = "colorblind")
box("figure")

qgraph(between, layout = "circle", labels = labels,
       title = "Between-persons",shape = "rectangle", 
       vsize = 30, vsize2 = 20,
       mar = rep(8,4),
       theme = "colorblind")
box("figure")

For more advanced panel data models, see the pre-print and supplementary materials.

Extra: Confirmatory GGM and Residual Network Model

Data preperation

Download the datafile selfEsteem.csv from Dropbox. We can load this file in R as follows:

data <- read.csv("selfEsteem.csv", sep = "\t")

This is a large dataset obtained from openpsychometrics.org containing measures of the Rosenberg Self-Esteem Scale. The items are the following:

Item label Item description
Q1 I feel that I am a person of worth, at least on an equal plane with others.
Q2 I feel that I have a number of good qualities.
Q3 All in all, I am inclined to feel that I am a failure.
Q4 I am able to do things as well as most other people.
Q5 I feel I do not have much to be proud of.
Q6 I take a positive attitude toward myself.
Q7 On the whole, I am satisfied with myself.
Q8 I wish I could have more respect for myself.
Q9 I certainly feel useless at times.
Q10 At times I think I am no good at all.

In addition, the dataset contains information about gender, age, datasource and country. We are only interested in the items:

items <- paste0("Q",1:10)

Confirmatory fit of GGMs

Because the data is so large, we can split the sample in two datasets. We use all odd cases (\(1, 3, 5, \ldots\)) to train models and all the even cases (\(2, 4, 6, \ldots\)) to test models:

trainData <- data[c(TRUE,FALSE),]
testData <- data[c(FALSE,TRUE),]

Let’s first estimate a GGM on the training data, using full information maximum likelihood (FIML) estimation:

# Form saturated model:
GGM_train <- ggm(trainData, vars = items, estimator = "FIML")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Perform model selection steps:
GGM_train <- GGM_train %>% runmodel %>% 
    prune(alpha = 0.01, recursive = FALSE) %>% 
    modelsearch(verbose = FALSE)

Here, we used the newer modelsearch function to find the best model. We can plot this network:

# Obtain the network:
net_train <- getmatrix(GGM_train, "omega")
qgraph(net_train, theme = "colorblind", layout = "spring")

Next, we can fit this model to the test data:

# Obtain structure:
structure <- 1*(net_train != 0)

# Form test model:
GGM_test <- ggm(testData, vars = items, estimator = "FIML",
                omega = structure)
## Computing missingness patterns...
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Fit model:
GGM_test <- GGM_test %>% runmodel
## Estimating baseline model...
## Estimating saturated model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...
# Obtain the network:
net_test <- getmatrix(GGM_test, "omega")
qgraph(net_test, theme = "colorblind", layout = "spring")

We can now investigate the fit of this model:

GGM_test %>% fit
##            Measure      Value
##               logl -262354.71
##  unrestricted.logl -262337.41
##      baseline.logl -325682.47
##               nvar         10
##               nobs         65
##               npar         59
##                 df          6
##          objective       3.50
##              chisq      34.60
##             pvalue         ~0
##     baseline.chisq  126690.11
##        baseline.df         45
##    baseline.pvalue         ~0
##                nfi        1.0
##               pnfi       0.13
##                tli        1.0
##               nnfi        1.0
##                rfi        1.0
##                ifi        1.0
##                rni        1.0
##                cfi        1.0
##              rmsea      0.014
##     rmsea.ci.lower     0.0098
##     rmsea.ci.upper      0.019
##       rmsea.pvalue          1
##             aic.ll  524827.42
##            aic.ll2  524827.71
##              aic.x      22.60
##             aic.x2     152.60
##                bic  525304.45
##               bic2  525116.95
##            ebic.25  525440.30
##             ebic.5  525576.15
##            ebic.75  525684.83
##              ebic1  525847.86

If you are familiar with SEM fit indices, you may see that this model fits very well.

Residual Network Model

To control for a potential common self-esteem factor, we can estimate a Residual Network Model (RNM). First, we will start with a saturated CFA model:

# Factor loadings matrix (one factor):
Lambda <- matrix(1,10,1)

# CFA model:
CFA_train <- rnm(trainData, vars = items, estimator = "FIML",
                 lambda = Lambda) %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
CFA_test <- rnm(testData, vars = items, estimator = "FIML",
                 lambda = Lambda) %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

This model does not fit great in both datasets:

rbind(
  train = CFA_train@fitmeasures,
  test = CFA_test@fitmeasures)[,c("rmsea","cfi","tli","rni")]
##       rmsea     cfi       tli       rni      
## train 0.1375208 0.875883  0.840421  0.875883 
## test  0.14104   0.8681317 0.8304551 0.8681317

Next, we can train an RNM model using stepup model selection using modification indices, followed by modelsearch for more finetuned search:

# Estimate RNM:
RNM_train <- CFA_train %>% 
  stepup(criterion = "bic",verbose = FALSE) %>% 
  modelsearch(verbose = FALSE)

# Obtain residual network:
residnet_train <- getmatrix(RNM_train, "omega_epsilon")

Finally, we can test this RNM in the test data:

structure <- 1*(residnet_train!=0)

# Form RNM on test data:
RNM_test <- rnm(testData, vars = items, estimator = "FIML",
                 lambda = Lambda, omega_epsilon = structure)
## Computing missingness patterns...
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Run model:
RNM_test <- RNM_test %>% runmodel
## Estimating baseline model...
## Estimating saturated model...
## Estimating model...
## Computing Fisher information...
## Computing modification indices...

This model fits very well in the test data:

RNM_test %>% fit
##            Measure      Value
##               logl -262387.72
##  unrestricted.logl -262337.41
##      baseline.logl -325682.47
##               nvar         10
##               nobs         65
##               npar         53
##                 df         12
##          objective       3.50
##              chisq     100.63
##             pvalue         ~0
##     baseline.chisq  126690.11
##        baseline.df         45
##    baseline.pvalue         ~0
##                nfi        1.0
##               pnfi       0.27
##                tli        1.0
##               nnfi        1.0
##                rfi        1.0
##                ifi        1.0
##                rni        1.0
##                cfi        1.0
##              rmsea      0.018
##     rmsea.ci.lower      0.014
##     rmsea.ci.upper      0.021
##       rmsea.pvalue          1
##             aic.ll  524881.45
##            aic.ll2  524881.69
##              aic.x      76.63
##             aic.x2     206.63
##                bic  525309.97
##               bic2  525141.54
##            ebic.25  525432.01
##             ebic.5  525554.04
##            ebic.75  525651.67
##              ebic1  525798.12

We can plot the factor loadings and residual network as follows:

# Obtain residual network:
residnet_test <- getmatrix(RNM_test, "omega_epsilon")

# Obtain factor loadings:
factorloadings <- getmatrix(RNM_test, "lambda")

# Plot:
layout(t(1:2))
qgraph(residnet_test, theme = "colorblind", layout = "spring",
       title = "Residual network", vsize = 8)
qgraph.loadings(factorloadings, theme = "colorblind", model = "reflective",
                title = "Factor loadings", vsize = c(8,13), asize = 5)

Finally, we can compare the RNM to the earlier estimated GGM:

# Comparison in training data:
compare(
  GGM = GGM_train,
  RNM = RNM_train
)
##   model DF      AIC      BIC      RMSEA    Chisq Chisq_diff DF_diff     p_value
## 1   GGM  6 521870.8 522347.8 0.01049712 21.85869         NA      NA          NA
## 2   RNM 12 521878.4 522306.9 0.01011165 41.43076   19.57207       6 0.003299056
# Comparison in test data:
compare(
  GGM = GGM_test,
  RNM = RNM_test
)
##   model DF      AIC      BIC      RMSEA     Chisq Chisq_diff DF_diff
## 1   GGM  6 524827.4 525304.4 0.01409590  34.59649         NA      NA
## 2   RNM 12 524881.4 525310.0 0.01754744 100.63077   66.03427       6
##       p_value
## 1          NA
## 2 2.65204e-12

We obtain a very similar fit, with BIC prefering the RNM in the training data but not in the test data. The RNM also contains less parameters (higher degrees of freedom) than the GGM, and fits well in the test data. At this point, we would likely use theoretical reasoning to choose which of these competing models we would prefer.

More complex models

From the figure above, we can see that the residual network distincly forms clusters for positively and negatively frased items. To this end, we could also explore a two-factor and a bi-factor model. The code below shows how to estimate a RNM from a two-factor model:

# Factor loadings matrix (two factors):
Lambda2 <- matrix(0,10,2)
Lambda2[c(1,2,4,6,7),1] <- 1 # Positive items
Lambda2[c(3,5,8,9,10),2] <- 1 # Negative items

# Fit 2-factor models:
twofac_train <- rnm(trainData, vars = items, estimator = "FIML",
                     lambda = Lambda2) %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
twofac_test <- rnm(testData, vars = items, estimator = "FIML",
               lambda = Lambda2) %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Fit 2-factor RNM to train data:
RNM_twofac_train <- twofac_train %>% 
  stepup(criterion = "bic",verbose = FALSE) %>% 
  modelsearch(verbose = FALSE)

# Fit 2-factor  RNM to test data:
structure <- 1*(getmatrix(RNM_twofac_train,"omega_epsilon")!=0)
RNM_twofac_test <- rnm(testData, vars = items, estimator = "FIML",
                       lambda = Lambda2, omega_epsilon = structure) %>% 
  runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

Subsequently, the code below shows how to estimate a RNM from a bi-factor model:

# Factor loadings matrix (bifactor):
Lambda3 <- matrix(0,10,3)
Lambda3[,1] <- 1 # Self esteem
Lambda3[c(1,2,4,6,7),2] <- 1 # Positive items
Lambda3[c(3,5,8,9,10),3] <- 1 # Negative items

# Fit bifactor:
bi_train <- rnm(trainData, vars = items, estimator = "FIML",
                     lambda = Lambda3, sigma_zeta = "empty") %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
bi_test <- rnm(testData, vars = items, estimator = "FIML",
               lambda = Lambda3, sigma_zeta = "empty") %>% runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Fit bifactor RNM to train data:
RNM_bi_train <- bi_train %>% 
  stepup(criterion = "bic",verbose = FALSE) %>% 
  modelsearch(verbose = FALSE)


# Fit bifactor RNM to test data:
structure <- 1*(getmatrix(RNM_bi_train,"omega_epsilon")!=0)
RNM_bi_test <- rnm(testData, vars = items, estimator = "FIML",
                       lambda = Lambda3, omega_epsilon = structure) %>% 
  runmodel
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

We can now compare all these models:

comparison <- compare(
  GGM = GGM_test,
  RNM = RNM_test,
  twofactor = twofac_test,
  twofactor_RNM = RNM_twofac_test,
  bifactor = bi_test,
  bifactor_RNM = RNM_bi_test
)  
print(comparison)
##           model DF      AIC      BIC      RMSEA      Chisq Chisq_diff DF_diff
## 1           GGM  6 524827.4 525304.4 0.01409590   34.59649         NA      NA
## 2  bifactor_RNM 11 524954.6 525391.2 0.02468536  171.78570  137.18920       5
## 3           RNM 12 524881.4 525310.0 0.01754744  100.63077   71.15493       1
## 4 twofactor_RNM 13 524839.3 525259.7 0.01234011   60.48512   40.14564       1
## 5      bifactor 25 526445.6 526769.0 0.05270428 1690.74196 1630.25683      12
## 6     twofactor 34 533001.5 533252.1 0.10045926 8264.66314 6573.92119       9
##        p_value
## 1           NA
## 2 7.079513e-28
## 3 3.302466e-17
## 4 2.357165e-10
## 5 0.000000e+00
## 6 0.000000e+00

Translating to posterior model probabilities:

# To posterior model probability:
BICs <- mpfr(comparison$BIC, 100)
BICtrans <- exp(-0.5 * BICs)
sumBICtrans <- sum(BICtrans)
comparison$modelProbability <- as.numeric(BICtrans / sumBICtrans)

# Make a plot:
g <- ggplot(comparison, aes(x=model, y = modelProbability)) + 
  geom_bar(stat="identity") + xlab("") + 
  ylab("Posterior model probability") + theme_bw() + 
  ylim(0,1)
print(g)

Which shows that the two-factor RNM also fits well. This model looks like this:

# Two-factor RNM:
# Obtain residual network:
residnet_test <- getmatrix(RNM_twofac_test, "omega_epsilon")

# Obtain factor loadings:
factorloadings <- getmatrix(RNM_twofac_test, "lambda")

# Obtain correlations:
factorCors <- getmatrix(RNM_twofac_test, "sigma_zeta")

# Plot:
layout(t(1:2))
qgraph(residnet_test, theme = "colorblind", layout = "spring",
       title = "Residual network", vsize = 8)
qgraph.loadings(factorloadings, theme = "colorblind", model = "reflective",
                title = "Factor loadings", vsize = c(8,13), asize = 5,
                factorCors = factorCors)