- Post your topic to blackboard!
- Some example topics are in this google drive folder
- Feel free to add more!
- Bonus if you find your own data
- Please let me know what group you are in before Tuesday 11:00
huge.npn
function in the huge
packageDF <- data.frame( x = ifelse(sample(c(TRUE,FALSE), 1000, TRUE), rnorm(1000,-10,4), rexp(1000,20)), y = rbeta(1000, 0.5, 0.5) )
library('ggplot2') ggplot(DF, aes(x = x)) + geom_density()
ggplot(DF, aes(x = y)) + geom_density()
ggplot(DF, aes(x = x, y = y )) + geom_point()
Transform to normal data:
library('huge') DFnormal <- as.data.frame(huge.npn(DF))
## Conducting the nonparanormal (npn) transformation via shrunkun ECDF....done.
library('ggplot2') ggplot(DFnormal, aes(x = x)) + geom_density()
ggplot(DFnormal, aes(x = y)) + geom_density()
ggplot(DFnormal, aes(x = x, y = y )) + geom_point()
I see myself as someone who is talkative…
I see myself as someone who is talkative…
cor_auto()
function in the qgraph package can be used to compute a correlation matrix of variables of which some are ordinal
cor
!# True correlation matrix: Cor <- matrix(c(1,0.5,0.25, 0.5,1,0.5, 0.25,0.5,1),3,3) Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Simulate true scores: library(mvtnorm)
## Warning: package 'mvtnorm' was built under R version 3.1.1
Ylat <- rmvnorm(1000,c(0,0,0),Cor) # Discretize variables 1 and 2 Y <- Ylat Y[,1:2] <- as.numeric(cut(Ylat[,1:2], breaks = c(-Inf,0,1,Inf)))
head(Y,10)
## [,1] [,2] [,3] ## [1,] 1 1 0.02917 ## [2,] 2 2 1.73107 ## [3,] 1 1 -1.58392 ## [4,] 1 1 -0.01996 ## [5,] 1 2 0.61200 ## [6,] 2 3 -0.30646 ## [7,] 3 3 0.51575 ## [8,] 3 1 -1.46357 ## [9,] 2 3 0.79041 ## [10,] 2 1 1.27870
# True correlation matrix: Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Pearson correlations: cor(Y)
## [,1] [,2] [,3] ## [1,] 1.0000 0.3563 0.2276 ## [2,] 0.3563 1.0000 0.4682 ## [3,] 0.2276 0.4682 1.0000
# True correlation matrix: Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Polychoric correlations cor_auto(Y)
## Variables detected as ordinal: V1; V2
## V1 V2 V3 ## V1 1.0000 0.4499 0.2576 ## V2 0.4499 1.0000 0.5393 ## V3 0.2576 0.5393 1.0000
Because the Ising Model is a combination of multiple logistic models and the Gaussian Random field a combination of multiple linear models we can form MRF's in the following way:
\[ \Pr(X_1 = 1) \propto \exp\left( \tau_1 + \beta_{12} x_2 + \beta_{13} x_3 + \beta_{14} x_4 \right) \]
\[ \Pr(X_2 = 1) \propto \exp\left( \tau_2 + \beta_{21} x_1 + \beta_{23} x_3 + \beta_{24} x_4 \right) \]
\[ \Pr(X_3 = 1) \propto \exp\left( \tau_3 + \beta_{31} x_1 + \beta_{32} x_2 + \beta_{34} x_4 \right) \]
\[ \Pr(X_4 = 1) \propto \exp\left( \tau_4 + \beta_{41} x_1 + \beta_{42} x_2 + \beta_{43} x_3 \right) \]
\[ \omega_{ij} = \beta_{ij} = \beta_{ji} \]
\[ X_1 = \tau_1 + \beta_{12} x_2 + \beta_{13} x_3 + \beta_{14} x_4 + \varepsilon_1 \]
\[ X_2 = \tau_2 + \beta_{21} x_1 + \beta_{23} x_3 + \beta_{24} x_4 + \varepsilon_2 \]
\[ X_3 = \tau_3 + \beta_{31} x_1 + \beta_{32} x_2 + \beta_{34} x_4 + \varepsilon_3 \]
\[ X_4 = \tau_4 + \beta_{41} x_1 + \beta_{42} x_2 + \beta_{43} x_3 + \varepsilon_4 \]
\[ \rho_{ij} = \frac{\beta_{ij} \mathrm{Var}\left(\varepsilon_j\right)}{\mathrm{Var}\left(\varepsilon_i\right)} = \frac{\beta_{ji} \mathrm{Var}\left(\varepsilon_i\right)}{\mathrm{Var}\left(\varepsilon_j\right)} \]
\[ \rho_{ij} = -\frac{ \theta_{ij} }{\sqrt{ \theta_{ii} \theta_{jj} }} \hspace{35pt} \boldsymbol{\Theta} = \boldsymbol{\Sigma}^{-1} \]
In which \(\boldsymbol{\Theta}\) is the precision matrix; the inverse of the variance-covariance matrix.
Data <- structure(list(W = c(4, 1, 5, 4, 2, 2, 8, 1, 4, 4), C = c(2, 6, 4, 2, 9, 6, 2, 5, 4, 2), F = c(3, 2, 4, 8, 1, 2, 4, 4, 5, 5), I = c(1, 1, 6, 8, 3, 5, 8, 1, 3, 4)), .Names = c("W", "C", "F", "I"), row.names = c(NA, -10L), class = "data.frame")
Data
## W C F I ## 1 4 2 3 1 ## 2 1 6 2 1 ## 3 5 4 4 6 ## 4 4 2 8 8 ## 5 2 9 1 3 ## 6 2 6 2 5 ## 7 8 2 4 8 ## 8 1 5 4 1 ## 9 4 4 5 3 ## 10 4 2 5 4
round(cor(Data),2)
## W C F I ## W 1.00 -0.67 0.40 0.70 ## C -0.67 1.00 -0.73 -0.38 ## F 0.40 -0.73 1.00 0.52 ## I 0.70 -0.38 0.52 1.00
library("qgraph") qgraph(cor(Data), esize = 20)
fitW <- lm(W ~ C + F + I, data = Data) fitW
## ## Call: ## lm(formula = W ~ C + F + I, data = Data) ## ## Coefficients: ## (Intercept) C F I ## 6.606 -0.723 -0.564 0.518
fitC <- lm(C ~ W + F + I, data = Data) fitF <- lm(F ~ W + C + I, data = Data) fitI <- lm(I ~ W + C + F, data = Data)
\[ \begin{aligned} W &= \beta_{12} C + \beta_{13} F + \beta_{14} I + \varepsilon_W \\ C &= \beta_{21} W + \beta_{23} F + \beta_{24} I + \varepsilon_C \\ F &= \beta_{31} W + \beta_{32} C + \beta_{34} I + \varepsilon_F \\ I &= \beta_{41} W + \beta_{42} C + \beta_{43} F + \varepsilon_I \end{aligned} \]
beta_WC <- coef(fitW)[2] beta_CW <- coef(fitC)[2] errorVariance_W <- summary(fitW)$sigma^2 errorVariance_C <- summary(fitC)$sigma^2 beta_WC * errorVariance_C
## C ## -1.163
beta_CW * errorVariance_W
## W ## -1.163
\[ \begin{aligned} W = {\color{red} \beta_{12}} C + {\color{blue} \beta_{13}} F + {\color{green}\beta_{14}} I + \varepsilon_W \\ C = {\color{red}\beta_{21}} W + {\color{purple} \beta_{23} } F + {\color{orange} \beta_{24}} I + \varepsilon_C \\ F = {\color{blue} \beta_{31}} W + {\color{purple} \beta_{32} } C + {\color{brown}\beta_{34}} I + \varepsilon_F \\ I = {\color{green}\beta_{41}} W + {\color{orange} \beta_{42} } C + {\color{brown} \beta_{43}} F + \varepsilon_I \\ \end{aligned} \]
beta_WC * sqrt(errorVariance_C) / sqrt(errorVariance_W)
## C ## -0.7651
beta_CW * sqrt(errorVariance_W) / sqrt(errorVariance_C)
## W ## -0.7651
-cov2cor(solve(cov(Data)))
## W C F I ## W -1.0000 -0.7651 -0.5889 0.7754 ## C -0.7651 -1.0000 -0.7993 0.5871 ## F -0.5889 -0.7993 -1.0000 0.6468 ## I 0.7754 0.5871 0.6468 -1.0000
These partial correlations give us the concentration graph:
library("qgraph") qgraph(cor(Data), graph = "concentration")
Data
## W C F I ## 1 4 2 3 1 ## 2 1 6 2 1 ## 3 5 4 4 6 ## 4 4 2 8 8 ## 5 2 9 1 3 ## 6 2 6 2 5 ## 7 8 2 4 8 ## 8 1 5 4 1 ## 9 4 4 5 3 ## 10 4 2 5 4
\[ Y = \beta_1 x_1 + \beta_2 x_2 + \varepsilon_Y \]
\[ \lambda = 0 \]
\[ \lambda = 0.1 \]
\[ \lambda = 0.25 \]
\[ \lambda = 0.3 \]
\[ \lambda = 0.4 \]
\[ \lambda = 0.5 \]
\[ \lambda = 1 \]
\[ \lambda = 2 \]
\[ \lambda = 3 \]
\[ \lambda = 4 \]
Find a sparse concentration graph using glasso:
qgraph(cor(Data), graph = "glasso", sampleSize = 10)
To estimate a Markov Random Field using EBIC to select a tuning parameter \(\lambda\), use:
qgraph
function in the qgraph package with arguments graph = "glasso"
and sampleSize
tuning
argument sets the \(\gamma\) hyper tuning parameterIsingFit
function in the IsingFit
package
gamma
argument sets the \(\gamma\) hyper tuning parameterweiadj
element of the resulting list gives the weights matrix