- Post your topic to blackboard!
- Some example topics are in this google drive folder
- Feel free to add more!
- Bonus if you find your own data
- Please let me know what group you are in before Tuesday 11:00
huge.npn
function in the huge
packageDF <- data.frame( x = ifelse(sample(c(TRUE,FALSE), 1000, TRUE), rnorm(1000,-10,4), rexp(1000,20)), y = rbeta(1000, 0.5, 0.5) )
library('ggplot2') ggplot(DF, aes(x = x)) + geom_density()
ggplot(DF, aes(x = y)) + geom_density()
ggplot(DF, aes(x = x, y = y )) + geom_point()
Transform to normal data:
library('huge') DFnormal <- as.data.frame(huge.npn(DF))
## Conducting the nonparanormal (npn) transformation via shrunkun ECDF....done.
library('ggplot2') ggplot(DFnormal, aes(x = x)) + geom_density()
ggplot(DFnormal, aes(x = y)) + geom_density()
ggplot(DFnormal, aes(x = x, y = y )) + geom_point()
I see myself as someone who is talkative…
I see myself as someone who is talkative…
cor_auto()
function in the qgraph package can be used to compute a correlation matrix of variables of which some are ordinal
cor
!# True correlation matrix: Cor <- matrix(c(1,0.5,0.25, 0.5,1,0.5, 0.25,0.5,1),3,3) Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Simulate true scores: library(mvtnorm)
## Warning: package 'mvtnorm' was built under R version 3.1.1
Ylat <- rmvnorm(1000,c(0,0,0),Cor) # Discretize variables 1 and 2 Y <- Ylat Y[,1:2] <- as.numeric(cut(Ylat[,1:2], breaks = c(-Inf,0,1,Inf)))
head(Y,10)
## [,1] [,2] [,3] ## [1,] 1 1 0.02917 ## [2,] 2 2 1.73107 ## [3,] 1 1 -1.58392 ## [4,] 1 1 -0.01996 ## [5,] 1 2 0.61200 ## [6,] 2 3 -0.30646 ## [7,] 3 3 0.51575 ## [8,] 3 1 -1.46357 ## [9,] 2 3 0.79041 ## [10,] 2 1 1.27870
# True correlation matrix: Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Pearson correlations: cor(Y)
## [,1] [,2] [,3] ## [1,] 1.0000 0.3563 0.2276 ## [2,] 0.3563 1.0000 0.4682 ## [3,] 0.2276 0.4682 1.0000
# True correlation matrix: Cor
## [,1] [,2] [,3] ## [1,] 1.00 0.5 0.25 ## [2,] 0.50 1.0 0.50 ## [3,] 0.25 0.5 1.00
# Polychoric correlations cor_auto(Y)
## Variables detected as ordinal: V1; V2
## V1 V2 V3 ## V1 1.0000 0.4499 0.2576 ## V2 0.4499 1.0000 0.5393 ## V3 0.2576 0.5393 1.0000
Because the Ising Model is a combination of multiple logistic models and the Gaussian Random field a combination of multiple linear models we can form MRF's in the following way:
\[ \Pr(X_1 = 1) \propto \exp\left( \tau_1 + \beta_{12} x_2 + \beta_{13} x_3 + \beta_{14} x_4 \right) \]
\[ \Pr(X_2 = 1) \propto \exp\left( \tau_2 + \beta_{21} x_1 + \beta_{23} x_3 + \beta_{24} x_4 \right) \]
\[ \Pr(X_3 = 1) \propto \exp\left( \tau_3 + \beta_{31} x_1 + \beta_{32} x_2 + \beta_{34} x_4 \right) \]
\[ \Pr(X_4 = 1) \propto \exp\left( \tau_4 + \beta_{41} x_1 + \beta_{42} x_2 + \beta_{43} x_3 \right) \]
\[ \omega_{ij} = \beta_{ij} = \beta_{ji} \]
\[ X_1 = \tau_1 + \beta_{12} x_2 + \beta_{13} x_3 + \beta_{14} x_4 + \varepsilon_1 \]
\[ X_2 = \tau_2 + \beta_{21} x_1 + \beta_{23} x_3 + \beta_{24} x_4 + \varepsilon_2 \]
\[ X_3 = \tau_3 + \beta_{31} x_1 + \beta_{32} x_2 + \beta_{34} x_4 + \varepsilon_3 \]
\[ X_4 = \tau_4 + \beta_{41} x_1 + \beta_{42} x_2 + \beta_{43} x_3 + \varepsilon_4 \]
\[ \rho_{ij} = \frac{\beta_{ij} \mathrm{Var}\left(\varepsilon_j\right)}{\mathrm{Var}\left(\varepsilon_i\right)} = \frac{\beta_{ji} \mathrm{Var}\left(\varepsilon_i\right)}{\mathrm{Var}\left(\varepsilon_j\right)} \]
\[ \rho_{ij} = -\frac{ \theta_{ij} }{\sqrt{ \theta_{ii} \theta_{jj} }} \hspace{35pt} \boldsymbol{\Theta} = \boldsymbol{\Sigma}^{-1} \]
In which \(\boldsymbol{\Theta}\) is the precision matrix; the inverse of the variance-covariance matrix.
Data <- structure(list(W = c(4, 1, 5, 4, 2, 2, 8, 1, 4, 4), C = c(2, 6, 4, 2, 9, 6, 2, 5, 4, 2), F = c(3, 2, 4, 8, 1, 2, 4, 4, 5, 5), I = c(1, 1, 6, 8, 3, 5, 8, 1, 3, 4)), .Names = c("W", "C", "F", "I"), row.names = c(NA, -10L), class = "data.frame")
Data
## W C F I ## 1 4 2 3 1 ## 2 1 6 2 1 ## 3 5 4 4 6 ## 4 4 2 8 8 ## 5 2 9 1 3 ## 6 2 6 2 5 ## 7 8 2 4 8 ## 8 1 5 4 1 ## 9 4 4 5 3 ## 10 4 2 5 4
round(cor(Data),2)
## W C F I ## W 1.00 -0.67 0.40 0.70 ## C -0.67 1.00 -0.73 -0.38 ## F 0.40 -0.73 1.00 0.52 ## I 0.70 -0.38 0.52 1.00
library("qgraph") qgraph(cor(Data), esize = 20)
fitW <- lm(W ~ C + F + I, data = Data) fitW
## ## Call: ## lm(formula = W ~ C + F + I, data = Data) ## ## Coefficients: ## (Intercept) C F I ## 6.606 -0.723 -0.564 0.518
fitC <- lm(C ~ W + F + I, data = Data) fitF <- lm(F ~ W + C + I, data = Data) fitI <- lm(I ~ W + C + F, data = Data)
\[ \begin{aligned} W &= \beta_{12} C + \beta_{13} F + \beta_{14} I + \varepsilon_W \\ C &= \beta_{21} W + \beta_{23} F + \beta_{24} I + \varepsilon_C \\ F &= \beta_{31} W + \beta_{32} C + \beta_{34} I + \varepsilon_F \\ I &= \beta_{41} W + \beta_{42} C + \beta_{43} F + \varepsilon_I \end{aligned} \]
beta_WC <- coef(fitW)[2] beta_CW <- coef(fitC)[2] errorVariance_W <- summary(fitW)$sigma^2 errorVariance_C <- summary(fitC)$sigma^2 beta_WC * errorVariance_C
## C ## -1.163
beta_CW * errorVariance_W
## W ## -1.163
\[ \begin{aligned} W = {\color{red} \beta_{12}} C + {\color{blue} \beta_{13}} F + {\color{green}\beta_{14}} I + \varepsilon_W \\ C = {\color{red}\beta_{21}} W + {\color{purple} \beta_{23} } F + {\color{orange} \beta_{24}} I + \varepsilon_C \\ F = {\color{blue} \beta_{31}} W + {\color{purple} \beta_{32} } C + {\color{brown}\beta_{34}} I + \varepsilon_F \\ I = {\color{green}\beta_{41}} W + {\color{orange} \beta_{42} } C + {\color{brown} \beta_{43}} F + \varepsilon_I \\ \end{aligned} \]
beta_WC * sqrt(errorVariance_C) / sqrt(errorVariance_W)
## C ## -0.7651
beta_CW * sqrt(errorVariance_W) / sqrt(errorVariance_C)
## W ## -0.7651
-cov2cor(solve(cov(Data)))
## W C F I ## W -1.0000 -0.7651 -0.5889 0.7754 ## C -0.7651 -1.0000 -0.7993 0.5871 ## F -0.5889 -0.7993 -1.0000 0.6468 ## I 0.7754 0.5871 0.6468 -1.0000
These partial correlations give us the concentration graph:
library("qgraph") qgraph(cor(Data), graph = "concentration")
Data
## W C F I ## 1 4 2 3 1 ## 2 1 6 2 1 ## 3 5 4 4 6 ## 4 4 2 8 8 ## 5 2 9 1 3 ## 6 2 6 2 5 ## 7 8 2 4 8 ## 8 1 5 4 1 ## 9 4 4 5 3 ## 10 4 2 5 4