SEM-based out-of-sample prediction using node-wise ML

Predict method for ML objects.

# S3 method for class 'ML'
predict(object, newdata, newoutcome = NULL, ncores = 2, verbose = FALSE, ...)

Arguments

object: A model fitting object from SEMml() function.
newdata: A matrix containing new data with rows corresponding to subjects, and columns to variables.
newoutcome: A new character vector (as.factor) of labels for a categorical output (target)(default = NULL).
ncores: number of cpu cores (default = 2)
verbose: Print predicted out-of-sample MSE values (default = FALSE).
...: Currently ignored.

Value

A list of 3 objects:

"PE", vector of the amse = average MSE over all (sink and mediators) graph nodes; r2 = 1 - amse; and srmr= Standardized Root Means Squared Residual between the out-of-bag correlation matrix and the model correlation matrix.
"mse", vector of the Mean Squared Error (MSE) for each out-of-bag prediction of the sink and mediators graph nodes.
"Yhat", the matrix of continuous predicted values of graph nodes (excluding source nodes) based on out-of-bag samples.

Author

Mario Grassi mario.grassi@unipv.it

Examples


# \donttest{
# Load Amyotrophic Lateral Sclerosis (ALS)
ig<- alsData$graph
data<- alsData$exprs
data<- transformData(data)$data
#> Conducting the nonparanormal transformation via shrunkun ECDF...done.
group<- alsData$group

#...with train-test (0.5-0.5) samples
set.seed(123)
train<- sample(1:nrow(data), 0.5*nrow(data))

start<- Sys.time()
# ... tree
res1<- SEMml(ig, data[train, ], algo="tree")
#> Running SEM model via ML...
#>  done.
#> 
#> TREE solver ended normally after 23 iterations
#> 
#>  logL:-45.080145  srmr:0.201877
mse1<- predict(res1, data[-train, ], verbose=TRUE)
#>      amse        r2      srmr 
#> 0.8208979 0.1791021 0.2269379 

# ... rf
res2<- SEMml(ig, data[train, ], algo="rf")
#> Running SEM model via ML...
#>  done.
#> 
#> RF solver ended normally after 23 iterations
#> 
#>  logL:-33.16687  srmr:0.086188
mse2<- predict(res2, data[-train, ], verbose=TRUE)
#>      amse        r2      srmr 
#> 0.8260251 0.1739749 0.1760094 

# ... xgb
res3<- SEMml(ig, data[train, ], algo="xgb")
#> Running SEM model via ML...
#>  done.
#> 
#> XGB solver ended normally after 23 iterations
#> 
#>  logL:70.10035  srmr:0.001439
mse3<- predict(res3, data[-train, ], verbose=TRUE)
#>      amse        r2      srmr 
#> 0.8005846 0.1994154 0.1543352 

# ... nn
res4<- SEMml(ig, data[train, ], algo="nn")
#> Running SEM model via ML...
#>  done.
#> 
#> NN solver ended normally after 23 iterations
#> 
#>  logL:-33.873642  srmr:0.135954
mse4<- predict(res4, data[-train, ], verbose=TRUE)
#>      amse        r2      srmr 
#>       NaN       NaN 0.1844971 
end<- Sys.time()
print(end-start)
#> Time difference of 2.424637 secs

#...with a categorical (as.factor) outcome
outcome <- factor(ifelse(group == 0, "control", "case")); table(outcome) 
#> outcome
#>    case control 
#>     139      21 

res5 <- SEMml(ig, data[train, ], outcome[train], algo="tree")
#> Running SEM model via ML...
#>  done.
#> 
#> TREE solver ended normally after 25 iterations
#> 
#>  logL:-48.72171  srmr:0.196861
pred <- predict(res5, data[-train, ], outcome[-train], verbose=TRUE)
#>      amse        r2      srmr 
#> 0.7783295 0.2216705 0.2253818 
yhat <- pred$Yhat[ ,levels(outcome)]; head(yhat)
#>         case    control
#> 1  0.4773726 -0.4773726
#> 2  0.4773726 -0.4773726
#> 3  0.4773726 -0.4773726
#> 4  0.4773726 -0.4773726
#> 5  0.4773726 -0.4773726
#> 6 -0.6137648  0.6137648
yobs <- outcome[-train]; head(yobs)
#> [1] case case case case case case
#> Levels: case control
classificationReport(yobs, yhat, verbose=TRUE)$stats
#>          pred
#> yobs      case control
#>   case      57      17
#>   control    2       4
#> 

#>              precision    recall        f1 accuracy      mcc support
#> case         0.9661017 0.7702703 0.8571429   0.7625 0.261562      74
#> control      0.1904762 0.6666667 0.2962963   0.7625 0.261562       6
#> macro avg    0.5782889 0.7184685 0.5767196   0.7625 0.261562      80
#> weighted avg 0.9079298 0.7625000 0.8150794   0.7625 0.261562      80
#>              support_prop
#> case                0.925
#> control             0.075
#> macro avg           1.000
#> weighted avg        1.000
# }