1 TL;DR

suppressMessages(library(netDx))
suppressMessages(library(curatedTCGAData))
suppressMessages(library(MultiAssayExperiment))

# load example predictor output data
inDir <- sprintf("%s/extdata/example_output",
    path.package("netDx"))
all_rngs <- list.dirs(inDir, recursive = FALSE)

predClasses <- c("LumA","notLumA")

# plot model performance
predFiles <- unlist(lapply(all_rngs, function(x) 
        paste(x, "predictionResults.txt", sep = "/")))
predPerf <- plotPerf(inDir, predClasses=predClasses)

# list top-scoring features 
featScores <- getFeatureScores(inDir,predClasses=c("LumA","notLumA"))
featSelNet <- lapply(featScores, function(x) {
    callFeatSel(x, fsCutoff=2, fsPctPass=0.7)
})

# plot network view of top-scoring pathways (EnrichmentMap)
# requires Cytoscape to be open - commented out here
###
### # data setup  - fetch data to limit pathways to genes assayed in the data
### brca <- suppressMessages(curatedTCGAData("BRCA",c("mRNAArray"),FALSE))
### xpr_genes <- rownames(assays(brca)[[1]])
### 
### #setup
###pathwayList <- readPathways(getExamplePathways())
###pathwayList <- lapply(pathwayList, function(x) x[which(x %in% xpr_genes)])
###
###netInfoFile <- sprintf("%s/inputNets.txt",inDir)
###netInfo <- read.delim(netInfoFile,sep="\t",h=TRUE,as.is=TRUE)
###
#### fetch input to create EnrichmentMap
###Emap_res <- getEMapInput_many(featScores,pathwayList,
### maxScore=10,pctPass=0.7,netInfo,verbose=FALSE)
###
# write emap results to file 
###gmtFiles <- list()
###nodeAttrFiles <- list()
###
###for (g in names(Emap_res)) {
### outFile <- sprintf("%s/%s_nodeAttrs.txt",outDir,g)
### write.table(Emap_res[[g]][["nodeAttrs"]],file=outFile,
###     sep="\t",col=TRUE,row=FALSE,quote=FALSE)
### nodeAttrFiles[[g]] <- outFile
###
### outFile <- sprintf("%s/%s.gmt",outDir,g)
### conn <- base::file(outFile,"w")
### tmp <- Emap_res[[g]][["featureSets"]]
### gmtFiles[[g]] <- outFile
###
### for (cur in names(tmp)) {
###     curr <- sprintf("%s\t%s\t%s", cur,cur,
###         paste(tmp[[cur]],collapse="\t"))
###     writeLines(curr,con=conn)
### }
###close(conn)
###}
#### plot EnrichmentMap - needs Cytoscape to be open
###plotEmap(gmtFiles[[1]],nodeAttrFiles[[1]])

2 Setup

suppressMessages(require(netDx))

suppressMessages(library(curatedTCGAData))
suppressMessages(library(MultiAssayExperiment))

3 Prepare data

Load predictor results generated by running buildPredictor(). Here we have saved the results so the example runs quickly and simple load these results.

# load example predictor output data
inDir <- sprintf("%s/extdata/example_output",
    path.package("netDx"))
all_rngs <- list.dirs(inDir, recursive = FALSE)

predClasses <- c("LumA","notLumA")

Plot distribution of AUROC and AUPR, as well as ROC and precision-recall curves showing performance across splits.

# plot model performance
predFiles <- unlist(lapply(all_rngs, function(x) 
        paste(x, "predictionResults.txt", sep = "/")))
predPerf <- plotPerf(inDir, predClasses=predClasses)
## Single directory provided, retrieving prediction files

Get feature scores for all train/test splits

featScores <- getFeatureScores(inDir,predClasses=c("LumA","notLumA"))
## LumA
##  Single directory provided, retrieving CV score files
## Got 3 iterations
## * Computing consensus
## notLumA
##  Single directory provided, retrieving CV score files
## Got 3 iterations
## * Computing consensus
print(names(featScores))
## [1] "LumA"    "notLumA"

Look at scores for the LumA label:

head(featScores$LumA[,1:3])
##                                                  PATHWAY_NAME rng1 rng2
## 1       5HT2_TYPE_RECEPTOR_MEDIATED_SIGNALING_PATHWAY.profile   NA    1
## 2         ACETYLCHOLINE_BINDING_AND_DOWNSTREAM_EVENTS.profile    5    2
## 3 ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS.profile   10    3
## 4 ACTIVATION_OF_BAD_AND_TRANSLOCATION_TO_MITOCHONDRIA.profile    7    1
## 5                     ACTIVATION_OF_BH3-ONLY_PROTEINS.profile    9    5
## 6              ACTIVATION_OF_DNA_FRAGMENTATION_FACTOR.profile   NA    4

NA values indicate a score of zero.

Now get the features that scored at least 2 out of 2 in over 70% of the splits.

featSelNet <- lapply(featScores, function(x) {
    callFeatSel(x, fsCutoff=2, fsPctPass=0.7)
})
print(head(featSelNet$LumA))
## [1] "ACETYLCHOLINE_BINDING_AND_DOWNSTREAM_EVENTS.profile"        
## [2] "ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS.profile"
## [3] "ACTIVATION_OF_BAD_AND_TRANSLOCATION_TO_MITOCHONDRIA.profile"
## [4] "ACTIVATION_OF_BH3-ONLY_PROTEINS.profile"                    
## [5] "ACTIVATION_OF_DNA_FRAGMENTATION_FACTOR.profile"             
## [6] "ACTIVATION_OF_NICOTINIC_ACETYLCHOLINE_RECEPTORS.profile"

4 sessionInfo

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] C/C/C/C/C/en_CA.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] curatedTCGAData_1.6.0       MultiAssayExperiment_1.10.4
##  [3] SummarizedExperiment_1.14.1 DelayedArray_0.10.0        
##  [5] BiocParallel_1.18.1         matrixStats_0.55.0         
##  [7] Biobase_2.44.0              GenomicRanges_1.36.0       
##  [9] GenomeInfoDb_1.20.0         IRanges_2.18.0             
## [11] S4Vectors_0.22.0            BiocGenerics_0.30.0        
## [13] netDx_0.99.8                BiocStyle_2.12.0           
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6                  bit64_0.9-7                  
##  [3] doParallel_1.0.14             httr_1.4.0                   
##  [5] tools_3.6.0                   backports_1.1.4              
##  [7] R6_2.4.0                      KernSmooth_2.23-15           
##  [9] DBI_1.0.0                     lazyeval_0.2.2               
## [11] colorspace_1.4-1              tidyselect_0.2.5             
## [13] bit_1.1-14                    curl_3.3                     
## [15] compiler_3.6.0                glmnet_2.0-18                
## [17] graph_1.62.0                  bookdown_0.14                
## [19] caTools_1.17.1.2              scales_1.0.0                 
## [21] rappdirs_0.3.1                caroline_0.7.6               
## [23] stringr_1.4.0                 digest_0.6.18                
## [25] rmarkdown_1.12                R.utils_2.8.0                
## [27] XVector_0.24.0                pkgconfig_2.0.2              
## [29] htmltools_0.4.0               fastmap_1.0.1                
## [31] dbplyr_1.4.2                  rlang_0.4.1                  
## [33] RSQLite_2.1.2                 shiny_1.4.0                  
## [35] combinat_0.0-8                gtools_3.8.1                 
## [37] dplyr_0.8.3                   R.oo_1.22.0                  
## [39] RCurl_1.95-4.12               magrittr_1.5                 
## [41] GenomeInfoDbData_1.2.1        Matrix_1.2-17                
## [43] Rcpp_1.0.1                    munsell_0.5.0                
## [45] R.methodsS3_1.7.1             stringi_1.4.3                
## [47] yaml_2.2.0                    RJSONIO_1.3-1.1              
## [49] zlibbioc_1.30.0               AnnotationHub_2.16.1         
## [51] gplots_3.0.1.1                plyr_1.8.4                   
## [53] BiocFileCache_1.8.0           grid_3.6.0                   
## [55] blob_1.2.0                    promises_1.1.0               
## [57] gdata_2.18.0                  ExperimentHub_1.10.0         
## [59] bigmemory.sri_0.1.3           crayon_1.3.4                 
## [61] lattice_0.20-38               zeallot_0.1.0                
## [63] knitr_1.22                    pillar_1.3.1                 
## [65] igraph_1.2.4.1                reshape2_1.4.3               
## [67] codetools_0.2-16              XML_3.98-1.19                
## [69] glue_1.3.1                    evaluate_0.13                
## [71] BiocManager_1.30.4            httpuv_1.5.2                 
## [73] vctrs_0.2.0                   foreach_1.4.4                
## [75] gtable_0.3.0                  purrr_0.3.2                  
## [77] assertthat_0.2.1              ggplot2_3.1.1                
## [79] xfun_0.6                      mime_0.6                     
## [81] xtable_1.8-4                  pracma_2.2.5                 
## [83] later_1.0.0                   RCy3_2.4.6                   
## [85] tibble_2.1.1                  iterators_1.0.10             
## [87] AnnotationDbi_1.46.1          memoise_1.1.0                
## [89] interactiveDisplayBase_1.22.0 bigmemory_4.5.33             
## [91] ROCR_1.0-7