Machine learning

安装所有依赖的包

required_packages <- c("Boruta","ROCR","VennDiagram","caret","doParallel","ggplot2",
                       "ipred","knitr","pROC","randomForest","rattle","rpart","rpart.plot", 
                       "ranger", "RRF", "e1071", "bookdown", "zoo", "plyr", "dplyr",
                       "ggrepel", "verification")

# site= "https://mirrors.tuna.tsinghua.edu.cn/CRAN"
# old <- options(BioC_mirror=c(""))
# "https://mirrors.nju.edu.cn/bioconductor/",  ,"https://mirrors.tuna.tsinghua.edu.cn/bioconductor"))


# local({r = getOption("repos")  
# r["CRAN"] = "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"   
# r["BioC_mirror"] = "http://mirrors.ustc.edu.cn/bioc/"
# options(repos=r)}) 


if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

for(i in required_packages) {
  a = rownames(installed.packages())
  if(! i %in% a) BiocManager::install(i, update=F)
}

Some utility functions

generateTestVariableSet <- function(num_toal_variable){
  max_power <- ceiling(log10(num_toal_variable))
  tmp_subset <- unique(unlist(sapply(1:max_power, function(x) (1:10)^x, simplify = F)))
  sort(tmp_subset[tmp_subset<num_toal_variable])
}
# number_k_fold: the k for k-fold cross validation
# repeat_k_fold: repeat number of k-fold cross validation
# num_parameters: number of potential parameter spaces. Larger is better.
generateTrainControlSeeds <- function(number_k_fold=10, repeat_k_fold=5, num_parameters=100){
  total_len = number_k_fold * repeat_k_fold + 1
  totalnumber = total_len * num_parameters
  # print(totalnumber)
  seeds <- sample(1:totalnumber*10, totalnumber, replace=F)
  seedL <- as.list(as.data.frame(matrix(seeds, nrow=num_parameters, ncol=total_len)))
  seedL
}

# generateTrainControlSeeds(3,2,2)