机器学习
2021-06-22
Machine learning
安装所有依赖的包
<- c("Boruta","ROCR","VennDiagram","caret","doParallel","ggplot2",
required_packages "ipred","knitr","pROC","randomForest","rattle","rpart","rpart.plot",
"ranger", "RRF", "e1071", "bookdown", "zoo", "plyr", "dplyr",
"ggrepel", "verification")
# site= "https://mirrors.tuna.tsinghua.edu.cn/CRAN"
# old <- options(BioC_mirror=c(""))
# "https://mirrors.nju.edu.cn/bioconductor/", ,"https://mirrors.tuna.tsinghua.edu.cn/bioconductor"))
# local({r = getOption("repos")
# r["CRAN"] = "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"
# r["BioC_mirror"] = "http://mirrors.ustc.edu.cn/bioc/"
# options(repos=r)})
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
for(i in required_packages) {
= rownames(installed.packages())
a if(! i %in% a) BiocManager::install(i, update=F)
}
Some utility functions
<- function(num_toal_variable){
generateTestVariableSet <- ceiling(log10(num_toal_variable))
max_power <- unique(unlist(sapply(1:max_power, function(x) (1:10)^x, simplify = F)))
tmp_subset sort(tmp_subset[tmp_subset<num_toal_variable])
}
# number_k_fold: the k for k-fold cross validation
# repeat_k_fold: repeat number of k-fold cross validation
# num_parameters: number of potential parameter spaces. Larger is better.
<- function(number_k_fold=10, repeat_k_fold=5, num_parameters=100){
generateTrainControlSeeds = number_k_fold * repeat_k_fold + 1
total_len = total_len * num_parameters
totalnumber # print(totalnumber)
<- sample(1:totalnumber*10, totalnumber, replace=F)
seeds <- as.list(as.data.frame(matrix(seeds, nrow=num_parameters, ncol=total_len)))
seedL
seedL
}
# generateTrainControlSeeds(3,2,2)