
library('dplyr')
library('caret')
library(randomForest)

############ training model ############
rm(list = ls())

md<-read.csv('socd_tp_kgm3.csv')
md <- filter(md, tn != 0)
md$cn<-md$soc/md$tn


# 30cm
md30<-filter(md,depth %in% c("10cm","20cm","30cm"))

set.seed(20250519.1451)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
            data = md30,
            method = 'rf',
            trControl = cv_folds)

rf$results

rf$finalModel$importance

saveRDS(rf,'tp_socd30_rf.RData')


# 100cm
md100<-filter(md,depth %in% c("50cm","100cm"))

set.seed(20250519.1503)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
            data = md100,
            method = 'rf',
            trControl = cv_folds)

rf$results

rf$finalModel$importance

saveRDS(rf,'tp_socd100_rf.RData')


# 300cm
md300<-filter(md,depth %in% c("200cm","300cm"))

set.seed(20250519.1566)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
            data = md300,
            method = 'rf',
            trControl = cv_folds)

rf$results

rf$finalModel$importance

saveRDS(rf,'tp_socd300_rf.RData')



# 30cm-boost
md30<-filter(md,depth %in% c("10cm","20cm","30cm"))

sample_models <- vector("list",100)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

for (samsize in seq(1,100,1)){
  
  sam_md5<-md30[sample(nrow(md30),size = nrow(md30),replace = T),]
  
  rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
              data = sam_md5,
              method = 'rf',
              trControl = cv_folds)
  
  sample_models[[samsize]]<-rf$finalModel
  
}

saveRDS(sample_models,'tp_socd30_rf_boostrap.RData')



# 100cm-boost
md100<-filter(md,depth %in% c("50cm","100cm"))

sample_models <- vector("list",100)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

for (samsize in seq(1,100,1)){
  
  sam_md5<-md100[sample(nrow(md100),size = nrow(md100),replace = T),]
  
  rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
              data = sam_md5,
              method = 'rf',
              trControl = cv_folds)
  
  sample_models[[samsize]]<-rf$finalModel
  
}

saveRDS(sample_models,'tp_socd100_rf_boostrap.RData')



# 300cm-boost
md300<-filter(md,depth %in% c("200cm","300cm"))

sample_models <- vector("list",100)
cv_folds <- trainControl(method = "cv", 10,allowParallel = TRUE)

for (samsize in seq(1,100,1)){
  
  sam_md5<-md300[sample(nrow(md300),size = nrow(md300),replace = T),]
  
  rf <- train(socd~ndvi+mat+map+midmat+midmap+lgmmat+lgmmap+ph+clay+tn+cn+cec+ele+doc+initial_depth+end_depth, 
              data = sam_md5,
              method = 'rf',
              trControl = cv_folds)
  
  
  sample_models[[samsize]]<-rf$finalModel
  
}

saveRDS(sample_models,'tp_socd300_rf_boostrap.RData')


