BART (Bayesian Additive Regression Tree) Models

library(BayesTree)

trainingdata <- read.csv("training.csv")
testdata <- read.csv("test.csv")
soil_properties <- c("Ca", "P", "pH", "SOC", "Sand")

## CO2_bands <- 2656:2670
names(trainingdata)[2656:2670]
##  [1] "m2379.76" "m2377.83" "m2375.9"  "m2373.97" "m2372.04" "m2370.11"
##  [7] "m2368.18" "m2366.26" "m2364.33" "m2362.4"  "m2360.47" "m2358.54"
## [13] "m2356.61" "m2354.68" "m2352.76"

take the first derivatives to smooth out the measurement noise

MIR_measurements <- trainingdata[, 2:2655]
MIR_DER <- MIR_measurements- cbind(NA, MIR_measurements)[, -(dim(MIR_measurements)[2]+1)]
X_train <- cbind(trainingdata[, 3580:3595], MIR_DER[,-1])
MIR_measurements <- trainingdata[, 2671:3579]
MIR_DER <- MIR_measurements- cbind(NA, MIR_measurements)[, -(dim(MIR_measurements)[2]+1)]
X_train <- cbind(X_train, MIR_DER[, -1])

MIR_measurements <- testdata[, 2:2655]
MIR_DER <- MIR_measurements- cbind(NA, MIR_measurements)[, -(dim(MIR_measurements)[2]+1)]
X_test <- cbind(testdata[, 3580:3595], MIR_DER[,-1])
MIR_measurements <- testdata[, 2671:3579]
MIR_DER <- MIR_measurements- cbind(NA, MIR_measurements)[, -(dim(MIR_measurements)[2]+1)]
X_test <- cbind(X_test, MIR_DER[, -1])

BART predictions

without Cross-Validation calibration for hyperparameters

predictions <- rep(NA, dim(X_test)[1])
for(soil_property in soil_properties){
    bart_model <- bart(X_train, trainingdata[, soil_property], x.test = X_test, sigest=sd(trainingdata[, soil_property]), ndpost=10000)
    predictions <- cbind(predictions, bart_model$yhat.test.mean)    
}

write out results

predictions <- predictions[,-1]
colnames(predictions) <-  soil_properties
write.csv(cbind(PIDN= as.character(testdata[,1]), predictions), "predictions.csv", row.names=FALSE)

cbind(PIDN= as.character(testdata[,1]), predictions)[1,]
##                 PIDN                   Ca                    P 
##           "09gt9UK5" "-0.461854766821337" "0.0994277699265408" 
##                   pH                  SOC                 Sand 
##  "-1.22708223504059" "-0.424106200152868"   "2.27348830251789"

Reference

If you have any questions, please contact Jiehua Chen (jc3288 AT columbia.edu)

R code can be downloaded from github (https://gist.github.com/a-f-s-i-s/3913d670cb882341a332)

Paper about BART is in (http://www-old.newton.ac.uk/preprints/NI09002.pdf).