add additional checks for response for binary regression

add banknote data
merliseclyde · May 3, 2023 · 195a7c0 · 195a7c0
1 parent 4555a90
commit 195a7c0
Show file tree

Hide file tree

Showing 9 changed files with 78 additions and 60 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -19,4 +19,5 @@ run-checks.R
 ^CONTRIBUTING\.md$
 ^SECURITY\.md$
 ^CRAN-SUBMISSION$
-bark-profiling.R
+bark-profiling.R
+
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ src/*.dll
 
 docs
 inst/doc
+tests/testthat/test_new_vs_old_code.R
diff --git a/R/bark.r b/R/bark.r
@@ -199,7 +199,19 @@ bark <- function(formula, data, subset, na.action = na.omit,
   Terms <- attr(m, "terms")
   attr(Terms, "intercept") <- 0
   x.train <- model.matrix(Terms, m)
-  y.train <- model.extract(m, "response")
+  y.train <- model.extract(m, "response") 
+  if (is.character(y.train)) {
+    stop("the response variable should be a double for regression problems 
+         or a factor, integer or double for classification problems")
+  }
+  if (!is.double(y.train)) {
+      if (classification) {
+        y.train = as.double(y.train)
+        if (min(y.train) > 0) y.train = y.train - 1.0 
+      }
+      else stop("response should be a double for regression problems")
+  }
+
   attr(x.train, "na.action") <- attr(y.train, "na.action") <- attr(m, "na.action")
 
   if (!is.logical(classification))
@@ -321,6 +333,7 @@ bark <- function(formula, data, subset, na.action = na.omit,
   }
   # burning the markov chain
   fullXX <- NULL;
+  fullXX <- getfulldesign(x.train, x.train, theta)
   for(i in 1:(keepevery*nburn)){
     cur <- rjmcmcone(y.train, x.train, theta, fixed, tune, classification, type, fullXX);
     theta <- cur$theta;

diff --git a/R/data.R b/R/data.R
@@ -0,0 +1,21 @@
+#' @name banknotes
+#' @title Swiss Bank Notes 
+#' @description This data set contains six measurements on 100 genuine and 
+#' 100 fradulent old Swiss banknotes
+#' @docType data
+#' @usage data(banknotes)
+#' @format a dataframe with the following variables: 
+#' \describe{
+#'  \item{Status}{the status of the banknote: genuine or counterfeit}
+#'  \item{Length}{Length of bill (mm)}
+#'  \item{Left}{Width of left edge (mm)}
+#'  \item{Right}{Width of right edge (mm)}
+#'  \item{Bottom}{Bottom margin width (mm)}
+#'  \item{Top}{Top margin width (mm)}
+#'  \item{Diagonal}{Length of diagonal (mm)}
+#' }
+#' @keywords datasets
+#' @source Flury, B. and Riedwyl, H. (1988). Multivariate Statistics: A 
+#' practical approach. London: Chapman & Hall, Tables 1.1 and 1.2, pp. 5-8.
+#' 
+NULL
diff --git a/R/depr_simCircle.R b/R/depr_simCircle.R
@@ -42,9 +42,9 @@ NULL
 ## sim.Circle()
 sim.Circle <- function(n, dim = 5) {
   .Deprecated("sim_circle")
-  if (dim < 2) { # start nocov
+  if (dim < 2) { # nocov start
     stop("number of variables must be >= 2.")  
-    #  end nocov
+    #   nocov end
   }
   x <- matrix(runif(n * dim, min = -1, max = 1), nrow = n)
   r2 <- x[, 1]^2 + x[, 2]^2

diff --git a/data/banknotes.rda b/data/banknotes.rda
diff --git a/man/banknotes.Rd b/man/banknotes.Rd
diff --git a/tests/testthat/test-depr_bark.r b/tests/testthat/test-depr_bark.r
@@ -13,7 +13,7 @@ test_that("old bark", {
 # check main input argument types  
 # y is not a vector
 expect_error(bark_mat( y.train=data.frame(traindata), x.train=traindata$x,
-                    testdata= testdata$x,
+                    x.test = testdata$x,
                     nburn=10, nkeep=100, keepevery=10,
                     classification = FALSE, 
                     printevery=10^10))
@@ -27,7 +27,7 @@ expect_error(bark_mat(x.train=traindata, y.train = traindata$y,
 
 # testdata is a dataframe
  expect_error(bark_mat(x.train=traindata$x, y.train = traindata$y,
-                 testdata=testdata,
+                 x.test = testdata,
                  nburn=10, nkeep=100, keepevery=10,
                  classification = FALSE, 
                  printevery=10^10))    
@@ -50,7 +50,12 @@ expect_error(bark_mat(x.train=traindata, y.train = traindata$y,
                                       nburn=10, nkeep=100, keepevery=10,
                                       classification=5, type="e", 
                                       printevery=500))
-
+
+   expect_error(bark_mat(traindata$x, traindata$y, as.character(testdata$x),
+                            nburn=10, nkeep=10, keepevery=10, 
+                            keeptrain=TRUE,
+                            classification=FALSE, type="sd", printevery=10^10))
+
 
    expect_no_error(bark_mat(traindata$x, traindata$y, testdata$x,
                          nburn=10, nkeep=10, keepevery=10, 

diff --git a/vignettes/bark.Rmd b/vignettes/bark.Rmd
@@ -200,56 +200,3 @@ if (bart.available) {
 ```
 
 
-
-
-## Ionosphere Example
-
-```{r io-data}
-set.seed(42)
-data(ionosphere, package="fdm2id")
-y.loc = ncol(ionosphere)
-ionosphere[, y.loc] = 1L*(ionosphere[, y.loc]  == "g")
-train = sample(nrow(ionosphere), 200, rep=FALSE)
-io.traindata = ionosphere[train,]
-io.testdata =  ionosphere[-train,]
-
-```
-
-### BARK
-
-```{r io-bark}
-if (io.available) {
-set.seed(42)
-io.bark <- bark(V35 ~ ., data= io.traindata,
-                    testdata = io.testdata,
-                    classification=TRUE, 
-                    selection = TRUE,
-                    common_lambdas = FALSE,
-                    nburn = 100,
-                    nkeep = 2500,
-                    keepevery = 100,
-                    printevery = 10^10)
-mean((io.bark$yhat.test.mean > 0) != io.testdata[, y.loc])
-}
-```
-
-
-### BART
-```{r bart-io}
-if (bart.available & io.available) {
-  io.bart = pbart(x.train = as.matrix(io.traindata[, -y.loc]), 
-                            y.train =  io.traindata[, y.loc]);
-  pred.bart =   predict(io.bart, io.testdata[, -y.loc]);
-  mean((pred.bart$prob.test.mean > .5) != io.testdata[, y.loc])
-} 
-```
-
-### SVM
-```{r svm-io}
-if (svm.available & io.available) {
-  io.svm = svm(V35 ~ ., data=io.traindata, type="C")
-  pred.svm = predict(io.svm, io.testdata)
-  mean(pred.svm != io.testdata[, y.loc])
-}
-
-```