mlverse · cregouby · Oct 14, 2024 · Oct 14, 2024
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -33,6 +33,7 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       TORCH_INSTALL: 1
       TORCH_TEST: 1
+      PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
 
     steps:
       - uses: actions/checkout@v3

diff --git a/NEWS.md b/NEWS.md
@@ -2,9 +2,10 @@
 
 ## Bugfixes
 
-* improve function documentation consistency before translation
-* fix ".... is not an exported object from 'namespace:dials'" error when using tune() on tabnet parameters. (#160 @cphaarmeyer)
-
+* fix `tabet_pretrain` wrongly used instead of `tabnet_fit` in Missing data predictor vignette
+* improve message related to case_weights not being used as predictors.
+* improve function documentation consistency before translation.
+* fix "..." is not an exported object from 'namespace:dials'" error when using tune() on tabnet parameters. (#160 @cphaarmeyer)
 
 # tabnet 0.6.0
 

diff --git a/vignettes/Missing_data_predictors.Rmd b/vignettes/Missing_data_predictors.Rmd
@@ -119,16 +119,16 @@ Now we capture the columns with missings, and create a convenience function to c
 
 ```{r}
 col_with_missings <- ames_missing %>%
-  summarise_all(~sum(is.na(.))>0) %>%
-  t %>% enframe(name="Variable") %>% 
-  rename(has_missing="value")
+  summarise_all(~sum(is.na(.)) > 0) %>%
+  t %>% enframe(name = "Variable") %>% 
+  rename(has_missing = "value")
 
 vip_color <- function(object, col_has_missing) {
   vip_data <- vip::vip(object)$data %>% arrange(Importance)
-  vis_miss_plus <- left_join(vip_data, col_has_missing , by="Variable") %>%
-    mutate(Variable=factor(Variable, levels = vip_data$Variable))
+  vis_miss_plus <- left_join(vip_data, col_has_missing , by = "Variable") %>%
+    mutate(Variable = factor(Variable, levels = vip_data$Variable))
   vis_miss_plus
-  ggplot(vis_miss_plus, aes(x=Variable, y=Importance, fill=has_missing)) +
+  ggplot(vis_miss_plus, aes(x = Variable, y = Importance, fill = has_missing)) +
     geom_col() + coord_flip() + scale_fill_grey()
 }
 vip_color(ames_pretrain, col_with_missings)
@@ -145,12 +145,12 @@ Let's pretrain a new model with the same hyperparameter, but now using the `ames
 In order to compensate the 13% missingness already present in the `ames_missing` dataset, we adjust the `pretraining_ratio` parameter to `0.5 - 0.13 = 0.37`
 
 ```{r}
-ames_missing_rec <- recipe(Sale_Price ~ ., data=ames_missing) %>% 
+ames_missing_rec <- recipe(Sale_Price ~ ., data = ames_missing) %>% 
   step_normalize(all_numeric())
-ames_missing_pretrain <- tabnet_pretrain(ames_missing_rec, data=ames_missing, epoch=50, 
+ames_missing_pretrain <- tabnet_pretrain(ames_missing_rec, data = ames_missing, epoch = 50, 
                                     cat_emb_dim = cat_emb_dim,
-                                    valid_split = 0.2, verbose=TRUE, batch=2930, 
-                                    pretraining_ratio=0.37, 
+                                    valid_split = 0.2, verbose = TRUE, batch = 2930, 
+                                    pretraining_ratio = 0.37, 
                                     early_stopping_patience = 3L, early_stopping_tolerance = 1e-4)
 autoplot(ames_missing_pretrain)
 vip_color(ames_missing_pretrain, col_with_missings)
@@ -183,9 +183,9 @@ We can see here no variables with high missingness is present in the top 10 impo
 ## Variable importance with raw `ames` dataset
 
 ```{r}
-ames_fit <- tabnet_pretrain(ames_rec, data=ames,  tabnet_model = ames_pretrain, 
-                            epoch=50, cat_emb_dim = cat_emb_dim,
-                            valid_split = 0.2, verbose=TRUE, batch=2930, 
+ames_fit <- tabnet_fit(ames_rec, data = ames,  tabnet_model = ames_pretrain, 
+                            epoch = 50, cat_emb_dim = cat_emb_dim,
+                            valid_split = 0.2, verbose = TRUE, batch = 2930, 
                             early_stopping_patience = 5L, early_stopping_tolerance = 1e-4)
 autoplot(ames_fit)
 vip_color(ames_fit, col_with_missings)
@@ -201,9 +201,9 @@ Here again, the model uses two predictors `BasmFin_SF_2` and `Garage_Finish` tha
 ## Variable importance with `ames_missing` dataset
 
 ```{r}
-ames_missing_fit <- tabnet_pretrain(ames_rec, data=ames_missing,  tabnet_model = ames_missing_pretrain, 
-                            epoch=50, cat_emb_dim = cat_emb_dim,
-                            valid_split = 0.2, verbose=TRUE, batch=2930, 
+ames_missing_fit <- tabnet_fit(ames_rec, data = ames_missing,  tabnet_model = ames_missing_pretrain, 
+                            epoch = 50, cat_emb_dim = cat_emb_dim,
+                            valid_split = 0.2, verbose = TRUE, batch = 2930, 
                             early_stopping_patience = 5L, early_stopping_tolerance = 1e-4)
 autoplot(ames_missing_fit)
 vip_color(ames_missing_fit, col_with_missings)