Tidymodels with Guild AI • guildai

Here is an example showing how to use guildai with tidymodels in R.

If this is your first exposure to guildai, we recommend starting with the framework agnostic “Getting Started” guide. This example assumes familiarity with guild concepts like flags and scalars.

We’ll start with an example R script, named “train-tidymodels.R”, that trains, evaluates, and saves an tidymodels model using the “Ames Housing” dataset. Here is what it looks like:

library(tidymodels)
library(butcher)
library(bestNormalize)
library(sessioninfo)
set.seed(1234)

# Boilerplate options -------------------------------------------------
tidymodels_prefer()
theme_set(theme_bw())
options(pillar.advice = FALSE, pillar.min_title_chars = Inf)

# Setup and partition data --------------------------------------------
data(ames)

ames <-
  ames %>%
  mutate(Sale_Price = log10(Sale_Price)) %>%
  select(-contains("_Cond"))

ames_split <- initial_split(ames, strata = Sale_Price)
ames_not_test <- training(ames_split)
ames_test <- testing(ames_split)

ames_rs <- validation_split(ames_not_test, strata = Sale_Price)
ames_train <- analysis(ames_rs$splits[[1]])
ames_val <- assessment(ames_rs$splits[[1]])

# Flags ---------------------------------------------------------------

#| description: spline degrees of freedom for longitude
#| min: 2
#| max: 50
longitude_df <- 40L

#| description: spline degrees of freedom for latitude
#| min: 2
#| max: 50
latitude_df <- 40L

#| description: L2 penalty
#| min: 0.0
#| max: 1.0
pen_val <- 0.001

#| description: Mixture of L1 and L2 penalty
#| min: 0.0
#| max: 1.0
mix_val <- 1.0

# Recipe and Workflow -------------------------------------------------

ames_rec <-
  recipe(Sale_Price ~ ., data = ames_train) %>%
  step_other(MS_SubClass, MS_Zoning, Neighborhood, threshold = 0.05) %>%
  step_orderNorm(Lot_Area, ends_with("_SF"), Gr_Liv_Area) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_interact(~ starts_with("Central"):Year_Built) %>%
  # NOTE We splice the values in via !! so that the step uses their values
  # and not a reference to global variables (a very bad idea)
  step_spline_natural(Longitude, deg_free = !!longitude_df) %>%
  step_spline_natural(Latitude, deg_free = !!latitude_df)

glmn_spec <-
  linear_reg(penalty = pen_val, mixture = mix_val) %>%
  set_engine("glmnet")

ames_wflow <-
  workflow() %>%
  add_model(glmn_spec) %>%
  add_recipe(ames_rec)

ames_fit <- fit(ames_wflow, data = ames_train)
ames_fit

# fit -----------------------------------------------------------------

glmnet_fit <-
  ames_fit %>%
  extract_fit_engine()

glmnet_pred <-
  glmnet_fit %>%
  coef(s = pen_val) %>%
  apply(2, function(x) sum(x != 0))

glmnet_fit %>% autoplot(best_penalty = pen_val)


# Validation set results ----------------------------------------------

ames_pred <- augment(ames_fit, ames_val)

ames_pred %>%
  ggplot(aes(Sale_Price, .pred)) +
  geom_abline(col = "green", lty = 2) +
  geom_point(alpha = 1 / 3) +
  coord_obs_pred() +
  labs(x = "Observed (log-10)", y = "Predicted (log-10)")

val_results <- ames_pred %>% metrics(Sale_Price, .pred)

cat('validation_rmse:', val_results$.estimate[1], "\n")
cat('validation_R2:', val_results$.estimate[2], "\n")
cat('num_predictors:', glmnet_pred, "\n")

# Save object ---------------------------------------------------------

# butcher the objects to make their install sizes smaller
ames_fit <- butcher(ames_fit)

# Save objects in the current working directory
save(ames_fit, val_results, file = ".RData",
     compress = TRUE)

# ---------------------------------------------------------------------

session_info()

A few things to note about the script:

There is no ‘guildai’ specific configuration in the script code. It’s a regular R script, that you can safely source() or work with interactively at the REPL.
We declare some constraints on the flag values we’ve defined.
At the end of the run, we save the model. This file artifacts will be stored as part of the run, enabling us to restore a trained model.

With our script defined, we can launch a guild run and view it:

library(guildai)
guild_run("train-tidymodels.R")
## > library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.3     ✔ recipes      1.0.5
## ✔ dials        1.1.0     ✔ rsample      1.1.1
## ✔ dplyr        1.1.0     ✔ tibble       3.1.8
## ✔ ggplot2      3.4.1     ✔ tidyr        1.3.0
## ✔ infer        1.0.4     ✔ tune         1.0.1
## ✔ modeldata    1.1.0     ✔ workflows    1.1.3
## ✔ parsnip      1.0.4     ✔ workflowsets 1.0.0
## ✔ purrr        1.0.1     ✔ yardstick    1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
## > library(butcher)
## > library(bestNormalize)
## > library(sessioninfo)
## > set.seed(1234)
## > # Boilerplate options -------------------------------------------------
## > tidymodels_prefer()
## > theme_set(theme_bw())
## > options(pillar.advice = FALSE, pillar.min_title_chars = Inf)
## > # Setup and partition data --------------------------------------------
## > data(ames)
## > ames <-
## +   ames %>%
## +   mutate(Sale_Price = log10(Sale_Price)) %>%
## +   select(-contains("_Cond"))
## > ames_split <- initial_split(ames, strata = Sale_Price)
## > ames_not_test <- training(ames_split)
## > ames_test <- testing(ames_split)
## > ames_rs <- validation_split(ames_not_test, strata = Sale_Price)
## > ames_train <- analysis(ames_rs$splits[[1]])
## > ames_val <- assessment(ames_rs$splits[[1]])
## > # Flags ---------------------------------------------------------------
## >
## > #| description: spline degrees of freedom for longitude
## > #| min: 2
## > #| max: 50
## > longitude_df <- 40L
## > #| description: spline degrees of freedom for latitude
## > #| min: 2
## > #| max: 50
## > latitude_df <- 40L
## > #| description: L2 penalty
## > #| min: 0.0
## > #| max: 1.0
## > pen_val <- 0.001
## > #| description: Mixture of L1 and L2 penalty
## > #| min: 0.0
## > #| max: 1.0
## > mix_val <- 1.0
## > # Recipe and Workflow -------------------------------------------------
## >
## > ames_rec <-
## +   recipe(Sale_Price ~ ., data = ames_train) %>%
## +   step_other(MS_SubClass, MS_Zoning, Neighborhood, threshold = 0.05) %>%
## +   step_orderNorm(Lot_Area, ends_with("_SF"), Gr_Liv_Area) %>%
## +   step_dummy(all_nominal_predictors()) %>%
## +   step_interact(~ starts_with("Central"):Year_Built) %>%
## +   # NOTE We splice the values in via !! so that the step uses their values
## +   # and not a reference to global variables (a very bad idea)
## +   step_spline_natural(Longitude, deg_free = !!longitude_df) %>%
## +   step_spline_natural(Latitude, deg_free = !!latitude_df)
## > glmn_spec <-
## +   linear_reg(penalty = pen_val, mixture = mix_val) %>%
## +   set_engine("glmnet")
## > ames_wflow <-
## +   workflow() %>%
## +   add_model(glmn_spec) %>%
## +   add_recipe(ames_rec)
## > ames_fit <- fit(ames_wflow, data = ames_train)
## > ames_fit
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 6 Recipe Steps
##
## • step_other()
## • step_orderNorm()
## • step_dummy()
## • step_interact()
## • step_spline_natural()
## • step_spline_natural()
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~mix_val)
##
##      Df  %Dev           Lambda
## 1     0  0.00 0.12961925523300
## 2     1  8.70 0.11810423784300
## 3     2 16.72 0.10761218286200
## 4     4 25.60 0.09805221312850
## 5     4 34.44 0.08934152475740
## 6     5 42.12 0.08140466993350
## 7     5 48.54 0.07417290341730
## 8     5 53.88 0.06758358710680
## 9     5 58.31 0.06157964749640
## 10    5 61.99 0.05610908133340
## 11    6 65.07 0.05112450519090
## 12    6 67.64 0.04658274505480
## 13    7 69.79 0.04244446237150
## 14    7 71.82 0.03867381331630
## 15    8 73.53 0.03523813833070
## 16    8 75.06 0.03210767924180
## 17    9 76.41 0.02925532151050
## 18    9 77.55 0.02665635937850
## 19    9 78.49 0.02428828187930
## 20   10 79.28 0.02213057785850
## 21   12 80.05 0.02016455831600
## 22   14 80.86 0.01837319453100
## 23   15 81.61 0.01674097056750
## 24   17 82.34 0.01525374888240
## 25   18 83.02 0.01389864787270
## 26   21 83.64 0.01266393030200
## 27   21 84.22 0.01153890163720
## 28   23 84.70 0.01051381741830
## 29   26 85.14 0.00957979885618
## 30   29 85.62 0.00872875592883
## 31   30 86.04 0.00795331730957
## 32   30 86.39 0.00724676652005
## 33   35 86.72 0.00660298375533
## 34   38 87.05 0.00601639287709
## 35   39 87.35 0.00548191311577
## 36   42 87.62 0.00499491506335
## 37   50 87.88 0.00455118057568
## 38   51 88.15 0.00414686623692
## 39   54 88.39 0.00377847006967
## 40   60 88.64 0.00344280120258
## 41   66 88.88 0.00313695223250
## 42   69 89.11 0.00285827404197
## 43   77 89.31 0.00260435285381
## 44   81 89.51 0.00237298932418
## 45   85 89.68 0.00216217949285
## 46   91 89.84 0.00197009742592
##
## ...
## and 54 more lines.
## > # fit -----------------------------------------------------------------
## >
## > glmnet_fit <-
## +   ames_fit %>%
## +   extract_fit_engine()
## > glmnet_pred <-
## +   glmnet_fit %>%
## +   coef(s = pen_val) %>%
## +   apply(2, function(x) sum(x != 0))
## > glmnet_fit %>% autoplot(best_penalty = pen_val)
## > # Validation set results ----------------------------------------------
## >
## > ames_pred <- augment(ames_fit, ames_val)
## > ames_pred %>%
## +   ggplot(aes(Sale_Price, .pred)) +
## +   geom_abline(col = "green", lty = 2) +
## +   geom_point(alpha = 1 / 3) +
## +   coord_obs_pred() +
## +   labs(x = "Observed (log-10)", y = "Predicted (log-10)")
## > val_results <- ames_pred %>% metrics(Sale_Price, .pred)
## > cat('validation_rmse:', val_results$.estimate[1], "\n")
## validation_rmse: 0.0548724422689783
## > cat('validation_R2:', val_results$.estimate[2], "\n")
## validation_R2: 0.894286076866259
## > cat('num_predictors:', glmnet_pred, "\n")
## num_predictors: 138
## > # Save object ---------------------------------------------------------
## >
## > # butcher the objects to make their install sizes smaller
## > ames_fit <- butcher(ames_fit)
## > # Save objects in the current working directory
## > save(ames_fit, val_results, file = ".RData",
## +      compress = TRUE)
## > # ---------------------------------------------------------------------
## >
## > session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.2 (2022-10-31)
##  os       Ubuntu 20.04.5 LTS
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2023-03-01
##  pandoc   2.5 @ /usr/bin/pandoc
##
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package       * version    date (UTC) lib source
##  backports       1.4.1      2021-12-13 [1] CRAN (R 4.2.2)
##  bestNormalize * 1.8.3      2022-06-13 [1] CRAN (R 4.2.2)
##  broom         * 1.0.3      2023-01-25 [1] CRAN (R 4.2.2)
##  butcher       * 0.3.1      2022-12-14 [1] CRAN (R 4.2.2)
##  cachem          1.0.7      2023-02-24 [1] CRAN (R 4.2.2)
##  class           7.3-21     2023-01-23 [2] CRAN (R 4.2.2)
##  cli             3.6.0      2023-01-09 [1] CRAN (R 4.2.2)
##  codetools       0.2-19     2023-02-01 [2] CRAN (R 4.2.2)
##  colorspace      2.1-0      2023-01-23 [1] CRAN (R 4.2.2)
##  conflicted      1.2.0      2023-02-01 [1] CRAN (R 4.2.2)
##  dials         * 1.1.0      2022-11-04 [1] CRAN (R 4.2.2)
##  DiceDesign      1.9        2021-02-13 [1] CRAN (R 4.2.2)
##  digest          0.6.31     2022-12-11 [1] CRAN (R 4.2.2)
##  doParallel      1.0.17     2022-02-07 [1] CRAN (R 4.2.2)
##  doRNG           1.8.6      2023-01-16 [1] CRAN (R 4.2.2)
##  dplyr         * 1.1.0      2023-01-29 [1] CRAN (R 4.2.2)
##  ellipsis        0.3.2      2021-04-29 [1] CRAN (R 4.2.2)
##  fansi           1.0.4      2023-01-22 [1] CRAN (R 4.2.2)
##  farver          2.1.1      2022-07-06 [1] CRAN (R 4.2.2)
##  fastmap         1.1.1      2023-02-24 [1] CRAN (R 4.2.2)
##  foreach         1.5.2      2022-02-02 [1] CRAN (R 4.2.2)
##  furrr           0.3.1      2022-08-15 [1] CRAN (R 4.2.2)
##  future          1.31.0     2023-02-01 [1] CRAN (R 4.2.2)
##  future.apply    1.10.0     2022-11-05 [1] CRAN (R 4.2.2)
##  generics        0.1.3      2022-07-05 [1] CRAN (R 4.2.2)
##  ggplot2       * 3.4.1      2023-02-10 [1] CRAN (R 4.2.2)
##  ggrepel         0.9.3      2023-02-03 [1] CRAN (R 4.2.2)
##  glmnet          4.1-6      2022-11-27 [1] CRAN (R 4.2.2)
##  globals         0.16.2     2022-11-21 [1] CRAN (R 4.2.2)
##  glue            1.6.2      2022-02-24 [1] CRAN (R 4.2.2)
##  gower           1.0.1      2022-12-22 [1] CRAN (R 4.2.2)
##  GPfit           1.0-8      2019-02-08 [1] CRAN (R 4.2.2)
##  gtable          0.3.1      2022-09-01 [1] CRAN (R 4.2.2)
##  guildai         0.0.0.9001 2023-02-28 [1] Github (guildai/guildai-r@a6c0059)
##  hardhat         1.2.0      2022-06-30 [1] CRAN (R 4.2.2)
##  infer         * 1.0.4      2022-12-02 [1] CRAN (R 4.2.2)
##  ipred           0.9-13     2022-06-02 [1] CRAN (R 4.2.2)
##  iterators       1.0.14     2022-02-05 [1] CRAN (R 4.2.2)
##  jsonlite        1.8.4      2022-12-06 [1] CRAN (R 4.2.2)
##  labeling        0.4.2      2020-10-20 [1] CRAN (R 4.2.2)
##  lattice         0.20-45    2021-09-22 [2] CRAN (R 4.2.2)
##  lava            1.7.2.1    2023-02-27 [1] CRAN (R 4.2.2)
##  lhs             1.1.6      2022-12-17 [1] CRAN (R 4.2.2)
##  lifecycle       1.0.3      2022-10-07 [1] CRAN (R 4.2.2)
##  listenv         0.9.0      2022-12-16 [1] CRAN (R 4.2.2)
##  lubridate       1.9.2      2023-02-10 [1] CRAN (R 4.2.2)
##  magrittr        2.0.3      2022-03-30 [1] CRAN (R 4.2.2)
##  MASS            7.3-58.2   2023-01-23 [2] CRAN (R 4.2.2)
##  Matrix          1.5-3      2022-11-11 [2] CRAN (R 4.2.2)
##  memoise         2.0.1      2021-11-26 [1] CRAN (R 4.2.2)
##  modeldata     * 1.1.0      2023-01-25 [1] CRAN (R 4.2.2)
##  munsell         0.5.0      2018-06-12 [1] CRAN (R 4.2.2)
##  nnet            7.3-18     2022-09-28 [2] CRAN (R 4.2.2)
##  nortest         1.0-4      2015-07-30 [1] CRAN (R 4.2.2)
##  parallelly      1.34.0     2023-01-13 [1] CRAN (R 4.2.2)
##  parsnip       * 1.0.4      2023-02-22 [1] CRAN (R 4.2.2)
##  pillar          1.8.1      2022-08-19 [1] CRAN (R 4.2.2)
##  pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 4.2.2)
##  prodlim         2019.11.13 2019-11-17 [1] CRAN (R 4.2.2)
##  purrr         * 1.0.1      2023-01-10 [1] CRAN (R 4.2.2)
##  R6              2.5.1      2021-08-19 [1] CRAN (R 4.2.2)
##  Rcpp            1.0.10     2023-01-22 [1] CRAN (R 4.2.2)
##  recipes       * 1.0.5      2023-02-20 [1] CRAN (R 4.2.2)
##  rlang           1.0.6      2022-09-24 [1] CRAN (R 4.2.2)
##  rngtools        1.5.2      2021-09-20 [1] CRAN (R 4.2.2)
##  rpart           4.1.19     2022-10-21 [2] CRAN (R 4.2.2)
##  rsample       * 1.1.1      2022-12-07 [1] CRAN (R 4.2.2)
##  rstudioapi      0.14       2022-08-22 [1] CRAN (R 4.2.2)
##  scales        * 1.2.1      2022-08-20 [1] CRAN (R 4.2.2)
##  sessioninfo   * 1.2.2      2021-12-06 [1] CRAN (R 4.2.2)
##  shape           1.4.6      2021-05-19 [1] CRAN (R 4.2.2)
##  splines2        0.4.7      2023-01-14 [1] CRAN (R 4.2.2)
##  survival        3.5-3      2023-02-12 [2] CRAN (R 4.2.2)
##  tibble        * 3.1.8      2022-07-22 [1] CRAN (R 4.2.2)
##  tidymodels    * 1.0.0      2022-07-13 [1] CRAN (R 4.2.2)
##  tidyr         * 1.3.0      2023-01-24 [1] CRAN (R 4.2.2)
##  tidyselect      1.2.0      2022-10-10 [1] CRAN (R 4.2.2)
##  timechange      0.2.0      2023-01-11 [1] CRAN (R 4.2.2)
##  timeDate        4022.108   2023-01-07 [1] CRAN (R 4.2.2)
##  tune          * 1.0.1      2022-10-09 [1] CRAN (R 4.2.2)
##  utf8            1.2.3      2023-01-31 [1] CRAN (R 4.2.2)
##  vctrs           0.5.2      2023-01-23 [1] CRAN (R 4.2.2)
##  withr           2.5.0      2022-03-03 [1] CRAN (R 4.2.2)
##  workflows     * 1.1.3      2023-02-22 [1] CRAN (R 4.2.2)
##  workflowsets  * 1.0.0      2022-07-12 [1] CRAN (R 4.2.2)
##  yaml            2.3.7      2023-01-23 [1] CRAN (R 4.2.2)
##  yardstick     * 1.1.0      2022-09-07 [1] CRAN (R 4.2.2)
##
##  [1] /home/tomasz/opt/R-4.2.2/lib/R/site-library
##  [2] /home/tomasz/opt/R-4.2.2/lib/R/library
##
## ──────────────────────────────────────────────────────────────────────────────

guild_view()

We can use the flags and flag constraints to run an optimizer. We could use one of the guildai built-in optimizers, or manually define a grid using tidymodels functions, like this:

# Use existing tidymodels code to generate a space-filling design for
# new hyperparameter combinations
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.3     ✔ recipes      1.0.5
## ✔ dials        1.1.0     ✔ rsample      1.1.1
## ✔ dplyr        1.1.0     ✔ tibble       3.1.8
## ✔ ggplot2      3.4.1     ✔ tidyr        1.3.0
## ✔ infer        1.0.4     ✔ tune         1.0.1
## ✔ modeldata    1.1.0     ✔ workflows    1.1.3
## ✔ parsnip      1.0.4     ✔ workflowsets 1.0.0
## ✔ purrr        1.0.1     ✔ yardstick    1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
params <- parameters(list(
  pen_val = penalty(),
  mix_val = mixture(),
  latitude_df = spline_degree(c(2L, 50L)),
  longitude_df = spline_degree(c(2L, 50L))
))

set.seed(1)
grid <- grid_latin_hypercube(params, size = 19)
grid
## # A tibble: 19 × 4
##     pen_val mix_val latitude_df longitude_df
##       <dbl>   <dbl>       <int>        <int>
##  1 1.10e- 8  0.915           21           31
##  2 3.88e- 3  0.289           29            2
##  3 1.11e- 7  0.996           25            5
##  4 1.57e- 4  0.480           37           26
##  5 1.00e- 3  0.568           47           29
##  6 3.10e- 9  0.0644           4           12
##  7 2.54e-10  0.841            6           37
##  8 1.13e- 1  0.401           50           44
##  9 6.15e- 7  0.633           15           38
## 10 1.81e- 5  0.254           41           49
## 11 5.81e- 5  0.363           32            7
## 12 6.53e- 1  0.0346          19           24
## 13 4.48e- 4  0.879           38           42
## 14 4.15e- 2  0.613           14           13
## 15 1.06e- 9  0.457           11           35
## 16 1.83e- 2  0.777            8           21
## 17 2.96e- 6  0.174           42           46
## 18 2.81e- 8  0.690           34           16
## 19 3.40e- 7  0.157           26           20

# Launch a batch of run defined by the grid flags.
guild_run(
  "train-tidymodels.R",
  flags = grid,
  echo = c(FALSE, FALSE) # ignore stdout *and* stderr
)

Find the best run:

runs <- runs_info(operation = "train-tidymodels.R")

runs$scalars
## # A tibble: 20 × 3
##    num_predictors validation_R2 validation_rmse
##             <dbl>         <dbl>           <dbl>
##  1            206         0.887          0.0571
##  2            206         0.871          0.0614
##  3            206         0.886          0.0573
##  4             19         0.857          0.0671
##  5            206         0.878          0.0597
##  6             12         0.845          0.0759
##  7            172         0.892          0.0556
##  8             50         0.854          0.0963
##  9            204         0.884          0.0579
## 10            206         0.884          0.0581
## 11            206         0.873          0.0610
## 12             11         0.837          0.0971
## 13            206         0.868          0.0624
## 14            206         0.892          0.0558
## 15            158         0.894          0.0551
## 16            197         0.892          0.0558
## 17            206         0.864          0.0634
## 18            132         0.895          0.0546
## 19            206         0.866          0.0628
## 20            138         0.894          0.0549

best_run <- runs %>%
  slice_min(scalars$validation_rmse)

Recover the fitted model and validation from the best run:

best_run_saved_objects <- new.env(parent = emptyenv())
load(file.path(best_run$dir, ".RData"),
     envir = best_run_saved_objects)

best_run_saved_objects$ames_fit
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 6 Recipe Steps
##
## • step_other()
## • step_orderNorm()
## • step_dummy()
## • step_interact()
## • step_spline_natural()
## • step_spline_natural()
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call:  dummy_call()
##
##      Df  %Dev  Lambda
## 1     0  0.00 0.44850
## 2     2  3.53 0.40860
## 3     4  9.53 0.37230
## 4     6 17.29 0.33920
## 5     7 25.01 0.30910
## 6     7 32.02 0.28160
## 7     7 38.24 0.25660
## 8     9 43.79 0.23380
## 9    10 48.82 0.21310
## 10   10 53.42 0.19410
## 11   10 57.41 0.17690
## 12   11 60.91 0.16120
## 13   12 64.09 0.14690
## 14   12 66.86 0.13380
## 15   14 69.28 0.12190
## 16   14 71.37 0.11110
## 17   15 73.20 0.10120
## 18   16 74.79 0.09223
## 19   17 76.20 0.08403
## 20   18 77.44 0.07657
## 21   20 78.62 0.06977
## 22   24 79.68 0.06357
## 23   25 80.61 0.05792
## 24   25 81.46 0.05278
## 25   28 82.31 0.04809
## 26   31 83.08 0.04382
## 27   33 83.76 0.03992
## 28   35 84.35 0.03638
## 29   38 84.89 0.03314
## 30   39 85.40 0.03020
## 31   39 85.87 0.02752
## 32   41 86.28 0.02507
## 33   45 86.67 0.02285
## 34   48 87.03 0.02082
## 35   50 87.37 0.01897
## 36   53 87.68 0.01728
## 37   55 87.96 0.01575
## 38   58 88.21 0.01435
## 39   61 88.44 0.01307
## 40   65 88.66 0.01191
## 41   71 88.88 0.01085
## 42   76 89.10 0.00989
## 43   80 89.30 0.00901
## 44   85 89.49 0.00821
## 45   93 89.66 0.00748
## 46   94 89.82 0.00682
##
## ...
## and 54 more lines.

best_run_saved_objects$val_results
## # A tibble: 3 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard      0.0546
## 2 rsq     standard      0.895
## 3 mae     standard      0.0395

For reproducibility, (or preparing to deploy a model) it can be helpful to know some additional run metadata, like the starting random seed or a log of which R packages were loaded during the R session. This information is all stored under the RUN_DIR/.guild directory.

To recover the seed:

run <- best_run
seed <- yaml::read_yaml(file.path(run$dir, ".guild/attrs/random_seed"))
seed
## [1] 400252956
set.seed(seed) # restore the starting seed.

To access the log of R loaded packages:

packages_loaded <- yaml::read_yaml(
  file.path(run$dir, ".guild/attrs/r_packages_loaded"))
str(packages_loaded, list.len = 5)
## List of 98
##  $ guildai      :List of 2
##   ..$ path   : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/guildai"
##   ..$ version: chr "0.0.0.9001"
##  $ grDevices    :List of 2
##   ..$ path   : chr "/home/tomasz/opt/R-4.2.2/lib/R/library/grDevices"
##   ..$ version: chr "4.2.2"
##  $ fansi        :List of 2
##   ..$ path   : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/fansi"
##   ..$ version: chr "1.0.4"
##  $ utf8         :List of 2
##   ..$ path   : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/utf8"
##   ..$ version: chr "1.2.3"
##  $ dplyr        :List of 2
##   ..$ path   : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/dplyr"
##   ..$ version: chr "1.1.0"
##   [list output truncated]