library(tidymodels)
library(butcher)
library(bestNormalize)
library(sessioninfo)
set.seed(1234)
# Boilerplate options -------------------------------------------------
tidymodels_prefer()
theme_set(theme_bw())
options(pillar.advice = FALSE, pillar.min_title_chars = Inf)
# Setup and partition data --------------------------------------------
data(ames)
ames <-
ames %>%
mutate(Sale_Price = log10(Sale_Price)) %>%
select(-contains("_Cond"))
ames_split <- initial_split(ames, strata = Sale_Price)
ames_not_test <- training(ames_split)
ames_test <- testing(ames_split)
ames_rs <- validation_split(ames_not_test, strata = Sale_Price)
ames_train <- analysis(ames_rs$splits[[1]])
ames_val <- assessment(ames_rs$splits[[1]])
# Flags ---------------------------------------------------------------
#| description: spline degrees of freedom for longitude
#| min: 2
#| max: 50
longitude_df <- 40L
#| description: spline degrees of freedom for latitude
#| min: 2
#| max: 50
latitude_df <- 40L
#| description: L2 penalty
#| min: 0.0
#| max: 1.0
pen_val <- 0.001
#| description: Mixture of L1 and L2 penalty
#| min: 0.0
#| max: 1.0
mix_val <- 1.0
# Recipe and Workflow -------------------------------------------------
ames_rec <-
recipe(Sale_Price ~ ., data = ames_train) %>%
step_other(MS_SubClass, MS_Zoning, Neighborhood, threshold = 0.05) %>%
step_orderNorm(Lot_Area, ends_with("_SF"), Gr_Liv_Area) %>%
step_dummy(all_nominal_predictors()) %>%
step_interact(~ starts_with("Central"):Year_Built) %>%
# NOTE We splice the values in via !! so that the step uses their values
# and not a reference to global variables (a very bad idea)
step_spline_natural(Longitude, deg_free = !!longitude_df) %>%
step_spline_natural(Latitude, deg_free = !!latitude_df)
glmn_spec <-
linear_reg(penalty = pen_val, mixture = mix_val) %>%
set_engine("glmnet")
ames_wflow <-
workflow() %>%
add_model(glmn_spec) %>%
add_recipe(ames_rec)
ames_fit <- fit(ames_wflow, data = ames_train)
ames_fit
# fit -----------------------------------------------------------------
glmnet_fit <-
ames_fit %>%
extract_fit_engine()
glmnet_pred <-
glmnet_fit %>%
coef(s = pen_val) %>%
apply(2, function(x) sum(x != 0))
glmnet_fit %>% autoplot(best_penalty = pen_val)
# Validation set results ----------------------------------------------
ames_pred <- augment(ames_fit, ames_val)
ames_pred %>%
ggplot(aes(Sale_Price, .pred)) +
geom_abline(col = "green", lty = 2) +
geom_point(alpha = 1 / 3) +
coord_obs_pred() +
labs(x = "Observed (log-10)", y = "Predicted (log-10)")
val_results <- ames_pred %>% metrics(Sale_Price, .pred)
cat('validation_rmse:', val_results$.estimate[1], "\n")
cat('validation_R2:', val_results$.estimate[2], "\n")
cat('num_predictors:', glmnet_pred, "\n")
# Save object ---------------------------------------------------------
# butcher the objects to make their install sizes smaller
ames_fit <- butcher(ames_fit)
# Save objects in the current working directory
save(ames_fit, val_results, file = ".RData",
compress = TRUE)
# ---------------------------------------------------------------------
session_info()
Here is an example showing how to use guildai with tidymodels in R.
If this is your first exposure to guildai, we recommend starting with the framework agnostic “Getting Started” guide. This example assumes familiarity with guild concepts like flags and scalars.
We’ll start with an example R script, named “train-tidymodels.R”, that trains, evaluates, and saves an tidymodels model using the “Ames Housing” dataset. Here is what it looks like:
A few things to note about the script:
- There is no ‘guildai’ specific configuration in the script code.
It’s a regular R script, that you can safely
source()
or work with interactively at the REPL. - We declare some constraints on the flag values we’ve defined.
- At the end of the run, we save the model. This file artifacts will be stored as part of the run, enabling us to restore a trained model.
With our script defined, we can launch a guild run and view it:
library(guildai)
guild_run("train-tidymodels.R")
## > library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.3 ✔ recipes 1.0.5
## ✔ dials 1.1.0 ✔ rsample 1.1.1
## ✔ dplyr 1.1.0 ✔ tibble 3.1.8
## ✔ ggplot2 3.4.1 ✔ tidyr 1.3.0
## ✔ infer 1.0.4 ✔ tune 1.0.1
## ✔ modeldata 1.1.0 ✔ workflows 1.1.3
## ✔ parsnip 1.0.4 ✔ workflowsets 1.0.0
## ✔ purrr 1.0.1 ✔ yardstick 1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
## > library(butcher)
## > library(bestNormalize)
## > library(sessioninfo)
## > set.seed(1234)
## > # Boilerplate options -------------------------------------------------
## > tidymodels_prefer()
## > theme_set(theme_bw())
## > options(pillar.advice = FALSE, pillar.min_title_chars = Inf)
## > # Setup and partition data --------------------------------------------
## > data(ames)
## > ames <-
## + ames %>%
## + mutate(Sale_Price = log10(Sale_Price)) %>%
## + select(-contains("_Cond"))
## > ames_split <- initial_split(ames, strata = Sale_Price)
## > ames_not_test <- training(ames_split)
## > ames_test <- testing(ames_split)
## > ames_rs <- validation_split(ames_not_test, strata = Sale_Price)
## > ames_train <- analysis(ames_rs$splits[[1]])
## > ames_val <- assessment(ames_rs$splits[[1]])
## > # Flags ---------------------------------------------------------------
## >
## > #| description: spline degrees of freedom for longitude
## > #| min: 2
## > #| max: 50
## > longitude_df <- 40L
## > #| description: spline degrees of freedom for latitude
## > #| min: 2
## > #| max: 50
## > latitude_df <- 40L
## > #| description: L2 penalty
## > #| min: 0.0
## > #| max: 1.0
## > pen_val <- 0.001
## > #| description: Mixture of L1 and L2 penalty
## > #| min: 0.0
## > #| max: 1.0
## > mix_val <- 1.0
## > # Recipe and Workflow -------------------------------------------------
## >
## > ames_rec <-
## + recipe(Sale_Price ~ ., data = ames_train) %>%
## + step_other(MS_SubClass, MS_Zoning, Neighborhood, threshold = 0.05) %>%
## + step_orderNorm(Lot_Area, ends_with("_SF"), Gr_Liv_Area) %>%
## + step_dummy(all_nominal_predictors()) %>%
## + step_interact(~ starts_with("Central"):Year_Built) %>%
## + # NOTE We splice the values in via !! so that the step uses their values
## + # and not a reference to global variables (a very bad idea)
## + step_spline_natural(Longitude, deg_free = !!longitude_df) %>%
## + step_spline_natural(Latitude, deg_free = !!latitude_df)
## > glmn_spec <-
## + linear_reg(penalty = pen_val, mixture = mix_val) %>%
## + set_engine("glmnet")
## > ames_wflow <-
## + workflow() %>%
## + add_model(glmn_spec) %>%
## + add_recipe(ames_rec)
## > ames_fit <- fit(ames_wflow, data = ames_train)
## > ames_fit
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 6 Recipe Steps
##
## • step_other()
## • step_orderNorm()
## • step_dummy()
## • step_interact()
## • step_spline_natural()
## • step_spline_natural()
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~mix_val)
##
## Df %Dev Lambda
## 1 0 0.00 0.12961925523300
## 2 1 8.70 0.11810423784300
## 3 2 16.72 0.10761218286200
## 4 4 25.60 0.09805221312850
## 5 4 34.44 0.08934152475740
## 6 5 42.12 0.08140466993350
## 7 5 48.54 0.07417290341730
## 8 5 53.88 0.06758358710680
## 9 5 58.31 0.06157964749640
## 10 5 61.99 0.05610908133340
## 11 6 65.07 0.05112450519090
## 12 6 67.64 0.04658274505480
## 13 7 69.79 0.04244446237150
## 14 7 71.82 0.03867381331630
## 15 8 73.53 0.03523813833070
## 16 8 75.06 0.03210767924180
## 17 9 76.41 0.02925532151050
## 18 9 77.55 0.02665635937850
## 19 9 78.49 0.02428828187930
## 20 10 79.28 0.02213057785850
## 21 12 80.05 0.02016455831600
## 22 14 80.86 0.01837319453100
## 23 15 81.61 0.01674097056750
## 24 17 82.34 0.01525374888240
## 25 18 83.02 0.01389864787270
## 26 21 83.64 0.01266393030200
## 27 21 84.22 0.01153890163720
## 28 23 84.70 0.01051381741830
## 29 26 85.14 0.00957979885618
## 30 29 85.62 0.00872875592883
## 31 30 86.04 0.00795331730957
## 32 30 86.39 0.00724676652005
## 33 35 86.72 0.00660298375533
## 34 38 87.05 0.00601639287709
## 35 39 87.35 0.00548191311577
## 36 42 87.62 0.00499491506335
## 37 50 87.88 0.00455118057568
## 38 51 88.15 0.00414686623692
## 39 54 88.39 0.00377847006967
## 40 60 88.64 0.00344280120258
## 41 66 88.88 0.00313695223250
## 42 69 89.11 0.00285827404197
## 43 77 89.31 0.00260435285381
## 44 81 89.51 0.00237298932418
## 45 85 89.68 0.00216217949285
## 46 91 89.84 0.00197009742592
##
## ...
## and 54 more lines.
## > # fit -----------------------------------------------------------------
## >
## > glmnet_fit <-
## + ames_fit %>%
## + extract_fit_engine()
## > glmnet_pred <-
## + glmnet_fit %>%
## + coef(s = pen_val) %>%
## + apply(2, function(x) sum(x != 0))
## > glmnet_fit %>% autoplot(best_penalty = pen_val)
## > # Validation set results ----------------------------------------------
## >
## > ames_pred <- augment(ames_fit, ames_val)
## > ames_pred %>%
## + ggplot(aes(Sale_Price, .pred)) +
## + geom_abline(col = "green", lty = 2) +
## + geom_point(alpha = 1 / 3) +
## + coord_obs_pred() +
## + labs(x = "Observed (log-10)", y = "Predicted (log-10)")
## > val_results <- ames_pred %>% metrics(Sale_Price, .pred)
## > cat('validation_rmse:', val_results$.estimate[1], "\n")
## validation_rmse: 0.0548724422689783
## > cat('validation_R2:', val_results$.estimate[2], "\n")
## validation_R2: 0.894286076866259
## > cat('num_predictors:', glmnet_pred, "\n")
## num_predictors: 138
## > # Save object ---------------------------------------------------------
## >
## > # butcher the objects to make their install sizes smaller
## > ames_fit <- butcher(ames_fit)
## > # Save objects in the current working directory
## > save(ames_fit, val_results, file = ".RData",
## + compress = TRUE)
## > # ---------------------------------------------------------------------
## >
## > session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.2 (2022-10-31)
## os Ubuntu 20.04.5 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2023-03-01
## pandoc 2.5 @ /usr/bin/pandoc
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.2)
## bestNormalize * 1.8.3 2022-06-13 [1] CRAN (R 4.2.2)
## broom * 1.0.3 2023-01-25 [1] CRAN (R 4.2.2)
## butcher * 0.3.1 2022-12-14 [1] CRAN (R 4.2.2)
## cachem 1.0.7 2023-02-24 [1] CRAN (R 4.2.2)
## class 7.3-21 2023-01-23 [2] CRAN (R 4.2.2)
## cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.2)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.2.2)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.2)
## conflicted 1.2.0 2023-02-01 [1] CRAN (R 4.2.2)
## dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.2)
## DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.2)
## digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.2)
## doParallel 1.0.17 2022-02-07 [1] CRAN (R 4.2.2)
## doRNG 1.8.6 2023-01-16 [1] CRAN (R 4.2.2)
## dplyr * 1.1.0 2023-01-29 [1] CRAN (R 4.2.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.2)
## fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
## farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.2)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.2.2)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.2)
## furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.2)
## future 1.31.0 2023-02-01 [1] CRAN (R 4.2.2)
## future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.2)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.2)
## ggplot2 * 3.4.1 2023-02-10 [1] CRAN (R 4.2.2)
## ggrepel 0.9.3 2023-02-03 [1] CRAN (R 4.2.2)
## glmnet 4.1-6 2022-11-27 [1] CRAN (R 4.2.2)
## globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.2)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.2)
## gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.2)
## GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.2)
## gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.2)
## guildai 0.0.0.9001 2023-02-28 [1] Github (guildai/guildai-r@a6c0059)
## hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.2)
## infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.2)
## ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.2)
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.2)
## jsonlite 1.8.4 2022-12-06 [1] CRAN (R 4.2.2)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.2.2)
## lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.2)
## lava 1.7.2.1 2023-02-27 [1] CRAN (R 4.2.2)
## lhs 1.1.6 2022-12-17 [1] CRAN (R 4.2.2)
## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.2)
## listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.2)
## lubridate 1.9.2 2023-02-10 [1] CRAN (R 4.2.2)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.2)
## MASS 7.3-58.2 2023-01-23 [2] CRAN (R 4.2.2)
## Matrix 1.5-3 2022-11-11 [2] CRAN (R 4.2.2)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.2)
## modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.2)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.2)
## nnet 7.3-18 2022-09-28 [2] CRAN (R 4.2.2)
## nortest 1.0-4 2015-07-30 [1] CRAN (R 4.2.2)
## parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.2)
## parsnip * 1.0.4 2023-02-22 [1] CRAN (R 4.2.2)
## pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.2)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.2)
## prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.2)
## purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.2)
## Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.2.2)
## recipes * 1.0.5 2023-02-20 [1] CRAN (R 4.2.2)
## rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.2)
## rngtools 1.5.2 2021-09-20 [1] CRAN (R 4.2.2)
## rpart 4.1.19 2022-10-21 [2] CRAN (R 4.2.2)
## rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.2)
## rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.2)
## scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.2)
## sessioninfo * 1.2.2 2021-12-06 [1] CRAN (R 4.2.2)
## shape 1.4.6 2021-05-19 [1] CRAN (R 4.2.2)
## splines2 0.4.7 2023-01-14 [1] CRAN (R 4.2.2)
## survival 3.5-3 2023-02-12 [2] CRAN (R 4.2.2)
## tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.2)
## tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.2)
## tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.2)
## tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.2)
## timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
## timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.2)
## tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.2)
## utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.2)
## vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.2)
## withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.2)
## workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.2.2)
## workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.2)
## yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.2)
## yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.2)
##
## [1] /home/tomasz/opt/R-4.2.2/lib/R/site-library
## [2] /home/tomasz/opt/R-4.2.2/lib/R/library
##
## ──────────────────────────────────────────────────────────────────────────────
We can use the flags and flag constraints to run an optimizer. We could use one of the guildai built-in optimizers, or manually define a grid using tidymodels functions, like this:
# Use existing tidymodels code to generate a space-filling design for
# new hyperparameter combinations
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.3 ✔ recipes 1.0.5
## ✔ dials 1.1.0 ✔ rsample 1.1.1
## ✔ dplyr 1.1.0 ✔ tibble 3.1.8
## ✔ ggplot2 3.4.1 ✔ tidyr 1.3.0
## ✔ infer 1.0.4 ✔ tune 1.0.1
## ✔ modeldata 1.1.0 ✔ workflows 1.1.3
## ✔ parsnip 1.0.4 ✔ workflowsets 1.0.0
## ✔ purrr 1.0.1 ✔ yardstick 1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
params <- parameters(list(
pen_val = penalty(),
mix_val = mixture(),
latitude_df = spline_degree(c(2L, 50L)),
longitude_df = spline_degree(c(2L, 50L))
))
set.seed(1)
grid <- grid_latin_hypercube(params, size = 19)
grid
## # A tibble: 19 × 4
## pen_val mix_val latitude_df longitude_df
## <dbl> <dbl> <int> <int>
## 1 1.10e- 8 0.915 21 31
## 2 3.88e- 3 0.289 29 2
## 3 1.11e- 7 0.996 25 5
## 4 1.57e- 4 0.480 37 26
## 5 1.00e- 3 0.568 47 29
## 6 3.10e- 9 0.0644 4 12
## 7 2.54e-10 0.841 6 37
## 8 1.13e- 1 0.401 50 44
## 9 6.15e- 7 0.633 15 38
## 10 1.81e- 5 0.254 41 49
## 11 5.81e- 5 0.363 32 7
## 12 6.53e- 1 0.0346 19 24
## 13 4.48e- 4 0.879 38 42
## 14 4.15e- 2 0.613 14 13
## 15 1.06e- 9 0.457 11 35
## 16 1.83e- 2 0.777 8 21
## 17 2.96e- 6 0.174 42 46
## 18 2.81e- 8 0.690 34 16
## 19 3.40e- 7 0.157 26 20
# Launch a batch of run defined by the grid flags.
guild_run(
"train-tidymodels.R",
flags = grid,
echo = c(FALSE, FALSE) # ignore stdout *and* stderr
)
Find the best run:
runs <- runs_info(operation = "train-tidymodels.R")
runs$scalars
## # A tibble: 20 × 3
## num_predictors validation_R2 validation_rmse
## <dbl> <dbl> <dbl>
## 1 206 0.887 0.0571
## 2 206 0.871 0.0614
## 3 206 0.886 0.0573
## 4 19 0.857 0.0671
## 5 206 0.878 0.0597
## 6 12 0.845 0.0759
## 7 172 0.892 0.0556
## 8 50 0.854 0.0963
## 9 204 0.884 0.0579
## 10 206 0.884 0.0581
## 11 206 0.873 0.0610
## 12 11 0.837 0.0971
## 13 206 0.868 0.0624
## 14 206 0.892 0.0558
## 15 158 0.894 0.0551
## 16 197 0.892 0.0558
## 17 206 0.864 0.0634
## 18 132 0.895 0.0546
## 19 206 0.866 0.0628
## 20 138 0.894 0.0549
Recover the fitted model and validation from the best run:
best_run_saved_objects <- new.env(parent = emptyenv())
load(file.path(best_run$dir, ".RData"),
envir = best_run_saved_objects)
best_run_saved_objects$ames_fit
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 6 Recipe Steps
##
## • step_other()
## • step_orderNorm()
## • step_dummy()
## • step_interact()
## • step_spline_natural()
## • step_spline_natural()
##
## ── Model ───────────────────────────────────────────────────────────────────────
##
## Call: dummy_call()
##
## Df %Dev Lambda
## 1 0 0.00 0.44850
## 2 2 3.53 0.40860
## 3 4 9.53 0.37230
## 4 6 17.29 0.33920
## 5 7 25.01 0.30910
## 6 7 32.02 0.28160
## 7 7 38.24 0.25660
## 8 9 43.79 0.23380
## 9 10 48.82 0.21310
## 10 10 53.42 0.19410
## 11 10 57.41 0.17690
## 12 11 60.91 0.16120
## 13 12 64.09 0.14690
## 14 12 66.86 0.13380
## 15 14 69.28 0.12190
## 16 14 71.37 0.11110
## 17 15 73.20 0.10120
## 18 16 74.79 0.09223
## 19 17 76.20 0.08403
## 20 18 77.44 0.07657
## 21 20 78.62 0.06977
## 22 24 79.68 0.06357
## 23 25 80.61 0.05792
## 24 25 81.46 0.05278
## 25 28 82.31 0.04809
## 26 31 83.08 0.04382
## 27 33 83.76 0.03992
## 28 35 84.35 0.03638
## 29 38 84.89 0.03314
## 30 39 85.40 0.03020
## 31 39 85.87 0.02752
## 32 41 86.28 0.02507
## 33 45 86.67 0.02285
## 34 48 87.03 0.02082
## 35 50 87.37 0.01897
## 36 53 87.68 0.01728
## 37 55 87.96 0.01575
## 38 58 88.21 0.01435
## 39 61 88.44 0.01307
## 40 65 88.66 0.01191
## 41 71 88.88 0.01085
## 42 76 89.10 0.00989
## 43 80 89.30 0.00901
## 44 85 89.49 0.00821
## 45 93 89.66 0.00748
## 46 94 89.82 0.00682
##
## ...
## and 54 more lines.
best_run_saved_objects$val_results
## # A tibble: 3 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 0.0546
## 2 rsq standard 0.895
## 3 mae standard 0.0395
For reproducibility, (or preparing to deploy a model) it can be
helpful to know some additional run metadata, like the starting random
seed or a log of which R packages were loaded during the R session. This
information is all stored under the RUN_DIR/.guild
directory.
To recover the seed:
To access the log of R loaded packages:
packages_loaded <- yaml::read_yaml(
file.path(run$dir, ".guild/attrs/r_packages_loaded"))
str(packages_loaded, list.len = 5)
## List of 98
## $ guildai :List of 2
## ..$ path : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/guildai"
## ..$ version: chr "0.0.0.9001"
## $ grDevices :List of 2
## ..$ path : chr "/home/tomasz/opt/R-4.2.2/lib/R/library/grDevices"
## ..$ version: chr "4.2.2"
## $ fansi :List of 2
## ..$ path : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/fansi"
## ..$ version: chr "1.0.4"
## $ utf8 :List of 2
## ..$ path : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/utf8"
## ..$ version: chr "1.2.3"
## $ dplyr :List of 2
## ..$ path : chr "/home/tomasz/opt/R-4.2.2/lib/R/site-library/dplyr"
## ..$ version: chr "1.1.0"
## [list output truncated]