Generally, one should keep pipeline steps as simple as possible, basically following the principle “one step, one task”. This means that usually a lot of pipeline steps are used to calculate intermediate results and only a few steps contain the final results that we are interested in. This vignette shows how to conveniently collect and possibly group the output of those final steps.
Output steps are flagged by settting the keepOut
argument to TRUE when adding a step to the pipeline. In the
following example, we will want to keep the output of the steps
data_summary, model_summary, and
model_plot.
library(pipeflow)
library(ggplot2)
pip <- pipe_new(
"my-pipeline",
data = airquality
) |>
pipe_add(
"data_prep",
function(data = ~data) {
replace(data, "Temp.Celsius", (data[, "Temp"] - 32) * 5/9)
}
) |>
pipe_add(
"data_summary",
function(
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone"
) {
data[, c(xVar, yVar)]
}
) |>
pipe_add(
"model_fit",
function(
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone"
) {
lm(paste(yVar, "~", xVar), data = data)
}
) |>
pipe_add(
"model_summary",
function(
fit = ~model_fit
) {
summary(fit)
}
) |>
pipe_add(
"model_plot",
function(
model = ~model_fit,
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone",
title = "Linear model fit"
) {
coeffs <- coefficients(model)
ggplot(data) +
geom_point(aes(.data[[xVar]], .data[["Ozone"]])) +
geom_abline(intercept = coeffs[1], slope = coeffs[2]) +
labs(title = title)
}
)Looking at the pipeline, we see that the steps
data_summary, model-summary, and
model_plot have been flagged accordingly (see column
keepOut).
pip
# step depends out group state
# <char> <list> <list> <char> <char>
# 1: data [NULL] data New
# 2: data_prep data [NULL] data_prep New
# 3: data_summary data_prep [NULL] data_summary New
# 4: model_fit data_prep [NULL] model_fit New
# 5: model_summary model_fit [NULL] model_summary New
# 6: model_plot model_fit,data_prep [NULL] model_plot NewNow let’s run and collect the output of the flagged steps using the
collect_out method, which returns a list with the output of
the flagged steps.
pip$run()
out <- pip$collect_out()
names(out)
# [1] "data" "data_prep" "data_summary" "model_fit" "model_summary" "model_plot"As expected, the output list contains the output of the flagged steps.
str(out, max.level = 1)
# List of 6
# $ data :'data.frame': 153 obs. of 6 variables:
# $ data_prep :'data.frame': 153 obs. of 7 variables:
# $ data_summary :'data.frame': 153 obs. of 2 variables:
# $ model_fit :List of 13
# ..- attr(*, "class")= chr "lm"
# $ model_summary:List of 12
# ..- attr(*, "class")= chr "summary.lm"
# $ model_plot : <ggplot2::ggplot>
# ..@ data :'data.frame': 153 obs. of 7 variables:
# ..@ layers :List of 2
# ..@ scales :Classes 'ScalesList', 'ggproto', 'gg' <ggproto object: Class ScalesList, gg>
# add: function
# add_defaults: function
# add_missing: function
# backtransform_df: function
# clone: function
# find: function
# get_scales: function
# has_scale: function
# input: function
# map_df: function
# n: function
# non_position_scales: function
# scales: list
# set_palettes: function
# train_df: function
# transform_df: function
# super: <ggproto object: Class ScalesList, gg>
# ..@ guides :Classes 'Guides', 'ggproto', 'gg' <ggproto object: Class Guides, gg>
# add: function
# assemble: function
# build: function
# draw: function
# get_custom: function
# get_guide: function
# get_params: function
# get_position: function
# guides: NULL
# merge: function
# missing: <ggproto object: Class GuideNone, Guide, gg>
# add_title: function
# arrange_layout: function
# assemble_drawing: function
# available_aes: any
# build_decor: function
# build_labels: function
# build_ticks: function
# build_title: function
# draw: function
# draw_early_exit: function
# elements: list
# extract_decor: function
# extract_key: function
# extract_params: function
# get_layer_key: function
# hashables: list
# measure_grobs: function
# merge: function
# override_elements: function
# params: list
# process_layers: function
# setup_elements: function
# setup_params: function
# train: function
# transform: function
# super: <ggproto object: Class GuideNone, Guide, gg>
# package_box: function
# print: function
# process_layers: function
# setup: function
# subset_guides: function
# train: function
# update_params: function
# super: <ggproto object: Class Guides, gg>
# ..@ mapping : <ggplot2::mapping> Named list()
# ..@ theme : <theme> Named list()
# .. .. @ complete: logi FALSE
# .. .. @ validate: logi TRUE
# ..@ coordinates:Classes 'CoordCartesian', 'Coord', 'ggproto', 'gg' <ggproto object: Class CoordCartesian, Coord, gg>
# aspect: function
# backtransform_range: function
# clip: on
# default: TRUE
# distance: function
# draw_panel: function
# expand: TRUE
# is_free: function
# is_linear: function
# labels: function
# limits: list
# modify_scales: function
# range: function
# ratio: NULL
# render_axis_h: function
# render_axis_v: function
# render_bg: function
# render_fg: function
# reverse: none
# setup_data: function
# setup_layout: function
# setup_panel_guides: function
# setup_panel_params: function
# setup_params: function
# train_panel_guides: function
# transform: function
# super: <ggproto object: Class CoordCartesian, Coord, gg>
# ..@ facet :Classes 'FacetNull', 'Facet', 'ggproto', 'gg' <ggproto object: Class FacetNull, Facet, gg>
# attach_axes: function
# attach_strips: function
# compute_layout: function
# draw_back: function
# draw_front: function
# draw_labels: function
# draw_panel_content: function
# draw_panels: function
# finish_data: function
# format_strip_labels: function
# init_gtable: function
# init_scales: function
# map_data: function
# params: list
# set_panel_size: function
# setup_data: function
# setup_panel_params: function
# setup_params: function
# shrink: TRUE
# train_scales: function
# vars: function
# super: <ggproto object: Class FacetNull, Facet, gg>
# ..@ layout :Classes 'Layout', 'ggproto', 'gg' <ggproto object: Class Layout, gg>
# coord: NULL
# coord_params: list
# facet: NULL
# facet_params: list
# finish_data: function
# get_scales: function
# layout: NULL
# map_position: function
# panel_params: NULL
# panel_scales_x: NULL
# panel_scales_y: NULL
# render: function
# render_labels: function
# reset_scales: function
# resolve_label: function
# setup: function
# setup_panel_guides: function
# setup_panel_params: function
# train_position: function
# super: <ggproto object: Class Layout, gg>
# ..@ labels : <ggplot2::labels> List of 1
# .. .. $ title: chr "Linear model fit"
# ..@ meta : list()
# ..@ plot_env :<environment: 0x5627a63aa3d8>Often certain output steps are related and should be grouped
together. This can be achieved conveniently by setting the
group argument when adding a step to the pipeline. Let’s
illustrate this by slightly modifying the previous example.
pip <- Pipeline$new("my-pipeline", data = airquality) |>
pipe_add(
"data_prep",
function(data = ~data) {
replace(data, "Temp.Celsius", (data[, "Temp"] - 32) * 5/9)
}
) |>
pipe_add(
"used_data",
function(
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone"
) {
data[, c(xVar, yVar)]
},
group = "Data" # <- define 'Data' group here
) |>
pipe_add(
"model_fit",
function(
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone"
) {
lm(paste(yVar, "~", xVar), data = data)
}
) |>
pipe_add(
"model_summary",
function(
fit = ~model_fit
) {
summary(fit)
},
group = "Model" # <- define 'Model' group here
) |>
pipe_add(
"model_plot",
function(
model = ~model_fit,
data = ~data_prep,
xVar = "Temp.Celsius",
yVar = "Ozone",
title = "Linear model fit"
) {
coeffs <- coefficients(model)
ggplot(data) +
geom_point(aes(.data[[xVar]], .data[["Ozone"]])) +
geom_abline(intercept = coeffs[1], slope = coeffs[2]) +
labs(title = title)
},
group = "Model" # <- define 'Model' group here
)Looking at the pipeline, the defined groups are shown in the
group column.
pip
# step depends out group state
# <char> <list> <list> <char> <char>
# 1: data [NULL] data New
# 2: data_prep data [NULL] data_prep New
# 3: used_data data_prep [NULL] Data New
# 4: model_fit data_prep [NULL] model_fit New
# 5: model_summary model_fit [NULL] Model New
# 6: model_plot model_fit,data_prep [NULL] Model NewAs you see, by default, the group is identical to the step name, that is, each step represents the trivial case of a one-sized group. Again, we run the pipeline and collect the output.
As we can see, the output related to the modelling has been grouped
into one sublist named Model.
str(out, max.level = 2)
# List of 5
# $ data :'data.frame': 153 obs. of 6 variables:
# ..$ Ozone : int [1:153] 41 36 12 18 NA 28 23 19 8 NA ...
# ..$ Solar.R: int [1:153] 190 118 149 313 NA NA 299 99 19 194 ...
# ..$ Wind : num [1:153] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
# ..$ Temp : int [1:153] 67 72 74 62 56 66 65 59 61 69 ...
# ..$ Month : int [1:153] 5 5 5 5 5 5 5 5 5 5 ...
# ..$ Day : int [1:153] 1 2 3 4 5 6 7 8 9 10 ...
# $ data_prep:'data.frame': 153 obs. of 7 variables:
# ..$ Ozone : int [1:153] 41 36 12 18 NA 28 23 19 8 NA ...
# ..$ Solar.R : int [1:153] 190 118 149 313 NA NA 299 99 19 194 ...
# ..$ Wind : num [1:153] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
# ..$ Temp : int [1:153] 67 72 74 62 56 66 65 59 61 69 ...
# ..$ Month : int [1:153] 5 5 5 5 5 5 5 5 5 5 ...
# ..$ Day : int [1:153] 1 2 3 4 5 6 7 8 9 10 ...
# ..$ Temp.Celsius: num [1:153] 19.4 22.2 23.3 16.7 13.3 ...
# $ Data :'data.frame': 153 obs. of 2 variables:
# ..$ Temp.Celsius: num [1:153] 19.4 22.2 23.3 16.7 13.3 ...
# ..$ Ozone : int [1:153] 41 36 12 18 NA 28 23 19 8 NA ...
# $ model_fit:List of 13
# ..$ coefficients : Named num [1:2] -69.28 4.37
# .. ..- attr(*, "names")= chr [1:2] "(Intercept)" "Temp.Celsius"
# ..$ residuals : Named num [1:116] 25.27 8.13 -20.73 14.42 14.7 ...
# .. ..- attr(*, "names")= chr [1:116] "1" "2" "3" "4" ...
# ..$ effects : Named num [1:116] -453.7 247 -23 11.4 11.9 ...
# .. ..- attr(*, "names")= chr [1:116] "(Intercept)" "Temp.Celsius" "" "" ...
# ..$ rank : int 2
# ..$ fitted.values: Named num [1:116] 15.73 27.87 32.73 3.58 13.3 ...
# .. ..- attr(*, "names")= chr [1:116] "1" "2" "3" "4" ...
# ..$ assign : int [1:2] 0 1
# ..$ qr :List of 5
# .. ..- attr(*, "class")= chr "qr"
# ..$ df.residual : int 114
# ..$ na.action : 'omit' Named int [1:37] 5 10 25 26 27 32 33 34 35 36 ...
# .. ..- attr(*, "names")= chr [1:37] "5" "10" "25" "26" ...
# ..$ xlevels : Named list()
# ..$ call : language lm(formula = paste(yVar, "~", xVar), data = data)
# ..$ terms :Classes 'terms', 'formula' language Ozone ~ Temp.Celsius
# .. .. ..- attr(*, "variables")= language list(Ozone, Temp.Celsius)
# .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
# .. .. .. ..- attr(*, "dimnames")=List of 2
# .. .. ..- attr(*, "term.labels")= chr "Temp.Celsius"
# .. .. ..- attr(*, "order")= int 1
# .. .. ..- attr(*, "intercept")= int 1
# .. .. ..- attr(*, "response")= int 1
# .. .. ..- attr(*, ".Environment")=<environment: 0x5627a579aa20>
# .. .. ..- attr(*, "predvars")= language list(Ozone, Temp.Celsius)
# .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
# .. .. .. ..- attr(*, "names")= chr [1:2] "Ozone" "Temp.Celsius"
# ..$ model :'data.frame': 116 obs. of 2 variables:
# .. ..- attr(*, "terms")=Classes 'terms', 'formula' language Ozone ~ Temp.Celsius
# .. .. .. ..- attr(*, "variables")= language list(Ozone, Temp.Celsius)
# .. .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
# .. .. .. .. ..- attr(*, "dimnames")=List of 2
# .. .. .. ..- attr(*, "term.labels")= chr "Temp.Celsius"
# .. .. .. ..- attr(*, "order")= int 1
# .. .. .. ..- attr(*, "intercept")= int 1
# .. .. .. ..- attr(*, "response")= int 1
# .. .. .. ..- attr(*, ".Environment")=<environment: 0x5627a579aa20>
# .. .. .. ..- attr(*, "predvars")= language list(Ozone, Temp.Celsius)
# .. .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
# .. .. .. .. ..- attr(*, "names")= chr [1:2] "Ozone" "Temp.Celsius"
# .. ..- attr(*, "na.action")= 'omit' Named int [1:37] 5 10 25 26 27 32 33 34 35 36 ...
# .. .. ..- attr(*, "names")= chr [1:37] "5" "10" "25" "26" ...
# ..- attr(*, "class")= chr "lm"
# $ Model :List of 2
# ..$ model_summary:List of 12
# .. ..- attr(*, "class")= chr "summary.lm"
# ..$ model_plot : <ggplot2::ggplot>
# .. ..@ data :'data.frame': 153 obs. of 7 variables:
# .. ..@ layers :List of 2
# .. ..@ scales :Classes 'ScalesList', 'ggproto', 'gg' <ggproto object: Class ScalesList, gg>
# add: function
# add_defaults: function
# add_missing: function
# backtransform_df: function
# clone: function
# find: function
# get_scales: function
# has_scale: function
# input: function
# map_df: function
# n: function
# non_position_scales: function
# scales: list
# set_palettes: function
# train_df: function
# transform_df: function
# super: <ggproto object: Class ScalesList, gg>
# .. ..@ guides :Classes 'Guides', 'ggproto', 'gg' <ggproto object: Class Guides, gg>
# add: function
# assemble: function
# build: function
# draw: function
# get_custom: function
# get_guide: function
# get_params: function
# get_position: function
# guides: NULL
# merge: function
# missing: <ggproto object: Class GuideNone, Guide, gg>
# add_title: function
# arrange_layout: function
# assemble_drawing: function
# available_aes: any
# build_decor: function
# build_labels: function
# build_ticks: function
# build_title: function
# draw: function
# draw_early_exit: function
# elements: list
# extract_decor: function
# extract_key: function
# extract_params: function
# get_layer_key: function
# hashables: list
# measure_grobs: function
# merge: function
# override_elements: function
# params: list
# process_layers: function
# setup_elements: function
# setup_params: function
# train: function
# transform: function
# super: <ggproto object: Class GuideNone, Guide, gg>
# package_box: function
# print: function
# process_layers: function
# setup: function
# subset_guides: function
# train: function
# update_params: function
# super: <ggproto object: Class Guides, gg>
# .. ..@ mapping : <ggplot2::mapping> Named list()
# .. ..@ theme : <theme> Named list()
# .. .. .. @ complete: logi FALSE
# .. .. .. @ validate: logi TRUE
# .. ..@ coordinates:Classes 'CoordCartesian', 'Coord', 'ggproto', 'gg' <ggproto object: Class CoordCartesian, Coord, gg>
# aspect: function
# backtransform_range: function
# clip: on
# default: TRUE
# distance: function
# draw_panel: function
# expand: TRUE
# is_free: function
# is_linear: function
# labels: function
# limits: list
# modify_scales: function
# range: function
# ratio: NULL
# render_axis_h: function
# render_axis_v: function
# render_bg: function
# render_fg: function
# reverse: none
# setup_data: function
# setup_layout: function
# setup_panel_guides: function
# setup_panel_params: function
# setup_params: function
# train_panel_guides: function
# transform: function
# super: <ggproto object: Class CoordCartesian, Coord, gg>
# .. ..@ facet :Classes 'FacetNull', 'Facet', 'ggproto', 'gg' <ggproto object: Class FacetNull, Facet, gg>
# attach_axes: function
# attach_strips: function
# compute_layout: function
# draw_back: function
# draw_front: function
# draw_labels: function
# draw_panel_content: function
# draw_panels: function
# finish_data: function
# format_strip_labels: function
# init_gtable: function
# init_scales: function
# map_data: function
# params: list
# set_panel_size: function
# setup_data: function
# setup_panel_params: function
# setup_params: function
# shrink: TRUE
# train_scales: function
# vars: function
# super: <ggproto object: Class FacetNull, Facet, gg>
# .. ..@ layout :Classes 'Layout', 'ggproto', 'gg' <ggproto object: Class Layout, gg>
# coord: NULL
# coord_params: list
# facet: NULL
# facet_params: list
# finish_data: function
# get_scales: function
# layout: NULL
# map_position: function
# panel_params: NULL
# panel_scales_x: NULL
# panel_scales_y: NULL
# render: function
# render_labels: function
# reset_scales: function
# resolve_label: function
# setup: function
# setup_panel_guides: function
# setup_panel_params: function
# train_position: function
# super: <ggproto object: Class Layout, gg>
# .. ..@ labels : <ggplot2::labels> List of 1
# .. .. .. $ title: chr "Linear model fit"
# .. ..@ meta : list()
# .. ..@ plot_env :<environment: 0x5627a5b7b1a0>