Programming with projectable • projectable

library(projectable)
library(dplyr)

Other packages exist for producing summary tables in R. The main advantages projectable is intended to have over these other packages are flexibility and programmability.

Concretely, what that means is that the projectable user can specify exactly what the columns and rows of the output table should include, and, moreover, that they can do this programmatically.

Thus, taking the mtcars dataset as our example, if we wanted to see transmission type and engine shape broken down by a few different vehicle characteristics, we could write:

col_vars <- c("cyl", "gear")


my_tbls <- lapply(col_vars, function(.cvar) {
  
  # Create column expressions
  .cvals <- sort(unique(mtcars[[.cvar]]))
  cols <- glue::glue("col_freq({.cvar} %in% {.cvals}, {.cvar} %in% .cvals)")
  cols <- rlang::parse_exprs(cols) %>% rlang::set_names(glue::glue("{.cvar}: {.cvals}"))

  mtcars %>% 
    prj_tbl_cols(!!!cols) %>% 
    prj_tbl_rows(
      Transmission = am,
      `Engine Shape` = vs
    ) %>% 
    prj_tbl_summarise() %>% 
    prj_shadow(everything(), .shadow = "{signif(p, 2)} ({n})") %>% 
    prj_project()
})

my_tbls
#> [[1]]
#> # A tibble: 4 × 5
#>   row_spanner  rows      `cyl: 4`  `cyl: 6` `cyl: 8` 
#> * <col_row>    <col_row> <glue>    <glue>   <glue>   
#> 1 Transmission 0         0.16 (3)  0.21 (4) 0.63 (12)
#> 2 Transmission 1         0.62 (8)  0.23 (3) 0.15 (2) 
#> 3 Engine Shape 0         0.06 (1)  0.17 (3) 0.78 (14)
#> 4 Engine Shape 1         0.71 (10) 0.29 (4) 0 (0)    
#> 
#> [[2]]
#> # A tibble: 4 × 5
#>   row_spanner  rows      `gear: 3` `gear: 4` `gear: 5`
#> * <col_row>    <col_row> <glue>    <glue>    <glue>   
#> 1 Transmission 0         0.79 (15) 0.21 (4)  0 (0)    
#> 2 Transmission 1         0 (0)     0.62 (8)  0.38 (5) 
#> 3 Engine Shape 0         0.67 (12) 0.11 (2)  0.22 (4) 
#> 4 Engine Shape 1         0.21 (3)  0.71 (10) 0.07 (1)

At the price of being a bit more verbose, we can gain finer-grained control over the shape of the output. We can, for instance, omit automatic cars:

val_labels <- list(
  cyl = c(`Four Cylinders` = 4, `Six Cylinders` = 6, `Eight Cylinders` = 8),
  gear = c(`Three Gears` = 3, `Four Gears` = 4, `Five Gears` = 5)
)

my_tbls <- mapply(col_vars, val_labels, FUN = function(.cvar, .cvals) {
  cols <- glue::glue("col_freq({.cvar} %in% {.cvals}, {.cvar} %in% .cvals)")
  cols <- rlang::parse_exprs(cols)
  names(cols) <- names(.cvals)

  mtcars %>% 
    prj_tbl_cols(!!!cols) %>% 
    prj_tbl_rows(
      Transmission = list(Manual = am %in% 1),
      `Engine Shape` = list(
        `V-shaped` = vs %in% 1,
        `Not V-shaped` = vs %in% 0
      )
    ) %>% 
    prj_tbl_summarise() %>% 
    prj_shadow(everything(), .shadow = "{signif(p, 2)} ({n})") %>% 
    prj_project()
}, SIMPLIFY = FALSE)

my_tbls
#> $cyl
#> # A tibble: 3 × 5
#>   row_spanner  rows         `Four Cylinders` `Six Cylinders` `Eight Cylinders`
#> * <col_row>    <col_row>    <glue>           <glue>          <glue>           
#> 1 Transmission Manual       0.62 (8)         0.23 (3)        0.15 (2)         
#> 2 Engine Shape V-shaped     0.71 (10)        0.29 (4)        0 (0)            
#> 3 Engine Shape Not V-shaped 0.06 (1)         0.17 (3)        0.78 (14)        
#> 
#> $gear
#> # A tibble: 3 × 5
#>   row_spanner  rows         `Three Gears` `Four Gears` `Five Gears`
#> * <col_row>    <col_row>    <glue>        <glue>       <glue>      
#> 1 Transmission Manual       0 (0)         0.62 (8)     0.38 (5)    
#> 2 Engine Shape V-shaped     0.21 (3)      0.71 (10)    0.07 (1)    
#> 3 Engine Shape Not V-shaped 0.67 (12)     0.11 (2)     0.22 (4)

Or we can display only certain columns (this time doing column proportions rather than row proportions):

val_labels <- list(
  cyl = c(`Four Cylinders` = 4, `Six Cylinders` = 6),
  gear = c(`Three Gears` = 3, `Four Gears` = 4)
)

my_tbls <- mapply(col_vars, val_labels, FUN = function(.cvar, .cvals) {
  cols <- glue::glue("col_freq({.cvar} %in% {.cvals}, .data${.cvar} %in% {.cvals})")
  cols <- rlang::parse_exprs(cols)
  names(cols) <- names(.cvals)

  mtcars %>% 
    prj_tbl_cols(!!!cols) %>% 
    prj_tbl_rows(
      Transmission = am,
      `Engine Shape` = vs
    ) %>% 
    prj_tbl_summarise() %>% 
    prj_shadow(everything(), .shadow = "{signif(p, 2)} ({n})") %>% 
    prj_project()
}, SIMPLIFY = FALSE)

my_tbls
#> $cyl
#> # A tibble: 4 × 4
#>   row_spanner  rows      `Four Cylinders` `Six Cylinders`
#> * <col_row>    <col_row> <glue>           <glue>         
#> 1 Transmission 0         0.27 (3)         0.57 (4)       
#> 2 Transmission 1         0.73 (8)         0.43 (3)       
#> 3 Engine Shape 0         0.09 (1)         0.43 (3)       
#> 4 Engine Shape 1         0.91 (10)        0.57 (4)       
#> 
#> $gear
#> # A tibble: 4 × 4
#>   row_spanner  rows      `Three Gears` `Four Gears`
#> * <col_row>    <col_row> <glue>        <glue>      
#> 1 Transmission 0         1 (15)        0.33 (4)    
#> 2 Transmission 1         0 (0)         0.67 (8)    
#> 3 Engine Shape 0         0.8 (12)      0.17 (2)    
#> 4 Engine Shape 1         0.2 (3)       0.83 (10)