Exercises + Solutions

Exercises

01-intro.R

# Let's warm-up!

library(dplyr)

# Using dplyr:
#  - From the ADSL dataset:
#   - Subset to the safety population (SAFFL == "Y")
#   - calculate the number of unique subjects in each treatment group (TRT01A)  

pharmaverseadam::adsl |> 
  filter(   ) |> 
  count(   )

02-SDTM.R

library(sdtm.oak)
library(pharmaverseraw)
library(dplyr)

#AE aCRF - https://github.com/pharmaverse/pharmaverseraw/blob/main/vignettes/articles/aCRFs/AdverseEvent_aCRF.pdf

# Read in Raw dataset ----
ae_raw <- pharmaverseraw::ae_raw

# Generate oak_id_vars ----
ae_raw <- ae_raw %>%
  generate_oak_id_vars(
    pat_var = "PATNUM",
    raw_src = "ae_raw"
  )

# Read in Controlled Terminology
study_ct <-  data.frame(
  codelist_code = c("C66742", "C66742"),
  term_code = c("C49487", "C49488"),
  term_value = c("N", "Y"),
  collected_value = c("No", "Yes"),
  term_preferred_term = c("No", "Yes"),
  term_synonyms = c("No", "Yes"),
  stringsAsFactors = FALSE
)

# Exercise 1 ------------------------------------------------
# Map AETERM from raw_var=IT.AETERM, tgt_var=AETERM
ae <-
  # Derive topic variable
  # Map AETERM using assign_no_ct
  assign_no_ct(
    raw_dat = ??,
    raw_var = ??,
    tgt_var = ??,
    id_vars = oak_id_vars()
  )

# Exercise 2 ------------------------------------------------
# Map AESER from raw_var=IT.AESER, tgt_var=AESER. Codelist code for AESDTH is C66742
  ae <- ae %>%
  # Map AESER using ??
  ??(
    raw_dat = ??,
    raw_var = ??,
    tgt_var = ??,
    ct_spec = ??,
    ct_clst = ??,
    id_vars = oak_id_vars()
  )

# Exercise 3 ------------------------------------------------
# Map AESDTH from raw_var=IT.AESDTH, tgt_var=AESDTH.Annotation text is 
#    If "Yes" then AESDTH = "Y" else Not Submitted. Codelist code for AESDTH is C66742

ae <- ae %>%
# Map AESDTH using condition_add & assign_ct, raw_var=IT.AESDTH, tgt_var=AESDTH
assign_ct(
  raw_dat = condition_add(??),
  raw_var = "IT.AESDTH",
  tgt_var = "AESDTH",
  ct_spec = study_ct,
  ct_clst = "C66742",
  id_vars = oak_id_vars()
)

03-ADaM.R

# Exercise 1
# Update date and time imputation arguments

library(tibble)
library(lubridate)
library(admiral)

posit_mh <- tribble(
  ~USUBJID, ~MHSTDTC,
  1,        "2019-07-18T15:25:40",
  1,        "2019-07-18T15:25",
  1,        "2019-07-18",
  2,        "2024-02",
  2,        "2019",
  2,        "2019---07",
  3,        ""
)

derive_vars_dtm(
  dataset = posit_mh,
  new_vars_prefix = "AST",
  dtc = MHSTDTC,
  highest_imputation = "M",
  date_imputation = "????",
  time_imputation = "????"
)

# Exercise 2
# Update the parameters argument
# Update set_values_to argument for the formula
# MAP Formula: MAP = (SYSBP + 2*DIABP) / 3


ADVS <- tribble(
  ~USUBJID,      ~PARAMCD, ~PARAM,                            ~AVALU,  ~AVAL, ~VISIT,
  "01-701-1015", "DIABP",  "Diastolic Blood Pressure (mmHg)", "mmHg",    51, "BASELINE",
  "01-701-1015", "SYSBP",  "Systolic Blood Pressure (mmHg)",  "mmHg",   121, "BASELINE",
  "01-701-1028", "DIABP",  "Diastolic Blood Pressure (mmHg)", "mmHg",    79, "BASELINE",
  "01-701-1028", "SYSBP",  "Systolic Blood Pressure (mmHg)",  "mmHg",   130, "BASELINE",
) 

derive_param_computed(
  ADVS,
  by_vars = exprs(USUBJID, VISIT),
  parameters = c("????", "????"),
  set_values_to = exprs(
    AVAL = (AVAL.SYSBP + ?? * AVAL.DIABP) / ??,
    PARAMCD = "MAP",
    PARAM = "Mean Arterial Pressure (mmHg)",
    AVALU = "mmHg",
  )
) 

04-ARD.R

# ARD Exercise: Adverse Events summaries using {cards}


# Setup: run this first! --------------------------------------------------

# Load necessary packages
library(cards) 

# Import & subset data
adsl <- pharmaverseadam::adsl |> 
  dplyr::filter(SAFFL=="Y")

adae <- pharmaverseadam::adae |> 
  dplyr::filter(SAFFL=="Y") |> 
  dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |> 
  dplyr::group_by(AESOC) |> 
  dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |> 
  dplyr::ungroup()


# Exercise ----------------------------------------------------------------

# A. Calculate the number and percentage of *unique* subjects with at least one AE:
#  - By each SOC (AESOC)
#  - By each Preferred term (AEDECOD) within SOC (AESOC)
# By every combination of treatment group (ARM) 

ard_stack_hierarchical(
  data = ,
  variables = ,
  by = , 
  id = ,
  denominator = 
) 

# B. [*BONUS*] Modify the code from part A to include overall number/percentage of
# subjects with at least one AE, regardless of SOC and PT

05-tables-tfrmt.R

# Table Exercise: AE summary table using {tfrmt}

# For this exercise, we will use the AE ARD from the last section to
# create a {tfrmt} table


# Setup: run this first! --------------------------------------------------

## Load necessary packages
library(cards)
library(dplyr)
library(tidyr)
library(tfrmt)

## Import & subset data
adsl <- pharmaverseadam::adsl |> 
  dplyr::filter(SAFFL=="Y") 

adae <- pharmaverseadam::adae |> 
  dplyr::filter(SAFFL=="Y") |> 
  dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |> 
  dplyr::group_by(AESOC) |> 
  dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |> 
  dplyr::ungroup()

## Create AE Summary using cards
ard_ae <- ard_stack_hierarchical(
  data = adae,
  variables = c(AESOC, AEDECOD),
  by = ARM, 
  id = USUBJID,
  denominator = adsl,
  over_variables = TRUE,
  statistic = ~ c("n", "p")
) 


# Exercise ----------------------------------------------------------------

# A. Convert `cards` object into a tidy data frame ready for {tfrmt}. 
#    Nothing to do besides run each step & explore the output!

ard_ae_tidy <- ard_ae |> 
  shuffle_card(fill_hierarchical_overall = "ANY EVENT") |> 
  prep_big_n(vars = "ARM") |> 
  prep_hierarchical_fill(vars = c("AESOC","AEDECOD"),
                       fill_from_left = TRUE)|> 
  dplyr::select(-c(context, stat_label, stat_variable)) 


# B. Create a basic tfrmt, filling in the missing variable names

ae_tfrmt <- tfrmt(
  group = AESOC,
  label = AEDECOD,
  param = , # fill
  value = , # fill
  column = , # fill
  body_plan = body_plan(
    frmt_structure(group_val = ".default", label_val = ".default", 
                   frmt_combine(
                     "{n} ({p}%)",
                     n = frmt("xx"),
                     p = frmt("xx", transform = ~ . *100)
                   )
    )
  ),
  big_n = big_n_structure(param_val = "bigN") 
) 

print_to_gt(ae_tfrmt,
            ard_ae_tidy)


# C. Switch the order of the columns so Placebo is last

ae_tfrmt <- ae_tfrmt |> 
  tfrmt(
    col_plan = col_plan(
      "Placebo",
      starts_with("Xanomeline")
    )
  )  

print_to_gt(ae_tfrmt, ard_ae_tidy)


# D. Add a title and source note for the table

ae_tfrmt <- ae_tfrmt |> 
  tfrmt(
    title = "", # fill
    footnote_plan = footnote_plan(
      footnote_structure("") # fill with footnote text
    ) 
  )

print_to_gt(ae_tfrmt, ard_ae_tidy)

06-tables-gtsummary.R

# Table Exercise: Demographic summary table using {gtsummary}

# Create a Demography table split by treatment

# Setup
## Load necessary packages
library(gtsummary)
library(tidyverse)

## Import data
df_gtsummary_exercise <- pharmaverseadam::adsl |>
  filter(SAFFL == "Y") |>
  left_join(
    pharmaverseadam::advs |>
      filter(PARAMCD %in% c("BMI", "HEIGHT", "WEIGHT"), !is.na(AVAL)) |>
      arrange(ADY) |>
      slice(1, .by = c(USUBJID, PARAMCD)) |>
      pivot_wider(id_cols = USUBJID, names_from = PARAMCD, values_from = AVAL),
    by = "USUBJID"
  ) |>
  select(USUBJID, TRT01A, AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT) |>
  labelled::set_variable_labels(
    BMI = "BMI",
    HEIGHT = "Height, cm",
    WEIGHT = "Weight, kg"
  )

# 1. Use tbl_summary() to summarize AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT by TRT01A
# 2. For all continuous variables, present the following stats: c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}")
# 3. Ensure the AGEGR1 levels are reported in the correct order
# 4. View the ARD saved in the gtsummary table using `gather_ard()` function
# BONUS!
# 5. Add the header "**Active Treatment**" over the 'Xanomeline' treatments using the `modify_spanning_header()` function

tbl <-
  df_gtsummary_exercise |>
  mutate(AGEGR1 = factor()) |>
  tbl_summary(
    by = ,
    include = ,
    type = ,
    statistic = ,
    label =  # add a label for AGEGR1
  ) |>
  # add a header above the 'Xanomeline' treatments. HINT: Use `show_header_names()` to know the column names
  modify_spanning_header()

tbl

# extract the ARD from the table

Solutions

01-intro.R

# Let's warm-up!

library(dplyr)

# Using dplyr:
#  - From the ADSL dataset:
#   - Subset to the safety population (SAFFL == "Y")
#   - calculate the number of unique subjects in each treatment group (TRT01A)  

pharmaverseadam::adsl |> 
  filter(SAFFL == "Y") |> 
  count(TRT01A)

02-SDTM.R

library(sdtm.oak)
library(pharmaverseraw)
library(dplyr)

#AE aCRF - https://github.com/pharmaverse/pharmaverseraw/blob/main/vignettes/articles/aCRFs/AdverseEvent_aCRF.pdf

# Read in Raw dataset ----
ae_raw <- pharmaverseraw::ae_raw

# Generate oak_id_vars ----
ae_raw <- ae_raw %>%
  generate_oak_id_vars(
    pat_var = "PATNUM",
    raw_src = "ae_raw"
  )

# Read in Controlled Terminology
study_ct <-  data.frame(
  codelist_code = c("C66742", "C66742"),
  term_code = c("C49487", "C49488"),
  term_value = c("N", "Y"),
  collected_value = c("No", "Yes"),
  term_preferred_term = c("No", "Yes"),
  term_synonyms = c("No", "Yes"),
  stringsAsFactors = FALSE
)

# Exercise 1 ------------------------------------------------
# Map AETERM from raw_var=IT.AETERM, tgt_var=AETERM
ae <-
  # Derive topic variable
  # Map AETERM using assign_no_ct, raw_var=IT.AETERM, tgt_var=AETERM
  assign_no_ct(
    raw_dat = ae_raw,
    raw_var = "IT.AETERM",
    tgt_var = "AETERM",
    id_vars = oak_id_vars()
  ) %>%
  # Exercise 2 ------------------------------------------------
  # Map AESER using assign_no_ct, raw_var=IT.AESER, tgt_var=AESER
  assign_ct(
    raw_dat = ae_raw,
    raw_var = "IT.AESER",
    tgt_var = "AESER",
    ct_spec = study_ct,
    ct_clst = "C66742",
    id_vars = oak_id_vars()
  ) %>%
  # Exercise 3 ------------------------------------------------
  # Map AESDTH from raw_var=IT.AESDTH, tgt_var=AESDTH.Annotation text is 
  #    If "Yes" then AESDTH = "Y" else Not Submitted. Codelist code for AESDTH is C66742
  # Map AESDTH using condition_add & assign_ct, raw_var=IT.AESDTH, tgt_var=AESDTH
  assign_ct(
    raw_dat = condition_add(ae_raw, IT.AESDTH == "Yes"),
    raw_var = "IT.AESDTH",
    tgt_var = "AESDTH",
    ct_spec = study_ct,
    ct_clst = "C66742",
    id_vars = oak_id_vars()
  )

03-ADaM.R

# Exercise 1
# Update date and time imputation arguments so that any dates or times
# that are imputed are the last month/day of the year and 23:59:59

library(tibble)
library(lubridate)
library(admiral)

posit_mh <- tribble(
  ~USUBJID, ~MHSTDTC,
  1,        "2019-07-18T15:25:40",
  1,        "2019-07-18T15:25",
  1,        "2019-07-18",
  2,        "2024-02",
  2,        "2019",
  2,        "2019---07",
  3,        ""
)

derive_vars_dtm(
  dataset = posit_mh,
  new_vars_prefix = "AST",
  dtc = MHSTDTC,
  highest_imputation = "M",
  date_imputation = "last",
  time_imputation = "last"
)

# Exercise 2
# Update set_values_to argument for the formula
# MAP Formula: MAP = (SYSBP + 2*DIABP) / 3


ADVS <- tribble(
  ~USUBJID,      ~PARAMCD, ~PARAM,                            ~AVALU,  ~AVAL, ~VISIT,
  "01-701-1015", "DIABP",  "Diastolic Blood Pressure (mmHg)", "mmHg",    51, "BASELINE",
  "01-701-1015", "SYSBP",  "Systolic Blood Pressure (mmHg)",  "mmHg",   121, "BASELINE",
  "01-701-1028", "DIABP",  "Diastolic Blood Pressure (mmHg)", "mmHg",    79, "BASELINE",
  "01-701-1028", "SYSBP",  "Systolic Blood Pressure (mmHg)",  "mmHg",   130, "BASELINE",
) 

derive_param_computed(
  ADVS,
  by_vars = exprs(USUBJID, VISIT),
  parameters = c("SYSBP", "DIABP"),
  set_values_to = exprs(
    AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3,
    PARAMCD = "MAP",
    PARAM = "Mean Arterial Pressure (mmHg)",
    AVALU = "mmHg",
  )
) 

04-ARD.R

# ARD Exercise: Adverse Events summaries using {cards}


# Setup: run this first! --------------------------------------------------

# Load necessary packages
library(cards) 

# Import & subset data
adsl <- pharmaverseadam::adsl |> 
  dplyr::filter(SAFFL=="Y")

adae <- pharmaverseadam::adae |> 
  dplyr::filter(SAFFL=="Y") |> 
  dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |> 
  dplyr::group_by(AESOC) |> 
  dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |> 
  dplyr::ungroup()

  
# Exercise ----------------------------------------------------------------
  
# A. Calculate the number and percentage of *unique* subjects with at least one AE:
#  - By each SOC (AESOC)
#  - By each Preferred term (AEDECOD) within SOC (AESOC)
# By every combination of treatment group (ARM2) 

ard_stack_hierarchical(
  data = adae,
  variables = c(AESOC, AEDECOD),
  by = ARM, 
  id = USUBJID,
  denominator = adsl 
) 

# B. [*BONUS*] Modify the code from part A to include overall number/percentage of
# subjects with at least one AE, regardless of SOC and PT

ard_stack_hierarchical(
  data = adae,
  variables = c(AESOC, AEDECOD),
  by = ARM, 
  id = USUBJID,
  denominator = adsl,
  over_variables = TRUE
) 

05-tables-tfrmt.R

# Table Exercise: AE summary table using {tfrmt}

# For this exercise, we will use the AE ARD from the last section to
# create a {tfrmt} table


# Setup: run this first! --------------------------------------------------

## Load necessary packages
library(cards)
library(dplyr)
library(tidyr)
library(tfrmt)

## Import & subset data
adsl <- pharmaverseadam::adsl |> 
  dplyr::filter(SAFFL=="Y") 

adae <- pharmaverseadam::adae |> 
  dplyr::filter(SAFFL=="Y") |> 
  dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |> 
  dplyr::group_by(AESOC) |> 
  dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |> 
  dplyr::ungroup()

## Create AE Summary using cards
ard_ae <- ard_stack_hierarchical(
  data = adae,
  variables = c(AESOC, AEDECOD),
  by = ARM, 
  id = USUBJID,
  denominator = adsl,
  over_variables = TRUE,
  statistic = ~ c("n", "p")
) 


# Exercise ----------------------------------------------------------------

# A. Convert `cards` object into a tidy data frame ready for {tfrmt}. 
#    Nothing to do besides run each step & explore the output!

ard_ae_tidy <- ard_ae |> 
  shuffle_card(fill_hierarchical_overall = "ANY EVENT") |> 
  prep_big_n(vars = "ARM") |> 
  prep_hierarchical_fill(vars = c("AESOC","AEDECOD"),
                         fill_from_left = TRUE)|> 
  dplyr::select(-c(context, stat_label, stat_variable))  


# B. Create a basic tfrmt, filling in the appropriate variable names

ae_tfrmt <- tfrmt(
  group = AESOC,
  label = AEDECOD,
  param = stat_name,
  value = stat,
  column = ARM,
  body_plan = body_plan(
    frmt_structure(group_val = ".default", label_val = ".default", 
                   frmt_combine(
                     "{n} ({p}%)",
                     n = frmt("xx"),
                     p = frmt("xx", transform = ~ . *100)
                   )
    )
  ),
  big_n = big_n_structure(param_val = "bigN") 
) 

print_to_gt(ae_tfrmt,
            ard_ae_tidy)


# C. Switch the order of the columns so Placebo is last

ae_tfrmt <- ae_tfrmt |> 
  tfrmt(
    col_plan = col_plan(
      starts_with("Xanomeline"),
      "Placebo"
    )
  )  

print_to_gt(ae_tfrmt, ard_ae_tidy)


# D. Add a title and source note for the table

ae_tfrmt <- ae_tfrmt |> 
  tfrmt(
    title = "AE Table",
    footnote_plan = footnote_plan(
      footnote_structure("A typical AE Table")
    ) 
  )

print_to_gt(ae_tfrmt, ard_ae_tidy)

06-tables-gtsummary.R

# Table Exercise: Demographic summary table using {gtsummary}

# Create a Demography table split by treatment

# Setup
## Load necessary packages
library(gtsummary)
library(tidyverse)

## Import data
df_gtsummary_exercise <- pharmaverseadam::adsl |>
  filter(SAFFL == "Y") |>
  left_join(
    pharmaverseadam::advs |>
      filter(PARAMCD %in% c("BMI", "HEIGHT", "WEIGHT"), !is.na(AVAL)) |>
      arrange(ADY) |>
      slice(1, .by = c(USUBJID, PARAMCD)) |>
      pivot_wider(id_cols = USUBJID, names_from = PARAMCD, values_from = AVAL),
    by = "USUBJID"
  ) |>
  select(USUBJID, TRT01A, AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT) |>
  labelled::set_variable_labels(
    BMI = "BMI",
    HEIGHT = "Height, cm",
    WEIGHT = "Weight, kg"
  )

# 1. Use tbl_summary() to summarize AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT by TRT01A
# 2. For all continuous variables, present the following stats: c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}")
# 3. Ensure the AGEGR1 levels are reported in the correct order
# 4. View the ARD saved in the gtsummary table using `gather_ard()` function
# BONUS!
# 5. Add the header "**Active Treatment**" over the 'Xanomeline' treatments using the `modify_spanning_header()` function


tbl <-
  df_gtsummary_exercise |>
  # ensure the age groups print in the correct order
  mutate(AGEGR1 = factor(AGEGR1, levels = c("18-64", ">64"))) |>
  tbl_summary(
    by = TRT01A,
    include = c(AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT), 
    type = all_continuous() ~ "continuous2", # all continuous variables should be summarized as multi-row
    statistic = all_continuous() ~ c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}"), # change the statistics for all continuous variables
    label = list(AGEGR1 = "Age Group"), # add a label for AGEGR1
  ) |>
  # add a header above the 'Xanomeline' treatments. We used `show_header_names()` to know the column names
  modify_spanning_header(c(stat_2, stat_3) ~ "**Active Treatment**")

tbl

# extract the ARD from the table
gather_ard(tbl)