# Let's warm-up!
library(dplyr)
# Using dplyr:
# - From the ADSL dataset:
# - Subset to the safety population (SAFFL == "Y")
# - calculate the number of unique subjects in each treatment group (TRT01A)
pharmaverseadam::adsl |>
filter( ) |>
count( )Exercises + Solutions
Exercises
01-intro.R
02-SDTM.R
library(sdtm.oak)
library(pharmaverseraw)
library(dplyr)
#AE aCRF - https://github.com/pharmaverse/pharmaverseraw/blob/main/vignettes/articles/aCRFs/AdverseEvent_aCRF.pdf
# Read in Raw dataset ----
ae_raw <- pharmaverseraw::ae_raw
# Generate oak_id_vars ----
ae_raw <- ae_raw %>%
generate_oak_id_vars(
pat_var = "PATNUM",
raw_src = "ae_raw"
)
# Read in Controlled Terminology
study_ct <- data.frame(
codelist_code = c("C66742", "C66742"),
term_code = c("C49487", "C49488"),
term_value = c("N", "Y"),
collected_value = c("No", "Yes"),
term_preferred_term = c("No", "Yes"),
term_synonyms = c("No", "Yes"),
stringsAsFactors = FALSE
)
# Exercise 1 ------------------------------------------------
# Map AETERM from raw_var=IT.AETERM, tgt_var=AETERM
ae <-
# Derive topic variable
# Map AETERM using assign_no_ct
assign_no_ct(
raw_dat = ??,
raw_var = ??,
tgt_var = ??,
id_vars = oak_id_vars()
)
# Exercise 2 ------------------------------------------------
# Map AESER from raw_var=IT.AESER, tgt_var=AESER. Codelist code for AESDTH is C66742
ae <- ae %>%
# Map AESER using ??
??(
raw_dat = ??,
raw_var = ??,
tgt_var = ??,
ct_spec = ??,
ct_clst = ??,
id_vars = oak_id_vars()
)
# Exercise 3 ------------------------------------------------
# Map AESDTH from raw_var=IT.AESDTH, tgt_var=AESDTH.Annotation text is
# If "Yes" then AESDTH = "Y" else Not Submitted. Codelist code for AESDTH is C66742
ae <- ae %>%
# Map AESDTH using condition_add & assign_ct, raw_var=IT.AESDTH, tgt_var=AESDTH
assign_ct(
raw_dat = condition_add(??),
raw_var = "IT.AESDTH",
tgt_var = "AESDTH",
ct_spec = study_ct,
ct_clst = "C66742",
id_vars = oak_id_vars()
)03-ADaM.R
# Exercise 1
# Update date and time imputation arguments
library(tibble)
library(lubridate)
library(admiral)
posit_mh <- tribble(
~USUBJID, ~MHSTDTC,
1, "2019-07-18T15:25:40",
1, "2019-07-18T15:25",
1, "2019-07-18",
2, "2024-02",
2, "2019",
2, "2019---07",
3, ""
)
derive_vars_dtm(
dataset = posit_mh,
new_vars_prefix = "AST",
dtc = MHSTDTC,
highest_imputation = "M",
date_imputation = "????",
time_imputation = "????"
)
# Exercise 2
# Update the parameters argument
# Update set_values_to argument for the formula
# MAP Formula: MAP = (SYSBP + 2*DIABP) / 3
ADVS <- tribble(
~USUBJID, ~PARAMCD, ~PARAM, ~AVALU, ~AVAL, ~VISIT,
"01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", "mmHg", 51, "BASELINE",
"01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", "mmHg", 121, "BASELINE",
"01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", "mmHg", 79, "BASELINE",
"01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", "mmHg", 130, "BASELINE",
)
derive_param_computed(
ADVS,
by_vars = exprs(USUBJID, VISIT),
parameters = c("????", "????"),
set_values_to = exprs(
AVAL = (AVAL.SYSBP + ?? * AVAL.DIABP) / ??,
PARAMCD = "MAP",
PARAM = "Mean Arterial Pressure (mmHg)",
AVALU = "mmHg",
)
) 04-ARD.R
# ARD Exercise: Adverse Events summaries using {cards}
# Setup: run this first! --------------------------------------------------
# Load necessary packages
library(cards)
# Import & subset data
adsl <- pharmaverseadam::adsl |>
dplyr::filter(SAFFL=="Y")
adae <- pharmaverseadam::adae |>
dplyr::filter(SAFFL=="Y") |>
dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |>
dplyr::group_by(AESOC) |>
dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |>
dplyr::ungroup()
# Exercise ----------------------------------------------------------------
# A. Calculate the number and percentage of *unique* subjects with at least one AE:
# - By each SOC (AESOC)
# - By each Preferred term (AEDECOD) within SOC (AESOC)
# By every combination of treatment group (ARM)
ard_stack_hierarchical(
data = ,
variables = ,
by = ,
id = ,
denominator =
)
# B. [*BONUS*] Modify the code from part A to include overall number/percentage of
# subjects with at least one AE, regardless of SOC and PT05-tables-tfrmt.R
# Table Exercise: AE summary table using {tfrmt}
# For this exercise, we will use the AE ARD from the last section to
# create a {tfrmt} table
# Setup: run this first! --------------------------------------------------
## Load necessary packages
library(cards)
library(dplyr)
library(tidyr)
library(tfrmt)
## Import & subset data
adsl <- pharmaverseadam::adsl |>
dplyr::filter(SAFFL=="Y")
adae <- pharmaverseadam::adae |>
dplyr::filter(SAFFL=="Y") |>
dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |>
dplyr::group_by(AESOC) |>
dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |>
dplyr::ungroup()
## Create AE Summary using cards
ard_ae <- ard_stack_hierarchical(
data = adae,
variables = c(AESOC, AEDECOD),
by = ARM,
id = USUBJID,
denominator = adsl,
over_variables = TRUE,
statistic = ~ c("n", "p")
)
# Exercise ----------------------------------------------------------------
# A. Convert `cards` object into a tidy data frame ready for {tfrmt}.
# Nothing to do besides run each step & explore the output!
ard_ae_tidy <- ard_ae |>
shuffle_card(fill_hierarchical_overall = "ANY EVENT") |>
prep_big_n(vars = "ARM") |>
prep_hierarchical_fill(vars = c("AESOC","AEDECOD"),
fill_from_left = TRUE)|>
dplyr::select(-c(context, stat_label, stat_variable))
# B. Create a basic tfrmt, filling in the missing variable names
ae_tfrmt <- tfrmt(
group = AESOC,
label = AEDECOD,
param = , # fill
value = , # fill
column = , # fill
body_plan = body_plan(
frmt_structure(group_val = ".default", label_val = ".default",
frmt_combine(
"{n} ({p}%)",
n = frmt("xx"),
p = frmt("xx", transform = ~ . *100)
)
)
),
big_n = big_n_structure(param_val = "bigN")
)
print_to_gt(ae_tfrmt,
ard_ae_tidy)
# C. Switch the order of the columns so Placebo is last
ae_tfrmt <- ae_tfrmt |>
tfrmt(
col_plan = col_plan(
"Placebo",
starts_with("Xanomeline")
)
)
print_to_gt(ae_tfrmt, ard_ae_tidy)
# D. Add a title and source note for the table
ae_tfrmt <- ae_tfrmt |>
tfrmt(
title = "", # fill
footnote_plan = footnote_plan(
footnote_structure("") # fill with footnote text
)
)
print_to_gt(ae_tfrmt, ard_ae_tidy)06-tables-gtsummary.R
# Table Exercise: Demographic summary table using {gtsummary}
# Create a Demography table split by treatment
# Setup
## Load necessary packages
library(gtsummary)
library(tidyverse)
## Import data
df_gtsummary_exercise <- pharmaverseadam::adsl |>
filter(SAFFL == "Y") |>
left_join(
pharmaverseadam::advs |>
filter(PARAMCD %in% c("BMI", "HEIGHT", "WEIGHT"), !is.na(AVAL)) |>
arrange(ADY) |>
slice(1, .by = c(USUBJID, PARAMCD)) |>
pivot_wider(id_cols = USUBJID, names_from = PARAMCD, values_from = AVAL),
by = "USUBJID"
) |>
select(USUBJID, TRT01A, AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT) |>
labelled::set_variable_labels(
BMI = "BMI",
HEIGHT = "Height, cm",
WEIGHT = "Weight, kg"
)
# 1. Use tbl_summary() to summarize AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT by TRT01A
# 2. For all continuous variables, present the following stats: c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}")
# 3. Ensure the AGEGR1 levels are reported in the correct order
# 4. View the ARD saved in the gtsummary table using `gather_ard()` function
# BONUS!
# 5. Add the header "**Active Treatment**" over the 'Xanomeline' treatments using the `modify_spanning_header()` function
tbl <-
df_gtsummary_exercise |>
mutate(AGEGR1 = factor()) |>
tbl_summary(
by = ,
include = ,
type = ,
statistic = ,
label = # add a label for AGEGR1
) |>
# add a header above the 'Xanomeline' treatments. HINT: Use `show_header_names()` to know the column names
modify_spanning_header()
tbl
# extract the ARD from the tableSolutions
01-intro.R
# Let's warm-up!
library(dplyr)
# Using dplyr:
# - From the ADSL dataset:
# - Subset to the safety population (SAFFL == "Y")
# - calculate the number of unique subjects in each treatment group (TRT01A)
pharmaverseadam::adsl |>
filter(SAFFL == "Y") |>
count(TRT01A)02-SDTM.R
library(sdtm.oak)
library(pharmaverseraw)
library(dplyr)
#AE aCRF - https://github.com/pharmaverse/pharmaverseraw/blob/main/vignettes/articles/aCRFs/AdverseEvent_aCRF.pdf
# Read in Raw dataset ----
ae_raw <- pharmaverseraw::ae_raw
# Generate oak_id_vars ----
ae_raw <- ae_raw %>%
generate_oak_id_vars(
pat_var = "PATNUM",
raw_src = "ae_raw"
)
# Read in Controlled Terminology
study_ct <- data.frame(
codelist_code = c("C66742", "C66742"),
term_code = c("C49487", "C49488"),
term_value = c("N", "Y"),
collected_value = c("No", "Yes"),
term_preferred_term = c("No", "Yes"),
term_synonyms = c("No", "Yes"),
stringsAsFactors = FALSE
)
# Exercise 1 ------------------------------------------------
# Map AETERM from raw_var=IT.AETERM, tgt_var=AETERM
ae <-
# Derive topic variable
# Map AETERM using assign_no_ct, raw_var=IT.AETERM, tgt_var=AETERM
assign_no_ct(
raw_dat = ae_raw,
raw_var = "IT.AETERM",
tgt_var = "AETERM",
id_vars = oak_id_vars()
) %>%
# Exercise 2 ------------------------------------------------
# Map AESER using assign_no_ct, raw_var=IT.AESER, tgt_var=AESER
assign_ct(
raw_dat = ae_raw,
raw_var = "IT.AESER",
tgt_var = "AESER",
ct_spec = study_ct,
ct_clst = "C66742",
id_vars = oak_id_vars()
) %>%
# Exercise 3 ------------------------------------------------
# Map AESDTH from raw_var=IT.AESDTH, tgt_var=AESDTH.Annotation text is
# If "Yes" then AESDTH = "Y" else Not Submitted. Codelist code for AESDTH is C66742
# Map AESDTH using condition_add & assign_ct, raw_var=IT.AESDTH, tgt_var=AESDTH
assign_ct(
raw_dat = condition_add(ae_raw, IT.AESDTH == "Yes"),
raw_var = "IT.AESDTH",
tgt_var = "AESDTH",
ct_spec = study_ct,
ct_clst = "C66742",
id_vars = oak_id_vars()
)03-ADaM.R
# Exercise 1
# Update date and time imputation arguments so that any dates or times
# that are imputed are the last month/day of the year and 23:59:59
library(tibble)
library(lubridate)
library(admiral)
posit_mh <- tribble(
~USUBJID, ~MHSTDTC,
1, "2019-07-18T15:25:40",
1, "2019-07-18T15:25",
1, "2019-07-18",
2, "2024-02",
2, "2019",
2, "2019---07",
3, ""
)
derive_vars_dtm(
dataset = posit_mh,
new_vars_prefix = "AST",
dtc = MHSTDTC,
highest_imputation = "M",
date_imputation = "last",
time_imputation = "last"
)
# Exercise 2
# Update set_values_to argument for the formula
# MAP Formula: MAP = (SYSBP + 2*DIABP) / 3
ADVS <- tribble(
~USUBJID, ~PARAMCD, ~PARAM, ~AVALU, ~AVAL, ~VISIT,
"01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", "mmHg", 51, "BASELINE",
"01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", "mmHg", 121, "BASELINE",
"01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", "mmHg", 79, "BASELINE",
"01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", "mmHg", 130, "BASELINE",
)
derive_param_computed(
ADVS,
by_vars = exprs(USUBJID, VISIT),
parameters = c("SYSBP", "DIABP"),
set_values_to = exprs(
AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3,
PARAMCD = "MAP",
PARAM = "Mean Arterial Pressure (mmHg)",
AVALU = "mmHg",
)
) 04-ARD.R
# ARD Exercise: Adverse Events summaries using {cards}
# Setup: run this first! --------------------------------------------------
# Load necessary packages
library(cards)
# Import & subset data
adsl <- pharmaverseadam::adsl |>
dplyr::filter(SAFFL=="Y")
adae <- pharmaverseadam::adae |>
dplyr::filter(SAFFL=="Y") |>
dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |>
dplyr::group_by(AESOC) |>
dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |>
dplyr::ungroup()
# Exercise ----------------------------------------------------------------
# A. Calculate the number and percentage of *unique* subjects with at least one AE:
# - By each SOC (AESOC)
# - By each Preferred term (AEDECOD) within SOC (AESOC)
# By every combination of treatment group (ARM2)
ard_stack_hierarchical(
data = adae,
variables = c(AESOC, AEDECOD),
by = ARM,
id = USUBJID,
denominator = adsl
)
# B. [*BONUS*] Modify the code from part A to include overall number/percentage of
# subjects with at least one AE, regardless of SOC and PT
ard_stack_hierarchical(
data = adae,
variables = c(AESOC, AEDECOD),
by = ARM,
id = USUBJID,
denominator = adsl,
over_variables = TRUE
) 05-tables-tfrmt.R
# Table Exercise: AE summary table using {tfrmt}
# For this exercise, we will use the AE ARD from the last section to
# create a {tfrmt} table
# Setup: run this first! --------------------------------------------------
## Load necessary packages
library(cards)
library(dplyr)
library(tidyr)
library(tfrmt)
## Import & subset data
adsl <- pharmaverseadam::adsl |>
dplyr::filter(SAFFL=="Y")
adae <- pharmaverseadam::adae |>
dplyr::filter(SAFFL=="Y") |>
dplyr::filter(AESOC %in% unique(AESOC)[1:3]) |>
dplyr::group_by(AESOC) |>
dplyr::filter(AEDECOD %in% unique(AEDECOD)[1:3]) |>
dplyr::ungroup()
## Create AE Summary using cards
ard_ae <- ard_stack_hierarchical(
data = adae,
variables = c(AESOC, AEDECOD),
by = ARM,
id = USUBJID,
denominator = adsl,
over_variables = TRUE,
statistic = ~ c("n", "p")
)
# Exercise ----------------------------------------------------------------
# A. Convert `cards` object into a tidy data frame ready for {tfrmt}.
# Nothing to do besides run each step & explore the output!
ard_ae_tidy <- ard_ae |>
shuffle_card(fill_hierarchical_overall = "ANY EVENT") |>
prep_big_n(vars = "ARM") |>
prep_hierarchical_fill(vars = c("AESOC","AEDECOD"),
fill_from_left = TRUE)|>
dplyr::select(-c(context, stat_label, stat_variable))
# B. Create a basic tfrmt, filling in the appropriate variable names
ae_tfrmt <- tfrmt(
group = AESOC,
label = AEDECOD,
param = stat_name,
value = stat,
column = ARM,
body_plan = body_plan(
frmt_structure(group_val = ".default", label_val = ".default",
frmt_combine(
"{n} ({p}%)",
n = frmt("xx"),
p = frmt("xx", transform = ~ . *100)
)
)
),
big_n = big_n_structure(param_val = "bigN")
)
print_to_gt(ae_tfrmt,
ard_ae_tidy)
# C. Switch the order of the columns so Placebo is last
ae_tfrmt <- ae_tfrmt |>
tfrmt(
col_plan = col_plan(
starts_with("Xanomeline"),
"Placebo"
)
)
print_to_gt(ae_tfrmt, ard_ae_tidy)
# D. Add a title and source note for the table
ae_tfrmt <- ae_tfrmt |>
tfrmt(
title = "AE Table",
footnote_plan = footnote_plan(
footnote_structure("A typical AE Table")
)
)
print_to_gt(ae_tfrmt, ard_ae_tidy)06-tables-gtsummary.R
# Table Exercise: Demographic summary table using {gtsummary}
# Create a Demography table split by treatment
# Setup
## Load necessary packages
library(gtsummary)
library(tidyverse)
## Import data
df_gtsummary_exercise <- pharmaverseadam::adsl |>
filter(SAFFL == "Y") |>
left_join(
pharmaverseadam::advs |>
filter(PARAMCD %in% c("BMI", "HEIGHT", "WEIGHT"), !is.na(AVAL)) |>
arrange(ADY) |>
slice(1, .by = c(USUBJID, PARAMCD)) |>
pivot_wider(id_cols = USUBJID, names_from = PARAMCD, values_from = AVAL),
by = "USUBJID"
) |>
select(USUBJID, TRT01A, AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT) |>
labelled::set_variable_labels(
BMI = "BMI",
HEIGHT = "Height, cm",
WEIGHT = "Weight, kg"
)
# 1. Use tbl_summary() to summarize AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT by TRT01A
# 2. For all continuous variables, present the following stats: c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}")
# 3. Ensure the AGEGR1 levels are reported in the correct order
# 4. View the ARD saved in the gtsummary table using `gather_ard()` function
# BONUS!
# 5. Add the header "**Active Treatment**" over the 'Xanomeline' treatments using the `modify_spanning_header()` function
tbl <-
df_gtsummary_exercise |>
# ensure the age groups print in the correct order
mutate(AGEGR1 = factor(AGEGR1, levels = c("18-64", ">64"))) |>
tbl_summary(
by = TRT01A,
include = c(AGE, AGEGR1, SEX, RACE, ETHNIC, BMI, HEIGHT, WEIGHT),
type = all_continuous() ~ "continuous2", # all continuous variables should be summarized as multi-row
statistic = all_continuous() ~ c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}"), # change the statistics for all continuous variables
label = list(AGEGR1 = "Age Group"), # add a label for AGEGR1
) |>
# add a header above the 'Xanomeline' treatments. We used `show_header_names()` to know the column names
modify_spanning_header(c(stat_2, stat_3) ~ "**Active Treatment**")
tbl
# extract the ARD from the table
gather_ard(tbl)