prolific_export = read_csv("../data/prolific_export.csv") %>%
  select(participant_id,
         entered_code,
         time_taken,
         age,
         country = `Current Country of Residence`,
         sex = Sex)

results = read_csv("../data/results.csv") %>%
  select(participant_id,
         fund_allocation_southeast_asia = fundAllocation_UNHCR,
         fund_allocation_middle_east = fundAllocation_IOM,
         donation_allocation_southeast_asia = donationAllocation,
         starts_with("timestamp"))

excluded = read_csv("../data/excluded.csv") %>%
  select(participant_id,
         failed_attention_check,
         reloaded) %>%
  mutate(participant_id = as.character(participant_id)) # necessary if all are NAs 

agreed = read_csv("../data/all_who_agreed.csv") %>%
  select(participant_id,
         condition)

# Join the four datasets and clean up a bit
data_all = agreed %>%
  left_join(excluded, by = "participant_id") %>%
  left_join(prolific_export, by = "participant_id") %>%
  full_join(results, by = "participant_id") %>%
  subset(!participant_id %in% exclude_participant_ids) %>% 
  mutate(failed_attention_check = coalesce(failed_attention_check, 0), # replace NAs with zeros
         reloaded = coalesce(reloaded, 0))

# Clean up possibly serious anomalies in the data and issue warnings if necessary
data_all = data_all %>%
  drop_na(participant_id, condition) %>%
  distinct(participant_id, .keep_all= TRUE)

# Create another dataset with completed submissions only
data = data_all %>% filter(entered_code == completion_code)

sequence = c("important", "consent", "overview", "example", "ready", "vis_sea", "unhcr", "unhcr_allocation",
             "unhcr_justification", "vis_me", "iom", "iom_allocation", "iom_justification", "donation",
             "donation_justification", "attention", "demographics")

# Transforms data to analyze the timestamps
timestamp_data = data %>%
  mutate(
    ts_important = timestamp_1 - timestamp_0,
    ts_consent = timestamp_2 - timestamp_1,
    ts_overview = timestamp_3 - timestamp_2,
    ts_example = timestamp_4 - timestamp_3,
    ts_ready = timestamp_5 - timestamp_4,
    ts_vis_sea = timestamp_6 - timestamp_5,
    ts_unhcr = timestamp_7 - timestamp_6,
    ts_unhcr_allocation = timestamp_8 - timestamp_7,
    ts_unhcr_justification = timestamp_9 - timestamp_8,
    ts_vis_me = timestamp_10 - timestamp_9,
    ts_iom = timestamp_11 - timestamp_10,
    ts_iom_allocation = timestamp_12 - timestamp_11,
    ts_iom_justification = timestamp_13 - timestamp_12,
    ts_donation = timestamp_14 - timestamp_13,
    ts_donation_justification = timestamp_15 - timestamp_14,
    ts_attention = timestamp_16 - timestamp_15,
    ts_demographics = timestamp_18 - timestamp_16
  ) %>%
  mutate_at(vars(starts_with("ts")), function(d) d / 1000) %>%
  select(starts_with("ts"), condition) %>%
  melt(id.vars = c("condition")) %>%
  rename(page = variable,
         time = value) %>%
  mutate(page = factor(str_remove(page, "ts_"), levels = sequence, ordered = T))
basic_elements_layer = list(
  labs(x = "Experiment Pages",
       y = "Time (s)",
       color = "Condition"),
  scale_color_brewer(palette = "Accent"),
  theme(axis.text.x = element_text(angle = 30, color = "gray40"))
)

1 Overview of time spent on every page

timestamp_data %>%
  group_by(page) %>%
  mutate(median_time = median(time)) %>%
  ggplot(aes(x = page,
             y = time)) +
  geom_jitter(alpha = .2, width = .1, color = "orangered") +
  geom_point(aes(y = median_time),
             color = "gold",
             size = 2) +
  basic_elements_layer

2 Time distribution between conditions

timestamp_data %>%
  ggplot(aes(
    x = page,
    y = time,
    color = condition
  )) +
  geom_boxplot(outlier.color = NA) +
  scale_y_continuous(limits = c(0, 175)) +
  basic_elements_layer +
  theme(legend.position = "top")

3 Median time on each page by condition

timestamp_data %>%
  group_by(condition, page) %>%
  summarise(median_time = median(time)) %>%
  ggplot(aes(
    x = page,
    y = median_time,
    color = condition,
    group = condition
  )) +
  geom_point() +
  geom_line() +
  basic_elements_layer +
  ylab("Median Time (s)") +
  theme(legend.position = "top")