prolific_export = read_csv("../data/prolific_export.csv") %>%
select(participant_id,
entered_code,
time_taken,
age,
country = `Current Country of Residence`,
sex = Sex)
results = read_csv("../data/results.csv") %>%
select(participant_id,
fund_allocation_southeast_asia = fundAllocation_UNHCR,
fund_allocation_middle_east = fundAllocation_IOM,
donation_allocation_southeast_asia = donationAllocation,
starts_with("timestamp"))
excluded = read_csv("../data/excluded.csv") %>%
select(participant_id,
failed_attention_check,
reloaded) %>%
mutate(participant_id = as.character(participant_id)) # necessary if all are NAs
agreed = read_csv("../data/all_who_agreed.csv") %>%
select(participant_id,
condition)
# Join the four datasets and clean up a bit
data_all = agreed %>%
left_join(excluded, by = "participant_id") %>%
left_join(prolific_export, by = "participant_id") %>%
full_join(results, by = "participant_id") %>%
subset(!participant_id %in% exclude_participant_ids) %>%
mutate(failed_attention_check = coalesce(failed_attention_check, 0), # replace NAs with zeros
reloaded = coalesce(reloaded, 0))
# Clean up possibly serious anomalies in the data and issue warnings if necessary
data_all = data_all %>%
drop_na(participant_id, condition) %>%
distinct(participant_id, .keep_all= TRUE)
# Create another dataset with completed submissions only
data = data_all %>% filter(entered_code == completion_code)
sequence = c("important", "consent", "overview", "example", "ready", "vis_sea", "unhcr", "unhcr_allocation",
"unhcr_justification", "vis_me", "iom", "iom_allocation", "iom_justification", "donation",
"donation_justification", "attention", "demographics")
# Transforms data to analyze the timestamps
timestamp_data = data %>%
mutate(
ts_important = timestamp_1 - timestamp_0,
ts_consent = timestamp_2 - timestamp_1,
ts_overview = timestamp_3 - timestamp_2,
ts_example = timestamp_4 - timestamp_3,
ts_ready = timestamp_5 - timestamp_4,
ts_vis_sea = timestamp_6 - timestamp_5,
ts_unhcr = timestamp_7 - timestamp_6,
ts_unhcr_allocation = timestamp_8 - timestamp_7,
ts_unhcr_justification = timestamp_9 - timestamp_8,
ts_vis_me = timestamp_10 - timestamp_9,
ts_iom = timestamp_11 - timestamp_10,
ts_iom_allocation = timestamp_12 - timestamp_11,
ts_iom_justification = timestamp_13 - timestamp_12,
ts_donation = timestamp_14 - timestamp_13,
ts_donation_justification = timestamp_15 - timestamp_14,
ts_attention = timestamp_16 - timestamp_15,
ts_demographics = timestamp_18 - timestamp_16
) %>%
mutate_at(vars(starts_with("ts")), function(d) d / 1000) %>%
select(starts_with("ts"), condition) %>%
melt(id.vars = c("condition")) %>%
rename(page = variable,
time = value) %>%
mutate(page = factor(str_remove(page, "ts_"), levels = sequence, ordered = T))
basic_elements_layer = list(
labs(x = "Experiment Pages",
y = "Time (s)",
color = "Condition"),
scale_color_brewer(palette = "Accent"),
theme(axis.text.x = element_text(angle = 30, color = "gray40"))
)
Overview of time spent on every page
timestamp_data %>%
group_by(page) %>%
mutate(median_time = median(time)) %>%
ggplot(aes(x = page,
y = time)) +
geom_jitter(alpha = .2, width = .1, color = "orangered") +
geom_point(aes(y = median_time),
color = "gold",
size = 2) +
basic_elements_layer
Time distribution between conditions
timestamp_data %>%
ggplot(aes(
x = page,
y = time,
color = condition
)) +
geom_boxplot(outlier.color = NA) +
scale_y_continuous(limits = c(0, 175)) +
basic_elements_layer +
theme(legend.position = "top")
Median time on each page by condition
timestamp_data %>%
group_by(condition, page) %>%
summarise(median_time = median(time)) %>%
ggplot(aes(
x = page,
y = median_time,
color = condition,
group = condition
)) +
geom_point() +
geom_line() +
basic_elements_layer +
ylab("Median Time (s)") +
theme(legend.position = "top")