Skip to content

Commit 1e4afad

Browse files
committed
merging
1 parent cec33b9 commit 1e4afad

2 files changed

Lines changed: 95 additions & 51 deletions

File tree

code/scenarios/accra/master_script_rj.R

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,26 @@ for(i in 1:nDiseases){
8080
#################################################
8181
## Use case 3: sampling:
8282
## sample size, travel patterns, emissions (cleaner fleet)
83-
ithim_object <- run_ithim_setup(NSAMPLES = 32,
83+
set.seed(1)
84+
ithim_object <- run_ithim_setup(NSAMPLES = 1024,
8485
BUS_WALK_TIME = c(log(5), log(1.2)),
86+
MMET_WALKING = c(log(2.5), log(1.2)),
8587
MMET_CYCLING = c(log(5), log(1.2)),
8688
PM_CONC_BASE = c(log(50), log(1.2)),
87-
PM_TRANS_SHARE = c(5, 5),
89+
PM_TRANS_SHARE = c(5,20),
90+
INJURY_REPORTING_RATE = c(8,3),
91+
CHRONIC_DISEASE_SCALAR = c(log(1), log(1.2)),
92+
BACKGROUND_PA_SCALAR = c(log(1), log(1.2)),
8893
MC_TO_CAR_RATIO = c(-1.4,0.4),
8994
PA_DOSE_RESPONSE_QUANTILE = T,
9095
AP_DOSE_RESPONSE_QUANTILE = T)
9196

9297
numcores <- detectCores()
9398
ithim_object$outcomes <- mclapply(1:NSAMPLES, FUN = ithim_uncertainty, ithim_object = ithim_object, mc.cores = ifelse(Sys.info()[['sysname']] == "Windows", 1, numcores))
99+
for(i in 1:NSAMPLES) print(length(ithim_object$outcomes[[i]]))
94100

95101
plot(ithim_object$parameters$MC_TO_CAR_RATIO,sapply(ithim_object$outcomes,function(x)sum(x$hb$deaths[,40])))
102+
plot(ithim_object$parameters$INJURY_REPORTING_RATE,sapply(ithim_object$outcomes,function(x)sum(x$hb$deaths[,40])))
96103

97104
## calculate EVPPI
98105
parameter_names <- names(ithim_object$parameters)[names(ithim_object$parameters)!="DR_AP_LIST"]
@@ -133,11 +140,11 @@ certainty_parameters <- list(uncertain=list(
133140
background_pm = list(now=c(log(50),log(1.2)),safer=c(log(50),log(1.2)),more_chronic_disease=c(log(50),log(1.2)),less_background_AP=30.625, less_background_PA=c(log(50),log(1.2))),
134141
transport_pm = list(now=c(5,20), safer=c(5,20), more_chronic_disease=c(5,20), less_background_AP=0.3673469, less_background_PA=c(5,20)),
135142
background_pa_scalar = list(now=c(0,log(1.2)), safer=c(0,log(1.2)), more_chronic_disease=c(0,log(1.2)), less_background_AP=c(0,log(1.2)),less_background_PA=0.5),
136-
NSAMPLES = 4096,
143+
NSAMPLES = 1024,
137144
BUS_WALK_TIME = c(log(5), log(1.2)),
138145
MMET_CYCLING = c(log(5), log(1.2)),
139146
MMET_WALKING = c(log(2.5), log(1.2)),
140-
MC_TO_CAR_RATIO = 0.2,#c(-1.4,0.4),
147+
MC_TO_CAR_RATIO = c(-1.4,0.4),
141148
PA_DOSE_RESPONSE_QUANTILE = T,
142149
AP_DOSE_RESPONSE_QUANTILE = T
143150
), not_uncertain=list(
@@ -155,6 +162,8 @@ certainty_parameters <- list(uncertain=list(
155162
AP_DOSE_RESPONSE_QUANTILE = F
156163
))
157164

165+
ithim_object_list$uncertain$now <- ithim_object
166+
158167
file_name <- paste0('six_by_one_scenarios_',certainty_parameters$uncertain$NSAMPLES,'.Rds')
159168
if(file.exists(file_name)){
160169
ithim_object_list <- readRDS(file_name)
@@ -220,7 +229,8 @@ if(file.exists(file_name)){
220229
}
221230
}
222231
saveRDS(ithim_object_list,file_name)
223-
}
232+
233+
}
224234

225235
library(RColorBrewer)
226236
library(plotrix)
@@ -232,7 +242,7 @@ evppi <- ithim_object_list$uncertain$now$evppi
232242

233243
x11(width=5); par(mar=c(6,12,3.5,5.5))
234244
parameter_names <- c('walk-to-bus time','cycling mMETs','walking mMETs','background PM2.5','traffic PM2.5 share',#'motorcycle distance',
235-
'non-travel PA','street safety','non-communicable disease burden','all-cause mortality (PA)','IHD (PA)',
245+
'non-travel PA','injury reporting rate','non-communicable disease burden','all-cause mortality (PA)','IHD (PA)',
236246
'cancer (PA)','lung cancer (PA)','stroke (PA)','diabetes (PA)','IHD (AP)','lung cancer (AP)',
237247
'COPD (AP)','stroke (AP)')
238248
labs <- rownames(evppi)
@@ -251,8 +261,12 @@ fullaxis(side=2,las=1,at=(length(labs)-1):0+0.5,labels=parameter_names,line=NA,p
251261
mtext(3,text='By how much (%) could we reduce uncertainty in\n the outcome if we knew this parameter perfectly?',line=1)
252262
color.legend(5.5,0,5.5+0.3,length(labs),col.labels,rev(redCol),gradient="y",cex=1,align="rb")
253263

254-
x11(width=8,height=4); par(mfrow=c(2,4),mar=c(5,2,1,1));
255-
for(i in 1:8) plot(density(ithim_object_list$uncertain$now$parameters[[i]]),col='navyblue',xlab=names(ithim_object_list$uncertain$now$parameters)[i],ylab='',frame=F,main='',lwd=2)
264+
parameter_names <- c('walk-to-bus time','cycling mMETs','walking mMETs','background PM2.5','traffic PM2.5 share','motorcycle distance',
265+
'non-travel PA','injury reporting rate','NCD burden','all-cause mortality (PA)','IHD (PA)',
266+
'cancer (PA)','lung cancer (PA)','stroke (PA)','diabetes (PA)','IHD (AP)','lung cancer (AP)',
267+
'COPD (AP)','stroke (AP)')
268+
x11(width=6,height=6); par(mfrow=c(3,3),mar=c(5,3,1,1));
269+
for(i in 1:9) plot(density(ithim_object_list$uncertain$now$parameters[[i]]),cex.lab=1.5,cex.axis=1.5,col='navyblue',xlab=parameter_names[i],ylab='',frame=F,main='',lwd=2)
256270

257271
outcome <- t(sapply(ithim_object_list$uncertain$now$outcomes, function(x) colSums(x$hb$deaths[,(NSCEN+3):ncol(x$hb$deaths)])))
258272

ithim_r_functions.R

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ ithim_setup_parameters <- function(NSAMPLES = 1,
181181
for(age in unique(dr_ap$age_code)){
182182
dr_ap_age <- subset(dr_ap,age_code==age)
183183
#######################################
184-
##RJ I recommend the following as a better approximation to the distribution but it is currently v e r y slow
185-
## so I leave it here commented out until we want to develop it and/or use it
184+
185+
186186
lbeta <- log(dr_ap_age$beta)
187187
lgamma <- log(dr_ap_age$gamma)
188188
gamma_val <- quantile(density(lgamma),quant1)
@@ -228,7 +228,7 @@ ithim_load_data <- function(){
228228
DR_AP <<- read.csv("data/dose_response/AP/dose_response_AP.csv")
229229
INJ_DIST_EXP <<- read_csv('code/injuries/data/sin_coefficients_pairs.csv') ## injury distance exponent
230230
# root of list_of_files matches DISEASE_INVENTORY$pa_acronym
231-
list_of_files <- list.files(path = "data/drpa/extdata/", recursive = TRUE, pattern = "\\.csv$", full.names = TRUE)
231+
list_of_files <- list.files(path = "data/global/drpa/extdata/", recursive = TRUE, pattern = "\\.csv$", full.names = TRUE)
232232
for (i in 1:length(list_of_files)){
233233
assign(stringr::str_sub(basename(list_of_files[[i]]), end = -5),
234234
read_csv(list_of_files[[i]]),
@@ -590,11 +590,10 @@ create_all_scenarios <- function(trip_set){
590590
# Scenario 1
591591
source_modes <- c('Bus', 'Walking')
592592
target_modes <- c('Private Car')
593-
source_percentages <- c(0.16, 0.49)
593+
source_percentages <- round(c(0.16, 0.49)* tt)
594594
rdr <- create_scenario(rdr, scen_name = 'Scenario 1', source_modes = source_modes,
595595
target_modes = target_modes, source_distance_cats = DIST_CAT,
596-
source_trips = c(round(source_percentages[1] * tt),
597-
round(source_percentages[2] * tt)))
596+
source_trips = source_percentages)
598597
rd_list[[2]] <- rdr
599598
###############################################################
600599
# Scenario 2
@@ -609,7 +608,7 @@ create_all_scenarios <- function(trip_set){
609608
long_trips <- sum(total_car_trips$trip_distance_cat!=DIST_CAT[1])
610609
long_car_trips_sample <- create_scenario(total_car_trips, scen_name = 'Scenario 2', source_modes = source_modes, combined_modes = T,
611610
target_modes = target_modes, source_distance_cats = DIST_CAT[2:3],source_trips = long_trips)
612-
short_trips <- target_new_trips - long_trips
611+
short_trips <- min( target_new_trips - long_trips, sum(total_car_trips$trip_distance_cat==DIST_CAT[1]))
613612
if(short_trips>0){
614613
short_car_trips_sample <- create_scenario(total_car_trips, scen_name = 'Scenario 2', source_modes = source_modes, combined_modes = T,
615614
target_modes = target_modes, source_distance_cats = DIST_CAT[1],source_trips = short_trips)
@@ -633,23 +632,27 @@ create_all_scenarios <- function(trip_set){
633632
# x decrease private car
634633
source_modes <- c('Private Car')
635634
target_modes <- c('Motorcycle')
636-
target_new_trips <- round(0.1 * tt) - sum(rdr$trip_mode=='Motorcycle')
635+
target_new_trips <- max(round(0.1 * tt) - sum(rdr$trip_mode=='Motorcycle'),1)
637636
mcycle_trips_sample <- create_scenario(rdr, scen_name = 'Scenario 3', source_modes = source_modes,
638637
combined_modes = T, target_modes = target_modes,
639-
source_distance_cats = DIST_CAT, source_trips = c(round(0.1 * tt) -
640-
nrow(filter(rdr, trip_mode == 'Motorcycle'))))
638+
source_distance_cats = DIST_CAT, source_trips = target_new_trips)
639+
641640
# Update selected rows for mode and duration
642641
rdr$trip_mode[match(mcycle_trips_sample$row_id,rdr$row_id)] <- mcycle_trips_sample$trip_mode
643642
rdr$trip_duration[match(mcycle_trips_sample$row_id,rdr$row_id)] <- mcycle_trips_sample$trip_duration
644643
rdr$scenario <- "Scenario 3"
645644
rd_list[[4]] <- rdr
645+
#return(rd_list)
646646
###############################################################
647647
# Scenario 4
648648
rdr <- rd_list[[2]]
649649
# 3.5 % Cycle
650650
source_modes <- c('Motorcycle', 'Private Car', 'Taxi')
651651
target_modes <- c('Bicycle')
652-
target_new_trips <- c(52, round(0.035 * tt) - sum(rdr$trip_mode == 'Bicycle') - 52)
652+
mtrips <- max(min(52,sum(rdr$trip_mode == 'Motorcycle')),1)
653+
btrips <- sum(rdr$trip_mode == 'Bicycle')
654+
ctrips <- max(min(round(0.035 * tt) - btrips - mtrips, sum(rdr$trip_mode %in% c('Private Car', 'Taxi')&rdr$trip_distance_cat==DIST_CAT[1])),1)
655+
target_new_trips <- c(mtrips, ctrips)
653656
mbike_trips <- create_scenario(rdr, scen_name = 'Scenario 4', source_modes = source_modes[1],combined_modes = T,
654657
target_modes = target_modes,source_distance_cats = DIST_CAT,source_trips = target_new_trips[1])
655658
car_trips <- create_scenario(rdr, scen_name = 'Scenario 4', source_modes = c(source_modes[2], source_modes[3]),combined_modes = T,
@@ -658,7 +661,6 @@ create_all_scenarios <- function(trip_set){
658661
# Update selected rows for mode and duration
659662
rdr$trip_mode[match(car_mbike_trips$row_id,rdr$row_id)] <- car_mbike_trips$trip_mode
660663
rdr$trip_duration[match(car_mbike_trips$row_id,rdr$row_id)] <- car_mbike_trips$trip_duration
661-
#rdr %>% group_by(trip_mode) %>% summarise(c = dplyr::n(), p = dplyr::n() / nrow(rdr) * 100)
662664
rdr$scenario <- "Scenario 4"
663665
rd_list[[5]] <- rdr
664666
###############################################################
@@ -667,9 +669,9 @@ create_all_scenarios <- function(trip_set){
667669
# 3.5 % Cycle
668670
source_modes <- c('Private Car', 'Taxi')
669671
target_modes <- c('Walking')
670-
target_new_trips <- round(0.54 * tt) - sum(rdr$trip_mode == 'Walking')
671-
motorised_trips <- create_scenario(rdr, scen_name = 'Scenario 4', source_modes = c(source_modes[1], source_modes[2]),combined_modes = T,
672-
target_modes = target_modes,source_distance_cats = DIST_CAT[1],source_trips = target_new_trips[1])
672+
target_new_trips <- min(round(0.54 * tt) - sum(rdr$trip_mode == target_modes), sum(rdr$trip_mode%in%source_modes&rdr$trip_distance_cat==DIST_CAT[1]))
673+
motorised_trips <- create_scenario(rdr, scen_name = 'Scenario 4', source_modes = source_modes, combined_modes = T,
674+
target_modes = target_modes,source_distance_cats = DIST_CAT[1],source_trips = target_new_trips)
673675
# Update selected rows for mode and duration
674676
rdr$trip_mode[match(motorised_trips$row_id,rdr$row_id)] <- motorised_trips$trip_mode
675677
rdr$trip_duration[match(motorised_trips$row_id,rdr$row_id)] <- motorised_trips$trip_duration
@@ -682,18 +684,13 @@ create_all_scenarios <- function(trip_set){
682684
create_scenario <- function(rdr, scen_name, source_modes, combined_modes = F, target_modes, source_distance_cats,
683685
source_trips, target_trips){
684686
##!! RJ target_modes must be length 1
685-
local_source_trips <- list()
686-
if (!combined_modes){
687-
for (i in 1:length(source_modes))
688-
local_source_trips[i] <- sum(rdr$trip_mode == source_modes[i]) - source_trips[i]
689-
local_source_trips <- purrr::flatten_dbl(local_source_trips)
690-
}
691687
if (!combined_modes){
692688
for (i in 1:length(source_modes)){
689+
local_source_trips <- sum(rdr$trip_mode == source_modes[i]) - source_trips[i]
693690
candidate_trips <- filter(rdr,trip_mode == source_modes[i] &
694691
trip_distance_cat %in% source_distance_cats)
695-
sample_trips <- candidate_trips[sample(1:nrow(candidate_trips),local_source_trips[i]),]
696-
sample_trips$trip_mode <- target_modes;
692+
sample_trips <- candidate_trips[sample(1:nrow(candidate_trips),local_source_trips),]
693+
sample_trips$trip_mode <- target_modes
697694
sample_trips$trip_duration <- (sample_trips$trip_distance * 60) / VEHICLE_INVENTORY$speed[VEHICLE_INVENTORY$trip_mode == target_modes]
698695
# Update selected rows for mode and duration
699696
rdr$trip_mode[match(sample_trips$row_id,rdr$row_id)] <- sample_trips$trip_mode
@@ -706,7 +703,7 @@ create_scenario <- function(rdr, scen_name, source_modes, combined_modes = F, ta
706703
candidate_trips <- filter(rdr,trip_mode %in% source_modes &
707704
trip_distance_cat %in% source_distance_cats)
708705
sample_trips <- candidate_trips[sample(1:nrow(candidate_trips),source_trips),]
709-
sample_trips$trip_mode <- target_modes;
706+
sample_trips$trip_mode <- target_modes
710707
sample_trips$trip_duration <- (sample_trips$trip_distance * 60) / VEHICLE_INVENTORY$speed[VEHICLE_INVENTORY$trip_mode == target_modes]
711708
sample_trips$scenario <- scen_name
712709

@@ -904,8 +901,46 @@ distances_for_injury_function <- function(trip_scen_sets){
904901
}
905902
}
906903

904+
reg_model <- list()
905+
##TODO write formulae without prior knowledge of column names
906+
##TODO use all ages. ns.
907+
##TODO different formulae for whw and noov
908+
##!! need a catch for when regression fails. E.g., if fail, run simpler model.
909+
for(type in c('whw','noov')){
910+
injuries_list[[1]][[type]]$injury_reporting_rate <- 1
911+
suppressWarnings(reg_model[[type]] <- glm(count~cas_mode+strike_mode+cas_age+cas_gender,data=injuries_list[[1]][[type]],family='poisson',
912+
offset=0.5*log(cas_distance)+0.5*log(strike_distance)-log(injury_reporting_rate),control=glm.control(maxit=100)))
913+
reg_model[[type]] <- trim_glm_object(reg_model[[type]])
914+
}
915+
##
916+
## For predictive uncertainty, we could sample a number from the predicted distribution
917+
for(scen in SCEN)
918+
for(type in c('whw','noov'))
919+
injuries_list[[scen]][[type]] <- subset(injuries_list[[scen]][[type]],year==2016)
907920

908-
list(relative_distances=relative_distances,scen_dist=scen_dist,true_distances=true_distances,injuries_list=injuries_list)
921+
list(relative_distances=relative_distances,scen_dist=scen_dist,true_distances=true_distances,injuries_list=injuries_list,reg_model=reg_model)
922+
}
923+
924+
trim_glm_object <- function(obj){
925+
obj$y <- c()
926+
obj$model <- c()
927+
obj$R <- c()
928+
obj$qr$qr <- c()
929+
obj$residuals <- c()
930+
obj$fitted.values <- c()
931+
obj$effects <- c()
932+
#obj$linear.predictors <- c()
933+
obj$weights <- c()
934+
obj$prior.weights <- c()
935+
obj$data <- c()
936+
obj$family$variance = c()
937+
obj$family$dev.resids = c()
938+
obj$family$aic = c()
939+
obj$family$validmu = c()
940+
obj$family$simulate = c()
941+
#attr(obj$terms,".Environment") = c()
942+
attr(obj$formula,".Environment") = c()
943+
obj
909944
}
910945

911946
######################################################################
@@ -1128,11 +1163,8 @@ PA_dose_response <- function (cause, outcome_type, dose, confidence_intervals =
11281163
stop('Incidence does not exist for all_cause')
11291164
}
11301165
fname <- paste(cause, outcome_type, sep = "_")
1131-
lookup_table <- get(paste0(fname))
1166+
lookup_table <- get(fname)
11321167
lookup_df <- setDT(lookup_table)
1133-
#pert_75 <- stringr::str_sub(basename(list_of_files[[1]]), end = -5)
1134-
##RJ previously:
1135-
## cond <- ifelse(use_75_pert, abs(lookup_table$dose - dose), which.min(abs(lookup_table$dose - dose)))
11361168
rr <- approx(x=lookup_df$dose,y=lookup_df$RR,xout=dose,yleft=1,yright=min(lookup_df$RR))$y
11371169
if (confidence_intervals || PA_DOSE_RESPONSE_QUANTILE==T) {
11381170
lb <-
@@ -1154,7 +1186,7 @@ PA_dose_response <- function (cause, outcome_type, dose, confidence_intervals =
11541186
}
11551187
if (PA_DOSE_RESPONSE_QUANTILE==T){
11561188
#rr <- truncnorm::qtruncnorm(get(paste0('PA_DOSE_RESPONSE_QUANTILE_',cause)), rr, sd=rr-lb,a=0, b=1)
1157-
rr <- qnorm(get(paste0('PA_DOSE_RESPONSE_QUANTILE_',cause)), rr, sd=(ub-lb)/1.96)
1189+
rr <- qnorm(get(paste0('PA_DOSE_RESPONSE_QUANTILE_',cause)), mean=rr, sd=(ub-lb)/1.96)
11581190
rr[rr<0] <- 0
11591191
}
11601192
if (confidence_intervals) {
@@ -1186,22 +1218,14 @@ combined_rr_pa_pa <- function(ind_pa,ind_ap){
11861218
ind_ap_pa
11871219
}
11881220

1189-
injuries_function_2 <- function(true_distances,injuries_list){
1190-
##!! move to distance calculation. store predictions and scale by distance.
1191-
reg_model <- list()
1192-
##TODO write formulae without prior knowledge of column names
1193-
##TODO use all ages. ns.
1194-
##TODO different formulae for whw and noov
1195-
for(type in c('whw','noov'))
1196-
reg_model[[type]] <- glm(count~cas_mode+strike_mode+cas_age+cas_gender,data=injuries_list[[1]][[type]],family='poisson',
1197-
offset=0.5*log(cas_distance)+0.5*log(strike_distance))
1198-
##
1221+
injuries_function_2 <- function(true_distances,injuries_list,reg_model){
1222+
11991223
## For predictive uncertainty, we could sample a number from the predicted distribution
12001224
injuries <- true_distances
12011225
injuries$Bus_driver <- 0
12021226
for(scen in SCEN){
12031227
for(type in c('whw','noov')){
1204-
injuries_list[[scen]][[type]] <- subset(injuries_list[[scen]][[type]],year==2016)
1228+
injuries_list[[scen]][[type]]$injury_reporting_rate <- INJURY_REPORTING_RATE
12051229
injuries_list[[scen]][[type]]$pred <- predict(reg_model[[type]],newdata = injuries_list[[scen]][[type]],type='response')
12061230
}
12071231
for(injured_mode in unique(injuries_list[[1]]$whw$cas_mode))
@@ -1416,6 +1440,12 @@ ithim_uncertainty <- function(ithim_object,seed=1){
14161440

14171441
run_ithim <- function(ithim_object,seed=1){
14181442
############################
1443+
#mmets_pp=NULL
1444+
#scenario_pm=NULL
1445+
#pm_conc_pp=NULL
1446+
injuries=NULL
1447+
ref_injuries=NULL
1448+
hb=NULL
14191449
## (0) SET UP
14201450
set.seed(seed)
14211451
for(i in 1:length(ithim_object))
@@ -1443,8 +1473,8 @@ run_ithim <- function(ithim_object,seed=1){
14431473
# Injuries calculation
14441474
for(i in 1:length(inj_distances))
14451475
assign(names(inj_distances)[i],inj_distances[[i]])
1446-
(injuries <- injuries_function(relative_distances,scen_dist))
1447-
#system.time(injuries <- injuries_function_2(true_distances,injuries_list))
1476+
#(injuries <- injuries_function(relative_distances,scen_dist))
1477+
(injuries <- injuries_function_2(true_distances,injuries_list,reg_model))
14481478
(deaths_yll_injuries <- injury_death_to_yll(injuries))
14491479
ref_injuries <- deaths_yll_injuries$ref_injuries
14501480
############################

0 commit comments

Comments
 (0)