-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy path01-group_methods_into_tools.R
More file actions
141 lines (117 loc) · 5.31 KB
/
01-group_methods_into_tools.R
File metadata and controls
141 lines (117 loc) · 5.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#' Grouping methods into tools
#' (1) Grouping of methods into "tools", based on the google spreadsheet
#' (2) Some postprocessing of the dynmethods::methods
library(tidyverse)
library(googlesheets)
library(dynbenchmark)
experiment("03-methods")
# If it's your first time running this script, run this:
# gs_auth()
sheet <- gs_key("1Mug0yz8BebzWt8cmEW306ie645SBh_tDHwjVw4OFhlE")
## ............................................................................
## Methods ####
are_na <- function(x) map_lgl(x, is.na)
methods <- dynmethods::methods %>%
mutate(
method_tool_id = case_when(
!are_na(method_tool_id) ~ method_tool_id,
grepl("^projected_", method_id) ~ gsub("projected_", "", method_id),
TRUE ~ method_id
)
)
methods$wrapper_trajectory_types <- map(methods$wrapper_trajectory_types, function(x) {
x[x == "cyclic"] <- "cycle"
x
})
# add detects_... columns for trajectory types
trajectory_type_ids <- trajectory_types$id
methods_detects <-
methods$wrapper_trajectory_types %>%
map(~as.list(set_names(trajectory_type_ids %in% ., trajectory_type_ids))) %>%
bind_rows() %>%
rename_all(~paste0("detects_", .))
methods <- methods %>% bind_cols(methods_detects)
# add most complex trajectory type (the latest in dynwrap::trajectory_types)
methods$wrapper_most_complex_trajectory_type <- methods$wrapper_trajectory_types %>% map_chr(~ last(trajectory_type_ids[trajectory_type_ids %in% .]))
# add requires_priors column
methods$wrapper_requires_prior <- map_lgl(methods$wrapper_inputs, ~any(dynwrap::priors$prior_id %in% .$required))
methods$wrapper_required_priors <- map(methods$wrapper_inputs, ~intersect(dynwrap::priors$prior_id, .$required))
# TEMPORARY fix for wrapper types, awaiting updated dynmethods
wrapper_type_map <- c(
linear_trajectory = "linear",
cyclic_trajectory = "cyclic",
trajectory = "direct",
cell_graph = "cell_graph",
cluster_graph = "cluster_assignment",
control = NA,
dimred_projection = "orth_proj",
end_state_probabilities = "end_state_prob",
branch_trajectory = "direct"
)
methods$wrapper_type <- wrapper_type_map[methods$wrapper_type]
testthat::expect_true(all((methods$wrapper_type %in% dynwrap::wrapper_types$id) | is.na(methods$wrapper_type)))
# join with google sheet
methods_google <- sheet %>%
gs_read(ws = "methods")
if (length(setdiff(methods$method_id, methods_google$method_id))) {
stop(setdiff(methods$method_id, methods_google$method_id))
}
methods <- left_join(
methods,
methods_google,
"method_id"
)
methods$manuscript_publication_date <- as.Date(methods$manuscript_publication_date)
methods$manuscript_preprint_date <- as.Date(methods$manuscript_preprint_date)
# ____________________________________________________________________________
# Tools ####
tools_google <- sheet %>%
gs_read(ws = "tools")
tools <- methods %>%
filter(method_source == "tool") %>%
rename(tool_id = method_tool_id) %>%
group_by(tool_id) %>%
filter(row_number() == 1) %>%
ungroup()
tools <- tools %>%
full_join(tools_google, "tool_id")
tools_excluded <- sheet %>%
gs_read(ws = "tools_excluded", col_types = cols(.default = "c", method_evaluated = "l")) %>%
filter(!tool_id %in% tools$tool_id) %>%
mutate(wrapper_trajectory_types = map(wrapper_trajectory_types, str_split, ", ", simplify = TRUE)) %>%
mutate(wrapper_most_complex_trajectory_type = map_chr(wrapper_trajectory_types, ~ last(trajectory_type_ids[trajectory_type_ids %in% .]))) %>%
mutate(
manuscript_publication_date = as.Date(manuscript_publication_date),
manuscript_preprint_date = as.Date(manuscript_preprint_date)
)
tools <- bind_rows(tools, tools_excluded)
# Dates ------------------------------
tools$manuscript_date <- tools$manuscript_preprint_date
replace_date <- is.na(tools$manuscript_date) & !is.na(tools$manuscript_publication_date)
tools$manuscript_date[replace_date] <- tools$manuscript_publication_date[replace_date]
# Altmetrics ----------------------------
gsids <- tools$manuscript_google_scholar_cluster_id
cits <- map_int(seq_along(gsids), function(i) {
gsid <- gsids[[i]]
cat("Polling ", i, "/", length(gsids), ": ", gsid, sep = "")
# Sys.sleep(1)
cits <- google_scholar_num_citations(gsid)
cat(", number of citations: ", cits, "\n", sep = "")
as.integer(cits)
})
tools$manuscript_citations <- ifelse(is.na(cits), 0L, cits)
tools_altmetrics <- map(tools$manuscript_doi, function(doi) {
tryCatch(
rAltmetric::altmetrics(doi = doi) %>% rAltmetric::altmetric_data() %>% select(cited_by_posts_count) %>% mutate_all(as.numeric),
error = function(x) tibble(cited_by_posts_count = 0)
)
}) %>% bind_rows()
tools_altmetrics[is.na(tools_altmetrics)] <- 0
colnames(tools_altmetrics) <- paste0("manuscript_", colnames(tools_altmetrics))
tools <- bind_cols(tools, tools_altmetrics)
methods <- methods %>%
left_join(tools %>% select(method_tool_id = tool_id, manuscript_bibtex, manuscript_date, manuscript_citations, manuscript_cited_by_posts_count), by = "method_tool_id")
# ____________________________________________________________________________
# Save output ####
write_rds(methods, result_file("methods.rds"), compress = "xz")
write_rds(tools, result_file("tools.rds"), compress = "xz")