Skip to content

Commit 0c9119d

Browse files
committed
# version 0.0.53:
- Added Build_DILI_Hotgenes.R to data-raw — builds a Hotgenes object from the publicly available DILI discovery proteomics dataset (Federspiel et al., MassIVE MSV000089782). - Other clean up
1 parent 1aa419f commit 0c9119d

10 files changed

Lines changed: 282 additions & 286 deletions

File tree

DESCRIPTION

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
Package: Hotgenes
22
Type: Package
3-
Title: Tools to simplify Omics DE analysis
4-
Version: 0.0.52
3+
Title: Tools to simplify Omics DE Analysis
4+
Version: 0.0.53
55
Author: Richard Virgen-Slane <Richard.Virgen-Slane@pfizer.com>
66
Maintainer: Richard Virgen-Slane <Richard.Virgen-Slane@pfizer.com>
77
Description: Converts outputs from DESeq2, limma, or a method of your
88
choice into a Hotgenes object to simplify downstream analysis.
99
Recall all results, specific genes, or identify genes of interest with
1010
hierarchical clustering on principal components.
11-
License: Apache License (>= 2)
11+
License: Apache License (>= 2) | file LICENSE
1212
Encoding: UTF-8
1313
LazyData: true
1414
RoxygenNote: 7.3.3
@@ -31,7 +31,6 @@ Imports:
3131
grid,
3232
GSVA (>= 1.46.0),
3333
janitor,
34-
knitr,
3534
limma,
3635
methods,
3736
msigdbr (>= 7.5.1),
@@ -61,13 +60,15 @@ Suggests:
6160
apeglm,
6261
edgeR,
6362
devtools,
63+
knitr,
6464
roxygen2,
6565
testthat,
6666
org.Hs.eg.db,
6767
org.Mm.eg.db,
6868
plotly,
6969
DelayedArray (>= 0.24.0),
7070
DBI,
71-
rmarkdown
71+
rmarkdown,
72+
vsn
7273
VignetteBuilder: knitr
7374
Depends: R (>= 4.2.0)

NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# version 0.0.53:
2+
- Added Build_DILI_Hotgenes.R to data-raw — builds a Hotgenes object
3+
from the publicly available DILI discovery proteomics dataset
4+
(Federspiel et al., MassIVE MSV000089782).
5+
- Other clean up
6+
17
# version 0.0.52:
28
- Exported make_stat_frame()
39
- Cleaned up documentation in Venn methods.

R/GSVA.R

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#' @param method string for method to use for GSVA package. Options
2323
#' include: c("ssgsea","gsva", "zscore", "plage").
2424
#' @export
25-
#' @example examples/Hotgeneslimma_Example.R
25+
#' @example examples/GSVA_Example.R
2626

2727
HotgeneSets <- function(Hotgenes = NULL,
2828
ExpressionSlots = NULL,
@@ -58,7 +58,7 @@ geneset_weights <- gsva_outList$geneset_weights
5858

5959
if(use_vooma){
6060

61-
glue::glue("using vooma") %>% message()
61+
cli::cli_inform("using vooma")
6262

6363
# limma method
6464
if(is.null(voomaGroup)){
@@ -87,9 +87,8 @@ vooma_plot <- NULL
8787
}
8888

8989
if(use_weights){
90-
glue::glue("using geneset weights") %>% message()
91-
9290

91+
cli::cli_inform("using geneset weights")
9392
trend <- geneset_weights
9493

9594
} else {
@@ -103,7 +102,8 @@ fit <- limma::lmFit(vm_exp)
103102

104103
# using contrast matrix
105104
if(!is.null(contrast_matrix)){
106-
glue::glue("using contrast matrix") %>% message()
105+
106+
cli::cli_inform("using contrast matrix")
107107

108108
fit_final <- limma::contrasts.fit(fit = fit,
109109
contrasts = contrast_matrix)
@@ -162,17 +162,18 @@ Matched_ExpSel <- match.arg(ExpressionSlots, ExprOptions)
162162

163163
# prepare to remap
164164
if (MapperCol == "Feature") {
165-
print("using Feature col")
166165

166+
cli::cli_inform("using Feature col")
167+
167168
NormalizedData <- Normalized_Data_(
168169
Hotgenes,
169170
slot = Matched_ExpSel
170171
) %>%
171172
as.matrix()
172173
} else if (MapperCol != "Feature") {
173-
paste0("using ", MapperCol, " col") %>%
174-
print()
175174

175+
cli::cli_inform("using {MapperCol} col")
176+
176177
# This converts ids from expression data to
177178
# ids supplied in original mapper slot
178179

@@ -198,7 +199,7 @@ as.matrix()
198199

199200
# build a new mapper for geneset names
200201

201-
print("building mapper")
202+
cli::cli_inform("building mapper")
202203

203204
Featur_s <- rownames(NormalizedData)
204205

README.md

Lines changed: 57 additions & 167 deletions
Large diffs are not rendered by default.

data-raw/Build_DILI_Hotgenes.R

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# Build DILI Discovery Proteomics Hotgenes Object -------------------------
2+
# Source: Federspiel et al. (2023), J Hepatol
3+
# Data: MassIVE MSV000089782 (public deposit)
4+
#
5+
# Differences from the original analysis:
6+
# The original analysis used two variables from private metadata
7+
# (1219_p1-p5_protein_KEY.xlsx) that are not available in the public
8+
# MassIVE deposit:
9+
#
10+
# (1) subject: patient ID — used with duplicateCorrelation() to control
11+
# for repeated measures (DO and DF samples from the same patient).
12+
# Without subject IDs this block cannot be reproduced and is omitted.
13+
#
14+
# (2) channel: exact TMT channel label (e.g. 126C, 127N) within each
15+
# pool — used with voomaByGroup() to compensate for channel-specific
16+
# variance. Pool (P1-P5) is used as a proxy here, as it captures
17+
# the dominant source of TMT batch variance.
18+
#
19+
# All other analytical steps — VSN normalization, voomaByGroup, robust
20+
# lmFit, and BH-adjusted p-value threshold of 0.1 — reproduce those
21+
# described in Federspiel et al.
22+
23+
24+
# 1. Load data ------------------------------------------------------------
25+
# dili_raw.RDS is a pre-parsed snapshot of the public MassIVE deposit
26+
# (MSV000089782), saved to inst/extdata to avoid a runtime download.
27+
# To refresh it, run the commented block below.
28+
29+
raw_df <- readRDS(
30+
system.file("extdata", "dili_raw.RDS",
31+
package = "Hotgenes",
32+
mustWork = TRUE)
33+
)
34+
35+
# To re-download and refresh dili_raw.RDS from MassIVE:
36+
if (FALSE) {
37+
url <- paste0(
38+
"https://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?",
39+
"file=f.MSV000089782%2Fupdates%2F2023-02-28_jfederspiel_1bc96582",
40+
"%2Fother%2FDILI_discovery_data.xlsx&forceDownload=true"
41+
)
42+
43+
tmp <- tempfile(fileext = ".xlsx")
44+
download.file(url, tmp, mode = "wb")
45+
46+
raw_df <- readxl::read_excel(tmp) |>
47+
janitor::clean_names()
48+
49+
saveRDS(
50+
raw_df,
51+
file = file.path(getwd(), "inst", "extdata", "dili_raw.RDS")
52+
)
53+
}
54+
55+
56+
# 2. Sample columns -------------------------------------------------------
57+
58+
sample_cols <- colnames(raw_df)[
59+
grepl("^(do|df|hv|nafld|ndo|ndf)_", colnames(raw_df))
60+
]
61+
62+
63+
# 3. Filter proteins ------------------------------------------------------
64+
# - remove contaminants
65+
# - keep reviewed UniProt entries (^sp|)
66+
# - keep min_peps > 0
67+
# - generate unique gene symbols as Feature (required for GSEA)
68+
69+
filtered_exps <- raw_df |>
70+
dplyr::filter(!grepl("contaminant", .data$protein)) |>
71+
dplyr::filter(grepl("^sp[|]", .data$protein)) |>
72+
dplyr::filter(.data$min_peps > 0) |>
73+
dplyr::mutate(Feature = make.names(.data$gene_symbol, unique = TRUE))
74+
75+
76+
# 4. Expression matrix ----------------------------------------------------
77+
78+
expr_matrix <- filtered_exps |>
79+
dplyr::select("Feature", dplyr::any_of(sample_cols)) |>
80+
tibble::column_to_rownames("Feature") |>
81+
as.matrix()
82+
83+
84+
# 5. Protein ID mapper ----------------------------------------------------
85+
86+
mapper_df <- filtered_exps |>
87+
dplyr::select(
88+
"Feature",
89+
"Gene" = "gene_symbol",
90+
"Protein" = "protein",
91+
"Description" = "description"
92+
)
93+
94+
95+
# 6. Sample metadata (coldata) --------------------------------------------
96+
97+
coldata <- data.frame(
98+
Sample = sample_cols,
99+
Condition = toupper(sub("_p[0-9]+_[0-9]+$", "", sample_cols)),
100+
Pool = toupper(sub(".*_(p[0-9]+)_.*", "\\1", sample_cols)),
101+
row.names = sample_cols,
102+
stringsAsFactors = TRUE
103+
)
104+
105+
coldata[["Condition"]] <- factor(
106+
coldata[["Condition"]],
107+
levels = c("HV", "DO", "DF", "NDO", "NDF", "NAFLD")
108+
)
109+
110+
111+
# 7. VSN normalization ----------------------------------------------------
112+
113+
expr_matrix[is.na(expr_matrix)] <- 0
114+
expr_matrix_vsn <- limma::normalizeVSN(expr_matrix)
115+
116+
117+
# 8. Design matrix --------------------------------------------------------
118+
119+
design <- model.matrix(~ 0 + Condition, data = coldata)
120+
colnames(design) <- gsub("Condition", "", colnames(design))
121+
122+
123+
# 9. voomaByGroup ---------------------------------------------------------
124+
125+
vm_exp <- limma::voomaByGroup(
126+
y = expr_matrix_vsn,
127+
group = coldata[["Pool"]],
128+
design = design,
129+
plot = FALSE
130+
)
131+
132+
133+
# 10. Robust lmFit --------------------------------------------------------
134+
# Note: duplicateCorrelation() is omitted — subject IDs required to model
135+
# repeated measures (DO/DF pairing) are not available in the public data.
136+
137+
fit <- limma::lmFit(
138+
vm_exp,
139+
design,
140+
method = "robust"
141+
)
142+
143+
144+
# 11. Contrasts -----------------------------------------------------------
145+
146+
contrasts_mat <- limma::makeContrasts(
147+
DO_vs_HV = DO - HV,
148+
DF_vs_HV = DF - HV,
149+
NDO_vs_HV = NDO - HV,
150+
NDF_vs_HV = NDF - HV,
151+
NAFLD_vs_HV = NAFLD - HV,
152+
DF_vs_DO = DF - DO,
153+
NDO_vs_DO = NDO - DO,
154+
levels = design
155+
)
156+
157+
fit2 <- limma::contrasts.fit(fit, contrasts_mat)
158+
fit2 <- limma::eBayes(fit2)
159+
160+
161+
# 12. Hotgenes object -----------------------------------------------------
162+
163+
dili_hotgenes <- Hotgenes::Hotgeneslimma(
164+
limmafit = fit2,
165+
coldata = coldata,
166+
Expression = vm_exp,
167+
Expression_name = "VSN",
168+
Exps_list = list(log2 = log2(expr_matrix + 1)),
169+
Mapper = mapper_df
170+
)
171+
172+
dili_hotgenes
173+
174+
rm(list = ls())

examples/Hotgeneslimma_Example.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ if(interactive()) {
1515

1616
# Hotgeneslimma -----------------------------------------------------------
1717
require(DESeq2)
18-
#require(limma)
19-
#require(edgeR)
2018

2119
dds_con_dir <- system.file("extdata",
2220
"dds_con.Rdata",

inst/extdata/dili_raw.RDS

839 KB
Binary file not shown.

0 commit comments

Comments
 (0)