Generate references from the content

maxim-k · maxim-k · commit a9b6abc012bb · 2022-02-17T21:21:36.000-05:00
diff --git a/appyters/Bulk_RNA_seq/RNA_seq_Analysis_Pipeline.ipynb b/appyters/Bulk_RNA_seq/RNA_seq_Analysis_Pipeline.ipynb
@@ -580,7 +580,7 @@
    "outputs": [],
    "source": [
     "dataset['dataset_metadata'] = meta_df\n",
-    "\n",
+    "ref_counter = ['Clark, N.R. and Ma’ayan, A. (2011) Introduction to statistical methods to analyze large data sets: principal components analysis. Sci. Signal., 4, tr3-tr3.']\n",
     "table_counter, notebook_metadata = display_object(table_counter, \"Raw RNA-seq expression data. The table displays the first 5 rows of the quantified RNA-seq expression dataset. Rows represent genes, columns represent samples, and values show the number of mapped reads.\", notebook_metadata, \"raw_exp.csv\", dataset[current_dataset].head(), istable=True)\n",
     "table_counter, notebook_metadata = display_object(table_counter, \"Metadata. The table displays the metadata associated with the samples in the RNA-seq dataset. Rows represent RNA-seq samples, columns represent metadata categories.\", notebook_metadata, \"metadata.csv\", dataset['dataset_metadata'].head(), istable=True)\n",
     "table_counter, notebook_metadata = display_object(table_counter, \"Sample size for each class. The table displays the number of samples in each class.\", notebook_metadata, \"num_of_samples_in_class.csv\", dataset['dataset_metadata'].reset_index().groupby(meta_class_column_name).count(), istable=True)"
@@ -682,7 +682,7 @@
    "source": [
     "# Run analysis\n",
     "results['clustergrammer'] = run_clustergrammer(dataset=dataset, meta_class_column_name=meta_class_column_name, nr_genes=clustering_topk, normalization=normalization, z_score=True, gene_list=gene_list_for_clustergrammer)\n",
-    "\n",
+    "ref_counter.append('Fernandez, Nicolas F., et al. \"Clustergrammer, a web-based heatmap visualization and analysis tool for high-dimensional biological data.\" Scientific data 4 (2017): 170151.')\n",
     "# Display results\n",
     "plot_clustergrammar(results['clustergrammer'])\n",
     "caption = \"Clustered heatmap plot. The figure contains an interactive heatmap displaying gene expression for each sample in the RNA-seq dataset. Every row of the heatmap represents a gene, every column represents a sample, and every cell displays normalized gene expression values. The heatmap additionally features color bars beside each column which represent prior knowledge of each sample, such as the tissue of origin or experimental treatment.\"\n",
@@ -745,7 +745,7 @@
    "outputs": [],
    "source": [
     "signatures = get_signatures(classes, dataset, normalization, diff_gex_method, meta_class_column_name, filter_genes)\n",
-    "\n",
+    "ref_counter.extend(['Ritchie, Matthew E., et al. limma powers differential expression analyses for RNA-sequencing and microarray studies. Nucleic acids research 43.7 (2015): e47-e47.', 'Clark, Neil R., et al. The characteristic direction: a geometrical approach to identify differentially expressed genes. BMC bioinformatics 15.1 (2014): 79.'])\n",
     "for label, signature in signatures.items():\n",
     "    case_label = label.split(\" vs. \")[1]\n",
     "    table_counter, notebook_metadata = display_object(table_counter, \"Differentially expressed genes between {} using {}. The figure displays a browsable table containing the gene expression signature generated from a differential gene expression analysis. Every row of the table represents a gene; the columns display the estimated measures of differential expression.\".format(label, diff_gex_method), notebook_metadata, \"DEG_results_{}.csv\".format(label), signature, istable=True)\n",
@@ -806,6 +806,7 @@
    "outputs": [],
    "source": [
     "# Loop through signatures\n",
+    "ref_counter.append('Kuleshov, M.V., Jones, M.R., Rouillard, A.D., Fernandez, N.F., Duan, Q., Wang, Z., Koplev, S., Jenkins, S.L., Jagodnik, K.M. and Lachmann, A. (2016) Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Nucleic acids research, 44, W90-W97.')\n",
     "results = {}\n",
     "results['enrichr']= {}\n",
     "if diff_gex_method == \"characteristic_direction\":\n",
@@ -850,7 +851,7 @@
     "%%appyter markdown\n",
     "{% if \"Gene Ontology\" in enrichr_libraries.value %}\n",
     "# GO Enrichment Analysis\n",
-    "Gene Ontology (GO) (Ashburner et al. 2000) is a major bioinformatics initiative aimed at unifying the representation of gene attributes across all species. It contains a large collection of experimentally validated and predicted associations between genes and biological terms. This information can be leveraged by Enrichr to identify the biological processes, molecular functions and cellular components which are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
+    "Gene Ontology (GO) [{{ref_counter|length}}] is a major bioinformatics initiative aimed at unifying the representation of gene attributes across all species. It contains a large collection of experimentally validated and predicted associations between genes and biological terms. This information can be leveraged by Enrichr to identify the biological processes, molecular functions and cellular components which are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
     "{% endif %}"
    ]
   },
@@ -862,7 +863,7 @@
    "source": [
     "%%appyter code_exec\n",
     "{% if \"Gene Ontology\" in enrichr_libraries.value %}\n",
-    "\n",
+    "ref_counter.append('Ashburner, M., Ball, C.A., Blake, J.A., Botstein, D., Butler, H., Cherry, J.M., Davis, A.P., Dolinski, K., Dwight, S.S. and Eppig, J.T. (2000) Gene Ontology: tool for the unification of biology. Nature genetics, 25, 25.')\n",
     "results['go_enrichment'] = {}\n",
     "for label, signature in signatures.items():\n",
     "    # Run analysis\n",
@@ -891,7 +892,7 @@
     "%%appyter markdown\n",
     "{% if \"Pathway\" in enrichr_libraries.value %}\n",
     "# Pathway Enrichment Analysis\n",
-    "Biological pathways are sequences of interactions between biochemical compounds which play a key role in determining cellular behavior. Databases such as KEGG (Kanehisa et al. 2000), Reactome (Croft et al. 2014) and WikiPathways (Kelder et al. 2012) contain a large number of associations between such pathways and genes. This information can be leveraged by Enrichr to identify the biological pathways which are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
+    "Biological pathways are sequences of interactions between biochemical compounds which play a key role in determining cellular behavior. Databases such as KEGG [{{ref_counter|length}}], Reactome [{{ref_counter|length + 1}}] and WikiPathways [{{ref_counter|length + 2}}] contain a large number of associations between such pathways and genes. This information can be leveraged by Enrichr to identify the biological pathways which are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
     "{% endif %}"
    ]
   },
@@ -904,6 +905,7 @@
     "%%appyter code_exec\n",
     "{% if \"Pathway\" in enrichr_libraries.value %}\n",
     "# Initialize results\n",
+    "ref_counter.extend(['Kanehisa, M. and Goto, S. (2000) KEGG: kyoto encyclopedia of genes and genomes. Nucleic acids research, 28, 27-30.', 'Croft, David, et al. The Reactome pathway knowledgebase. Nucleic acids research 42.D1 (2014): D472-D477.', 'Kelder, Thomas, et al. WikiPathways: building research communities on biological pathways. Nucleic acids research 40.D1 (2012): D1301-D1307.'])\n",
     "results['pathway_enrichment'] = {}\n",
     "\n",
     "# Loop through results\n",
@@ -935,7 +937,7 @@
     "%%appyter markdown\n",
     "{% if \"Transcription Factor\" in enrichr_libraries.value %}\n",
     "# Transcription Factor Enrichment Analysis\n",
-    "Transcription Factors (TFs) are proteins involved in the transcriptional regulation of gene expression. Databases such as ChEA (Lachmann et al. 2010) and ENCODE (Consortium, 2014) contain a large number of associations between TFs and their transcriptional targets. This information can be leveraged by Enrichr to identify the transcription factors whose targets are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
+    "Transcription Factors (TFs) are proteins involved in the transcriptional regulation of gene expression. Databases such as ChEA [{{ref_counter|length}}] and ENCODE [{{ref_counter|length + 1}}] contain a large number of associations between TFs and their transcriptional targets. This information can be leveraged by Enrichr to identify the transcription factors whose targets are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
     "{% endif %}"
    ]
   },
@@ -948,6 +950,7 @@
     "%%appyter code_exec\n",
     "{% if \"Transcription Factor\" in enrichr_libraries.value %}\n",
     "# Initialize results\n",
+    "ref_counter.extend(['Lachmann, A., Xu, H., Krishnan, J., Berger, S.I., Mazloom, A.R. and Ma\\'ayan, A. (2010) ChEA: transcription factor regulation inferred from integrating genome-wide ChIP-X experiments. Bioinformatics, 26, 2438-2444.',  'ENCODE Consortium (2004) The ENCODE (ENCyclopedia of DNA elements) project. Science, 306, 636-640.'])\n",
     "results['tf_enrichment'] = {}\n",
     "\n",
     "# Loop through results\n",
@@ -968,7 +971,7 @@
     "%%appyter markdown\n",
     "{% if \"Kinase\" in enrichr_libraries.value %}\n",
     "# Kinase Enrichment Analysis\n",
-    "Protein kinases are enzymes that modify other proteins by chemically adding phosphate groups. Databases such as KEA (Lachmann et al. 2009) contain a large number of associations between kinases and their substrates. This information can be leveraged by Enrichr to identify the protein kinases whose substrates are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
+    "Protein kinases are enzymes that modify other proteins by chemically adding phosphate groups. Databases such as KEA [{{ref_counter|length}}] contain a large number of associations between kinases and their substrates. This information can be leveraged by Enrichr to identify the protein kinases whose substrates are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
     "{% endif %}"
    ]
   },
@@ -981,6 +984,7 @@
     "%%appyter code_exec\n",
     "{% if \"Kinase\" in enrichr_libraries.value %}\n",
     "# Initialize results\n",
+    "ref_counter.append('Lachmann, Alexander, and Avi Ma\\'ayan. \"KEA: kinase enrichment analysis.\" Bioinformatics 25.5 (2009): 684-686. ')\n",
     "results['kinase_enrichment'] = {}\n",
     "\n",
     "# Loop through results\n",
@@ -1002,7 +1006,7 @@
     "%%appyter markdown\n",
     "{% if \"miRNA\" in enrichr_libraries.value %}\n",
     "# miRNA Enrichment Analysis\n",
-    "microRNAs (miRNAs) are small non-coding RNA molecules which play a key role in the post-transcriptional regulation of gene expression. Databases such as TargetScan (Agarwal et al. 2015) and MiRTarBase (Chou et al. 2016) contain a large number of associations between miRNAs and their targets. This information can be leveraged by Enrichr to identify the miRNAs whose targets are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
+    "microRNAs (miRNAs) are small non-coding RNA molecules which play a key role in the post-transcriptional regulation of gene expression. Databases such as TargetScan [{{ref_counter|length}}] and MiRTarBase [{{ref_counter|length + 1}}] contain a large number of associations between miRNAs and their targets. This information can be leveraged by Enrichr to identify the miRNAs whose targets are over-represented in the up-regulated and down-regulated genes identified by comparing two groups of samples.\n",
     "{% endif %}"
    ]
   },
@@ -1014,6 +1018,7 @@
    "source": [
     "%%appyter code_exec\n",
     "{% if \"miRNA\" in enrichr_libraries.value %}\n",
+    "ref_counter.extend(['Agarwal, Vikram, et al. Predicting effective microRNA target sites in mammalian mRNAs. elife 4 (2015): e05005.', 'Chou, Chih-Hung, et al. miRTarBase 2016: updates to the experimentally validated miRNA-target interactions database. Nucleic acids research 44.D1 (2016): D239-D247.'])\n",
     "\n",
     "results['mirna_enrichment'] = {}\n",
     "\n",
@@ -1036,7 +1041,7 @@
     "%%appyter markdown\n",
     "{% if small_molecule_method.value == \"L1000CDS2\" %}\n",
     "# L1000CDS2 Query\n",
-    "L1000CDS2 (Duan et al. 2016) is a web-based tool for querying gene expression signatures against signatures created from human cell lines treated with over 20,000 small molecules and drugs for the LINCS project. It is commonly used to identify small molecules which mimic or reverse the effects of a gene expression signature generated from a differential gene expression analysis.\n",
+    "L1000CDS2 [{{ref_counter|length}}] is a web-based tool for querying gene expression signatures against signatures created from human cell lines treated with over 20,000 small molecules and drugs for the LINCS project. It is commonly used to identify small molecules which mimic or reverse the effects of a gene expression signature generated from a differential gene expression analysis.\n",
     "{% endif %}"
    ]
   },
@@ -1049,6 +1054,7 @@
     "%%appyter code_exec\n",
     "{% if small_molecule_method.value == \"L1000CDS2\" %}\n",
     "# Initialize results\n",
+    "ref_counter.append('Duan, Q., et al. L1000CDS2: Lincs l1000 characteristic direction signatures search engine. NPJ Syst Biol Appl. 2016; 2: 16015. (2016).')\n",
     "results['l1000cds2'] = {}\n",
     "\n",
     "# Loop through signatures\n",
@@ -1071,7 +1077,7 @@
     "%%appyter markdown\n",
     "{% if small_molecule_method.value == \"L1000FWD\" %}\n",
     "# L1000FWD Query\n",
-    "L1000FWD (Wang et al. 2018) is a web-based tool for querying gene expression signatures against signatures created from human cell lines treated with over 20,000 small molecules and drugs for the LINCS project.\n",
+    "L1000FWD [{{ref_counter|length}}] is a web-based tool for querying gene expression signatures against signatures created from human cell lines treated with over 20,000 small molecules and drugs for the LINCS project.\n",
     "{% endif %}"
    ]
   },
@@ -1084,6 +1090,7 @@
     "%%appyter code_exec\n",
     "{% if small_molecule_method.value == \"L1000FWD\" %}\n",
     "# Initialize results\n",
+    "ref_counter.append('Wang, Zichen, et al. L1000FWD: fireworks visualization of drug-induced transcriptomic signatures. Bioinformatics 34.12 (2018): 2150-2152.')\n",
     "results['l1000fwd'] = {}\n",
     "\n",
     "# Loop through signatures\n",
@@ -1120,38 +1127,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1. Clark, N.R. and Ma’ayan, A. (2011) Introduction to statistical methods to analyze large data sets: principal components analysis. Sci. Signal., 4, tr3-tr3.\n",
-    "<br>\n",
-    "2. Fernandez, Nicolas F., et al. \"Clustergrammer, a web-based heatmap visualization and analysis tool for high-dimensional biological data.\" Scientific data 4 (2017): 170151.\n",
-    "<br>\n",
-    "3. Ritchie, Matthew E., et al. \"limma powers differential expression analyses for RNA-sequencing and microarray studies.\" Nucleic acids research 43.7 (2015): e47-e47.\n",
-    "<br>\n",
-    "4. Clark, Neil R., et al. \"The characteristic direction: a geometrical approach to identify differentially expressed genes.\" BMC bioinformatics 15.1 (2014): 79.\n",
-    "<br>\n",
-    "5. Kuleshov, M.V., Jones, M.R., Rouillard, A.D., Fernandez, N.F., Duan, Q., Wang, Z., Koplev, S., Jenkins, S.L., Jagodnik, K.M. and Lachmann, A. (2016) Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Nucleic acids research, 44, W90-W97.\n",
-    "<br>\n",
+    "%%appyter markdown\n",
     "\n",
-    "Agarwal, Vikram, et al. \"Predicting effective microRNA target sites in mammalian mRNAs.\" elife 4 (2015): e05005.\n",
-    "<br>\n",
-    "Ashburner, M., Ball, C.A., Blake, J.A., Botstein, D., Butler, H., Cherry, J.M., Davis, A.P., Dolinski, K., Dwight, S.S. and Eppig, J.T. (2000) Gene Ontology: tool for the unification of biology. Nature genetics, 25, 25.\n",
-    "<br>\n",
-    "Chou, Chih-Hung, et al. \"miRTarBase 2016: updates to the experimentally validated miRNA-target interactions database.\" Nucleic acids research 44.D1 (2016): D239-D247.\n",
-    "<br>\n",
-    "Consortium, E.P. (2004) The ENCODE (ENCyclopedia of DNA elements) project. Science, 306, 636-640.\n",
-    "<br>\n",
-    "Croft, David, et al. \"The Reactome pathway knowledgebase.\" Nucleic acids research 42.D1 (2014): D472-D477.\n",
-    "<br>\n",
-    "Duan, Q., et al. \"L1000cds2: Lincs l1000 characteristic direction signatures search engine. NPJ Syst Biol Appl. 2016; 2: 16015.\" (2016).\n",
-    "<br>\n",
-    "Kanehisa, M. and Goto, S. (2000) KEGG: kyoto encyclopedia of genes and genomes. Nucleic acids research, 28, 27-30.\n",
-    "<br>\n",
-    "Kelder, Thomas, et al. \"WikiPathways: building research communities on biological pathways.\" Nucleic acids research 40.D1 (2012): D1301-D1307.\n",
-    "<br>\n",
-    "Lachmann, A., Xu, H., Krishnan, J., Berger, S.I., Mazloom, A.R. and Ma'ayan, A. (2010) ChEA: transcription factor regulation inferred from integrating genome-wide ChIP-X experiments. Bioinformatics, 26, 2438-2444.\n",
-    "<br>\n",
-    "Lachmann, Alexander, and Avi Ma'ayan. \"KEA: kinase enrichment analysis.\" Bioinformatics 25.5 (2009): 684-686.\n",
+    "{% for result in results %}\n",
+    "{{ loop.index }}. {{ result }}\n",
     "<br>\n",
-    "Wang, Zichen, et al. \"L1000FWD: fireworks visualization of drug-induced transcriptomic signatures.\" Bioinformatics 34.12 (2018): 2150-2152."
+    "{% endfor %}"
    ]
   }
  ],