Generate publications list from BibTeX file (#1507)

matt-graham · web-flow · commit 28643ea75509 · 2024-11-14T15:02:52.000Z
* Generate publications list from BibTeX file

* Add help strings to arguments and tidy up interface

* Warn when entry does not have note with valid section name

* Add pybtex to docs requirements

* Add docstrings

* Ignore generated publications list doc file

* Use keywords rather than note field for section

* Add title as additional sort key to break ties

* Allow updating BibTeX file from Zotero at command line

* Add requests to documentation requirements

* Add link to download BibTeX file

* Expandable publication list entries with additional details

* Format publication details as table

* Add tox environment for updating publications

* Revert accidental deletion of sphinx-build command in tox

* Strip trailing whitespace

* Show defaults in script help and normalize arg name

* Adding Healthcare provision section to publication list

* Updating publication data from Zotero

* Add published version of overview paper and remove pre-print from list
diff --git a/.gitignore b/.gitignore
@@ -124,6 +124,7 @@ src/**/_version.py
 
 # Generated TLO docs files
 docs/_*.rst
+docs/_*.html
 docs/hsi_events.csv
 docs/parameters.rst
 docs/reference/modules.rst
diff --git a/docs/publications.bib b/docs/publications.bib
diff --git a/docs/publications.rst b/docs/publications.rst
@@ -1,60 +1,10 @@
-
 =============
 Publications
 =============
 
 These are the publications that have been generated either in the course of the model's development or its application.
 
+:download:`Download a BibTeX file for all publications <./publications.bib>`
 
-Overview of the Model
-======================
-
-* `A Healthcare Service Delivery and Epidemiological Model for Investigating Resource Allocation for Health: The Thanzi La Onse Model <https://www.medrxiv.org/content/10.1101/2024.01.04.24300834v1>`_
-
-
-Analyses Using The Model
-========================
-
-* `The potential impact of declining development assistance for healthcare on population health: projections for Malawi <https://www.medrxiv.org/content/10.1101/2024.10.11.24315287v1>`_
-
-* `Health workforce needs in Malawi: analysis of the Thanzi La Onse integrated epidemiological model of care <https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2>`_
-
-* `A new approach to Health Benefits Package design: an application of the Thanzi La Onse model in Malawi <https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1012462>`_
-
-* `The Changes in Health Service Utilisation in Malawi During the COVID-19 Pandemic <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0290823>`_
-
-* `Modeling Contraception and Pregnancy in Malawi: A Thanzi La Onse Mathematical Modeling Study <https://onlinelibrary.wiley.com/doi/10.1111/sifp.12255>`_
-
-* `Factors Associated with Consumable Stock-Outs in Malawi: Evidence from a Facility Census <https://www.sciencedirect.com/science/article/pii/S2214109X24000950>`_
-
-* `The Effects of Health System Frailties on the Projected Impact of the HIV and TB Programmes in Malawi <https://www.sciencedirect.com/science/article/pii/S2214109X24002596>`_
-
-* `Estimating the health burden of road traffic injuries in Malawi using an individual-based model <https://injepijournal.biomedcentral.com/articles/10.1186/s40621-022-00386-6>`_
-
-* `The potential impact of intervention strategies on COVID-19 transmission in Malawi: A mathematical modelling study. <https://bmjopen.bmj.com/content/11/7/e045196>`_
-
-* `The potential impact of including pre-school aged children in the praziquantel mass-drug administration programmes on the S.haematobium infections in Malawi: a modelling study <https://www.medrxiv.org/content/10.1101/2020.12.09.20246652v1>`_
-
-* `A Decade of Progress in HIV, Malaria, and Tuberculosis Initiatives in Malawi. <https://www.medrxiv.org/content/10.1101/2024.10.08.24315077v1>`_
-
-
-Healthcare Seeking Behaviour
-============================
-
-* `Socio-demographic factors associated with early antenatal care visits among pregnant women in Malawi: 2004–2016 <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0263650>`_
-
-* `Factors associated with healthcare seeking behaviour for children in Malawi: 2016. <https://onlinelibrary.wiley.com/doi/abs/10.1111/tmi.13499>`_
-
-* `A cross-sectional study on factors associated with health seeking behaviour of Malawians aged 15+ years in 2016. <https://www.ajol.info/index.php/mmj/article/view/202965>`_
-
-
-
-
-
-
-
-
-
-
-
-
+.. raw:: html
+   :file: _publications_list.html
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,4 +1,6 @@
 sphinx>=1.3
 sphinx-rtd-theme
+pybtex
 pyyaml
+requests
 tabulate
diff --git a/docs/tlo_publications.py b/docs/tlo_publications.py
@@ -0,0 +1,241 @@
+"""Create publications page from BibTeX database file."""
+
+import argparse
+import calendar
+from collections import defaultdict
+from pathlib import Path
+from warnings import warn
+
+import pybtex.database
+import requests
+from pybtex.backends.html import Backend as HTMLBackend
+from pybtex.style.formatting import toplevel
+from pybtex.style.formatting.unsrt import Style as UnsrtStyle
+from pybtex.style.formatting.unsrt import date as publication_date
+from pybtex.style.names import BaseNameStyle, name_part
+from pybtex.style.sorting import BaseSortingStyle
+from pybtex.style.template import (
+    FieldIsMissing,
+    field,
+    first_of,
+    href,
+    join,
+    node,
+    optional,
+    sentence,
+    tag,
+    words,
+)
+
+
+class InlineHTMLBackend(HTMLBackend):
+    """Backend for bibliography output as plain list suitable for inclusion in a HTML document."""
+
+    def write_prologue(self):
+        self.output("<ul style='list-style-type: none; padding-left: 0;'>\n")
+
+    def write_epilogue(self):
+        self.output("</ul>\n")
+
+    def write_entry(self, _key, _label, text):
+        self.output(f"<li style='list-style: none;'>{text}</li>\n")
+
+
+class DateSortingStyle(BaseSortingStyle):
+    """Sorting style for bibliography in reverse (newest first) publication date order."""
+
+    def sorting_key(self, entry):
+        months = list(calendar.month_name)
+        return (
+            -int(entry.fields.get("year")),
+            -months.index(entry.fields.get("month", "")),
+            entry.fields.get("title", ""),
+        )
+
+
+class LastOnlyNameStyle(BaseNameStyle):
+    """Name style showing only last names and associated name particles."""
+
+    def format(self, person, _abbr=False):
+        return join[
+            name_part(tie=True)[person.rich_prelast_names],
+            name_part[person.rich_last_names],
+            name_part(before=", ")[person.rich_lineage_names],
+        ]
+
+
+@node
+def summarized_names(children, context, role, summarize_limit=3, **kwargs):
+    """Return formatted names with et al. summarization when number exceeds specified limit."""
+
+    assert not children
+
+    try:
+        persons = context["entry"].persons[role]
+    except KeyError:
+        raise FieldIsMissing(role, context["entry"])
+
+    name_style = LastOnlyNameStyle()
+    if len(persons) > summarize_limit:
+        return words[name_style.format(persons[0]), "et al."].format_data(context)
+    else:
+        formatted_names = [name_style.format(person) for person in persons]
+        return join(**kwargs)[formatted_names].format_data(context)
+
+
+class SummarizedStyle(UnsrtStyle):
+    """
+    Bibliography style showing summarized names, year, title and journal with expandable details.
+
+    Not suitable for use with LaTeX backend due to use of details tags.
+    """
+
+    default_sorting_style = DateSortingStyle
+
+    def _format_summarized_names(self, role):
+        return summarized_names(role, sep=", ", sep2=" and ", last_sep=", and ")
+
+    def _format_label(self, label):
+        return tag("em")[f"{label}: "]
+
+    def _format_details_as_table(self, details):
+        return tag("table")[
+            toplevel[
+                *(
+                    tag("tr")[toplevel[tag("td")[tag("em")[key]], tag("td")[value]]]
+                    for key, value in details.items()
+                )
+            ]
+        ]
+
+    def _get_summary_template(self, e, type_):
+        venue_field = "journal" if type_ == "article" else "publisher"
+        url = first_of[
+            optional[join["https://doi.org/", field("doi", raw=True)]],
+            optional[field("url", raw=True)],
+            "#",
+        ]
+        return href[
+            url,
+            sentence(sep=". ")[
+                words[
+                    self._format_summarized_names("author"),
+                    optional["(", field("year"), ")"],
+                ],
+                self.format_title(e, "title", as_sentence=False),
+                tag("em")[field(venue_field)],
+            ],
+        ]
+
+    def _get_details_template(self, type_):
+        bibtex_type_to_label = {"article": "Journal article", "misc": "Pre-print"}
+        return self._format_details_as_table(
+            {
+                "Type": bibtex_type_to_label[type_],
+                "DOI": optional[field("doi")],
+                "Date": publication_date,
+                "Authors": self.format_names("author"),
+                "Abstract": field("abstract"),
+            }
+        )
+
+    def _get_summarized_template(self, e, type_):
+        summary_template = self._get_summary_template(e, type_)
+        details_template = self._get_details_template(type_)
+        return tag("details")[tag("summary")[summary_template], details_template]
+
+    def get_article_template(self, e):
+        return self._get_summarized_template(e, "article")
+
+    def get_misc_template(self, e):
+        return self._get_summarized_template(e, "misc")
+
+
+def write_publications_list(stream, bibliography_data, section_names, backend, style):
+    """Write bibliography data with given backend and style to a stream splitting in to sections."""
+    keys_by_section = defaultdict(list)
+    for key, entry in bibliography_data.entries.items():
+        keywords = set(k.strip() for k in entry.fields.get("keywords", "").split(","))
+        section_names_in_keywords = keywords & set(section_names)
+        if len(section_names_in_keywords) == 1:
+            keys_by_section[section_names_in_keywords.pop()].append(key)
+        elif len(section_names_in_keywords) == 0:
+            msg = (
+                f"BibTeX entry with key {key} does not have a keyword / tag corresponding to "
+                f"one of section names {section_names} and so will not be included in output."
+            )
+            warn(msg, stacklevel=2)
+        else:
+            msg = (
+                f"BibTeX entry with key {key} has multiple keywords / tags corresponding to "
+                f"section names {section_names} and so will not be included in output."
+            )
+            warn(msg, stacklevel=2)
+    for section_name in section_names:
+        stream.write(f"<h2>{section_name}</h2>\n")
+        formatted_bibliography = style.format_bibliography(
+            bibliography_data, keys_by_section[section_name]
+        )
+        backend.write_to_stream(formatted_bibliography, stream)
+        stream.write("\n")
+
+
+if __name__ == "__main__":
+    docs_directory = Path(__file__).parent
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--bib-file",
+        type=Path,
+        default=docs_directory / "publications.bib",
+        help="BibTeX file containing publication details",
+    )
+    parser.add_argument(
+        "--output-file",
+        type=Path,
+        default=docs_directory / "_publications_list.html",
+        help="File to write publication list to in HTML format",
+    )
+    parser.add_argument(
+        "--update-from-zotero",
+        action="store_true",
+        help="Update BibTeX file at path specified by --bib-file from Zotero group library",
+    )
+    parser.add_argument(
+        "--zotero-group-id",
+        default="5746396",
+        help="Integer identifier for Zotero group library",
+    )
+    args = parser.parse_args()
+    if args.update_from_zotero:
+        endpoint_url = f"https://api.zotero.org/groups/{args.zotero_group_id}/items"
+        # Zotero API requires maximum number of results to return (limit parameter)
+        # to be explicitly specified for export formats such as bibtex and allows a
+        # maximum value of 100 - if we exceed this number of publications will need
+        # to switch to making multiple requests with different start indices
+        response = requests.get(
+            endpoint_url, params={"format": "bibtex", "limit": "100"}
+        )
+        if response.ok:
+            with open(args.bib_file, "w") as bib_file:
+                bib_file.write(response.text)
+        else:
+            msg = (
+                f"Request to {endpoint_url} failed with status code "
+                f"{response.status_code} ({response.reason})"
+            )
+            raise RuntimeError(msg)
+    with open(args.output_file, "w") as output_file:
+        write_publications_list(
+            stream=output_file,
+            bibliography_data=pybtex.database.parse_file(args.bib_file),
+            section_names=[
+                "Overview of the model",
+                "Analyses using the model",
+                "Healthcare seeking behaviour",
+                "Healthcare provision",
+            ],
+            backend=InlineHTMLBackend(),
+            style=SummarizedStyle(),
+        )
diff --git a/tox.ini b/tox.ini
@@ -73,6 +73,8 @@ commands =
     python docs/tlo_data_sources.py
     ; Generate contributors page
     python docs/tlo_contributors.py
+    ; Generate publications page
+    python docs/tlo_publications.py
     ; Generate resources files page
     python docs/tlo_resources.py
     ; Generate HSI events listing
@@ -139,6 +141,13 @@ commands = python {toxinidir}/src/scripts/automation/update_citation.py
 skip_install = true
 deps = pyyaml
 
+[testenv:update-publications]
+commands = python {toxinidir}/docs/tlo_publications.py --update-from-zotero
+skip_install = true
+deps =
+    pybtex
+    requests
+
 [testenv:requirements]
 commands =
     pip-compile --output-file {toxinidir}/requirements/base.txt