|
| 1 | +#! /usr/bin/env python |
| 2 | +"""Function to write Markdown (table) files""" |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +from colrev.constants import Fields |
| 8 | + |
| 9 | +FIELDS = [ |
| 10 | + Fields.ID, |
| 11 | + Fields.ENTRYTYPE, |
| 12 | + Fields.TITLE, |
| 13 | + Fields.AUTHOR, |
| 14 | + Fields.YEAR, |
| 15 | + Fields.JOURNAL, |
| 16 | + Fields.BOOKTITLE, |
| 17 | + Fields.VOLUME, |
| 18 | + Fields.NUMBER, |
| 19 | + Fields.PAGES, |
| 20 | + Fields.DOI, |
| 21 | + Fields.URL, |
| 22 | + Fields.FILE, |
| 23 | +] |
| 24 | + |
| 25 | + |
| 26 | +def to_dataframe( |
| 27 | + *, |
| 28 | + records_dict: dict, |
| 29 | + sort_fields_first: bool = True, |
| 30 | + drop_empty_fields: bool = True, |
| 31 | +) -> pd.DataFrame: |
| 32 | + """Convert a records dict to a pandas DataFrame""" |
| 33 | + all_keys = {k for v in records_dict.values() for k in v.keys()} |
| 34 | + additional_fields = sorted(all_keys - set(FIELDS)) |
| 35 | + fields = FIELDS + additional_fields if sort_fields_first else sorted(all_keys) |
| 36 | + |
| 37 | + data = [] |
| 38 | + for record_id in sorted(records_dict.keys()): |
| 39 | + record_dict = records_dict[record_id] |
| 40 | + row = {field: record_dict.get(field, "") for field in fields} |
| 41 | + data.append(row) |
| 42 | + |
| 43 | + df = pd.DataFrame(data) |
| 44 | + |
| 45 | + if drop_empty_fields: |
| 46 | + df = df.dropna(axis=1, how="all") |
| 47 | + df = df.loc[:, (df != "").any(axis=0)] |
| 48 | + |
| 49 | + return df |
| 50 | + |
| 51 | + |
| 52 | +def to_string( |
| 53 | + *, |
| 54 | + records_dict: dict, |
| 55 | + sort_fields_first: bool = True, |
| 56 | + drop_empty_fields: bool = True, |
| 57 | +) -> str: |
| 58 | + """Convert a records dict to a markdown string with a table""" |
| 59 | + data_frame = to_dataframe( |
| 60 | + records_dict=records_dict, |
| 61 | + sort_fields_first=sort_fields_first, |
| 62 | + drop_empty_fields=drop_empty_fields, |
| 63 | + ) |
| 64 | + |
| 65 | + headers = list(data_frame.columns) |
| 66 | + md_lines = [ |
| 67 | + "| " + " | ".join(headers) + " |", |
| 68 | + "|" + "|".join(["---"] * len(headers)) + "|", |
| 69 | + ] |
| 70 | + for _, row in data_frame.iterrows(): |
| 71 | + row_values = [str(row[h]).replace("\n", " ") for h in headers] |
| 72 | + md_lines.append("| " + " | ".join(row_values) + " |") |
| 73 | + |
| 74 | + return "\n".join(md_lines) |
| 75 | + |
| 76 | + |
| 77 | +def write_file( |
| 78 | + *, |
| 79 | + records_dict: dict, |
| 80 | + filename: str, |
| 81 | + sort_fields_first: bool = True, |
| 82 | + drop_empty_fields: bool = True, |
| 83 | +) -> None: |
| 84 | + """Write a markdown file with a table from a records dict""" |
| 85 | + md_string = to_string( |
| 86 | + records_dict=records_dict, |
| 87 | + sort_fields_first=sort_fields_first, |
| 88 | + drop_empty_fields=drop_empty_fields, |
| 89 | + ) |
| 90 | + with open(filename, "w", encoding="utf-8") as f: |
| 91 | + f.write(md_string) |
0 commit comments