Skip to content

Commit 38e8e45

Browse files
author
Gerit Wagner
committed
update writers and docs
1 parent 581c19f commit 38e8e45

15 files changed

Lines changed: 261 additions & 31 deletions

colrev/loader/load_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
import colrev.loader.load_utils
77
88
# Files
9-
records = colrev.loader.load_utils.load(filename=filename, logger=logger)
9+
records = colrev.loader.load_utils.load(filename=filename)
1010
1111
# Strings
12-
records = colrev.loader.load_utils.loads(load_str=load_str, logger=logger)
12+
records = colrev.loader.load_utils.loads(load_str=load_str)
1313
1414
returns: records (dict)
1515
@@ -173,6 +173,9 @@ def load( # type: ignore
173173
) -> dict:
174174
"""Load a file and return records as a dictionary"""
175175

176+
if isinstance(filename, str):
177+
filename = Path(filename)
178+
176179
if not filename.is_file():
177180
if empty_if_file_not_exists:
178181
return {}

colrev/writer/csv.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,43 @@
2323
]
2424

2525

26-
def to_dataframe(*, records_dict: dict) -> pd.DataFrame:
26+
def to_dataframe(
27+
*,
28+
records_dict: dict,
29+
sort_fields_first: bool = True,
30+
drop_empty_fields: bool = True,
31+
) -> pd.DataFrame:
2732
"""Convert a records dict to a pandas DataFrame"""
33+
all_keys = {k for v in records_dict.values() for k in v.keys()}
34+
additional_fields = sorted(all_keys - set(FIELDS))
35+
fields = FIELDS + additional_fields if sort_fields_first else sorted(all_keys)
36+
2837
data = []
29-
additional_fields = [x for x in list(records_dict) if x not in FIELDS]
3038
for record_id in sorted(records_dict.keys()):
3139
record_dict = records_dict[record_id]
32-
row = {}
33-
for field in FIELDS + additional_fields:
34-
if field in record_dict:
35-
row[field] = record_dict[field]
36-
else:
37-
row[field] = ""
40+
row = {field: record_dict.get(field, "") for field in fields}
3841
data.append(row)
39-
return pd.DataFrame(data)
42+
43+
df = pd.DataFrame(data)
44+
45+
if drop_empty_fields:
46+
df = df.dropna(axis=1, how="all")
47+
df = df.loc[:, (df != "").any(axis=0)]
48+
49+
return df
4050

4151

42-
def write_file(*, records_dict: dict, filename: str) -> None:
43-
"""Write a csv file from a records dict"""
44-
data_frame = to_dataframe(records_dict=records_dict)
45-
data_frame.to_csv(filename, index=False)
52+
def write_file(
53+
*,
54+
records_dict: dict,
55+
filename: str,
56+
sort_fields_first: bool = True,
57+
drop_empty_fields: bool = True,
58+
) -> None:
59+
"""Write a CSV file from a records dict"""
60+
df = to_dataframe(
61+
records_dict=records_dict,
62+
sort_fields_first=sort_fields_first,
63+
drop_empty_fields=drop_empty_fields,
64+
)
65+
df.to_csv(filename, index=False, encoding="utf-8")

colrev/writer/excel.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#! /usr/bin/env python
2-
"""Function to write excel files"""
2+
"""Function to write Excel files with flexible field handling"""
33
from __future__ import annotations
44

55
import pandas as pd
@@ -23,25 +23,45 @@
2323
]
2424

2525

26-
def to_dataframe(*, records_dict: dict) -> pd.DataFrame:
26+
def to_dataframe(
27+
*,
28+
records_dict: dict,
29+
sort_fields_first: bool = True,
30+
drop_empty_fields: bool = True,
31+
) -> pd.DataFrame:
2732
"""Convert a records dict to a pandas DataFrame"""
33+
all_keys = {k for v in records_dict.values() for k in v.keys()}
34+
additional_fields = sorted(all_keys - set(FIELDS))
35+
fields = FIELDS + additional_fields if sort_fields_first else sorted(all_keys)
36+
2837
data = []
2938
for record_id in sorted(records_dict.keys()):
3039
record_dict = records_dict[record_id]
31-
row = {}
32-
for field in FIELDS:
33-
if field in record_dict:
34-
row[field] = record_dict[field]
35-
else:
36-
row[field] = ""
40+
row = {field: record_dict.get(field, "") for field in fields}
3741
data.append(row)
38-
return pd.DataFrame(data)
42+
43+
df = pd.DataFrame(data)
44+
45+
if drop_empty_fields:
46+
df = df.dropna(axis=1, how="all")
47+
df = df.loc[:, (df != "").any(axis=0)]
48+
49+
return df
3950

4051

41-
def write_file(*, records_dict: dict, filename: str) -> None:
42-
"""Write an excel file from a records dict"""
43-
data_frame = to_dataframe(records_dict=records_dict)
44-
# pylint: disable=abstract-class-instantiated
52+
def write_file(
53+
*,
54+
records_dict: dict,
55+
filename: str,
56+
sort_fields_first: bool = True,
57+
drop_empty_fields: bool = True,
58+
) -> None:
59+
"""Write an Excel file from a records dict"""
60+
data_frame = to_dataframe(
61+
records_dict=records_dict,
62+
sort_fields_first=sort_fields_first,
63+
drop_empty_fields=drop_empty_fields,
64+
)
4565
writer = pd.ExcelWriter(filename, engine="xlsxwriter")
4666
data_frame.to_excel(writer, index=False)
4767

colrev/writer/markdown.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#! /usr/bin/env python
2+
"""Function to write Markdown (table) files"""
3+
from __future__ import annotations
4+
5+
import pandas as pd
6+
7+
from colrev.constants import Fields
8+
9+
FIELDS = [
10+
Fields.ID,
11+
Fields.ENTRYTYPE,
12+
Fields.TITLE,
13+
Fields.AUTHOR,
14+
Fields.YEAR,
15+
Fields.JOURNAL,
16+
Fields.BOOKTITLE,
17+
Fields.VOLUME,
18+
Fields.NUMBER,
19+
Fields.PAGES,
20+
Fields.DOI,
21+
Fields.URL,
22+
Fields.FILE,
23+
]
24+
25+
26+
def to_dataframe(
27+
*,
28+
records_dict: dict,
29+
sort_fields_first: bool = True,
30+
drop_empty_fields: bool = True,
31+
) -> pd.DataFrame:
32+
"""Convert a records dict to a pandas DataFrame"""
33+
all_keys = {k for v in records_dict.values() for k in v.keys()}
34+
additional_fields = sorted(all_keys - set(FIELDS))
35+
fields = FIELDS + additional_fields if sort_fields_first else sorted(all_keys)
36+
37+
data = []
38+
for record_id in sorted(records_dict.keys()):
39+
record_dict = records_dict[record_id]
40+
row = {field: record_dict.get(field, "") for field in fields}
41+
data.append(row)
42+
43+
df = pd.DataFrame(data)
44+
45+
if drop_empty_fields:
46+
df = df.dropna(axis=1, how="all")
47+
df = df.loc[:, (df != "").any(axis=0)]
48+
49+
return df
50+
51+
52+
def to_string(
53+
*,
54+
records_dict: dict,
55+
sort_fields_first: bool = True,
56+
drop_empty_fields: bool = True,
57+
) -> str:
58+
"""Convert a records dict to a markdown string with a table"""
59+
data_frame = to_dataframe(
60+
records_dict=records_dict,
61+
sort_fields_first=sort_fields_first,
62+
drop_empty_fields=drop_empty_fields,
63+
)
64+
65+
headers = list(data_frame.columns)
66+
md_lines = [
67+
"| " + " | ".join(headers) + " |",
68+
"|" + "|".join(["---"] * len(headers)) + "|",
69+
]
70+
for _, row in data_frame.iterrows():
71+
row_values = [str(row[h]).replace("\n", " ") for h in headers]
72+
md_lines.append("| " + " | ".join(row_values) + " |")
73+
74+
return "\n".join(md_lines)
75+
76+
77+
def write_file(
78+
*,
79+
records_dict: dict,
80+
filename: str,
81+
sort_fields_first: bool = True,
82+
drop_empty_fields: bool = True,
83+
) -> None:
84+
"""Write a markdown file with a table from a records dict"""
85+
md_string = to_string(
86+
records_dict=records_dict,
87+
sort_fields_first=sort_fields_first,
88+
drop_empty_fields=drop_empty_fields,
89+
)
90+
with open(filename, "w", encoding="utf-8") as f:
91+
f.write(md_string)

colrev/writer/write_utils.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
44
Usage::
55
6-
TODO
6+
import colrev.loader.load_utils
7+
import colrev.writer.write_utils
8+
9+
# Load
10+
records = colrev.loader.load_utils.load(filename=filename)
11+
12+
# Write
13+
colrev.loader.write_utils.write_file(records, filename=filename)
714
815
"""
916
from __future__ import annotations
@@ -13,11 +20,20 @@
1320
import colrev.writer.bib
1421
import colrev.writer.csv
1522
import colrev.writer.excel
23+
import colrev.writer.markdown
1624
import colrev.writer.ris
1725

1826

19-
def write_file(records_dict: dict, filename: Path, **kw) -> dict: # type: ignore
20-
"""Write a file (BiBTex, RIS, or other) from a dictionary of records."""
27+
def write_file(records_dict: dict, *, filename: Path, **kw) -> dict: # type: ignore
28+
"""Write a file (BiBTex, RIS, or other) from a dictionary of records.
29+
30+
Note:
31+
For tabular formats (csv, xlsx, md), the following options are supported:
32+
- sort_fields_first: list of fields to appear first in the output
33+
- drop_empty_fields: if True, empty fields will be omitted
34+
"""
35+
if isinstance(filename, str):
36+
filename = Path(filename)
2137
if filename.suffix == ".bib":
2238
writer = colrev.writer.bib.write_file # type: ignore
2339
elif filename.suffix == ".ris":
@@ -26,6 +42,8 @@ def write_file(records_dict: dict, filename: Path, **kw) -> dict: # type: ignor
2642
writer = colrev.writer.csv.write_file # type: ignore
2743
elif filename.suffix == ".xlsx":
2844
writer = colrev.writer.excel.write_file # type: ignore
45+
elif filename.suffix == ".md":
46+
writer = colrev.writer.markdown.write_file # type: ignore
2947
else:
3048
raise NotImplementedError
3149

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
colrev.writer.markdown
2+
======================
3+
4+
.. automodule:: colrev.writer.markdown
5+
6+
7+
8+
9+
10+
11+
12+
.. rubric:: Functions
13+
14+
.. autosummary::
15+
:toctree:
16+
:nosignatures:
17+
18+
to_dataframe
19+
to_string
20+
write_file
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
colrev.writer.markdown.to\_dataframe
2+
====================================
3+
4+
.. currentmodule:: colrev.writer.markdown
5+
6+
.. autofunction:: to_dataframe
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
colrev.writer.markdown.to\_string
2+
=================================
3+
4+
.. currentmodule:: colrev.writer.markdown
5+
6+
.. autofunction:: to_string
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
colrev.writer.markdown.write\_file
2+
==================================
3+
4+
.. currentmodule:: colrev.writer.markdown
5+
6+
.. autofunction:: write_file

docs/source/dev_docs/_autosummary/colrev.writer.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,6 @@ colrev.writer
2929
colrev.writer.bib
3030
colrev.writer.csv
3131
colrev.writer.excel
32+
colrev.writer.markdown
3233
colrev.writer.ris
3334
colrev.writer.write_utils

0 commit comments

Comments
 (0)