-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathconvert.py
More file actions
118 lines (92 loc) · 3.15 KB
/
convert.py
File metadata and controls
118 lines (92 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import argparse
from textwrap import dedent, indent
from typing import Sequence, NamedTuple
from pathlib import Path
import csv
TEMPLATE = dedent(
"""\
---
title: '{title}'
{authors}
license: CC-BY-4.0
exports:
- format: pdf
template: ../../template
---
{text}
{references}
"""
)
author_template = dedent(
"""\
- name: {name}
affiliations:
- {affiliation}"""
)
class Author(NamedTuple):
name: str
affiliation: str
def to_str(self):
return author_template.format(
name=self.name,
affiliation=self.affiliation,
)
def read_paper(row: dict[str, str]) -> str:
authors = []
author_affiliation_list = row["List of authors and affiliation"].split(";")
for entry in author_affiliation_list:
parts = entry.strip().split(",", 1) # Split only at the first comma
if len(parts) == 2:
author_name = parts[0].strip()
affiliation = parts[1].strip()
authors.append(
Author(
name=author_name,
affiliation=affiliation,
)
)
authors = "authors:\n" + indent("\n".join([a.to_str() for a in authors]), " ")
link_to_repo = row["Link to open-source software repository (if applicable)"]
if len(link_to_repo) > 0:
link_to_repo = f"\n# Repository\n{link_to_repo}\n"
return TEMPLATE.format(
title=row["Title"].replace("'", "''"),
authors=authors,
text=row["Abstract"],
references=link_to_repo,
)
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="")
parser.add_argument("path", type=Path)
parser.add_argument("-o", "--output", type=Path, default=None)
args = vars(parser.parse_args(argv))
outdir = args["output"] or Path("book") / "abstracts"
outdir.mkdir(parents=True, exist_ok=True)
# count the words
if not args["path"].is_file():
print(f"{args['path']} is not a file")
return 1
with open(args["path"], "r") as f:
reader = csv.DictReader(f)
rows = [row for row in reader if any(row.values())]
for row in rows:
# remove all linebreaks that would cause the markdown to break unless it's in the abstract
for key, value in row.items():
if key != "Abstract":
row[key] = value.replace("\n", " ")
# filename is last-name of author + first word of title
last_name = row["List of authors and affiliation"].split(",")[0].split()[0]
first_word_title = row["Title"].replace("-", " ").split()[0]
slug = f"{last_name}-{first_word_title}".lower()
# remove invalid characters
slug = (
slug.replace(" ", "").replace("/", "").replace(":", "").replace(",", "")
)
output = read_paper(row)
if row["Decision"] in ["poster", "oral", "demo"]:
(outdir / f"{slug}.md").write_text(output)
else:
print(f"Skipping {slug} due to decision {row['Decision']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())