Skip to content

Commit 9112c53

Browse files
authored
Refactor parser to use string entry_types (#9)
Previously, we used the `EntryType` enum to define the possible entry types. This was a little too constrained, as it would fail parsing the `BibTeXEntry`, if the `EntryType` was unknown. So in order to allow for better usability of the tool, such as writing rules for entry types that may be completely uncommon for my personal field, we refactor to use a string `BibTeXEntry.entry_type` from now on. This change needed to be carried out everwhere where `EntryType` was used before, so it turned out to be a significant refactor of the code base. Fixes #5
1 parent df43d29 commit 9112c53

5 files changed

Lines changed: 57 additions & 86 deletions

File tree

bibtex_linter/default_rules.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from typing import List, Set
22

3-
from bibtex_linter.parser import BibTeXEntry, EntryType
3+
from bibtex_linter.parser import BibTeXEntry
44
from bibtex_linter.verification import (
55
linter_rule,
66
check_required_fields,
77
check_omitted_fields,
88
)
99

1010

11-
@linter_rule(entry_type=EntryType.ARTICLE)
11+
@linter_rule(entry_type="article")
1212
def check_article(entry: BibTeXEntry) -> List[str]:
1313
"""
1414
Check that the required fields for `EntryType.ARTICLE` are there and that there are no fields present, that would
@@ -34,7 +34,7 @@ def check_article(entry: BibTeXEntry) -> List[str]:
3434
return invariant_violations
3535

3636

37-
@linter_rule(entry_type=EntryType.CONFERENCE)
37+
@linter_rule(entry_type="conference")
3838
def check_conference(entry: BibTeXEntry) -> List[str]:
3939
"""
4040
Check that the required fields for `EntryType.CONFERENCE` are there and that there are no fields present, that would
@@ -64,7 +64,7 @@ def check_conference(entry: BibTeXEntry) -> List[str]:
6464
return invariant_violations
6565

6666

67-
@linter_rule(entry_type=EntryType.ONLINE)
67+
@linter_rule(entry_type="online")
6868
def check_online(entry: BibTeXEntry) -> List[str]:
6969
"""
7070
Check that the required fields for `EntryType.ONLINE` are there and that there are no fields present, that would
@@ -95,7 +95,7 @@ def check_online(entry: BibTeXEntry) -> List[str]:
9595
return invariant_violations
9696

9797

98-
@linter_rule(entry_type=EntryType.BOOK)
98+
@linter_rule(entry_type="book")
9999
def check_book(entry: BibTeXEntry) -> List[str]:
100100
"""
101101
Check that the required fields for `EntryType.BOOK` are there and that there are no fields present, that would
@@ -123,7 +123,7 @@ def check_book(entry: BibTeXEntry) -> List[str]:
123123
return invariant_violations
124124

125125

126-
@linter_rule(entry_type=EntryType.IN_BOOK)
126+
@linter_rule(entry_type="inbook")
127127
def check_in_book(entry: BibTeXEntry) -> List[str]:
128128
"""
129129
Check that the required fields for `EntryType.IN_BOOK` are there and that there are no fields present, that would
@@ -153,7 +153,7 @@ def check_in_book(entry: BibTeXEntry) -> List[str]:
153153
return invariant_violations
154154

155155

156-
@linter_rule(entry_type=EntryType.IN_COLLECTION)
156+
@linter_rule(entry_type="incollection")
157157
def check_in_collection(entry: BibTeXEntry) -> List[str]:
158158
"""
159159
Check that the required fields for `EntryType.IN_COLLECTION` are there and that there are no fields present, that
@@ -180,7 +180,7 @@ def check_in_collection(entry: BibTeXEntry) -> List[str]:
180180
return invariant_violations
181181

182182

183-
@linter_rule(entry_type=EntryType.STANDARD)
183+
@linter_rule(entry_type="standard")
184184
def check_standard(entry: BibTeXEntry) -> List[str]:
185185
"""
186186
Check that the required fields for `EntryType.STANDARD` are there and that there are no fields present, that would
@@ -212,7 +212,7 @@ def check_standard(entry: BibTeXEntry) -> List[str]:
212212
return invariant_violations
213213

214214

215-
@linter_rule(entry_type=EntryType.TECH_REPORT)
215+
@linter_rule(entry_type="techreport")
216216
def check_tech_report(entry: BibTeXEntry) -> List[str]:
217217
"""
218218
Check that the required fields for `EntryType.TECH_REPORT` are there and that there are no fields present, that
@@ -242,7 +242,7 @@ def check_tech_report(entry: BibTeXEntry) -> List[str]:
242242
return invariant_violations
243243

244244

245-
@linter_rule(entry_type=EntryType.MISC)
245+
@linter_rule(entry_type="misc")
246246
def check_misc(entry: BibTeXEntry) -> List[str]:
247247
"""
248248
Check that the required fields for `EntryType.MISC` are there and that there are no fields present, that would

bibtex_linter/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main() -> None:
5353
total_number_of_violations += len(violations)
5454
if violations:
5555
had_violations = True
56-
print(f"\nEntry '{entry.name}' of type '{entry.entry_type.name}' failed verification:")
56+
print(f"\nEntry '{entry.name}' of type '{entry.entry_type}' failed verification:")
5757
print(" ❌ Invariant Violations:")
5858
for issue in violations:
5959
print(f" - {issue}")

bibtex_linter/parser.py

Lines changed: 16 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,55 +4,19 @@
44
import re
55

66

7-
class EntryType(enum.Enum):
8-
"""
9-
A collection of entry types that the LaTeX `IEEEtran` offers. Note that these only include what I
10-
need at the moment. The full list can be found at:
11-
https://ctan.net/macros/latex/contrib/IEEEtran/bibtex/IEEEtran_bst_HOWTO.pdf
12-
"""
13-
ARTICLE = "ARTICLE" # A typical journal article
14-
CONFERENCE = "CONFERENCE" # A typical conference paper. Alias to: `IN_PROCEEDINGS`
15-
ONLINE = "ONLINE" # A reference on the internet. Alias to: `ELECTRONIC`
16-
BOOK = "BOOK" # Referencing a whole book
17-
IN_BOOK = "IN_BOOK" # Referencing a part of a book (chapters or pages)
18-
IN_COLLECTION = "IN_COLLECTION" # Referencing a part of a book that has its own name
19-
STANDARD = "STANDARD" # Used for proposed or formally published standards
20-
TECH_REPORT = "TECH_REPORT" # Used for technical reports, or reports about standards. Compare to `STANDARD`!
21-
MISC = "MISC" # Anything else that does not fit the above
22-
23-
@classmethod
24-
def from_string(cls, s: str) -> 'EntryType':
25-
"""
26-
Get the `EntryType` from the string. Can deal with common aliases.
27-
28-
:raises: KeyError, if the given string does not correspond to one of the entry types
29-
"""
30-
s = s.upper()
31-
str_to_entry_type_map: Dict[str, "EntryType"] = {
32-
"ARTICLE": EntryType.ARTICLE,
33-
"CONFERENCE": EntryType.CONFERENCE,
34-
"INPROCEEDINGS": EntryType.CONFERENCE,
35-
36-
"BOOK": EntryType.BOOK,
37-
"INBOOK": EntryType.IN_BOOK,
38-
"INCOLLECTION": EntryType.IN_COLLECTION,
39-
"STANDARD": EntryType.STANDARD,
40-
"TECHREPORT": EntryType.TECH_REPORT,
41-
42-
"ONLINE": EntryType.ONLINE,
43-
"ELECTRONIC": EntryType.ONLINE,
44-
45-
"MISC": EntryType.MISC,
46-
}
47-
return str_to_entry_type_map[s]
7+
RESOLVE_ENTRY_TYPE_ALIAS: Dict[str, str] = {
8+
"inproceedings": "conference",
9+
"electronic": "online",
10+
}
4811

4912

5013
@dataclasses.dataclass
5114
class BibTeXEntry:
5215
"""
5316
An entry in a BibTeX file
5417
55-
:ivar entry_type: Type of the entry (e.g. `@misc`). See `EntryType` for details
18+
:ivar entry_type: Type of the entry (e.g. `@misc`). We always assume that the `entry_type` is in small letters only,
19+
and we transform some common `entry_type` aliases to their "canonical" form (e.g. the name I prefer to use).
5620
:ivar name: Name or ID of the entry. So basically what is here: `@misc{Name_or_ID,`
5721
:ivar fields: Fields of the entry, as a Dict mapping the field key (e.g. `author`) to its cleaned up value.
5822
@@ -72,14 +36,21 @@ class BibTeXEntry:
7236
will be parsed to: `{"note": "This value\nspans multiple\nlines"}`. For the implementation details, check out
7337
the `BibTeXEntry._parse_field_value` static method.
7438
"""
75-
entry_type: EntryType
39+
entry_type: str
7640
name: str
7741
fields: Dict[str, str]
7842

7943
@classmethod
8044
def from_string(cls, entry_string: str) -> "BibTeXEntry":
81-
entry_type_string: str = entry_string.split("{")[0].lstrip("@")
82-
entry_type = EntryType.from_string(entry_type_string)
45+
"""
46+
Parse a `BibTeXEntry` from a string.
47+
"""
48+
# First, we find and canonicalize the `entry_type`
49+
entry_type_string: str = entry_string.split("{")[0].lstrip("@").lower()
50+
if RESOLVE_ENTRY_TYPE_ALIAS.get(entry_type_string):
51+
entry_type: str = RESOLVE_ENTRY_TYPE_ALIAS[entry_type_string]
52+
else:
53+
entry_type = entry_type_string
8354

8455
name: str = entry_string.split("{")[1].split(",")[0]
8556
raw_fields = cls._split_fields(entry_string)

bibtex_linter/verification.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""
77
from typing import Callable, TypeVar, List, Optional, Set
88

9-
from bibtex_linter.parser import BibTeXEntry, EntryType
9+
from bibtex_linter.parser import BibTeXEntry
1010

1111
# The dynamic list of known rules.
1212
# This list gets updated when a method with the `@linter_rule` decorator gets imported.
@@ -17,7 +17,7 @@
1717
LINTER_RULE_TYPE = TypeVar("LINTER_RULE_TYPE", bound=Callable[[BibTeXEntry], List[str]])
1818

1919

20-
def linter_rule(entry_type: Optional[EntryType] = None) -> Callable[[LINTER_RULE_TYPE], LINTER_RULE_TYPE]:
20+
def linter_rule(entry_type: Optional[str] = None) -> Callable[[LINTER_RULE_TYPE], LINTER_RULE_TYPE]:
2121
"""
2222
Decorator to mark a method defines rules to be checked by the linter for a specific entry type.
2323

test/test_parser.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
from typing import Dict, List
44

5-
from bibtex_linter.parser import EntryType, BibTeXEntry, split_entries, parse_bibtex_file
5+
from bibtex_linter.parser import BibTeXEntry, split_entries, parse_bibtex_file
66

77

88
class TestBibTeXEntry(unittest.TestCase):
@@ -245,15 +245,15 @@ def test_parse_all_entries(self) -> None:
245245
self.assertEqual(17, len(entries))
246246

247247
expected_types = {
248-
EntryType.ARTICLE: 1,
249-
EntryType.CONFERENCE: 1,
250-
EntryType.ONLINE: 1,
251-
EntryType.TECH_REPORT: 1,
252-
EntryType.BOOK: 1,
253-
EntryType.MISC: 9,
254-
EntryType.STANDARD: 1,
255-
EntryType.IN_BOOK: 1,
256-
EntryType.IN_COLLECTION: 1,
248+
"article": 1,
249+
"conference": 1,
250+
"online": 1,
251+
"techreport": 1,
252+
"book": 1,
253+
"misc": 9,
254+
"standard": 1,
255+
"inbook": 1,
256+
"incollection": 1,
257257
}
258258

259259
for entry_type, expected_count in expected_types.items():
@@ -262,121 +262,121 @@ def test_parse_all_entries(self) -> None:
262262
self.assertEqual(expected_count, actual_count)
263263

264264
def test_entry_fields_and_values(self) -> None:
265-
expected_entries: List[Dict[str, EntryType | Dict[str, str]]] = [
265+
expected_entries: List[Dict[str, str | Dict[str, str]]] = [
266266
{
267-
"type": EntryType.ARTICLE,
267+
"type": "article",
268268
"fields": {
269269
"author": "Tests basic article",
270270
"title": "Standard field format",
271271
"year": "2020"
272272
}
273273
},
274274
{
275-
"type": EntryType.CONFERENCE,
275+
"type": "conference",
276276
"fields": {
277277
"author": "Should map to CONFERENCE",
278278
"title": "Using alias INPROCEEDINGS",
279279
"year": "2021"
280280
}
281281
},
282282
{
283-
"type": EntryType.ONLINE,
283+
"type": "online",
284284
"fields": {
285285
"author": "Should map to ONLINE",
286286
"url": "https://example.com"
287287
}
288288
},
289289
{
290-
"type": EntryType.TECH_REPORT,
290+
"type": "techreport",
291291
"fields": {
292292
"author": "Should map to TECH_REPORT",
293293
"title": "Tech report via alias"
294294
}
295295
},
296296
{
297-
"type": EntryType.BOOK,
297+
"type": "book",
298298
"fields": {
299299
"author": "Extra spaces around field and value",
300300
"title": "Trimmed Title",
301301
"year": "2023"
302302
}
303303
},
304304
{
305-
"type": EntryType.MISC,
305+
"type": "misc",
306306
"fields": {
307307
"note": "This has nested braces inside",
308308
"comment": "But only outermost braces should be stripped"
309309
}
310310
},
311311
{
312-
"type": EntryType.MISC,
312+
"type": "misc",
313313
"fields": {
314314
"author": "Double brace test",
315315
"title": "Another level of nesting"
316316
}
317317
},
318318
{
319-
"type": EntryType.MISC,
319+
"type": "misc",
320320
"fields": {
321321
"author": "Quoted Author",
322322
"title": "Simple quoted title"
323323
}
324324
},
325325
{
326-
"type": EntryType.MISC,
326+
"type": "misc",
327327
"fields": {
328328
"note": "This value\nspans multiple\nlines"
329329
}
330330
},
331331
{
332-
"type": EntryType.MISC,
332+
"type": "misc",
333333
"fields": {
334334
"howpublished": r"\url{https://wrapped-url.com}"
335335
}
336336
},
337337
{
338-
"type": EntryType.MISC,
338+
"type": "misc",
339339
"fields": {
340340
"title": r"\LaTeX command in value"
341341
}
342342
},
343343
{
344-
"type": EntryType.MISC,
344+
"type": "misc",
345345
"fields": {
346346
"author": "Trailing Comma",
347347
"title": "Should be OK"
348348
}
349349
},
350350
{
351-
"type": EntryType.MISC,
351+
"type": "misc",
352352
"fields": {
353353
"author": "No Trailing Comma"
354354
}
355355
},
356356
{
357-
"type": EntryType.MISC,
357+
"type": "misc",
358358
"fields": {
359359
"author": "Newlines and spacing\neverywhere",
360360
"title": "Still valid"
361361
}
362362
},
363363
{
364-
"type": EntryType.STANDARD,
364+
"type": "standard",
365365
"fields": {
366366
"author": "Tests EntryType.STANDARD",
367367
"title": "Formal standard ref"
368368
}
369369
},
370370
{
371-
"type": EntryType.IN_BOOK,
371+
"type": "inbook",
372372
"fields": {
373373
"author": "Part of a book",
374374
"title": "Chapter Title",
375375
"booktitle": "Whole Book Title"
376376
}
377377
},
378378
{
379-
"type": EntryType.IN_COLLECTION,
379+
"type": "incollection",
380380
"fields": {
381381
"author": "Self-contained part of a collection",
382382
"title": "In Collection Title",

0 commit comments

Comments
 (0)