Skip to content

Commit 2a822f6

Browse files
committed
loosens the title regex to allow years, question
marks, apostrophes, and non-breaking spaces in titles.
1 parent 03b7639 commit 2a822f6

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

server/api/views/uploadFile/title.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66

77

88
# regular expression to match common research white paper titles. Created by Chat-gpt
9-
# requires at least 3 words, no dates, no version numbers.
9+
# requires at least 3 words, no version numbers.
1010
title_regex = re.compile(
11-
r'^(?=(?:\b\w+\b[\s:,\-\(\)]*){3,})(?!.*\b(?:19|20)\d{2}\b)(?!.*\bv\d+\b)[A-Za-z0-9][\w\s:,\-\(\)]*[A-Za-z\)]$', re.IGNORECASE)
11+
r"^(?=(?:\b\w+\b[^A-Za-z0-9]*){3,})(?!.*\bv\d+\b)[A-Za-z0-9].+[A-Za-z\)?!]$", re.IGNORECASE)
1212

1313

1414
def generate_title(pdf: fitz.Document) -> str | None:

0 commit comments

Comments
 (0)