Skip to content

Commit 61b74b0

Browse files
kopporclaude
andcommitted
Split plain-text citation hover into its own files
Pull the text-based citation code out of RefHover/RefHoverDetect into two dedicated translation units, mirroring the existing pure-detect vs. engine-driven layering: - RefHoverTextDetect.{cpp,h}: pure pattern matchers (DetectCitationInPageText, FindSurnameInPageText, IsNamePrefix). No engine/HWND deps, unit-tested via RefHover_ut. - RefHoverText.{cpp,h}: engine page-walk, per-document lookup cache, and the multi-word / fragmented-surname fallback (RefHoverTryPlainText). RefHoverDestroy now calls the new public RefHoverFreeLookupCache. RefHoverDetect keeps only the link/destination detectors; RefHover keeps the popup UI / render machinery. Canvas.cpp and RefHover.cpp include the new RefHoverText.h. Build files (premake + vcxproj/filters for the app and test_util) updated for the new files. No behavior change; build and unit tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 9a09f80 commit 61b74b0

17 files changed

Lines changed: 744 additions & 648 deletions

premake5.files.lua

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,8 @@ function sumatrapdf_files()
798798
"ProgressUpdateUI.*",
799799
"RefHover.*",
800800
"RefHoverDetect.*",
801+
"RefHoverText.*",
802+
"RefHoverTextDetect.*",
801803
"RegistryInstaller.*",
802804
"RegistryPreview.*",
803805
"RegistrySearchFilter.*",
@@ -1364,6 +1366,7 @@ function test_util_files()
13641366
"DisplayMode.*",
13651367
"Flags.*",
13661368
"RefHoverDetect.*",
1369+
"RefHoverTextDetect.*",
13671370
"SettingsStructs.*",
13681371
--"StressTesting.*",
13691372
"SumatraConfig.*",

src/Canvas.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "Translations.h"
5959

6060
#include "RefHover.h"
61+
#include "RefHoverText.h"
6162

6263
#include "utils/Log.h"
6364

@@ -985,8 +986,8 @@ static void OnMouseMove(MainWindow* win, int x, int y, WPARAM) {
985986
pageScreenRect.y = topLeft.y;
986987
}
987988
int delayMs = gGlobalPrefs->citationHoverDelay;
988-
RefHoverSchedule(win->refHover, win->hwndCanvas, delayMs, screenPt, destPage, destX, destY,
989-
0.f, srcPageNo, RectF{}, pageScreenRect);
989+
RefHoverSchedule(win->refHover, win->hwndCanvas, delayMs, screenPt, destPage, destX, destY, 0.f,
990+
srcPageNo, RectF{}, pageScreenRect);
990991
scheduled = true;
991992
}
992993
}

src/RefHover.cpp

Lines changed: 2 additions & 223 deletions
Original file line numberDiff line numberDiff line change
@@ -68,64 +68,10 @@
6868
#include "EngineBase.h"
6969
#include "RefHover.h"
7070
#include "RefHoverDetect.h"
71+
#include "RefHoverText.h"
7172

7273
#define REF_HOVER_CLASS L"SumatraPDFRefHover"
7374

74-
// === Plain-text citation lookup cache ===
75-
// Keyed by (surname, year, srcPage) so the same citation hovered repeatedly
76-
// is resolved instantly. Negative results (citation not found) are also
77-
// cached to avoid re-scanning the document on each hover.
78-
struct CitationCacheEntry {
79-
char* surname; // owned UTF-8
80-
int year;
81-
int srcPage; // page where the lookup was issued (so cap at srcPage works per-page)
82-
int destPage; // -1 if not found
83-
float destX;
84-
float destY;
85-
};
86-
87-
struct RefLookupCache {
88-
Vec<CitationCacheEntry> entries;
89-
};
90-
91-
static const CitationCacheEntry* CacheLookup(RefLookupCache* c, const char* surname, int year, int srcPage) {
92-
if (!c) {
93-
return nullptr;
94-
}
95-
for (size_t i = 0; i < c->entries.size(); i++) {
96-
const CitationCacheEntry& e = c->entries[i];
97-
if (e.year == year && e.srcPage == srcPage && str::Eq(e.surname, surname)) {
98-
return &e;
99-
}
100-
}
101-
return nullptr;
102-
}
103-
104-
static void CacheInsert(RefLookupCache* c, const char* surname, int year, int srcPage, int destPage, float destX,
105-
float destY) {
106-
if (!c) {
107-
return;
108-
}
109-
CitationCacheEntry e;
110-
e.surname = str::Dup(surname);
111-
e.year = year;
112-
e.srcPage = srcPage;
113-
e.destPage = destPage;
114-
e.destX = destX;
115-
e.destY = destY;
116-
c->entries.Append(e);
117-
}
118-
119-
static void CacheFree(RefLookupCache* c) {
120-
if (!c) {
121-
return;
122-
}
123-
for (size_t i = 0; i < c->entries.size(); i++) {
124-
str::Free(c->entries[i].surname);
125-
}
126-
delete c;
127-
}
128-
12975
// upper bound for the auto-fit base zoom. We render at min(kRenderZoom,
13076
// fit-to-popup-max), then multiply by RefHoverState::Displayed::userZoom
13177
// (the mouse-wheel adjustment).
@@ -267,8 +213,7 @@ void RefHoverDestroy(RefHoverState* s) {
267213
}
268214
delete s->bmp;
269215
s->bmp = nullptr;
270-
CacheFree(s->lookupCache);
271-
s->lookupCache = nullptr;
216+
RefHoverFreeLookupCache(s);
272217
delete s;
273218
}
274219

@@ -1031,169 +976,3 @@ void RefHoverOnTimer(RefHoverState* s, HWND hwndCanvas, EngineBase* engine, floa
1031976
req.destYRaw = s->pending.destY;
1032977
RefHoverRequestRender(s, engine, req);
1033978
}
1034-
1035-
// === Plain-text citation lookup ===
1036-
1037-
// Result of detecting a citation under the cursor.
1038-
struct DetectedCitation {
1039-
char* surname; // owned UTF-8 (caller frees), or nullptr
1040-
int year;
1041-
};
1042-
1043-
static void FreeDetectedCitation(DetectedCitation* c) {
1044-
str::Free(c->surname);
1045-
c->surname = nullptr;
1046-
}
1047-
1048-
// Detect a citation pattern under the cursor on srcPage. On success, returns
1049-
// true and fills *out with a freshly-allocated surname and year. The actual
1050-
// pattern matching is the pure DetectCitationInPageText (RefHoverDetect.cpp).
1051-
static bool DetectCitationAtCursor(EngineBase* engine, int srcPage, Point pagePos, DetectedCitation* out) {
1052-
out->surname = nullptr;
1053-
out->year = 0;
1054-
int textLen = 0;
1055-
Rect* coords = nullptr;
1056-
const WCHAR* text = engine->GetTextForPage(srcPage, &textLen, &coords);
1057-
return DetectCitationInPageText(text, coords, textLen, pagePos, &out->surname, &out->year);
1058-
}
1059-
1060-
// Walk pages from pageCount → srcPage looking for a bibliography entry that
1061-
// matches the surname + year. Returns true on hit.
1062-
static bool FindReferenceLocation(EngineBase* engine, int srcPage, const char* surname, int year, int* destPageOut,
1063-
float* destXOut, float* destYOut) {
1064-
if (!engine || !surname || !*surname) {
1065-
return false;
1066-
}
1067-
int pageCount = engine->PageCount();
1068-
if (pageCount <= 0 || srcPage < 1 || srcPage > pageCount) {
1069-
return false;
1070-
}
1071-
1072-
// Convert surname to wide string for engine text matching.
1073-
WCHAR* surnameW = ToWStr(surname);
1074-
if (!surnameW) {
1075-
return false;
1076-
}
1077-
int surnameLen = (int)str::Len(surnameW);
1078-
if (surnameLen < 2) {
1079-
free(surnameW);
1080-
return false;
1081-
}
1082-
1083-
bool found = false;
1084-
for (int p = pageCount; p >= srcPage; p--) {
1085-
int textLen = 0;
1086-
Rect* coords = nullptr;
1087-
const WCHAR* text = engine->GetTextForPage(p, &textLen, &coords);
1088-
float x = 0, y = 0;
1089-
if (FindSurnameInPageText(text, coords, textLen, surnameW, surnameLen, year, &x, &y)) {
1090-
*destPageOut = p;
1091-
*destXOut = x;
1092-
*destYOut = y;
1093-
found = true;
1094-
break;
1095-
}
1096-
}
1097-
free(surnameW);
1098-
return found;
1099-
}
1100-
1101-
// Look up `surname` in the cache; on miss, do a fresh document scan and
1102-
// insert the result (positive or negative). Returns true on positive hit.
1103-
static bool LookupOrSearch(RefHoverState* s, EngineBase* engine, int srcPage, const char* surname, int year,
1104-
int& destPageOut, float& destXOut, float& destYOut) {
1105-
const CitationCacheEntry* hit = CacheLookup(s->lookupCache, surname, year, srcPage);
1106-
if (hit) {
1107-
if (hit->destPage > 0) {
1108-
destPageOut = hit->destPage;
1109-
destXOut = hit->destX;
1110-
destYOut = hit->destY;
1111-
return true;
1112-
}
1113-
return false;
1114-
}
1115-
int destPage = -1;
1116-
float destX = -1.f, destY = -1.f;
1117-
if (FindReferenceLocation(engine, srcPage, surname, year, &destPage, &destX, &destY)) {
1118-
CacheInsert(s->lookupCache, surname, year, srcPage, destPage, destX, destY);
1119-
destPageOut = destPage;
1120-
destXOut = destX;
1121-
destYOut = destY;
1122-
return true;
1123-
}
1124-
CacheInsert(s->lookupCache, surname, year, srcPage, -1, 0.f, 0.f);
1125-
return false;
1126-
}
1127-
1128-
bool RefHoverTryPlainText(RefHoverState* s, EngineBase* engine, int srcPage, Point pagePos, int& destPageOut,
1129-
float& destXOut, float& destYOut) {
1130-
if (!s || !engine || srcPage <= 0) {
1131-
return false;
1132-
}
1133-
DetectedCitation cite{};
1134-
if (!DetectCitationAtCursor(engine, srcPage, pagePos, &cite)) {
1135-
return false;
1136-
}
1137-
1138-
if (!s->lookupCache) {
1139-
s->lookupCache = new RefLookupCache();
1140-
}
1141-
1142-
bool result = LookupOrSearch(s, engine, srcPage, cite.surname, cite.year, destPageOut, destXOut, destYOut);
1143-
1144-
// Fallback: if surname has multiple space-separated parts and the full
1145-
// form didn't match, try each part as a prefix in descending-length
1146-
// order. Two patterns this covers:
1147-
// 1. Bibliography lists the entry under just the last name
1148-
// ("Vrielink, Oude R. A." vs. detected "Oude Vrielink").
1149-
// 2. PDF text extraction split a single-word surname by dropping a
1150-
// glyph ("Bash b" for "Bashab") — the longest fragment ("Bash")
1151-
// prefix-matches the real surname in the bibliography.
1152-
if (!result && cite.surname && str::FindChar(cite.surname, ' ')) {
1153-
struct Part {
1154-
const char* s;
1155-
int len;
1156-
};
1157-
Part parts[8];
1158-
int nParts = 0;
1159-
const char* p = cite.surname;
1160-
while (*p && nParts < 8) {
1161-
while (*p == ' ') {
1162-
p++;
1163-
}
1164-
if (!*p) {
1165-
break;
1166-
}
1167-
const char* start = p;
1168-
while (*p && *p != ' ') {
1169-
p++;
1170-
}
1171-
int len = (int)(p - start);
1172-
if (len >= 2) {
1173-
parts[nParts].s = start;
1174-
parts[nParts].len = len;
1175-
nParts++;
1176-
}
1177-
}
1178-
// Sort parts by length descending (simple selection sort, n<=8).
1179-
for (int i = 0; i < nParts - 1; i++) {
1180-
for (int j = i + 1; j < nParts; j++) {
1181-
if (parts[j].len > parts[i].len) {
1182-
Part t = parts[i];
1183-
parts[i] = parts[j];
1184-
parts[j] = t;
1185-
}
1186-
}
1187-
}
1188-
for (int i = 0; i < nParts && !result; i++) {
1189-
char buf[64];
1190-
int n = parts[i].len < 63 ? parts[i].len : 63;
1191-
memcpy(buf, parts[i].s, n);
1192-
buf[n] = 0;
1193-
result = LookupOrSearch(s, engine, srcPage, buf, cite.year, destPageOut, destXOut, destYOut);
1194-
}
1195-
}
1196-
1197-
FreeDetectedCitation(&cite);
1198-
return result;
1199-
}

src/RefHover.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,3 @@ bool RefHoverWheelZoom(RefHoverState* s, EngineBase* engine, int wheelDelta);
108108
// (continuous scrolling). Popup window keeps its initial size; only the
109109
// rendered region's Y (and possibly page number) changes.
110110
bool RefHoverWheelScroll(RefHoverState* s, EngineBase* engine, int wheelDelta);
111-
112-
// Plain-text citation hover: when no link element is under the cursor, try
113-
// to detect a "(Surname et al., 2020)" / "Surname (2020)" pattern at pagePos
114-
// on srcPage, find the bibliography entry that matches, and return its
115-
// location. Returns true on success and fills destPage/destX/destY.
116-
// Lookups are cached on s.
117-
bool RefHoverTryPlainText(RefHoverState* s, EngineBase* engine, int srcPage, Point pagePos, int& destPageOut,
118-
float& destXOut, float& destYOut);

0 commit comments

Comments
 (0)