|
71 | 71 |
|
72 | 72 | #define REF_HOVER_CLASS L"SumatraPDFRefHover" |
73 | 73 |
|
| 74 | +// === Plain-text citation lookup cache === |
| 75 | +// Keyed by (surname, year, srcPage) so the same citation hovered repeatedly |
| 76 | +// is resolved instantly. Negative results (citation not found) are also |
| 77 | +// cached to avoid re-scanning the document on each hover. |
| 78 | +struct CitationCacheEntry { |
| 79 | + char* surname; // owned UTF-8 |
| 80 | + int year; |
| 81 | + int srcPage; // page where the lookup was issued (so cap at srcPage works per-page) |
| 82 | + int destPage; // -1 if not found |
| 83 | + float destX; |
| 84 | + float destY; |
| 85 | +}; |
| 86 | + |
| 87 | +struct RefLookupCache { |
| 88 | + Vec<CitationCacheEntry> entries; |
| 89 | +}; |
| 90 | + |
| 91 | +static const CitationCacheEntry* CacheLookup(RefLookupCache* c, const char* surname, int year, int srcPage) { |
| 92 | + if (!c) { |
| 93 | + return nullptr; |
| 94 | + } |
| 95 | + for (size_t i = 0; i < c->entries.size(); i++) { |
| 96 | + const CitationCacheEntry& e = c->entries[i]; |
| 97 | + if (e.year == year && e.srcPage == srcPage && str::Eq(e.surname, surname)) { |
| 98 | + return &e; |
| 99 | + } |
| 100 | + } |
| 101 | + return nullptr; |
| 102 | +} |
| 103 | + |
| 104 | +static void CacheInsert(RefLookupCache* c, const char* surname, int year, int srcPage, int destPage, float destX, |
| 105 | + float destY) { |
| 106 | + if (!c) { |
| 107 | + return; |
| 108 | + } |
| 109 | + CitationCacheEntry e; |
| 110 | + e.surname = str::Dup(surname); |
| 111 | + e.year = year; |
| 112 | + e.srcPage = srcPage; |
| 113 | + e.destPage = destPage; |
| 114 | + e.destX = destX; |
| 115 | + e.destY = destY; |
| 116 | + c->entries.Append(e); |
| 117 | +} |
| 118 | + |
| 119 | +static void CacheFree(RefLookupCache* c) { |
| 120 | + if (!c) { |
| 121 | + return; |
| 122 | + } |
| 123 | + for (size_t i = 0; i < c->entries.size(); i++) { |
| 124 | + str::Free(c->entries[i].surname); |
| 125 | + } |
| 126 | + delete c; |
| 127 | +} |
| 128 | + |
74 | 129 | // upper bound for the auto-fit base zoom. We render at min(kRenderZoom, |
75 | 130 | // fit-to-popup-max), then multiply by RefHoverState::Displayed::userZoom |
76 | 131 | // (the mouse-wheel adjustment). |
@@ -212,6 +267,8 @@ void RefHoverDestroy(RefHoverState* s) { |
212 | 267 | } |
213 | 268 | delete s->bmp; |
214 | 269 | s->bmp = nullptr; |
| 270 | + CacheFree(s->lookupCache); |
| 271 | + s->lookupCache = nullptr; |
215 | 272 | delete s; |
216 | 273 | } |
217 | 274 |
|
@@ -974,3 +1031,169 @@ void RefHoverOnTimer(RefHoverState* s, HWND hwndCanvas, EngineBase* engine, floa |
974 | 1031 | req.destYRaw = s->pending.destY; |
975 | 1032 | RefHoverRequestRender(s, engine, req); |
976 | 1033 | } |
| 1034 | + |
| 1035 | +// === Plain-text citation lookup === |
| 1036 | + |
| 1037 | +// Result of detecting a citation under the cursor. |
| 1038 | +struct DetectedCitation { |
| 1039 | + char* surname; // owned UTF-8 (caller frees), or nullptr |
| 1040 | + int year; |
| 1041 | +}; |
| 1042 | + |
| 1043 | +static void FreeDetectedCitation(DetectedCitation* c) { |
| 1044 | + str::Free(c->surname); |
| 1045 | + c->surname = nullptr; |
| 1046 | +} |
| 1047 | + |
| 1048 | +// Detect a citation pattern under the cursor on srcPage. On success, returns |
| 1049 | +// true and fills *out with a freshly-allocated surname and year. The actual |
| 1050 | +// pattern matching is the pure DetectCitationInPageText (RefHoverDetect.cpp). |
| 1051 | +static bool DetectCitationAtCursor(EngineBase* engine, int srcPage, Point pagePos, DetectedCitation* out) { |
| 1052 | + out->surname = nullptr; |
| 1053 | + out->year = 0; |
| 1054 | + int textLen = 0; |
| 1055 | + Rect* coords = nullptr; |
| 1056 | + const WCHAR* text = engine->GetTextForPage(srcPage, &textLen, &coords); |
| 1057 | + return DetectCitationInPageText(text, coords, textLen, pagePos, &out->surname, &out->year); |
| 1058 | +} |
| 1059 | + |
| 1060 | +// Walk pages from pageCount → srcPage looking for a bibliography entry that |
| 1061 | +// matches the surname + year. Returns true on hit. |
| 1062 | +static bool FindReferenceLocation(EngineBase* engine, int srcPage, const char* surname, int year, int* destPageOut, |
| 1063 | + float* destXOut, float* destYOut) { |
| 1064 | + if (!engine || !surname || !*surname) { |
| 1065 | + return false; |
| 1066 | + } |
| 1067 | + int pageCount = engine->PageCount(); |
| 1068 | + if (pageCount <= 0 || srcPage < 1 || srcPage > pageCount) { |
| 1069 | + return false; |
| 1070 | + } |
| 1071 | + |
| 1072 | + // Convert surname to wide string for engine text matching. |
| 1073 | + WCHAR* surnameW = ToWStr(surname); |
| 1074 | + if (!surnameW) { |
| 1075 | + return false; |
| 1076 | + } |
| 1077 | + int surnameLen = (int)str::Len(surnameW); |
| 1078 | + if (surnameLen < 2) { |
| 1079 | + free(surnameW); |
| 1080 | + return false; |
| 1081 | + } |
| 1082 | + |
| 1083 | + bool found = false; |
| 1084 | + for (int p = pageCount; p >= srcPage; p--) { |
| 1085 | + int textLen = 0; |
| 1086 | + Rect* coords = nullptr; |
| 1087 | + const WCHAR* text = engine->GetTextForPage(p, &textLen, &coords); |
| 1088 | + float x = 0, y = 0; |
| 1089 | + if (FindSurnameInPageText(text, coords, textLen, surnameW, surnameLen, year, &x, &y)) { |
| 1090 | + *destPageOut = p; |
| 1091 | + *destXOut = x; |
| 1092 | + *destYOut = y; |
| 1093 | + found = true; |
| 1094 | + break; |
| 1095 | + } |
| 1096 | + } |
| 1097 | + free(surnameW); |
| 1098 | + return found; |
| 1099 | +} |
| 1100 | + |
| 1101 | +// Look up `surname` in the cache; on miss, do a fresh document scan and |
| 1102 | +// insert the result (positive or negative). Returns true on positive hit. |
| 1103 | +static bool LookupOrSearch(RefHoverState* s, EngineBase* engine, int srcPage, const char* surname, int year, |
| 1104 | + int& destPageOut, float& destXOut, float& destYOut) { |
| 1105 | + const CitationCacheEntry* hit = CacheLookup(s->lookupCache, surname, year, srcPage); |
| 1106 | + if (hit) { |
| 1107 | + if (hit->destPage > 0) { |
| 1108 | + destPageOut = hit->destPage; |
| 1109 | + destXOut = hit->destX; |
| 1110 | + destYOut = hit->destY; |
| 1111 | + return true; |
| 1112 | + } |
| 1113 | + return false; |
| 1114 | + } |
| 1115 | + int destPage = -1; |
| 1116 | + float destX = -1.f, destY = -1.f; |
| 1117 | + if (FindReferenceLocation(engine, srcPage, surname, year, &destPage, &destX, &destY)) { |
| 1118 | + CacheInsert(s->lookupCache, surname, year, srcPage, destPage, destX, destY); |
| 1119 | + destPageOut = destPage; |
| 1120 | + destXOut = destX; |
| 1121 | + destYOut = destY; |
| 1122 | + return true; |
| 1123 | + } |
| 1124 | + CacheInsert(s->lookupCache, surname, year, srcPage, -1, 0.f, 0.f); |
| 1125 | + return false; |
| 1126 | +} |
| 1127 | + |
| 1128 | +bool RefHoverTryPlainText(RefHoverState* s, EngineBase* engine, int srcPage, Point pagePos, int& destPageOut, |
| 1129 | + float& destXOut, float& destYOut) { |
| 1130 | + if (!s || !engine || srcPage <= 0) { |
| 1131 | + return false; |
| 1132 | + } |
| 1133 | + DetectedCitation cite{}; |
| 1134 | + if (!DetectCitationAtCursor(engine, srcPage, pagePos, &cite)) { |
| 1135 | + return false; |
| 1136 | + } |
| 1137 | + |
| 1138 | + if (!s->lookupCache) { |
| 1139 | + s->lookupCache = new RefLookupCache(); |
| 1140 | + } |
| 1141 | + |
| 1142 | + bool result = LookupOrSearch(s, engine, srcPage, cite.surname, cite.year, destPageOut, destXOut, destYOut); |
| 1143 | + |
| 1144 | + // Fallback: if surname has multiple space-separated parts and the full |
| 1145 | + // form didn't match, try each part as a prefix in descending-length |
| 1146 | + // order. Two patterns this covers: |
| 1147 | + // 1. Bibliography lists the entry under just the last name |
| 1148 | + // ("Vrielink, Oude R. A." vs. detected "Oude Vrielink"). |
| 1149 | + // 2. PDF text extraction split a single-word surname by dropping a |
| 1150 | + // glyph ("Bash b" for "Bashab") — the longest fragment ("Bash") |
| 1151 | + // prefix-matches the real surname in the bibliography. |
| 1152 | + if (!result && cite.surname && str::FindChar(cite.surname, ' ')) { |
| 1153 | + struct Part { |
| 1154 | + const char* s; |
| 1155 | + int len; |
| 1156 | + }; |
| 1157 | + Part parts[8]; |
| 1158 | + int nParts = 0; |
| 1159 | + const char* p = cite.surname; |
| 1160 | + while (*p && nParts < 8) { |
| 1161 | + while (*p == ' ') { |
| 1162 | + p++; |
| 1163 | + } |
| 1164 | + if (!*p) { |
| 1165 | + break; |
| 1166 | + } |
| 1167 | + const char* start = p; |
| 1168 | + while (*p && *p != ' ') { |
| 1169 | + p++; |
| 1170 | + } |
| 1171 | + int len = (int)(p - start); |
| 1172 | + if (len >= 2) { |
| 1173 | + parts[nParts].s = start; |
| 1174 | + parts[nParts].len = len; |
| 1175 | + nParts++; |
| 1176 | + } |
| 1177 | + } |
| 1178 | + // Sort parts by length descending (simple selection sort, n<=8). |
| 1179 | + for (int i = 0; i < nParts - 1; i++) { |
| 1180 | + for (int j = i + 1; j < nParts; j++) { |
| 1181 | + if (parts[j].len > parts[i].len) { |
| 1182 | + Part t = parts[i]; |
| 1183 | + parts[i] = parts[j]; |
| 1184 | + parts[j] = t; |
| 1185 | + } |
| 1186 | + } |
| 1187 | + } |
| 1188 | + for (int i = 0; i < nParts && !result; i++) { |
| 1189 | + char buf[64]; |
| 1190 | + int n = parts[i].len < 63 ? parts[i].len : 63; |
| 1191 | + memcpy(buf, parts[i].s, n); |
| 1192 | + buf[n] = 0; |
| 1193 | + result = LookupOrSearch(s, engine, srcPage, buf, cite.year, destPageOut, destXOut, destYOut); |
| 1194 | + } |
| 1195 | + } |
| 1196 | + |
| 1197 | + FreeDetectedCitation(&cite); |
| 1198 | + return result; |
| 1199 | +} |
0 commit comments