|
68 | 68 | #include "EngineBase.h" |
69 | 69 | #include "RefHover.h" |
70 | 70 | #include "RefHoverDetect.h" |
| 71 | +#include "RefHoverText.h" |
71 | 72 |
|
72 | 73 | #define REF_HOVER_CLASS L"SumatraPDFRefHover" |
73 | 74 |
|
74 | | -// === Plain-text citation lookup cache === |
75 | | -// Keyed by (surname, year, srcPage) so the same citation hovered repeatedly |
76 | | -// is resolved instantly. Negative results (citation not found) are also |
77 | | -// cached to avoid re-scanning the document on each hover. |
78 | | -struct CitationCacheEntry { |
79 | | - char* surname; // owned UTF-8 |
80 | | - int year; |
81 | | - int srcPage; // page where the lookup was issued (so cap at srcPage works per-page) |
82 | | - int destPage; // -1 if not found |
83 | | - float destX; |
84 | | - float destY; |
85 | | -}; |
86 | | - |
87 | | -struct RefLookupCache { |
88 | | - Vec<CitationCacheEntry> entries; |
89 | | -}; |
90 | | - |
91 | | -static const CitationCacheEntry* CacheLookup(RefLookupCache* c, const char* surname, int year, int srcPage) { |
92 | | - if (!c) { |
93 | | - return nullptr; |
94 | | - } |
95 | | - for (size_t i = 0; i < c->entries.size(); i++) { |
96 | | - const CitationCacheEntry& e = c->entries[i]; |
97 | | - if (e.year == year && e.srcPage == srcPage && str::Eq(e.surname, surname)) { |
98 | | - return &e; |
99 | | - } |
100 | | - } |
101 | | - return nullptr; |
102 | | -} |
103 | | - |
104 | | -static void CacheInsert(RefLookupCache* c, const char* surname, int year, int srcPage, int destPage, float destX, |
105 | | - float destY) { |
106 | | - if (!c) { |
107 | | - return; |
108 | | - } |
109 | | - CitationCacheEntry e; |
110 | | - e.surname = str::Dup(surname); |
111 | | - e.year = year; |
112 | | - e.srcPage = srcPage; |
113 | | - e.destPage = destPage; |
114 | | - e.destX = destX; |
115 | | - e.destY = destY; |
116 | | - c->entries.Append(e); |
117 | | -} |
118 | | - |
119 | | -static void CacheFree(RefLookupCache* c) { |
120 | | - if (!c) { |
121 | | - return; |
122 | | - } |
123 | | - for (size_t i = 0; i < c->entries.size(); i++) { |
124 | | - str::Free(c->entries[i].surname); |
125 | | - } |
126 | | - delete c; |
127 | | -} |
128 | | - |
129 | 75 | // upper bound for the auto-fit base zoom. We render at min(kRenderZoom, |
130 | 76 | // fit-to-popup-max), then multiply by RefHoverState::Displayed::userZoom |
131 | 77 | // (the mouse-wheel adjustment). |
@@ -267,8 +213,7 @@ void RefHoverDestroy(RefHoverState* s) { |
267 | 213 | } |
268 | 214 | delete s->bmp; |
269 | 215 | s->bmp = nullptr; |
270 | | - CacheFree(s->lookupCache); |
271 | | - s->lookupCache = nullptr; |
| 216 | + RefHoverFreeLookupCache(s); |
272 | 217 | delete s; |
273 | 218 | } |
274 | 219 |
|
@@ -1031,169 +976,3 @@ void RefHoverOnTimer(RefHoverState* s, HWND hwndCanvas, EngineBase* engine, floa |
1031 | 976 | req.destYRaw = s->pending.destY; |
1032 | 977 | RefHoverRequestRender(s, engine, req); |
1033 | 978 | } |
1034 | | - |
1035 | | -// === Plain-text citation lookup === |
1036 | | - |
1037 | | -// Result of detecting a citation under the cursor. |
1038 | | -struct DetectedCitation { |
1039 | | - char* surname; // owned UTF-8 (caller frees), or nullptr |
1040 | | - int year; |
1041 | | -}; |
1042 | | - |
1043 | | -static void FreeDetectedCitation(DetectedCitation* c) { |
1044 | | - str::Free(c->surname); |
1045 | | - c->surname = nullptr; |
1046 | | -} |
1047 | | - |
1048 | | -// Detect a citation pattern under the cursor on srcPage. On success, returns |
1049 | | -// true and fills *out with a freshly-allocated surname and year. The actual |
1050 | | -// pattern matching is the pure DetectCitationInPageText (RefHoverDetect.cpp). |
1051 | | -static bool DetectCitationAtCursor(EngineBase* engine, int srcPage, Point pagePos, DetectedCitation* out) { |
1052 | | - out->surname = nullptr; |
1053 | | - out->year = 0; |
1054 | | - int textLen = 0; |
1055 | | - Rect* coords = nullptr; |
1056 | | - const WCHAR* text = engine->GetTextForPage(srcPage, &textLen, &coords); |
1057 | | - return DetectCitationInPageText(text, coords, textLen, pagePos, &out->surname, &out->year); |
1058 | | -} |
1059 | | - |
1060 | | -// Walk pages from pageCount → srcPage looking for a bibliography entry that |
1061 | | -// matches the surname + year. Returns true on hit. |
1062 | | -static bool FindReferenceLocation(EngineBase* engine, int srcPage, const char* surname, int year, int* destPageOut, |
1063 | | - float* destXOut, float* destYOut) { |
1064 | | - if (!engine || !surname || !*surname) { |
1065 | | - return false; |
1066 | | - } |
1067 | | - int pageCount = engine->PageCount(); |
1068 | | - if (pageCount <= 0 || srcPage < 1 || srcPage > pageCount) { |
1069 | | - return false; |
1070 | | - } |
1071 | | - |
1072 | | - // Convert surname to wide string for engine text matching. |
1073 | | - WCHAR* surnameW = ToWStr(surname); |
1074 | | - if (!surnameW) { |
1075 | | - return false; |
1076 | | - } |
1077 | | - int surnameLen = (int)str::Len(surnameW); |
1078 | | - if (surnameLen < 2) { |
1079 | | - free(surnameW); |
1080 | | - return false; |
1081 | | - } |
1082 | | - |
1083 | | - bool found = false; |
1084 | | - for (int p = pageCount; p >= srcPage; p--) { |
1085 | | - int textLen = 0; |
1086 | | - Rect* coords = nullptr; |
1087 | | - const WCHAR* text = engine->GetTextForPage(p, &textLen, &coords); |
1088 | | - float x = 0, y = 0; |
1089 | | - if (FindSurnameInPageText(text, coords, textLen, surnameW, surnameLen, year, &x, &y)) { |
1090 | | - *destPageOut = p; |
1091 | | - *destXOut = x; |
1092 | | - *destYOut = y; |
1093 | | - found = true; |
1094 | | - break; |
1095 | | - } |
1096 | | - } |
1097 | | - free(surnameW); |
1098 | | - return found; |
1099 | | -} |
1100 | | - |
1101 | | -// Look up `surname` in the cache; on miss, do a fresh document scan and |
1102 | | -// insert the result (positive or negative). Returns true on positive hit. |
1103 | | -static bool LookupOrSearch(RefHoverState* s, EngineBase* engine, int srcPage, const char* surname, int year, |
1104 | | - int& destPageOut, float& destXOut, float& destYOut) { |
1105 | | - const CitationCacheEntry* hit = CacheLookup(s->lookupCache, surname, year, srcPage); |
1106 | | - if (hit) { |
1107 | | - if (hit->destPage > 0) { |
1108 | | - destPageOut = hit->destPage; |
1109 | | - destXOut = hit->destX; |
1110 | | - destYOut = hit->destY; |
1111 | | - return true; |
1112 | | - } |
1113 | | - return false; |
1114 | | - } |
1115 | | - int destPage = -1; |
1116 | | - float destX = -1.f, destY = -1.f; |
1117 | | - if (FindReferenceLocation(engine, srcPage, surname, year, &destPage, &destX, &destY)) { |
1118 | | - CacheInsert(s->lookupCache, surname, year, srcPage, destPage, destX, destY); |
1119 | | - destPageOut = destPage; |
1120 | | - destXOut = destX; |
1121 | | - destYOut = destY; |
1122 | | - return true; |
1123 | | - } |
1124 | | - CacheInsert(s->lookupCache, surname, year, srcPage, -1, 0.f, 0.f); |
1125 | | - return false; |
1126 | | -} |
1127 | | - |
1128 | | -bool RefHoverTryPlainText(RefHoverState* s, EngineBase* engine, int srcPage, Point pagePos, int& destPageOut, |
1129 | | - float& destXOut, float& destYOut) { |
1130 | | - if (!s || !engine || srcPage <= 0) { |
1131 | | - return false; |
1132 | | - } |
1133 | | - DetectedCitation cite{}; |
1134 | | - if (!DetectCitationAtCursor(engine, srcPage, pagePos, &cite)) { |
1135 | | - return false; |
1136 | | - } |
1137 | | - |
1138 | | - if (!s->lookupCache) { |
1139 | | - s->lookupCache = new RefLookupCache(); |
1140 | | - } |
1141 | | - |
1142 | | - bool result = LookupOrSearch(s, engine, srcPage, cite.surname, cite.year, destPageOut, destXOut, destYOut); |
1143 | | - |
1144 | | - // Fallback: if surname has multiple space-separated parts and the full |
1145 | | - // form didn't match, try each part as a prefix in descending-length |
1146 | | - // order. Two patterns this covers: |
1147 | | - // 1. Bibliography lists the entry under just the last name |
1148 | | - // ("Vrielink, Oude R. A." vs. detected "Oude Vrielink"). |
1149 | | - // 2. PDF text extraction split a single-word surname by dropping a |
1150 | | - // glyph ("Bash b" for "Bashab") — the longest fragment ("Bash") |
1151 | | - // prefix-matches the real surname in the bibliography. |
1152 | | - if (!result && cite.surname && str::FindChar(cite.surname, ' ')) { |
1153 | | - struct Part { |
1154 | | - const char* s; |
1155 | | - int len; |
1156 | | - }; |
1157 | | - Part parts[8]; |
1158 | | - int nParts = 0; |
1159 | | - const char* p = cite.surname; |
1160 | | - while (*p && nParts < 8) { |
1161 | | - while (*p == ' ') { |
1162 | | - p++; |
1163 | | - } |
1164 | | - if (!*p) { |
1165 | | - break; |
1166 | | - } |
1167 | | - const char* start = p; |
1168 | | - while (*p && *p != ' ') { |
1169 | | - p++; |
1170 | | - } |
1171 | | - int len = (int)(p - start); |
1172 | | - if (len >= 2) { |
1173 | | - parts[nParts].s = start; |
1174 | | - parts[nParts].len = len; |
1175 | | - nParts++; |
1176 | | - } |
1177 | | - } |
1178 | | - // Sort parts by length descending (simple selection sort, n<=8). |
1179 | | - for (int i = 0; i < nParts - 1; i++) { |
1180 | | - for (int j = i + 1; j < nParts; j++) { |
1181 | | - if (parts[j].len > parts[i].len) { |
1182 | | - Part t = parts[i]; |
1183 | | - parts[i] = parts[j]; |
1184 | | - parts[j] = t; |
1185 | | - } |
1186 | | - } |
1187 | | - } |
1188 | | - for (int i = 0; i < nParts && !result; i++) { |
1189 | | - char buf[64]; |
1190 | | - int n = parts[i].len < 63 ? parts[i].len : 63; |
1191 | | - memcpy(buf, parts[i].s, n); |
1192 | | - buf[n] = 0; |
1193 | | - result = LookupOrSearch(s, engine, srcPage, buf, cite.year, destPageOut, destXOut, destYOut); |
1194 | | - } |
1195 | | - } |
1196 | | - |
1197 | | - FreeDetectedCitation(&cite); |
1198 | | - return result; |
1199 | | -} |
0 commit comments