Skip to content

Commit 5d6ede0

Browse files
committed
Indexer: Fix F95ZONE_UNAVAILABLE for missing threads, improve error
reporting
1 parent 70188e5 commit 5d6ede0

4 files changed

Lines changed: 47 additions & 25 deletions

File tree

indexer/cache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ async def lifespan():
6464
try:
6565
yield
6666
finally:
67-
6867
await redis.aclose()
6968
redis = None
7069

@@ -145,6 +144,8 @@ async def _update_thread_cache(id: int, name: str) -> None:
145144
INDEX_ERROR: result.error_flag,
146145
EXPIRE_TIME: int(now + result.retry_delay),
147146
}
147+
if result.details:
148+
new_fields[INDEX_ERROR] += f": {result.details}"
148149
# Consider new error as a change
149150
if old_fields.get(INDEX_ERROR) != new_fields.get(INDEX_ERROR):
150151
new_fields[LAST_CHANGE] = int(now)

indexer/f95zone.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
import aiohttp
1111
import aiolimiter
1212

13-
from common import meta
13+
from common import (
14+
meta,
15+
parser,
16+
)
1417

1518
RATELIMIT = aiolimiter.AsyncLimiter(max_rate=1, time_period=0.5)
1619
TIMEOUT = aiohttp.ClientTimeout(total=30, connect=30, sock_read=30, sock_connect=30)
@@ -32,7 +35,6 @@
3235
TEMP_ERROR_MESSAGES = (
3336
b'<div id="cf-error-details" class="p-0">',
3437
b"<b>504 - Gateway Timeout .</b>",
35-
b'<body data-template="error">',
3638
b"<title>502 Bad Gateway</title>",
3739
b"<title>Error 502</title>",
3840
b"An unexpected error occurred. Please try again later.",
@@ -44,6 +46,10 @@
4446
b'<script src="https://static.f95zone.to/assets/SamF95/ErrorPage',
4547
b'<div class="blockMessage"><p>Please check back in 10 mins</p></div>',
4648
)
49+
THREAD_MISSING_MESSAGES = (
50+
"The requested thread could not be found.",
51+
"You do not have permission to view this page or perform this action.",
52+
)
4753

4854
logger = logging.getLogger(__name__)
4955
session: aiohttp.ClientSession = None
@@ -68,6 +74,7 @@
6874
class IndexerError:
6975
error_flag: str
7076
retry_delay: int
77+
details: str = ""
7178

7279

7380
ERROR_SESSION_LOGGED_OUT = IndexerError(
@@ -79,6 +86,9 @@ class IndexerError:
7986
ERROR_F95ZONE_UNAVAILABLE = IndexerError(
8087
"F95ZONE_UNAVAILABLE", dt.timedelta(minutes=15).total_seconds()
8188
)
89+
ERROR_F95ZONE_ERROR = IndexerError(
90+
"F95ZONE_ERROR", dt.timedelta(minutes=15).total_seconds()
91+
)
8292
ERROR_THREAD_MISSING = IndexerError(
8393
"THREAD_MISSING", dt.timedelta(days=14).total_seconds()
8494
)
@@ -114,7 +124,6 @@ async def lifespan():
114124
try:
115125
yield
116126
finally:
117-
118127
await session.close()
119128
session = None
120129

@@ -123,10 +132,24 @@ def check_error(
123132
res: bytes | dict | Exception, logger: logging.Logger
124133
) -> IndexerError | None:
125134
if isinstance(res, bytes):
126-
if any((msg in res) for msg in LOGIN_ERROR_MESSAGES):
127-
logger.error("Logged out of F95zone")
128-
# TODO: maybe auto login, but xf_user cookie should be enough for a long time
129-
return ERROR_SESSION_LOGGED_OUT
135+
if b'<body data-template="error">' in res:
136+
try:
137+
html = parser.html(res)
138+
message = (
139+
html.select_one(".p-body-pageContent .blockMessage")
140+
.get_text()
141+
.strip()
142+
)
143+
if message in THREAD_MISSING_MESSAGES:
144+
return ERROR_THREAD_MISSING
145+
else:
146+
logger.error(f"F95zone Forum returned an error: {message}")
147+
return dataclasses.replace(ERROR_F95ZONE_ERROR, details=message)
148+
except Exception:
149+
logger.error(
150+
f"F95zone Forum returned an error that could not be parsed: {res}"
151+
)
152+
return ERROR_UNKNOWN_RESPONSE
130153

131154
if any((msg in res) for msg in RATELIMIT_FORUM_ERRORS):
132155
logger.error("Hit F95zone Forum ratelimit")
@@ -136,15 +159,27 @@ def check_error(
136159
logger.warning("F95zone temporarily unreachable")
137160
return ERROR_F95ZONE_UNAVAILABLE
138161

162+
if any((msg in res) for msg in LOGIN_ERROR_MESSAGES):
163+
logger.error("Logged out of F95zone")
164+
# TODO: maybe auto login, but xf_user cookie should be enough for a long time
165+
return ERROR_SESSION_LOGGED_OUT
166+
139167
elif isinstance(res, dict):
140168
if res.get("status") == "error":
169+
message = res.get("msg")
141170

142-
if any((msg == res.get("msg")) for msg in RATELIMIT_API_ERRORS):
171+
if any((msg == message) for msg in RATELIMIT_API_ERRORS):
143172
logger.error("Hit F95zone API ratelimit")
144173
return ERROR_F95ZONE_RATELIMIT
145174

146-
logger.error(f"F95zone API returned an error: {res}")
147-
return ERROR_UNKNOWN_RESPONSE
175+
if isinstance(message, str):
176+
logger.error(f"F95zone API returned an error: {message}")
177+
return dataclasses.replace(ERROR_F95ZONE_ERROR, details=message)
178+
else:
179+
logger.error(
180+
f"F95zone API returned an error that could not be parsed: {res}"
181+
)
182+
return ERROR_UNKNOWN_RESPONSE
148183

149184
elif isinstance(res, Exception):
150185
if isinstance(res, (asyncio.TimeoutError, aiohttp.ClientConnectionError)):

indexer/scraper.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ async def thread(id: int) -> dict[str, str] | f95zone.IndexerError | None:
3838
loop = asyncio.get_event_loop()
3939
ret = await loop.run_in_executor(None, parser.thread, res)
4040
if isinstance(ret, parser.ParserError):
41-
42-
if ret.message == "Thread structure missing" and req.status in (403, 404):
43-
return f95zone.ERROR_THREAD_MISSING
44-
4541
logger.error(f"Thread {id} parsing failed: {ret.message}\n{ret.dump}")
4642
return f95zone.ERROR_PARSING_FAILED
4743

@@ -80,7 +76,6 @@ async def thread(id: int) -> dict[str, str] | f95zone.IndexerError | None:
8076
if version:
8177
query = f95zone.latest_updates_search_sanitize_query(ret.name)
8278
for category in f95zone.LATEST_UPDATES_CATEGORIES:
83-
8479
try:
8580
async with f95zone.session.get(
8681
f95zone.LATEST_UPDATES_SEARCH_URL.format(
@@ -163,13 +158,6 @@ async def thread(id: int) -> dict[str, str] | f95zone.IndexerError | None:
163158
else:
164159
reviews = await loop.run_in_executor(None, parser.reviews, res)
165160
if isinstance(reviews, parser.ParserError):
166-
167-
if reviews.message == "Thread structure missing" and req.status in (
168-
403,
169-
404,
170-
):
171-
return f95zone.ERROR_THREAD_MISSING
172-
173161
logger.error(
174162
f"Thread {id} reviews parsing failed: {reviews.message}\n{reviews.dump}"
175163
)

indexer/watcher.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ async def lifespan():
3030
try:
3131
yield
3232
finally:
33-
3433
updates_task.cancel()
3534
versions_task.cancel()
3635

@@ -179,7 +178,6 @@ async def poll_versions():
179178
invalidate_cache = cache.redis.pipeline()
180179

181180
for names_chunk in chunks(names, WATCH_VERSIONS_CHUNK_SIZE):
182-
183181
cached_data = cache.redis.pipeline()
184182
csv = ""
185183
ids = []

0 commit comments

Comments
 (0)