Skip to content

Commit 2e0ad4d

Browse files
committed
douban enhance
1 parent 68ce85b commit 2e0ad4d

8 files changed

Lines changed: 73 additions & 210 deletions

File tree

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ COPY --from=pybuilder /root/.local /usr/local
2424
COPY --from=pybuilder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
2525
COPY --from=pybuilder /usr/share/zoneinfo /usr/share/zoneinfo
2626
COPY --from=nodebuilder /build /YYeTsBot/yyetsweb/templates/
27+
RUN playwright install --with-deps chromium
2728

2829
ENV TZ=Asia/Shanghai
2930
WORKDIR /YYeTsBot/yyetsbot

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ tgbot-ping==1.0.7
55
redis==6.4.0
66
apscheduler==3.11.0
77
pymongo==4.15.1
8-
tornado==6.5.2
8+
tornado==6.5.5
99
captcha==0.7.1
1010
passlib==1.7.4
1111
fakeredis==2.31.3
@@ -21,4 +21,5 @@ jinja2==3.1.6
2121
coloredlogs==15.0.1
2222
meilisearch==0.33.0
2323
pillow==11.3.0
24-
pytz==2025.2
24+
pytz==2025.2
25+
playwright==1.59.0

scripts/install.sh

Lines changed: 0 additions & 98 deletions
This file was deleted.

scripts/migrate_sub.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

yyetsweb/commands/douban_fix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
douban = Douban()
2929
session = requests.Session()
30-
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
30+
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"
3131
session.headers.update({"User-Agent": ua})
3232

3333
yyets_data = douban.db["yyets"].find_one({"data.info.id": resource_id})

yyetsweb/commands/share_excel.py

Lines changed: 0 additions & 69 deletions
This file was deleted.

yyetsweb/common/sync.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from bs4 import BeautifulSoup
1313
from tqdm import tqdm
1414

15+
from common.utils import ts_date
1516
from databases.base import Mongo
1617
from databases.douban import Douban
1718

@@ -53,7 +54,7 @@ def __init__(self):
5354
self.session = requests.Session()
5455
self.session.headers.update(
5556
{
56-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
57+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"
5758
}
5859
)
5960

@@ -218,17 +219,25 @@ def insert_data(self, data):
218219

219220

220221
def sync_douban():
222+
MAX_FAILS = 3
221223
douban = Douban()
222-
session = requests.Session()
223-
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4280.88 Safari/537.36"
224-
session.headers.update({"User-Agent": ua})
225224

225+
failed_ids = douban.db["douban_failed"].distinct("resourceId", {"fail_count": {"$gte": MAX_FAILS}})
226226
yyets_data = douban.db["yyets"].aggregate(
227227
[
228+
{
229+
"$match": {
230+
"data.info.id": {
231+
"$ne": 233,
232+
"$nin": failed_ids,
233+
}
234+
}
235+
},
228236
{"$group": {"_id": None, "ids": {"$push": "$data.info.id"}}},
229237
{"$project": {"_id": 0, "ids": 1}},
230238
]
231239
)
240+
232241
douban_data = douban.db["douban"].aggregate(
233242
[
234243
{"$group": {"_id": None, "ids": {"$push": "$resourceId"}}},
@@ -239,19 +248,28 @@ def sync_douban():
239248
id1 = next(yyets_data)["ids"]
240249
id2 = next(douban_data)["ids"]
241250
rids = list(set(id1).difference(id2))
242-
rids.remove(233)
243251
logging.info("resource id complete %d", len(rids))
252+
# rids = [33439, 26421]
244253
for rid in tqdm(rids):
245-
with contextlib.suppress(Exception):
254+
try:
246255
d = douban.find_douban(rid)
247256
logging.info("Processed %s, length %d", rid, len(d))
257+
# 成功后清掉失败记录,防止以前失败过后来成功了
258+
douban.db["douban_failed"].delete_one({"resourceId": rid})
259+
260+
except Exception:
261+
logging.exception("Failed to process %s", rid)
262+
douban.db["douban_failed"].update_one(
263+
{"resourceId": rid},
264+
{
265+
"$inc": {"fail_count": 1},
266+
"$set": {"last_failed_at": ts_date()},
267+
},
268+
upsert=True,
269+
)
248270

249271
logging.info("ALL FINISH!")
250272

251273

252274
if __name__ == "__main__":
253-
a = Zhuixinfan()
254-
# a.build_data(open("1.html").read(), "https://www.zhuixinfan.com/resource/1.html")
255-
a.run()
256-
# b = YYSub()
257-
# b.run()
275+
sync_douban()

0 commit comments

Comments
 (0)