-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper_wrapper.py
More file actions
35 lines (28 loc) · 1.05 KB
/
scraper_wrapper.py
File metadata and controls
35 lines (28 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
import time
from pathlib import Path
from scrapper import utils
from scrapper.entry import *
log_file = Path(__file__).resolve().parent / 'out/parser_log.log'
def get_logger():
if not log_file.parent.is_dir():
os.mkdir(log_file.parent)
utils.enable_logging(log_file)
def get_posts_data(group_id, cookies_file, pages_to_read, latest_date, max_past_limit):
start_url = None
def handle_pagination_url(url):
logger.debug(url)
global start_url
start_url = url
while True:
try:
posts = get_posts(group=group_id, start_url=start_url,
request_url_callback=handle_pagination_url,
cookies=cookies_file,
options={"comments": True}, pages=pages_to_read,
latest_date=latest_date, max_past_limit=max_past_limit)
break
except exceptions.TemporarilyBanned:
logger.debug("Temporarily banned, sleeping for 5m")
time.sleep(300)
return posts