-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweb_crawl_runner.py
More file actions
31 lines (26 loc) · 938 Bytes
/
web_crawl_runner.py
File metadata and controls
31 lines (26 loc) · 938 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from scrapy.crawler import CrawlerProcess
from webscrapy.webscrapy.spiders.web_spider import WebSpider
from webscrapy.webscrapy.spiders.web_spider_new import WebCrawSpider
from pydispatch import dispatcher
from scrapy import signals
import sys
def spider_closed(spider, reason):
global crawl_done
if reason == "finished":
print("crawl done!")
crawl_done = True
else :
print("crawl failed!")
crawl_done = False
dispatcher.connect(spider_closed , signal=signals.spider_closed)
urls = sys.argv[1:-1]
keywordId = sys.argv[-1]
print("-----------------------urls---------------------------")
print(urls)
print("-----------------------urls---------------------------")
# WARNING: This will BLOCK FastAPI and may cause reactor errors
process = CrawlerProcess()
process.crawl(WebCrawSpider , start_urls=urls ,keywordId=keywordId )
process.start()
if crawl_done :
print("It finished!")