-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapper.py
More file actions
66 lines (43 loc) · 1.97 KB
/
scrapper.py
File metadata and controls
66 lines (43 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from bs4 import BeautifulSoup
import os
import requests
print("""
_________________
WebScrapper Tool/
made by D3VR4J|‾
‾‾‾‾‾‾‾‾‾‾‾‾‾‾
(FOR COLLEGE PROJECT)
""")
run = True
while run:
print("___________________________________________________")
select = int(input("Select Your Web Scrapper(1or2)\n1. Keyword WebScrapper\n2. Latest News Scrapper: "))
if select == 1:
#Keyword Search(SiteUsed: https://www.bing.com/)
search1 = input("Enter Your Keyword: ")
params = {"q" : search1}
r = requests.get("https://www.bing.com/search/", params=params)
soup = BeautifulSoup(r.text, "html.parser")
results = soup.find("ol", {"id":"b_results"})
links = results.findAll("li", {"class":"b_algo"})
for item in links:
item_text = item.find("a").text
item_href = item.find("a").attrs["href"]
if item_text and item_href:
print("Tittle: ", item_text)
print("Summary: ", item.find("a").parent.parent.find("p").text)
print("Link: ", item_href)
print("___________________________________________________")
elif select == 2:
#LatestNewsArticle(SiteUsed: https://indiancybertroops.org)
r = requests.get("https://indiancybertroops.org/news/current-affairs/") #if any params=params
soup = BeautifulSoup(r.text, "html.parser")
links = soup.findAll("div", {"class": "twp-archive-content"})
for item in links:
item_href = item.find("a").attrs["href"]
desc_text = item.find("p").text
item_text = item.find("a").text
print("Title: ", item_text, "\nArtile: ", desc_text, "\nArticle link:", item_href)
print("___________________________________________________")
else:
print("ERROR: Wrong Input or Network Error")