-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBuildingStatusScraper.py
More file actions
37 lines (27 loc) · 1.12 KB
/
BuildingStatusScraper.py
File metadata and controls
37 lines (27 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
# Prompt the user to enter a URL
url = input("Enter the URL: ")
# Send a GET request to retrieve the webpage content
response = requests.get(url)
html_content = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
# Find the table element with the specified ID
table = soup.find("table", id="table-combined-base")
# Create a new Excel workbook and select the active sheet
workbook = Workbook()
sheet = workbook.active
# Find all rows in the table (excluding the header row)
rows = table.find_all("tr")[1:]
# Iterate over each row and extract the status value
for row in rows:
# Find all div elements within the row
divs = row.find_all("div", class_=lambda x: x and x.startswith("status-"))
# Extract the status values from the divs
status_values = [div.get("data-tippy-content") for div in divs]
# Add the status values to the Excel spreadsheet
sheet.append(status_values)
# Save the Excel spreadsheet
workbook.save("status_values.xlsx")