Skyscraper-Scraper-and-Presentation-Maker/BuildingStatusScraper.py at main · dmo7567/Skyscraper-Scraper-and-Presentation-Maker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook

# Prompt the user to enter a URL
url = input("Enter the URL: ")

# Send a GET request to retrieve the webpage content
response = requests.get(url)
html_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")

# Find the table element with the specified ID
table = soup.find("table", id="table-combined-base")

# Create a new Excel workbook and select the active sheet
workbook = Workbook()
sheet = workbook.active

# Find all rows in the table (excluding the header row)
rows = table.find_all("tr")[1:]

# Iterate over each row and extract the status value
for row in rows:
    # Find all div elements within the row
    divs = row.find_all("div", class_=lambda x: x and x.startswith("status-"))

    # Extract the status values from the divs
    status_values = [div.get("data-tippy-content") for div in divs]

    # Add the status values to the Excel spreadsheet
    sheet.append(status_values)

# Save the Excel spreadsheet
workbook.save("status_values.xlsx")