-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
101 lines (80 loc) · 2.65 KB
/
Copy pathmain.py
File metadata and controls
101 lines (80 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import requests
import csv
import time
from bs4 import BeautifulSoup
BASE_URL = "https://www.ss.com"
START_URL = "https://www.ss.com/ru/work/are-required/programmer/"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
def get_soup(url):
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return BeautifulSoup(response.text, "html.parser")
def get_vacancy_links(page_url):
soup = get_soup(page_url)
links = []
for a in soup.select("a[href*='/msg/']"):
link = BASE_URL + a["href"]
if link not in links:
links.append(link)
return links
def parse_vacancy(url):
soup = get_soup(url)
def safe_text(selector):
tag = soup.select_one(selector)
return tag.get_text(strip=True) if tag else ""
title = safe_text("h2")
description = safe_text("div#msg_div_msg")
city = ""
salary = ""
for row in soup.select("tr"):
if "Город" in row.get_text():
city = row.get_text(strip=True)
if "Зарплата" in row.get_text():
salary = row.get_text(strip=True)
return {
"Название": title,
"Город": city,
"Зарплата": salary,
"Описание": description,
"Ссылка": url
}
def get_all_pages(start_url):
soup = get_soup(start_url)
pages = [start_url]
for a in soup.select("a[href*='page']"):
link = BASE_URL + a["href"]
if link not in pages:
pages.append(link)
return pages
def main():
print("🔍 Поиск страниц...")
pages = get_all_pages(START_URL)
all_links = []
for page in pages:
print(f"📄 Страница: {page}")
links = get_vacancy_links(page)
all_links.extend(links)
time.sleep(1)
all_links = list(set(all_links))
print(f"🔗 Найдено объявлений: {len(all_links)}")
vacancies = []
for i, link in enumerate(all_links, 1):
print(f"📌 [{i}/{len(all_links)}] {link}")
try:
vacancy = parse_vacancy(link)
vacancies.append(vacancy)
time.sleep(1)
except Exception as e:
print("Ошибка:", e)
with open("vacancies_programmer_ss.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(
f,
fieldnames=["Название", "Город", "Зарплата", "Описание", "Ссылка"]
)
writer.writeheader()
writer.writerows(vacancies)
print("✅ Данные сохранены в vacancies_programmer_ss.csv")
if __name__ == "__main__":
main()