Skip to content

Commit 1072d42

Browse files
committed
refactor(subfinder): rewrite module using OOP principles and cleaner architecture
- Improved structure by applying object-oriented design patterns - Enhanced readability and maintainability through modular and clean code practices - Removed redundant logic and simplified control flow
1 parent cd4cca6 commit 1072d42

4 files changed

Lines changed: 132 additions & 176 deletions

File tree

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,36 @@
1-
from threading import RLock
21
from rich.console import Console
32

4-
console = Console()
53

6-
class Logger:
4+
class SubFinderConsole(Console):
75
def __init__(self):
8-
self._lock = RLock()
6+
super().__init__()
7+
self.total_subdomains = 0
8+
self.domain_stats = {}
9+
self.disable_cursor()
910

10-
def clear_line(self):
11-
with self._lock:
12-
print("\033[2K\r", end='', flush=True)
11+
def disable_cursor(self):
12+
print('\033[?25l', end='', flush=True)
1313

14-
def replace(self, message):
15-
with self._lock:
16-
print(f"{message}", end='', flush=True)
14+
def enable_cursor(self):
15+
print('\033[?25h', end='', flush=True)
1716

18-
class SubFinderConsole:
19-
def __init__(self):
20-
self.total_subdomains = 0
21-
self.domain_stats = {}
22-
self.logger = Logger()
23-
24-
def start_domain_scan(self, domain):
25-
self.logger.clear_line()
26-
console.print(f"[cyan] Processing: {domain}[/cyan]")
17+
def print_domain_start(self, domain):
18+
self.print(f"[cyan]Processing: {domain}[/cyan]")
2719

2820
def update_domain_stats(self, domain, count):
2921
self.domain_stats[domain] = count
3022
self.total_subdomains += count
3123

32-
def print_domain_complete(self, domain, subdomains_count):
33-
self.logger.clear_line()
34-
console.print(f"[green] {domain}: {subdomains_count} subdomains found[/green]")
24+
def print_domain_complete(self, domain, count):
25+
self.print(f"[green]{domain}: {count} subdomains found[/green]")
3526

3627
def print_final_summary(self, output_file):
37-
console.print(f"\n[green] Total: [bold]{self.total_subdomains}[/bold] subdomains found[/green]")
38-
console.print(f"[green] Results saved to {output_file}[/green]")
28+
self.print(f"\n[green]Total: [bold]{self.total_subdomains}[/bold] subdomains found")
29+
self.print(f"[green]Results saved to {output_file}[/green]")
30+
self.enable_cursor()
3931

40-
def show_progress(self, current, total):
41-
progress_message = f" progress: [{current}/{total}]\r"
42-
self.logger.replace(progress_message)
43-
44-
def print(self, message):
45-
self.logger.clear_line()
46-
console.print(message)
32+
def print_progress(self, current, total):
33+
self.print(f"Progress: {current} / {total}", end="\r")
4734

4835
def print_error(self, message):
49-
self.logger.clear_line()
50-
console.print(f"[red] {message}[/red]")
36+
self.print(f"[red]{message}[/red]")

bugscanx/modules/scrapers/subfinder/sources.py

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
1+
from abc import ABC, abstractmethod
12
from bs4 import BeautifulSoup
3+
from .utils import RequestHandler
24

3-
from .utils import make_request
4-
5-
class SubdomainSource:
5+
class SubdomainSource(RequestHandler, ABC):
66
def __init__(self, name):
7+
super().__init__()
78
self.name = name
89
self.subdomains = set()
9-
10-
def fetch(self, domain, session=None):
11-
raise NotImplementedError
10+
11+
@abstractmethod
12+
def fetch(self, domain):
13+
pass
1214

1315
class CrtshSource(SubdomainSource):
1416
def __init__(self):
1517
super().__init__("Crt.sh")
16-
17-
def fetch(self, domain, session=None):
18-
response = make_request(f"https://crt.sh/?q=%25.{domain}&output=json", session)
18+
19+
def fetch(self, domain):
20+
response = self.get(f"https://crt.sh/?q=%25.{domain}&output=json")
1921
if response and response.headers.get('Content-Type') == 'application/json':
2022
for entry in response.json():
2123
self.subdomains.update(entry['name_value'].splitlines())
@@ -24,19 +26,21 @@ def fetch(self, domain, session=None):
2426
class HackertargetSource(SubdomainSource):
2527
def __init__(self):
2628
super().__init__("Hackertarget")
27-
28-
def fetch(self, domain, session=None):
29-
response = make_request(f"https://api.hackertarget.com/hostsearch/?q={domain}", session)
29+
30+
def fetch(self, domain):
31+
response = self.get(f"https://api.hackertarget.com/hostsearch/?q={domain}")
3032
if response and 'text' in response.headers.get('Content-Type', ''):
31-
self.subdomains.update([line.split(",")[0] for line in response.text.splitlines()])
33+
self.subdomains.update(
34+
[line.split(",")[0] for line in response.text.splitlines()]
35+
)
3236
return self.subdomains
3337

3438
class RapidDnsSource(SubdomainSource):
3539
def __init__(self):
3640
super().__init__("RapidDNS")
37-
38-
def fetch(self, domain, session=None):
39-
response = make_request(f"https://rapiddns.io/subdomain/{domain}?full=1", session)
41+
42+
def fetch(self, domain):
43+
response = self.get(f"https://rapiddns.io/subdomain/{domain}?full=1")
4044
if response:
4145
soup = BeautifulSoup(response.text, 'html.parser')
4246
for link in soup.find_all('td'):
@@ -48,19 +52,19 @@ def fetch(self, domain, session=None):
4852
class AnubisDbSource(SubdomainSource):
4953
def __init__(self):
5054
super().__init__("AnubisDB")
51-
52-
def fetch(self, domain, session=None):
53-
response = make_request(f"https://jldc.me/anubis/subdomains/{domain}", session)
55+
56+
def fetch(self, domain):
57+
response = self.get(f"https://jldc.me/anubis/subdomains/{domain}")
5458
if response:
5559
self.subdomains.update(response.json())
5660
return self.subdomains
5761

5862
class AlienVaultSource(SubdomainSource):
5963
def __init__(self):
6064
super().__init__("AlienVault")
61-
62-
def fetch(self, domain, session=None):
63-
response = make_request(f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/passive_dns", session)
65+
66+
def fetch(self, domain):
67+
response = self.get(f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/passive_dns")
6468
if response:
6569
for entry in response.json().get("passive_dns", []):
6670
hostname = entry.get("hostname")
@@ -71,25 +75,15 @@ def fetch(self, domain, session=None):
7175
class CertSpotterSource(SubdomainSource):
7276
def __init__(self):
7377
super().__init__("CertSpotter")
74-
75-
def fetch(self, domain, session=None):
76-
response = make_request(f"https://api.certspotter.com/v1/issuances?domain={domain}&include_subdomains=true&expand=dns_names", session)
78+
79+
def fetch(self, domain):
80+
response = self.get(f"https://api.certspotter.com/v1/issuances?domain={domain}&include_subdomains=true&expand=dns_names")
7781
if response:
7882
for cert in response.json():
7983
self.subdomains.update(cert.get('dns_names', []))
8084
return self.subdomains
8185

82-
def get_all_sources():
83-
return [
84-
CrtshSource(),
85-
HackertargetSource(),
86-
RapidDnsSource(),
87-
AnubisDbSource(),
88-
AlienVaultSource(),
89-
CertSpotterSource(),
90-
]
91-
92-
def get_bulk_sources():
86+
def get_sources():
9387
return [
9488
CrtshSource(),
9589
HackertargetSource(),
Lines changed: 46 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,51 @@
11
import os
2-
import requests
3-
import threading
4-
from rich import print
52
from concurrent.futures import ThreadPoolExecutor, as_completed
3+
64
from bugscanx.utils.common import get_input
75
from .logger import SubFinderConsole
8-
from .sources import get_all_sources, get_bulk_sources
6+
from .sources import get_sources
97
from .utils import is_valid_domain, filter_valid_subdomains
108

119
class SubFinder:
1210
def __init__(self):
1311
self.console = SubFinderConsole()
14-
15-
def process_domain(self, domain, output_file, sources, total, completed_counter):
12+
self.completed = 0
13+
14+
def _fetch_from_source(self, source, domain):
15+
try:
16+
found = source.fetch(domain)
17+
return filter_valid_subdomains(found, domain)
18+
except Exception:
19+
return set()
20+
21+
def _save_subdomains(self, subdomains, output_file):
22+
if subdomains:
23+
with open(output_file, "a", encoding="utf-8") as f:
24+
f.write("\n".join(sorted(subdomains)) + "\n")
25+
26+
def process_domain(self, domain, output_file, sources, total):
1627
if not is_valid_domain(domain):
17-
with completed_counter.get_lock():
18-
completed_counter.value += 1
28+
self.completed += 1
1929
return set()
2030

21-
self.console.start_domain_scan(domain)
22-
self.console.show_progress(completed_counter.value, total)
31+
self.console.print_domain_start(domain)
32+
self.console.print_progress(self.completed, total)
2333

24-
with requests.Session() as session:
25-
results = []
26-
with ThreadPoolExecutor(max_workers=6) as executor:
27-
future_to_source = {
28-
executor.submit(source.fetch, domain, session): source.name
29-
for source in sources
30-
}
31-
32-
for future in as_completed(future_to_source):
33-
try:
34-
found = future.result()
35-
filtered = filter_valid_subdomains(found, domain)
36-
results.append(filtered)
37-
except Exception:
38-
results.append(set())
39-
40-
subdomains = set().union(*results) if results else set()
34+
with ThreadPoolExecutor(max_workers=6) as executor:
35+
futures = [
36+
executor.submit(self._fetch_from_source, source, domain)
37+
for source in sources
38+
]
39+
results = [f.result() for f in as_completed(futures)]
40+
41+
subdomains = set().union(*results) if results else set()
4142

4243
self.console.update_domain_stats(domain, len(subdomains))
4344
self.console.print_domain_complete(domain, len(subdomains))
45+
self._save_subdomains(subdomains, output_file)
4446

45-
if subdomains:
46-
with open(output_file, "a", encoding="utf-8") as f:
47-
f.write("\n".join(sorted(subdomains)) + "\n")
48-
49-
with completed_counter.get_lock():
50-
completed_counter.value += 1
51-
self.console.show_progress(completed_counter.value, total)
52-
47+
self.completed += 1
48+
self.console.print_progress(self.completed, total)
5349
return subdomains
5450

5551
def run(self, domains, output_file, sources):
@@ -58,55 +54,42 @@ def run(self, domains, output_file, sources):
5854
return
5955

6056
os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
61-
completed_counter = threading.Value('i', 0)
57+
self.completed = 0
6258
all_subdomains = set()
63-
59+
total = len(domains)
60+
6461
with ThreadPoolExecutor(max_workers=3) as executor:
65-
future_to_domain = {
66-
executor.submit(
67-
self.process_domain,
68-
domain,
69-
output_file,
70-
sources,
71-
len(domains),
72-
completed_counter
73-
): domain for domain in domains
74-
}
75-
76-
for future in as_completed(future_to_domain):
77-
domain = future_to_domain[future]
62+
futures = [
63+
executor.submit(self.process_domain, domain, output_file, sources, total)
64+
for domain in domains
65+
]
66+
for future in as_completed(futures):
7867
try:
7968
result = future.result()
80-
if result:
81-
all_subdomains.update(result)
69+
all_subdomains.update(result)
8270
except Exception as e:
83-
self.console.print(f"Error processing {domain}: {str(e)}")
71+
self.console.print(f"Error processing domain: {str(e)}")
8472

8573
self.console.print_final_summary(output_file)
8674
return all_subdomains
8775

76+
8877
def main():
8978
domains = []
90-
input_type = get_input("Select input mode", "choice",
91-
choices=["Manual", "File"])
92-
79+
sources = get_sources()
80+
input_type = get_input("Select input mode", "choice",
81+
choices=["Manual", "File"])
82+
9383
if input_type == "Manual":
9484
domain_input = get_input("Enter domain(s)")
9585
domains = [d.strip() for d in domain_input.split(',') if is_valid_domain(d.strip())]
96-
sources = get_all_sources()
9786
default_output = f"{domains[0]}_subdomains.txt"
98-
9987
else:
10088
file_path = get_input("Enter filename", "file")
10189
with open(file_path, 'r') as f:
10290
domains = [d.strip() for d in f if is_valid_domain(d.strip())]
103-
sources = get_bulk_sources()
10491
default_output = f"{file_path.rsplit('.', 1)[0]}_subdomains.txt"
10592

106-
if not domains:
107-
print("[bold red] No valid domains provided")
108-
return
109-
11093
output_file = get_input("Enter output filename", default=default_output)
11194
subfinder = SubFinder()
11295
subfinder.run(domains, output_file, sources)

0 commit comments

Comments
 (0)