Skip to content

Commit b170678

Browse files
authored
Merge pull request #360 from ChopinF/feature/fetch_html
html feature: display or save fetched html from the url
2 parents 9a51c49 + 74f1ca7 commit b170678

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

main.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from torbot.modules.api import get_ip
1111
from torbot.modules.color import color
1212
from torbot.modules.updater import check_version
13-
from torbot.modules.info import execute_all
13+
from torbot.modules.info import execute_all, fetch_html
1414
from torbot.modules.linktree import LinkTree
1515

1616

@@ -35,9 +35,7 @@ def print_header(version: str) -> None:
3535
/ __/ / / / /_/ / __ \/ __ \/ /
3636
/ /_/ /_/ / _, _/ /_/ / /_/ / /
3737
\__/\____/_/ |_/_____/\____/_/ v{VERSION}
38-
""".format(
39-
VERSION=version
40-
)
38+
""".format(VERSION=version)
4139
banner = color(banner, "red")
4240

4341
title = r"""
@@ -101,6 +99,11 @@ def run(arg_parser: argparse.ArgumentParser, version: str) -> None:
10199
elif args.save == "json":
102100
tree.saveJSON()
103101

102+
if args.html == "display":
103+
fetch_html(client, args.url, tree)
104+
elif args.html == "save":
105+
fetch_html(client, args.url, tree, save_html=True)
106+
104107
# always print something, table is the default
105108
if args.visualize == "table" or not args.visualize:
106109
tree.showTable()
@@ -158,14 +161,21 @@ def set_arguments() -> argparse.ArgumentParser:
158161
action="store_true",
159162
help="Executes HTTP requests without using SOCKS5 proxy",
160163
)
164+
parser.add_argument(
165+
"--html",
166+
choices=["save", "display"],
167+
help="Saves / Displays the html of the onion link",
168+
)
161169

162170
return parser
163171

164172

165173
if __name__ == "__main__":
166174
try:
167175
arg_parser = set_arguments()
168-
config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pyproject.toml")
176+
config_file_path = os.path.join(
177+
os.path.dirname(os.path.realpath(__file__)), "pyproject.toml"
178+
)
169179
try:
170180
with open(config_file_path, "r") as f:
171181
data = toml.load(f)

src/torbot/modules/info.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Module that contains methods for collecting all relevant data from links,
33
and saving data to file.
44
"""
5+
56
import re
67
import httpx
78
import logging
@@ -10,6 +11,8 @@
1011
from bs4 import BeautifulSoup
1112
from termcolor import cprint
1213

14+
from torbot.modules.linktree import LinkTree
15+
1316

1417
keys = set() # high entropy strings, prolly secret keys
1518
files = set() # pdf, css, png etc.
@@ -85,6 +88,24 @@ def execute_all(
8588
# display_headers(response)
8689

8790

91+
def fetch_html(
92+
client: httpx.Client, link: str, tree: LinkTree, save_html: bool = False
93+
) -> None:
94+
resp = client.get(url=link)
95+
soup = BeautifulSoup(resp.text, "html.parser")
96+
97+
if save_html is False:
98+
print(f"""
99+
HTML file
100+
{soup}
101+
""")
102+
else: # save_html is True
103+
file_name = tree._get_tree_file_name()
104+
print(f"SAVED to {file_name}.html\n\n")
105+
with open(f"{file_name}.html", "w+") as f:
106+
f.write(str(soup))
107+
108+
88109
def display_headers(response):
89110
"""Print all headers in response object.
90111

0 commit comments

Comments
 (0)