Skip to content

Commit 4840cf9

Browse files
committed
Version 1.1.0_dev
1 parent ea74df1 commit 4840cf9

5 files changed

Lines changed: 166 additions & 64 deletions

File tree

README.md

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,29 +65,33 @@ the following steps:
6565
8. After all URLs are processed, return the most relevant page.
6666

6767
### Features
68-
1. Crawls Tor links (.onion) only.
69-
2. Returns Page title and address.
70-
3. Cache links so that there won't be duplicate links.
68+
1. Crawls Tor links (.onion).(Partially Completed)
69+
2. Returns Page title and address with a short description about the site.(Partially Completed)
70+
3. Save links to database.(Not Started)
71+
4. Get emails from site.(Completed)
72+
5. Save crawl info to file.(Completed)
73+
6. Crawl custom domains.(Completed)
74+
7. Check if the link is live.(Complete)
75+
4. Built-in Updater.(Completed)
7176
...(will be updated)
7277

7378
## Contribute
7479
Contributions to this project are always welcome.
75-
To add a new feature fork this repository and give a pull request when your new feature is tested and complete.
80+
To add a new feature fork the dev branch and give a pull request when your new feature is tested and complete.
7681
If its a new module, it should be put inside the modules directory and imported to the main file.
7782
The branch name should be your new feature name in the format <Feature_featurename_version(optional)>. For example, <i>Feature_FasterCrawl_1.0</i>.
7883
Contributor name will be updated to the below list. :D
7984

8085
## Dependencies
8186
1. Tor
82-
2. Python 3.x (Make sure pip3 is there)
87+
2. Python 3.x (Make sure pip3 is installed)
8388
3. Python Stem Module
8489
4. urllib
8590
5. Beautiful Soup 4
8691
6. Socket
8792
7. Sock
8893
8. Argparse
89-
9. Stem module
90-
10. Git
94+
9. Git
9195

9296
## Basic setup
9397
Before you run the torBot make sure the following things are done properly:
@@ -105,17 +109,23 @@ with Controller.from_port(port = 9051) as controller:
105109
controller.signal(Signal.NEWNYM)`
106110

107111
`python3 torBot.py`
108-
`usage: torBot.py [-h] [-q] [-u URL] [-m] [-e EXTENSION] [-l]
112+
`usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION]
113+
[-l] [-i]
109114

110115
optional arguments:
111116
-h, --help show this help message and exit
117+
-v, --version Show current version of TorBot.
118+
--update Update TorBot to the latest stable version
112119
-q, --quiet
113120
-u URL, --url URL Specifiy a website link to crawl
121+
-s, --save Save results in a file
114122
-m, --mail Get e-mail addresses from the crawled sites
115123
-e EXTENSION, --extension EXTENSION
116124
Specifiy additional website extensions to the
117125
list(.com or .org etc)
118-
-l, --live Check if websites are live or not (slow)`
126+
-l, --live Check if websites are live or not (slow)
127+
-i, --info Info displays basic info of the scanned site (very
128+
slow)`
119129

120130
Read more about torrc here : [Torrc](https://github.com/DedSecInside/TorBoT/blob/master/Tor.md)
121131

modules/getweblinks.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
def link_status(web,out_queue,index):
1616
link_live = False
17+
live_links = 0
1718
out_queue[index] = web + " is_live = False "
1819
try:
1920
urllib.request.urlopen(web)
2021
link_live = True
2122
out_queue[index] = web + " is_live = True "
23+
live_links += 1
2224
print(web)
2325
except urllib.error.HTTPError as e:
2426
print(Bcolors.On_Red+web+Bcolors.ENDC)
@@ -74,6 +76,8 @@ def getLinks(soup,ext,live=0,save=0):
7476
print(web)
7577
if save:
7678
saveJson("Onion-Links",websites)
77-
return websites
79+
return websites
80+
#print (Bcolors.OKGREEN+'Live Links:'+live_links+' Dead Links:'+(str(len(websites))-live_links))
81+
#print ('-------------------------------')
7882
else:
7983
raise('Method parameter is not of instance bs4.BeautifulSoup')

modules/info.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import requests
2+
from urllib.parse import urlsplit
3+
from termcolor.termcolor import colored, cprint
4+
5+
__all__ = ['info']
6+
7+
8+
class info():
9+
def __init__(self):
10+
pass
11+
12+
def scan(self,target):
13+
execute_all_func(target)
14+
15+
def execute_all_func(self, target):
16+
try:
17+
self.get_robots_txt(target)
18+
except:
19+
cprint("No robots.txt file Found!", "blue")
20+
try:
21+
self.get_dot_git(target)
22+
except:
23+
cprint("Error !", "red")
24+
try:
25+
self.get_dot_svn(target)
26+
except:
27+
cprint("Error", "red")
28+
try:
29+
self.get_dot_htaccess(target)
30+
except:
31+
cprint("Error", "red")
32+
33+
def get_robots_txt(self, target):
34+
cprint("[*]Checking for Robots.txt", 'yellow')
35+
url = target
36+
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
37+
req = requests.get(target+"/robots.txt")
38+
r = req.text
39+
cprint(r, 'blue')
40+
41+
def get_dot_git(self, target):
42+
cprint("[*]Checking for .git folder", 'yellow')
43+
url = target
44+
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
45+
req = requests.get(target+"/.git/")
46+
r = req.status_code
47+
if r == 200:
48+
cprint("Alert!", 'red')
49+
cprint(".git folder exposed publicly", 'red')
50+
else:
51+
print("NO .git folder found", 'blue')
52+
53+
def get_dot_svn(self, target):
54+
cprint("[*]Checking for .svn folder", 'yellow')
55+
url = target
56+
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
57+
req = requests.get(target+"/.svn/entries")
58+
r = req.status_code
59+
if r == 200:
60+
cprint("Alert!", 'red')
61+
cprint(".SVN folder exposed publicly", 'red')
62+
else:
63+
cprint("NO .SVN folder found", 'blue')
64+
65+
def get_dot_htaccess(self, target):
66+
cprint("[*]Checking for .htaccess", 'yellow')
67+
url = target
68+
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
69+
req = requests.get(target+"/.htaccess")
70+
r = req.text
71+
statcode = req.status_code
72+
if statcode == 403:
73+
cprint("403 Forbidden", 'blue')
74+
elif statcode == 200:
75+
cprint("Alert!!", 'blue')
76+
cprint(".htaccess file found!", 'blue')
77+
else:
78+
cprint("Status code", 'blue')
79+
cprint(statcode, 'blue')

modules/updater.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,23 @@
44

55
def updateTor():
66
print("Checking for latest stable release")
7-
isGit = subprocess.Popen("git branch",stdout=subprocess.PIPE,shell=True)
8-
(output,err)=isGit.communicate()
7+
isGit = subprocess.Popen(["git","branch"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
8+
output = isGit.stdout.read()
99
branch = output[2:8].decode("utf-8")
10-
if branch == 'master':
10+
if branch == '* master':
1111
update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
1212
update_out = update.stdout.read()
13-
if update_out[85:104].decode("utf-8") == 'Already up-to-date.' :
14-
print("TorBot Already up-to-date.")
13+
if update_out[90:109].decode("utf-8") == 'Already up-to-date.' :
14+
print("TorBot is already up-to-date.")
1515
else:
1616
print("TorBot has succesfully updated to latest stable version.")
1717
else:
1818
subprocess.Popen(["git","init"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
1919
subprocess.Popen(["git","remote","add","origin","https://github.com/DedSecInside/TorBoT.git"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
2020
update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
2121
update_out = update.stdout.read()
22-
if update_out[85:104].decode("utf-8") == 'Already up-to-date.' :
23-
print("TorBot Already up-to-date.")
22+
if update_out[90:109].decode("utf-8") == 'Already up-to-date.' :
23+
print("TorBot is already up-to-date.")
2424
else:
2525
print("TorBot has succesfully updated to latest stable version.")
2626
return 0

torBot.py

Lines changed: 56 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
#from modules.getemails import getMails
2-
#from modules.getweblinks import getLinks
3-
#from modules.pagereader import readPage
4-
#from modules.bcolors import bcolors
5-
#from modules.updater import updateTor
6-
71
from modules import *
82

93
import socket
@@ -14,23 +8,30 @@
148
from stem.control import Controller
159

1610
with Controller.from_port(port = 9051) as controller:
17-
controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183")
18-
controller.signal(Signal.NEWNYM)
11+
controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183")
12+
controller.signal(Signal.NEWNYM)
1913
#TorBot VERSION
20-
_VERSION_ = "1.0.1"
14+
_VERSION_ = "1.1.0_dev"
2115
#TOR SETUP GLOBAL Vars
2216
SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to
2317
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT)
2418
socket.socket = socks.socksocket
19+
20+
21+
22+
2523
# Perform DNS resolution through the socket
2624
def getaddrinfo(*args):
2725
return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))]
2826

2927
socket.getaddrinfo = getaddrinfo
3028

29+
30+
31+
3132
def header():
3233
""" Display the header of TorBot """
33-
print("#######################################################")
34+
print( "######################################################")
3435
print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMM")
3536
print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMMM")
3637
print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNXNWWWWWMMMMMMMMMM")
@@ -72,49 +73,57 @@ def header():
7273
print("#######################################################")
7374
print("# TorBot - A python Tor Crawler #")
7475
print("# GitHub : https://github.com/DedsecInside/TorBot #")
76+
print("# Help : use -h for help text #")
7577
print("#######################################################")
7678
print(Bcolors.FAIL + "LICENSE: GNU Public License" + Bcolors.ENDC)
7779
print()
7880

7981

82+
83+
8084
def main():
81-
parser = argparse.ArgumentParser()
82-
parser.add_argument("-v","--version",action="store_true",help="Show current version of TorBot.")
83-
parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version")
84-
parser.add_argument("-q","--quiet",action="store_true")
85-
parser.add_argument("-u","--url",help="Specifiy a website link to crawl")
86-
parser.add_argument("-s","--save",action="store_true", help="Save results in a file")
87-
parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites")
88-
parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)")
89-
parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)")
90-
args = parser.parse_args()
91-
if args.version :
92-
print("TorBot Version:"+_VERSION_)
93-
exit()
94-
if args.update:
95-
updateTor()
96-
exit()
97-
98-
if args.quiet == 0:
99-
header()
100-
print ("Tor Ip Address :")
101-
link = args.url
102-
ext = 0
103-
live = 0
104-
save=0
105-
live = args.live
106-
ext = args.extension
107-
save = args.save
108-
a = readPage("https://check.torproject.org/",1)
109-
if link:
110-
b = readPage(link)
111-
else:
112-
b = readPage("http://torlinkbgs6aabns.onion/")
113-
if args.mail:
114-
getMails(b,save)
115-
getLinks(b,ext,live,save)
116-
print ("\n\n")
117-
return 0
85+
parser = argparse.ArgumentParser()
86+
parser.add_argument("-v","--version",action="store_true",help="Show current version of TorBot.")
87+
parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version")
88+
parser.add_argument("-q","--quiet",action="store_true")
89+
parser.add_argument("-u","--url",help="Specifiy a website link to crawl")
90+
parser.add_argument("-s","--save",action="store_true", help="Save results in a file")
91+
parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites")
92+
parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)")
93+
parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)")
94+
parser.add_argument("-i","--info",action="store_true",help="Info displays basic info of the scanned site (very slow)")
95+
96+
args = parser.parse_args()
97+
if args.version :
98+
print("TorBot Version:"+_VERSION_)
99+
exit()
100+
if args.update:
101+
updateTor()
102+
exit()
103+
104+
if args.quiet == 0:
105+
header()
106+
print ("Tor Ip Address :")
107+
link = args.url
108+
ext = 0
109+
live = 0
110+
save=0
111+
live = args.live
112+
ext = args.extension
113+
save = args.save
114+
a = readPage("https://check.torproject.org/",1)
115+
if link:
116+
b = readPage(link)
117+
else:
118+
b = readPage("http://torlinkbgs6aabns.onion/",0)
119+
if args.mail:
120+
getMails(b,save)
121+
if args.info:
122+
inf = info()
123+
scan(link)
124+
getLinks(b,ext,live,save)
125+
print ("\n\n")
126+
return 0
118127

119128
if __name__ == '__main__':
120129

0 commit comments

Comments
 (0)