Skip to content

Commit 02dc602

Browse files
committed
Cross linked script
1 parent 6f86819 commit 02dc602

5 files changed

Lines changed: 298 additions & 1 deletion

File tree

CrossLinked/Readme.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# CrossLinked
2+
3+
CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an organization. This technique provides accurate results without the use of API keys, credentials, or accessing LinkedIn directly!
4+

CrossLinked/logger.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import os
2+
import sys
3+
import logging
4+
5+
STYLE = {'None': '0',
6+
'bold': '1'
7+
}
8+
9+
FG = {'None': '',
10+
'gray': ';30',
11+
'red': ';31',
12+
'green': ';32',
13+
'yellow': ';33',
14+
'blue': ';34',
15+
'purple': ';35',
16+
'cyan': ';36'
17+
}
18+
19+
20+
class Log:
21+
# Quick log class for CLI output
22+
@staticmethod
23+
def info(msg):
24+
print(' '.join([highlight('[*]', 'bold', 'blue'), msg]))
25+
26+
@staticmethod
27+
def success(msg):
28+
print(' '.join([highlight('[+]', 'bold', 'green'), msg]))
29+
30+
@staticmethod
31+
def warn(msg):
32+
print(' '.join([highlight('[*]', 'bold', 'yellow'), msg]))
33+
34+
35+
def code_gen(data, style, color, windows=False):
36+
return data if windows else '\033[0{}{}m{}\033[0m'.format(STYLE[style], FG[color], data)
37+
38+
39+
def highlight(data, style='bold', fg='blue'):
40+
return code_gen(data, style, fg, windows=True if os.name == 'nt' else False)
41+
42+
43+
def debug_args(args):
44+
for k in args.__dict__:
45+
logging.debug('{:20} => {}'.format(k, args.__dict__[k]))
46+
47+
48+
def setup_debug_logger():
49+
debug_output_string = "{} %(message)s".format(highlight('DEBUG', fg='purple'))
50+
formatter = logging.Formatter(debug_output_string)
51+
streamHandler = logging.StreamHandler(sys.stdout)
52+
streamHandler.setFormatter(formatter)
53+
54+
root_logger = logging.getLogger()
55+
root_logger.propagate = False
56+
root_logger.addHandler(streamHandler)
57+
root_logger.setLevel(logging.DEBUG)
58+
return root_logger
59+
60+
61+
def setup_file_logger(file_name, log_name='cLinked_file', file_mode='w'):
62+
formatter = logging.Formatter('%(message)s')
63+
fileHandler = logging.FileHandler(file_name, file_mode)
64+
fileHandler.setFormatter(formatter)
65+
66+
logger = logging.getLogger(log_name)
67+
logger.propagate = False
68+
logger.addHandler(fileHandler)
69+
logger.setLevel(logging.INFO)
70+
71+
first_run(logger) if not os.path.exists(file_name) else False
72+
return logger
73+
74+
75+
def first_run(logger):
76+
# init headings in CSV log file
77+
logger.info('Datetime, Search, Name, Title, URL, rawText')
78+
79+
80+
def setup_cli_logger(log_level=logging.INFO, logger_name='cLinked'):
81+
formatter = logging.Formatter('%(message)s')
82+
StreamHandler = logging.StreamHandler(sys.stdout)
83+
StreamHandler.setFormatter(formatter)
84+
85+
logger = logging.getLogger(logger_name)
86+
logger.propagate = False
87+
logger.addHandler(StreamHandler)
88+
logger.setLevel(log_level)
89+
return logger

CrossLinked/search.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import logging
2+
import requests
3+
import threading
4+
from time import sleep
5+
from random import choice
6+
from bs4 import BeautifulSoup
7+
from unidecode import unidecode
8+
from urllib.parse import urlparse
9+
from crosslinked.logger import Log
10+
from datetime import datetime, timedelta
11+
from urllib3 import disable_warnings, exceptions
12+
13+
disable_warnings(exceptions.InsecureRequestWarning)
14+
logging.getLogger("urllib3").setLevel(logging.WARNING)
15+
csv = logging.getLogger('cLinked_csv')
16+
17+
18+
class Timer(threading.Thread):
19+
def __init__(self, timeout):
20+
threading.Thread.__init__(self)
21+
self.start_time = None
22+
self.running = None
23+
self.timeout = timeout
24+
25+
def run(self):
26+
self.running = True
27+
self.start_time = datetime.now()
28+
logging.debug("Thread Timer: Started")
29+
30+
while self.running:
31+
if (datetime.now() - self.start_time) > timedelta(seconds=self.timeout):
32+
self.stop()
33+
sleep(0.05)
34+
35+
def stop(self):
36+
logging.debug("Thread Timer: Stopped")
37+
self.running = False
38+
39+
40+
class CrossLinked:
41+
def __init__(self, search_engine, target, timeout, conn_timeout=3, proxies=[], jitter=0):
42+
self.results = []
43+
self.url = {'google': 'https://www.google.com/search?q=site:linkedin.com/in+"{}"&num=100&start={}',
44+
'bing': 'http://www.bing.com/search?q="{}"+site:linkedin.com/in&first={}'}
45+
46+
self.runtime = datetime.now().strftime('%m-%d-%Y %H:%M:%S')
47+
self.search_engine = search_engine
48+
self.conn_timeout = conn_timeout
49+
self.timeout = timeout
50+
self.proxies = proxies
51+
self.target = target
52+
self.jitter = jitter
53+
54+
def search(self):
55+
search_timer = Timer(self.timeout)
56+
search_timer.start()
57+
58+
while search_timer.running:
59+
try:
60+
url = self.url[self.search_engine].format(self.target, len(self.results))
61+
resp = web_request(url, self.conn_timeout, self.proxies)
62+
http_code = get_statuscode(resp)
63+
64+
if http_code != 200:
65+
Log.info("{:<3} {} ({})".format(len(self.results), url, http_code))
66+
Log.warn('None 200 response, exiting search ({})'.format(http_code))
67+
break
68+
69+
self.page_parser(resp)
70+
Log.info("{:<3} {} ({})".format(len(self.results), url, http_code))
71+
72+
sleep(self.jitter)
73+
except KeyboardInterrupt:
74+
Log.warn("Key event detected, exiting search...")
75+
break
76+
77+
search_timer.stop()
78+
return self.results
79+
80+
def page_parser(self, resp):
81+
for link in extract_links(resp):
82+
try:
83+
self.results_handler(link)
84+
except Exception as e:
85+
Log.warn('Failed Parsing: {}- {}'.format(link.get('href'), e))
86+
87+
def link_parser(self, url, link):
88+
u = {'url': url}
89+
u['text'] = unidecode(link.text.split("|")[0].split("...")[0]) # Capture link text before trailing chars
90+
u['title'] = self.parse_linkedin_title(u['text']) # Extract job title
91+
u['name'] = self.parse_linkedin_name(u['text']) # Extract whole name
92+
return u
93+
94+
def parse_linkedin_title(self, data):
95+
try:
96+
title = data.split("-")[1].split('https:')[0]
97+
return title.split("...")[0].split("|")[0].strip()
98+
except:
99+
return 'N/A'
100+
101+
def parse_linkedin_name(self, data):
102+
try:
103+
name = data.split("-")[0].strip()
104+
return unidecode(name)
105+
except:
106+
return False
107+
108+
def results_handler(self, link):
109+
url = str(link.get('href')).lower()
110+
111+
if not extract_subdomain(url).endswith('linkedin.com'):
112+
return False
113+
elif 'linkedin.com/in' not in url:
114+
return False
115+
116+
data = self.link_parser(url, link)
117+
self.log_results(data) if data['name'] else False
118+
119+
120+
def log_results(self, d):
121+
# Prevent Duplicates & non-standard responses (i.e: "<span>linkedin.com</span></a>")
122+
if d in self.results:
123+
return
124+
elif 'linkedin.com' in d['name']:
125+
return
126+
127+
self.results.append(d)
128+
# Search results are logged to names.csv but names.txt is not generated until end to prevent duplicates
129+
logging.debug('name: {:25} RawTxt: {}'.format(d['name'], d['text']))
130+
csv.info('"{}","{}","{}","{}","{}","{}",'.format(self.runtime, self.search_engine, d['name'], d['title'], d['url'], d['text']))
131+
132+
133+
def get_statuscode(resp):
134+
try:
135+
return resp.status_code
136+
except:
137+
return 0
138+
139+
140+
def get_proxy(proxies):
141+
tmp = choice(proxies) if proxies else False
142+
return {"http": tmp, "https": tmp} if tmp else {}
143+
144+
145+
def get_agent():
146+
return choice([
147+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0'
148+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 12.5; rv:104.0) Gecko/20100101 Firefox/104.0',
149+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
150+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
151+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
152+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
153+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
154+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
155+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
156+
])
157+
158+
159+
def web_request(url, timeout=3, proxies=[], **kwargs):
160+
try:
161+
s = requests.Session()
162+
r = requests.Request('GET', url, headers={'User-Agent': get_agent()}, cookies = {'CONSENT' : 'YES'}, **kwargs)
163+
p = r.prepare()
164+
return s.send(p, timeout=timeout, verify=False, proxies=get_proxy(proxies))
165+
except requests.exceptions.TooManyRedirects as e:
166+
Log.fail('Proxy Error: {}'.format(e))
167+
except:
168+
pass
169+
return False
170+
171+
172+
def extract_links(resp):
173+
links = []
174+
soup = BeautifulSoup(resp.content, 'lxml')
175+
for link in soup.findAll('a'):
176+
links.append(link)
177+
return links
178+
179+
180+
def extract_subdomain(url):
181+
return urlparse(url).netloc

CrossLinked/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from os import path
2+
from crosslinked.logger import Log
3+
4+
5+
def delimiter2list(value, delim=","):
6+
return value.split(delim) if value else []
7+
8+
9+
def delimiter2dict(value, delim_one=";", delim_two=":"):
10+
x = {}
11+
for item in value.split(delim_one):
12+
if item:
13+
sp = item.split(delim_two)
14+
x[sp[0].strip()] = delim_two.join(sp[1:]).strip()
15+
return x
16+
17+
18+
def file_exists(filename, contents=True):
19+
if path.exists(filename):
20+
return [line.strip() for line in open('filename')] if contents else filename
21+
Log.warn("Input file not found: {}".format(filename))
22+
exit(1)

SCRIPTS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,5 @@
7474
| [Take me](./dotdotslash/)
7575
| 50\. | HackerEnv | hackerEnv is an automation tool that quickly and easily sweep IPs and scan ports, vulnerabilities and exploit them. Then, it hands you an interactive shell for further testing. Also, it generates HTML and docx reports. It uses other tools such as nmap, nikto, metasploit and hydra. Works in kali linux and Parrot OS.
7676
| [Take me](./hackerenv/)
77-
77+
| 51\. | CrossLinked | CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an organization. This technique provides accurate results without the use of API keys, credentials, or accessing LinkedIn directly!
78+
| [Take me](./CrossLinked/)

0 commit comments

Comments
 (0)