Merge pull request #438 from shraddha761/cross

avinashkranjan · web-flow · commit 85a6de234d13 · 2023-07-28T01:58:32.000+05:30
Cross linked script
diff --git a/CrossLinked/Readme.md b/CrossLinked/Readme.md
@@ -0,0 +1,4 @@
+# CrossLinked
+
+CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an organization. This technique provides accurate results without the use of API keys, credentials, or accessing LinkedIn directly!
+
diff --git a/CrossLinked/logger.py b/CrossLinked/logger.py
@@ -0,0 +1,89 @@
+import os
+import sys
+import logging
+
+STYLE = {'None': '0',
+         'bold': '1'
+         }
+
+FG = {'None': '',
+      'gray': ';30',
+      'red': ';31',
+      'green': ';32',
+      'yellow': ';33',
+      'blue': ';34',
+      'purple': ';35',
+      'cyan': ';36'
+      }
+
+
+class Log:
+    # Quick log class for CLI output
+    @staticmethod
+    def info(msg):
+        print(' '.join([highlight('[*]', 'bold', 'blue'), msg]))
+
+    @staticmethod
+    def success(msg):
+        print(' '.join([highlight('[+]', 'bold', 'green'), msg]))
+
+    @staticmethod
+    def warn(msg):
+        print(' '.join([highlight('[*]', 'bold', 'yellow'), msg]))
+
+
+def code_gen(data, style, color, windows=False):
+    return data if windows else '\033[0{}{}m{}\033[0m'.format(STYLE[style], FG[color], data)
+
+
+def highlight(data, style='bold', fg='blue'):
+    return code_gen(data, style, fg, windows=True if os.name == 'nt' else False)
+
+
+def debug_args(args):
+    for k in args.__dict__:
+        logging.debug('{:20} => {}'.format(k, args.__dict__[k]))
+
+
+def setup_debug_logger():
+    debug_output_string = "{} %(message)s".format(highlight('DEBUG', fg='purple'))
+    formatter = logging.Formatter(debug_output_string)
+    streamHandler = logging.StreamHandler(sys.stdout)
+    streamHandler.setFormatter(formatter)
+
+    root_logger = logging.getLogger()
+    root_logger.propagate = False
+    root_logger.addHandler(streamHandler)
+    root_logger.setLevel(logging.DEBUG)
+    return root_logger
+
+
+def setup_file_logger(file_name, log_name='cLinked_file', file_mode='w'):
+    formatter = logging.Formatter('%(message)s')
+    fileHandler = logging.FileHandler(file_name, file_mode)
+    fileHandler.setFormatter(formatter)
+
+    logger = logging.getLogger(log_name)
+    logger.propagate = False
+    logger.addHandler(fileHandler)
+    logger.setLevel(logging.INFO)
+
+    first_run(logger) if not os.path.exists(file_name) else False
+    return logger
+
+
+def first_run(logger):
+    # init headings in CSV log file
+    logger.info('Datetime, Search, Name, Title, URL, rawText')
+
+
+def setup_cli_logger(log_level=logging.INFO, logger_name='cLinked'):
+    formatter = logging.Formatter('%(message)s')
+    StreamHandler = logging.StreamHandler(sys.stdout)
+    StreamHandler.setFormatter(formatter)
+
+    logger = logging.getLogger(logger_name)
+    logger.propagate = False
+    logger.addHandler(StreamHandler)
+    logger.setLevel(log_level)
+    return logger
diff --git a/CrossLinked/search.py b/CrossLinked/search.py
@@ -0,0 +1,181 @@
+import logging
+import requests
+import threading
+from time import sleep
+from random import choice
+from bs4 import BeautifulSoup
+from unidecode import unidecode
+from urllib.parse import urlparse
+from crosslinked.logger import Log
+from datetime import datetime, timedelta
+from urllib3 import disable_warnings, exceptions
+
+disable_warnings(exceptions.InsecureRequestWarning)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+csv = logging.getLogger('cLinked_csv')
+
+
+class Timer(threading.Thread):
+    def __init__(self, timeout):
+        threading.Thread.__init__(self)
+        self.start_time = None
+        self.running = None
+        self.timeout = timeout
+
+    def run(self):
+        self.running = True
+        self.start_time = datetime.now()
+        logging.debug("Thread Timer: Started")
+
+        while self.running:
+            if (datetime.now() - self.start_time) > timedelta(seconds=self.timeout):
+                self.stop()
+            sleep(0.05)
+
+    def stop(self):
+        logging.debug("Thread Timer: Stopped")
+        self.running = False
+
+
+class CrossLinked:
+    def __init__(self, search_engine, target, timeout, conn_timeout=3, proxies=[], jitter=0):
+        self.results = []
+        self.url = {'google': 'https://www.google.com/search?q=site:linkedin.com/in+"{}"&num=100&start={}',
+                    'bing': 'http://www.bing.com/search?q="{}"+site:linkedin.com/in&first={}'}
+
+        self.runtime = datetime.now().strftime('%m-%d-%Y %H:%M:%S')
+        self.search_engine = search_engine
+        self.conn_timeout = conn_timeout
+        self.timeout = timeout
+        self.proxies = proxies
+        self.target = target
+        self.jitter = jitter
+
+    def search(self):
+        search_timer = Timer(self.timeout)
+        search_timer.start()
+
+        while search_timer.running:
+            try:
+                url = self.url[self.search_engine].format(self.target, len(self.results))
+                resp = web_request(url, self.conn_timeout, self.proxies)
+                http_code = get_statuscode(resp)
+
+                if http_code != 200:
+                    Log.info("{:<3} {} ({})".format(len(self.results), url, http_code))
+                    Log.warn('None 200 response, exiting search ({})'.format(http_code))
+                    break
+
+                self.page_parser(resp)
+                Log.info("{:<3} {} ({})".format(len(self.results), url, http_code))
+
+                sleep(self.jitter)
+            except KeyboardInterrupt:
+                Log.warn("Key event detected, exiting search...")
+                break
+
+        search_timer.stop()
+        return self.results
+
+    def page_parser(self, resp):
+        for link in extract_links(resp):
+            try:
+                self.results_handler(link)
+            except Exception as e:
+                Log.warn('Failed Parsing: {}- {}'.format(link.get('href'), e))
+
+    def link_parser(self, url, link):
+        u = {'url': url}
+        u['text'] = unidecode(link.text.split("|")[0].split("...")[0])  # Capture link text before trailing chars
+        u['title'] = self.parse_linkedin_title(u['text'])               # Extract job title
+        u['name'] = self.parse_linkedin_name(u['text'])                 # Extract whole name
+        return u
+
+    def parse_linkedin_title(self, data):
+        try:
+            title = data.split("-")[1].split('https:')[0]
+            return title.split("...")[0].split("|")[0].strip()
+        except:
+            return 'N/A'
+
+    def parse_linkedin_name(self, data):
+        try:
+            name = data.split("-")[0].strip()
+            return unidecode(name)
+        except:
+            return False
+
+    def results_handler(self, link):
+        url = str(link.get('href')).lower()
+
+        if not extract_subdomain(url).endswith('linkedin.com'):
+            return False
+        elif 'linkedin.com/in' not in url:
+            return False
+
+        data = self.link_parser(url, link)
+        self.log_results(data) if data['name'] else False
+
+
+    def log_results(self, d):
+        # Prevent Duplicates & non-standard responses (i.e: "<span>linkedin.com</span></a>")
+        if d in self.results:
+            return
+        elif 'linkedin.com' in d['name']:
+            return
+
+        self.results.append(d)
+        # Search results are logged to names.csv but names.txt is not generated until end to prevent duplicates
+        logging.debug('name: {:25} RawTxt: {}'.format(d['name'], d['text']))
+        csv.info('"{}","{}","{}","{}","{}","{}",'.format(self.runtime, self.search_engine, d['name'], d['title'], d['url'], d['text']))
+
+
+def get_statuscode(resp):
+    try:
+        return resp.status_code
+    except:
+        return 0
+
+
+def get_proxy(proxies):
+    tmp = choice(proxies) if proxies else False
+    return {"http": tmp, "https": tmp} if tmp else {}
+
+
+def get_agent():
+    return choice([
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0'
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 12.5; rv:104.0) Gecko/20100101 Firefox/104.0',
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
+    ])
+
+
+def web_request(url, timeout=3, proxies=[], **kwargs):
+    try:
+        s = requests.Session()
+        r = requests.Request('GET', url, headers={'User-Agent': get_agent()}, cookies = {'CONSENT' : 'YES'}, **kwargs)
+        p = r.prepare()
+        return s.send(p, timeout=timeout, verify=False, proxies=get_proxy(proxies))
+    except requests.exceptions.TooManyRedirects as e:
+        Log.fail('Proxy Error: {}'.format(e))
+    except:
+        pass
+    return False
+
+
+def extract_links(resp):
+    links = []
+    soup = BeautifulSoup(resp.content, 'lxml')
+    for link in soup.findAll('a'):
+        links.append(link)
+    return links
+
+
+def extract_subdomain(url):
+    return urlparse(url).netloc
diff --git a/CrossLinked/utils.py b/CrossLinked/utils.py
@@ -0,0 +1,22 @@
+from os import path
+from crosslinked.logger import Log
+
+
+def delimiter2list(value, delim=","):
+    return value.split(delim) if value else []
+
+
+def delimiter2dict(value, delim_one=";", delim_two=":"):
+    x = {}
+    for item in value.split(delim_one):
+        if item:
+            sp = item.split(delim_two)
+            x[sp[0].strip()] = delim_two.join(sp[1:]).strip()
+    return x
+
+
+def file_exists(filename, contents=True):
+    if path.exists(filename):
+        return [line.strip() for line in open('filename')] if contents else filename
+    Log.warn("Input file not found: {}".format(filename))
+    exit(1)
diff --git a/SCRIPTS.md b/SCRIPTS.md
@@ -55,6 +55,27 @@
 | 44\.     | Controls open & close apps | This python script can add the functionality of open and close apps of the system. | [Take me](./Open-Close-Apps_Automation)
 | 45\.     | DNS Dump | This script is a simple Python tool that allows you to retrieve and save a DNS dump image for a specified domain. It utilizes the DNSDumpster service to generate the DNS map image. | [Take me](./DNS_Dump/)
 | 46\.  | Network Analysis| This Python script is designed to analyze network traffic using the Scapy library. It captures network packets, extracts relevant information, and provides insights into various aspects of the network. | [Take me](./Network Traffic Analyzer/)
+| 46\.  |Flooder_Sniffer_Spoofer | These scripts utilize various libraries such as subprocess, socket, struct, scapy, and argparse to implement their respective functionalities. | [Take me](./Flooder_Sniffer_Spoofer/)
+| 46\.     | Github Automation | This python script will automate your github account just input the github access token into this program, then enjoy the program ! | [Take me](./Automate_Github)
+| 46\.     | Wayback Machine | The script prompts the user to enter a website URL and a limit number, then fetches the archived URLs for the given website from the WayBack Machine and displays them. | [Take me](./Wayback_Machine)
+| 47\.     | Expose File Server | This script is a Python-based file server that exposes the contents of a specified directory over HTTP. It utilizes the Bottle framework to handle HTTP requests and serve static files.| [Take me](./File_Server/)
+| 48\.     | IP Location | An Excellent OSINT tool to get information of any ip address. | [Take me](./IP_Location/)
+| 47\.     | Expose File Server | This script is a Python-based file server that exposes the contents of a specified directory over HTTP. It utilizes the Bottle framework to handle HTTP requests and serve static files.| [Take me](./File_Server/)
+| 49\.     | SpiderFoot Parser | The sf_parser.py script is a Python script that parses JSON files containing SpiderFoot's output. It takes a JSON file as input and displays the parsed data in a table format. The script uses the argparse, huepy, and terminaltables libraries.| [Take me](./SF_Parser/)
+ | 50\.     | Enumerate Forms | This Python script, named "enum_forms," is a command-line utility designed to enumerate and extract form data from a specified URL. The script is useful for analyzing web pages to identify forms, their input elements, and related details.| [Take me](./ENUM_Forms/)
+ | 51\.     | Dumpster Fire | The DumpsterFire Toolset is a Python script that allows users to create, save, load, and ignite custom DumpsterFires, which are collections of "Fires" executed sequentially with optional delays..| [Take me](./Dumpster_Fire/)
+| 47\.     | Expose File Server | This script is a Python-based file server that exposes the contents of a specified directory over HTTP. It utilizes the Bottle framework to handle HTTP requests and serve static files.
+ | [Take me](./File_Server/)
+| 50\.     | Enumerate Forms | This Python script, named "enum_forms," is a command-line utility designed to enumerate and extract form data from a specified URL. The script is useful for analyzing web pages to identify forms, their input elements, and related details.
+ | [Take me](./ENUM_Forms/)
+| 49\.     | SpiderFoot Parser | The sf_parser.py script is a Python script that parses JSON files containing SpiderFoot's output. It takes a JSON file as input and displays the parsed data in a table format. The script uses the argparse, huepy, and terminaltables libraries.
+ | [Take me](./SF_Parser/)
+ | 50\.     | Dot dot Slash | Python script designed to automate the testing of Path Traversal vulnerabilities in web applications. It is intended for use by security researchers, ethical hackers, and developers to identify and fix potential security issues related to directory traversal
+ | [Take me](./dotdotslash/)
+ | 50\.     | HackerEnv | hackerEnv is an automation tool that quickly and easily sweep IPs and scan ports, vulnerabilities and exploit them. Then, it hands you an interactive shell for further testing. Also, it generates HTML and docx reports. It uses other tools such as nmap, nikto, metasploit and hydra. Works in kali linux and Parrot OS.
+ | [Take me](./hackerenv/)
+ | 51\.     | CrossLinked | CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an organization. This technique provides accurate results without the use of API keys, credentials, or accessing LinkedIn directly!
+ | [Take me](./CrossLinked/)
 | 47\.  |Flooder_Sniffer_Spoofer | These scripts utilize various libraries such as subprocess, socket, struct, scapy, and argparse to implement their respective functionalities. | [Take me](./Flooder_Sniffer_Spoofer/)
 | 48\.     | Github Automation | This python script will automate your github account just input the github access token into this program, then enjoy the program ! | [Take me](./Automate_Github)
 | 49\.     | Wayback Machine | The script prompts the user to enter a website URL and a limit number, then fetches the archived URLs for the given website from the WayBack Machine and displays them. | [Take me](./Wayback_Machine)

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +# CrossLinked
++
 +CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an organization. This technique provides accurate results without the use of API keys, credentials, or accessing LinkedIn directly!
++