|
| 1 | +from webdriver_manager.chrome import ChromeDriverManager |
| 2 | +from utils.threading_controller import FastSearchAlgo |
| 3 | +from argparse import ArgumentParser |
| 4 | +from os.path import isfile |
| 5 | +import sys |
| 6 | + |
| 7 | + |
| 8 | +class GMapsScraper: |
| 9 | + def __init__(self): |
| 10 | + self._args = None |
| 11 | + |
| 12 | + def arg_parser(self): |
| 13 | + parser = ArgumentParser(description='Command Line Google Map Scraper by Abdul Moez') |
| 14 | + |
| 15 | + # Input options |
| 16 | + parser.add_argument('-q', '--query-file', help='Path to query file (default: ./queries.txt)', type=str, |
| 17 | + default="./queries.txt") |
| 18 | + parser.add_argument('-w', '--threads', help='Number of threads to use (default: 1)', type=int, default=1) |
| 19 | + parser.add_argument('-l', '--limit', help='Number of results to scrape (-1 for all results, default: -1)', |
| 20 | + type=int, default=-1) |
| 21 | + parser.add_argument('-u', '--unavailable-text', |
| 22 | + help='Replacement text for unavailable information (default: "Not Available")', type=str, |
| 23 | + default="Not Available") |
| 24 | + parser.add_argument('-bw', '--browser-wait', help='Browser waiting time in seconds (default: 15)', type=int, |
| 25 | + default=15) |
| 26 | + parser.add_argument('-se', '--suggested-ext', |
| 27 | + help='Suggested URL extensions to try (can be specified multiple times)', action='append', |
| 28 | + default=[]) |
| 29 | + parser.add_argument('-wb', '--windowed-browser', help='Disable headless mode', action='store_false', |
| 30 | + default=True) |
| 31 | + parser.add_argument('-v', '--verbose', help='Enable verbose mode', action='store_true') |
| 32 | + parser.add_argument('-o', '--output-folder', help='Output folder to store CSV details (default: ./CSV_FILES)', |
| 33 | + type=str, default='./CSV_FILES') |
| 34 | + parser.add_argument('-d', '--driver-path', |
| 35 | + help='Path to Chrome driver (if not provided, it will be downloaded)', type=str, |
| 36 | + default='') |
| 37 | + |
| 38 | + # Custom commands for additional help |
| 39 | + parser.add_argument('--help-query-file', action='store_true', help='Get help for specifying the query file') |
| 40 | + parser.add_argument('--help-limit', action='store_true', help='Get help for specifying the result limit') |
| 41 | + parser.add_argument('--help-driver-path', action='store_true', help='Get help for specifying the driver path') |
| 42 | + |
| 43 | + self._args = parser.parse_args() |
| 44 | + |
| 45 | + @staticmethod |
| 46 | + def print_query_file_help(): |
| 47 | + print("The query file should contain a list of search queries, each query on a separate line.") |
| 48 | + print("For example:") |
| 49 | + print("Pizza restaurants") |
| 50 | + print("Coffee shops") |
| 51 | + print("...") |
| 52 | + sys.exit(0) |
| 53 | + |
| 54 | + @staticmethod |
| 55 | + def print_limit_help(): |
| 56 | + print("Use this option to specify the maximum number of results to scrape.") |
| 57 | + print("Use '-1' to scrape all results.") |
| 58 | + sys.exit(0) |
| 59 | + |
| 60 | + @staticmethod |
| 61 | + def print_driver_path_help(): |
| 62 | + print("If you have a specific Chrome driver path, you can provide it using this option.") |
| 63 | + print("If not provided, the script will attempt to download the driver automatically.") |
| 64 | + print("You can download a compatible driver from https://chromedriver.chromium.org/downloads.") |
| 65 | + sys.exit(0) |
| 66 | + |
| 67 | + def check_args(self): |
| 68 | + q = self._args.query_file |
| 69 | + if not isfile(q): |
| 70 | + print(f"[-] File not found at path: {q}") |
| 71 | + sys.exit(1) |
| 72 | + |
| 73 | + def scrape_maps_data(self): |
| 74 | + self.check_args() |
| 75 | + |
| 76 | + if self._args.help_query_file: |
| 77 | + self.print_query_file_help() |
| 78 | + |
| 79 | + if self._args.help_limit: |
| 80 | + self.print_limit_help() |
| 81 | + |
| 82 | + if self._args.help_driver_path: |
| 83 | + self.print_driver_path_help() |
| 84 | + |
| 85 | + queries_list = FastSearchAlgo.load_query_file(file_name=self._args.query_file) |
| 86 | + threads_limit = min(self._args.threads, len(queries_list)) |
| 87 | + limit_results = None if self._args.limit == -1 else self._args.limit |
| 88 | + |
| 89 | + driver_path = self._args.driver_path |
| 90 | + if not self._args.driver_path: |
| 91 | + try: |
| 92 | + driver_path = ChromeDriverManager().install() |
| 93 | + except ValueError: |
| 94 | + print("[-] Not able to download the driver which is capable with your browser.") |
| 95 | + print("[INFO] Head to this site (https://chromedriver.chromium.org/downloads)" |
| 96 | + " and find your version driver and pass it with argument -d.") |
| 97 | + exit() |
| 98 | + |
| 99 | + algo_obj = FastSearchAlgo( |
| 100 | + unavailable_text=self._args.unavailable_text, |
| 101 | + headless=self._args.windowed_browser, |
| 102 | + wait_time=self._args.browser_wait, |
| 103 | + suggested_ext=self._args.suggested_ext, |
| 104 | + output_path=self._args.output_folder, |
| 105 | + workers=threads_limit, |
| 106 | + result_range=limit_results, |
| 107 | + verbose=self._args.verbose, |
| 108 | + driver_path=driver_path |
| 109 | + ) |
| 110 | + |
| 111 | + algo_obj.fast_search_algorithm(queries_list) |
| 112 | + |
| 113 | + |
| 114 | +if __name__ == '__main__': |
| 115 | + App = GMapsScraper() |
| 116 | + App.arg_parser() |
| 117 | + App.scrape_maps_data() |
0 commit comments