diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..314f02b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.txt \ No newline at end of file diff --git a/chromedriver_win32/chromedriver.exe b/chromedriver_win32/chromedriver.exe index 4850c8d..bab4b5f 100644 Binary files a/chromedriver_win32/chromedriver.exe and b/chromedriver_win32/chromedriver.exe differ diff --git a/menu_scraper.py b/menu_scraper.py index d75460b..abdddf2 100644 --- a/menu_scraper.py +++ b/menu_scraper.py @@ -12,24 +12,26 @@ def scrape_menu(url): num = '' try: - detail = driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[1]").text + detail = driver.find_element_by_xpath("/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[1]").text except: detail = '' try: rating = driver.find_element_by_xpath( - "/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[1]").text + "/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[1]").text except: rating = 'N/A' try: num = driver.find_element_by_xpath( - "/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[3]").text + "/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[3]").text except: num = '(0)' + + # after this still not working false xpath restaurant = { - 'title': driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/h1").text, + 'title': driver.find_element_by_xpath("/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/h1").text, 'detail': detail, 'rating': rating, 'num_reviews': num, @@ -37,8 +39,8 @@ def scrape_menu(url): } # ===== Menu ===== - list_item_element = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_element_by_tag_name("li") - menu = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_elements_by_class_name( + list_item_element = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul").find_element_by_tag_name("li") + menu = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul").find_elements_by_class_name( list_item_element.get_attribute("class")) name = '' @@ -48,19 +50,19 @@ def scrape_menu(url): img_url = '' for x in range(len(menu) - 1): - category = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/h2").text + category = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/h2").text restaurant['menu'].append({ category: [] }) section = driver.find_element_by_xpath( - "/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul").find_elements_by_tag_name("li") + "/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul").find_elements_by_tag_name("li") for y in range(len(section)): # Get Product Name try: name = str(driver.find_element_by_xpath( - "/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( + "/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( y + 1) + "]/a/div/div[1]/h4").text) except: name = '' @@ -68,7 +70,7 @@ def scrape_menu(url): # Get Product Description try: description = str(driver.find_element_by_xpath( - "/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( + "/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( y + 1) + "]/a/div/div[1]/div[1]").text) except: description = '' @@ -76,7 +78,7 @@ def scrape_menu(url): # Get Product Price try: price = str(driver.find_element_by_xpath( - "/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( + "/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( y + 1) + "]/a/div/div[1]/div[2]").text) if price == description: @@ -99,7 +101,7 @@ def scrape_menu(url): # Get Image URL try: img_url = str(driver.find_element_by_xpath( - "/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( + "/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str( y + 1) + "]/a/div/div[2]/img").get_attribute("src")) except: img_url = '' diff --git a/restaurant_scraper.py b/restaurant_scraper.py index 6ec1bec..d5aedd8 100644 --- a/restaurant_scraper.py +++ b/restaurant_scraper.py @@ -1,18 +1,27 @@ from selenium import webdriver import os +from selenium.common.exceptions import NoSuchElementException def scrape_restaurants(base_url, location): driver = webdriver.Chrome(executable_path="chromedriver_win32/chromedriver.exe") driver.get(base_url + location) - categories = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/div[3]"). \ - text.replace(" ", "-").lower().splitlines() + try: + categories = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/div[3]"). \ + text.replace(" ", "-").lower().splitlines()[:14] + except NoSuchElementException: + # I noticed there is an extra div on uberEats websise so I had to write these lines + categories = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/div[3]"). \ + text.replace(" ", "-").lower().splitlines()[:14] for cat in categories: try: driver.get(base_url + location + "/" + cat) - temp_urls = driver.find_element_by_xpath("/html/body/div/div/main/div[5]").find_elements_by_tag_name("a") + try: + temp_urls = driver.find_element_by_xpath("/html/body/div/div/main/div[5]").find_elements_by_tag_name("a") + except NoSuchElementException: + temp_urls = driver.find_element_by_xpath("/html/body/div/div/div/main/div[5]").find_elements_by_tag_name("a") for url in temp_urls: out_file = open("temp_urls.txt", "a") out_file.write(str(url.get_attribute("href")) + "\n") diff --git a/ubereats.py b/ubereats.py index 3000ea5..b020d0f 100644 --- a/ubereats.py +++ b/ubereats.py @@ -3,8 +3,8 @@ import restaurant_scraper -base_url = "https://www.ubereats.com/ca/category/" -city_list = ["toronto", "kingston", "hamilton"] +base_url = "https://www.ubereats.com/category/" +city_list = ["new-york-city", "toronto", "kingston", "hamilton", "brooklyn"] restaurant_data = { 'cities': [] }