Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.txt
Binary file modified chromedriver_win32/chromedriver.exe
Binary file not shown.
26 changes: 14 additions & 12 deletions menu_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,35 @@ def scrape_menu(url):
num = ''

try:
detail = driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[1]").text
detail = driver.find_element_by_xpath("/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[1]").text
except:
detail = ''

try:
rating = driver.find_element_by_xpath(
"/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[1]").text
"/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[1]").text
except:
rating = 'N/A'

try:
num = driver.find_element_by_xpath(
"/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[3]").text
"/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[3]").text
except:
num = '(0)'

# after this still not working false xpath

restaurant = {
'title': driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/h1").text,
'title': driver.find_element_by_xpath("/html/body/div/div/div/main/div[1]/div/div/div[2]/div/div[2]/h1").text,
'detail': detail,
'rating': rating,
'num_reviews': num,
'menu': []
}

# ===== Menu =====
list_item_element = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_element_by_tag_name("li")
menu = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_elements_by_class_name(
list_item_element = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul").find_element_by_tag_name("li")
menu = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul").find_elements_by_class_name(
list_item_element.get_attribute("class"))

name = ''
Expand All @@ -48,35 +50,35 @@ def scrape_menu(url):
img_url = ''

for x in range(len(menu) - 1):
category = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/h2").text
category = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/h2").text
restaurant['menu'].append({
category: []
})
section = driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul").find_elements_by_tag_name("li")
"/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul").find_elements_by_tag_name("li")

for y in range(len(section)):

# Get Product Name
try:
name = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
"/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/h4").text)
except:
name = ''

# Get Product Description
try:
description = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
"/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/div[1]").text)
except:
description = ''

# Get Product Price
try:
price = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
"/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/div[2]").text)

if price == description:
Expand All @@ -99,7 +101,7 @@ def scrape_menu(url):
# Get Image URL
try:
img_url = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
"/html/body/div/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[2]/img").get_attribute("src"))
except:
img_url = ''
Expand Down
15 changes: 12 additions & 3 deletions restaurant_scraper.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
from selenium import webdriver
import os
from selenium.common.exceptions import NoSuchElementException


def scrape_restaurants(base_url, location):
driver = webdriver.Chrome(executable_path="chromedriver_win32/chromedriver.exe")
driver.get(base_url + location)

categories = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/div[3]"). \
text.replace(" ", "-").lower().splitlines()
try:
categories = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/div[3]"). \
text.replace(" ", "-").lower().splitlines()[:14]
except NoSuchElementException:
# I noticed there is an extra div on uberEats websise so I had to write these lines
categories = driver.find_element_by_xpath("/html/body/div/div/div/main/div[2]/div[3]"). \
text.replace(" ", "-").lower().splitlines()[:14]

for cat in categories:
try:
driver.get(base_url + location + "/" + cat)
temp_urls = driver.find_element_by_xpath("/html/body/div/div/main/div[5]").find_elements_by_tag_name("a")
try:
temp_urls = driver.find_element_by_xpath("/html/body/div/div/main/div[5]").find_elements_by_tag_name("a")
except NoSuchElementException:
temp_urls = driver.find_element_by_xpath("/html/body/div/div/div/main/div[5]").find_elements_by_tag_name("a")
for url in temp_urls:
out_file = open("temp_urls.txt", "a")
out_file.write(str(url.get_attribute("href")) + "\n")
Expand Down
4 changes: 2 additions & 2 deletions ubereats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import restaurant_scraper


base_url = "https://www.ubereats.com/ca/category/"
city_list = ["toronto", "kingston", "hamilton"]
base_url = "https://www.ubereats.com/category/"
city_list = ["new-york-city", "toronto", "kingston", "hamilton", "brooklyn"]
restaurant_data = {
'cities': []
}
Expand Down