Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mini_copilot/commands/search_provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SEARCH_PROVIDERS = ["duckduckgo", "startpage"]
SEARCH_PROVIDERS = ["duckduckgo", "startpage", "bing"]


def handle_search_provider_command(search_provider):
Expand Down
37 changes: 37 additions & 0 deletions mini_copilot/web_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,41 @@ def search_startpage(query, num_results=20):
return results[:num_results]


def search_bing(query, num_results=20):
"""Using Bing's search which is a good alternative"""
# Use international URL and parameters to bypass country redirection
url = f"https://www.bing.com/search?q={query}&setmkt=en-US&setlang=en-US&cc=US"

try:
# Use a session to maintain cookies
session = requests.Session()
# Set cookies to force US market and English language
session.cookies.set("SRCHHPGUSR", "SRCHLANG=EN&WLS=2", domain=".bing.com")
session.cookies.set("_EDGE_S", "mkt=en-us", domain=".bing.com")

res = session.get(url, headers=HEADERS, proxies=PROXY, timeout=10)
res.raise_for_status()
except Exception as e:
print(f"[web search] Error searching Bing: {e}")
return []

soup = BeautifulSoup(res.text, "html.parser")
results = []

for item in soup.select("li.b_algo"):
title_link = item.select_one("h2 a")
if not title_link:
continue

href = title_link["href"]
if href.startswith("//"):
href = "https:" + href

results.append({"title": title_link.text.strip(), "url": href})

return results[:num_results]


def extract_text_from_url(url):
try:
session = requests.Session()
Expand Down Expand Up @@ -173,6 +208,8 @@ def web_search(query, num_results=5, provider="duckduckgo"):

if provider == "startpage":
search_results = search_startpage(query, num_results=num_results)
elif provider == "bing":
search_results = search_bing(query, num_results=num_results)
else:
search_results = search_ddg(query, num_results=num_results)

Expand Down
46 changes: 42 additions & 4 deletions tests/test_web_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,60 @@
from unittest.mock import MagicMock, patch
from mini_copilot import web_search


class TestWebSearch(unittest.TestCase):
@patch("mini_copilot.web_search.requests.get")
def test_search_ddg(self, mock_get):
# Match the .result__title .result__a selector
mock_get.return_value = MagicMock(ok=True, text='<html><div class="result__title"><a class="result__a" href="u">T</a></div></html>')
mock_get.return_value = MagicMock(
ok=True,
text='<html><div class="result__title"><a class="result__a" href="u">T</a></div></html>',
)
results = web_search.search_ddg("q", num_results=1)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["url"], "u")

@patch("mini_copilot.web_search.requests.get")
def test_search_startpage(self, mock_get):
# Match the .result and a.result-link, .wgl-title selectors
mock_get.return_value = MagicMock(
ok=True,
text='<html><div class="result"><a class="result-link" href="http://u"><div class="wgl-title">T</div></a></div></html>',
)
results = web_search.search_startpage("q", num_results=1)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["url"], "http://u")
self.assertEqual(results[0]["title"], "T")

@patch("mini_copilot.web_search.requests.Session")
def test_search_bing(self, mock_session):
# Match the li.b_algo and h2 a selectors
mock_s = MagicMock()
mock_session.return_value = mock_s
mock_s.get.return_value = MagicMock(
ok=True,
text='<html><li class="b_algo"><h2><a href="http://u">T</a></h2></li></html>',
)
results = web_search.search_bing("q", num_results=1)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["url"], "http://u")
self.assertEqual(results[0]["title"], "T")

@patch("mini_copilot.web_search.requests.Session")
def test_extract_text(self, mock_session):
mock_s = MagicMock()
mock_session.return_value = mock_s
mock_s.get.return_value = MagicMock(ok=True, status_code=200, text='<html><div id="firstHeading">T</div></html>', apparent_encoding='u8')
mock_s.get.return_value.apparent_encoding = 'u8'
self.assertIn("T", web_search.extract_text_from_url("https://en.wikipedia.org/wiki/T"))
mock_s.get.return_value = MagicMock(
ok=True,
status_code=200,
text='<html><div id="firstHeading">T</div></html>',
apparent_encoding="u8",
)
mock_s.get.return_value.apparent_encoding = "u8"
self.assertIn(
"T", web_search.extract_text_from_url("https://en.wikipedia.org/wiki/T")
)


if __name__ == "__main__":
unittest.main()
Loading