Skip to content

Commit b870e6d

Browse files
committed
feat: add Bing as a search provider
1 parent 21e5823 commit b870e6d

3 files changed

Lines changed: 80 additions & 5 deletions

File tree

mini_copilot/commands/search_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SEARCH_PROVIDERS = ["duckduckgo", "startpage"]
1+
SEARCH_PROVIDERS = ["duckduckgo", "startpage", "bing"]
22

33

44
def handle_search_provider_command(search_provider):

mini_copilot/web_search.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,41 @@ def search_startpage(query, num_results=20):
8787
return results[:num_results]
8888

8989

90+
def search_bing(query, num_results=20):
91+
"""Using Bing's search which is a good alternative"""
92+
# Use international URL and parameters to bypass country redirection
93+
url = f"https://www.bing.com/search?q={query}&setmkt=en-US&setlang=en-US&cc=US"
94+
95+
try:
96+
# Use a session to maintain cookies
97+
session = requests.Session()
98+
# Set cookies to force US market and English language
99+
session.cookies.set("SRCHHPGUSR", "SRCHLANG=EN&WLS=2", domain=".bing.com")
100+
session.cookies.set("_EDGE_S", "mkt=en-us", domain=".bing.com")
101+
102+
res = session.get(url, headers=HEADERS, proxies=PROXY, timeout=10)
103+
res.raise_for_status()
104+
except Exception as e:
105+
print(f"[web search] Error searching Bing: {e}")
106+
return []
107+
108+
soup = BeautifulSoup(res.text, "html.parser")
109+
results = []
110+
111+
for item in soup.select("li.b_algo"):
112+
title_link = item.select_one("h2 a")
113+
if not title_link:
114+
continue
115+
116+
href = title_link["href"]
117+
if href.startswith("//"):
118+
href = "https:" + href
119+
120+
results.append({"title": title_link.text.strip(), "url": href})
121+
122+
return results[:num_results]
123+
124+
90125
def extract_text_from_url(url):
91126
try:
92127
session = requests.Session()
@@ -173,6 +208,8 @@ def web_search(query, num_results=5, provider="duckduckgo"):
173208

174209
if provider == "startpage":
175210
search_results = search_startpage(query, num_results=num_results)
211+
elif provider == "bing":
212+
search_results = search_bing(query, num_results=num_results)
176213
else:
177214
search_results = search_ddg(query, num_results=num_results)
178215

tests/test_web_search.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,60 @@
22
from unittest.mock import MagicMock, patch
33
from mini_copilot import web_search
44

5+
56
class TestWebSearch(unittest.TestCase):
67
@patch("mini_copilot.web_search.requests.get")
78
def test_search_ddg(self, mock_get):
89
# Match the .result__title .result__a selector
9-
mock_get.return_value = MagicMock(ok=True, text='<html><div class="result__title"><a class="result__a" href="u">T</a></div></html>')
10+
mock_get.return_value = MagicMock(
11+
ok=True,
12+
text='<html><div class="result__title"><a class="result__a" href="u">T</a></div></html>',
13+
)
1014
results = web_search.search_ddg("q", num_results=1)
1115
self.assertEqual(len(results), 1)
1216
self.assertEqual(results[0]["url"], "u")
1317

18+
@patch("mini_copilot.web_search.requests.get")
19+
def test_search_startpage(self, mock_get):
20+
# Match the .result and a.result-link, .wgl-title selectors
21+
mock_get.return_value = MagicMock(
22+
ok=True,
23+
text='<html><div class="result"><a class="result-link" href="http://u"><div class="wgl-title">T</div></a></div></html>',
24+
)
25+
results = web_search.search_startpage("q", num_results=1)
26+
self.assertEqual(len(results), 1)
27+
self.assertEqual(results[0]["url"], "http://u")
28+
self.assertEqual(results[0]["title"], "T")
29+
30+
@patch("mini_copilot.web_search.requests.Session")
31+
def test_search_bing(self, mock_session):
32+
# Match the li.b_algo and h2 a selectors
33+
mock_s = MagicMock()
34+
mock_session.return_value = mock_s
35+
mock_s.get.return_value = MagicMock(
36+
ok=True,
37+
text='<html><li class="b_algo"><h2><a href="http://u">T</a></h2></li></html>',
38+
)
39+
results = web_search.search_bing("q", num_results=1)
40+
self.assertEqual(len(results), 1)
41+
self.assertEqual(results[0]["url"], "http://u")
42+
self.assertEqual(results[0]["title"], "T")
43+
1444
@patch("mini_copilot.web_search.requests.Session")
1545
def test_extract_text(self, mock_session):
1646
mock_s = MagicMock()
1747
mock_session.return_value = mock_s
18-
mock_s.get.return_value = MagicMock(ok=True, status_code=200, text='<html><div id="firstHeading">T</div></html>', apparent_encoding='u8')
19-
mock_s.get.return_value.apparent_encoding = 'u8'
20-
self.assertIn("T", web_search.extract_text_from_url("https://en.wikipedia.org/wiki/T"))
48+
mock_s.get.return_value = MagicMock(
49+
ok=True,
50+
status_code=200,
51+
text='<html><div id="firstHeading">T</div></html>',
52+
apparent_encoding="u8",
53+
)
54+
mock_s.get.return_value.apparent_encoding = "u8"
55+
self.assertIn(
56+
"T", web_search.extract_text_from_url("https://en.wikipedia.org/wiki/T")
57+
)
58+
2159

2260
if __name__ == "__main__":
2361
unittest.main()

0 commit comments

Comments
 (0)