Skip to content

Commit ead2a56

Browse files
authored
Merge pull request #19 from lzwjava/feat/add-startpage-provider
feat: add Startpage as a search provider
2 parents 3ab05a7 + 21e5823 commit ead2a56

3 files changed

Lines changed: 110 additions & 34 deletions

File tree

mini_copilot/commands/search_provider.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
SEARCH_PROVIDERS = ["duckduckgo"]
1+
SEARCH_PROVIDERS = ["duckduckgo", "startpage"]
2+
23

34
def handle_search_provider_command(search_provider):
45
print(f"\nCurrent search provider: {search_provider}")
56
print("Available providers:")
67
for i, prov in enumerate(SEARCH_PROVIDERS, 1):
78
marker = "*" if prov == search_provider else " "
89
print(f" {marker} {i}. {prov}")
9-
10+
1011
try:
11-
choice = input("Select search provider (number, Enter to keep current): ").strip()
12+
choice = input(
13+
"Select search provider (number, Enter to keep current): "
14+
).strip()
1215
if choice:
1316
if choice.isdigit():
1417
n = int(choice)
@@ -21,5 +24,5 @@ def handle_search_provider_command(search_provider):
2124
print("Please enter a number.\n")
2225
except (EOFError, KeyboardInterrupt):
2326
print()
24-
27+
2528
return search_provider

mini_copilot/main.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66

77
try:
88
import readline
9+
910
COMMANDS = ["/login", "/help", "/copy", "/model", "/search_provider", ".exit"]
11+
1012
def completer(text, state):
1113
matches = [c for c in COMMANDS if c.startswith(text)]
1214
return matches[state] if state < len(matches) else None
15+
1316
readline.set_completer(completer)
1417
readline.parse_and_bind("tab: complete")
1518
except ImportError:
@@ -31,8 +34,6 @@ def completer(text, state):
3134
(".exit", "Quit"),
3235
]
3336

34-
SEARCH_PROVIDERS = ["duckduckgo"]
35-
3637
CONFIG_PATH = Path.home() / ".config" / "mini-copilot" / "config.json"
3738
TOKEN_REFRESH_INTERVAL = 24 * 60 # seconds
3839

@@ -60,11 +61,14 @@ def completer(text, state):
6061
}
6162
TOOLS = [WEB_SEARCH_TOOL]
6263

64+
6365
def load_github_token():
64-
if not CONFIG_PATH.exists(): return None
66+
if not CONFIG_PATH.exists():
67+
return None
6568
config = json.loads(CONFIG_PATH.read_text())
6669
return config.get("github_token")
6770

71+
6872
def main():
6973
github_token = load_github_token()
7074
copilot_token = None
@@ -93,33 +97,43 @@ def main():
9397
try:
9498
user_input = input("> ").strip()
9599
except (EOFError, KeyboardInterrupt):
96-
print("\nGoodbye!"); break
100+
print("\nGoodbye!")
101+
break
97102

98-
if not user_input: continue
103+
if not user_input:
104+
continue
99105
if user_input in ("/", "/help"):
100106
print("\nAvailable commands:")
101107
for cmd, desc in COMMANDS_HELP:
102108
print(f" {cmd:<20} {desc}")
103-
print(); continue
104-
if user_input == ".exit": print("Goodbye!"); break
109+
print()
110+
continue
111+
if user_input == ".exit":
112+
print("Goodbye!")
113+
break
105114
if user_input == "/copy":
106-
handle_copy_command(last_reply); continue
115+
handle_copy_command(last_reply)
116+
continue
107117
if user_input == "/model":
108-
current_model = handle_model_command(copilot_token, current_model); continue
118+
current_model = handle_model_command(copilot_token, current_model)
119+
continue
109120
if user_input == "/search_provider":
110-
search_provider = handle_search_provider_command(search_provider); continue
121+
search_provider = handle_search_provider_command(search_provider)
122+
continue
111123
if user_input == "/login":
112124
github_token = handle_login_command(CONFIG_PATH, TOKEN_REFRESH_INTERVAL)
113125
if github_token:
114126
try:
115127
copilot_token = get_copilot_token(github_token)
116128
token_expiry = time.monotonic() + TOKEN_REFRESH_INTERVAL
117129
print("Connected to GitHub Copilot.\n")
118-
except Exception as e: print(f"Error: {e}", file=sys.stderr)
130+
except Exception as e:
131+
print(f"Error: {e}", file=sys.stderr)
119132
continue
120133

121134
if not copilot_token:
122-
print("Not authenticated. Type /login first.", file=sys.stderr); continue
135+
print("Not authenticated. Type /login first.", file=sys.stderr)
136+
continue
123137

124138
try:
125139
if time.monotonic() >= token_expiry:
@@ -128,32 +142,42 @@ def main():
128142

129143
messages.append({"role": "user", "content": user_input})
130144
response_message = chat(messages, copilot_token, current_model, tools=TOOLS)
131-
145+
132146
while response_message.get("tool_calls"):
133147
messages.append(response_message)
134148
for tool_call in response_message["tool_calls"]:
135149
function_name = tool_call["function"]["name"]
136150
function_args = json.loads(tool_call["function"]["arguments"])
137-
151+
138152
if function_name == "web_search":
139153
search_query = function_args.get("query")
140154
num_results = function_args.get("num_results", 20)
141-
142-
search_context = web_search(search_query, num_results=num_results)
143-
144-
messages.append({
145-
"tool_call_id": tool_call["id"],
146-
"role": "tool",
147-
"name": function_name,
148-
"content": search_context,
149-
})
150-
response_message = chat(messages, copilot_token, current_model, tools=TOOLS)
155+
156+
search_context = web_search(
157+
search_query,
158+
num_results=num_results,
159+
provider=search_provider,
160+
)
161+
162+
messages.append(
163+
{
164+
"tool_call_id": tool_call["id"],
165+
"role": "tool",
166+
"name": function_name,
167+
"content": search_context,
168+
}
169+
)
170+
response_message = chat(
171+
messages, copilot_token, current_model, tools=TOOLS
172+
)
151173

152174
reply = response_message["content"]
153175
messages.append({"role": "assistant", "content": reply})
154176
last_reply = reply
155177
print(f"\n{reply}\n")
156-
except Exception as e: print(f"Error: {e}", file=sys.stderr)
178+
except Exception as e:
179+
print(f"Error: {e}", file=sys.stderr)
180+
157181

158182
if __name__ == "__main__":
159183
main()

mini_copilot/web_search.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,43 @@ def search_ddg(query, num_results=10):
5050
return results[:num_results]
5151

5252

53+
def search_startpage(query, num_results=20):
54+
"""Using Startpage search"""
55+
url = "https://www.startpage.com/sp/search"
56+
params = {
57+
"query": query,
58+
"cat": "web",
59+
"language": "english",
60+
}
61+
62+
try:
63+
res = requests.get(
64+
url, params=params, headers=HEADERS, proxies=PROXY, timeout=10
65+
)
66+
res.raise_for_status()
67+
except Exception as e:
68+
print(f"[web search] Error searching Startpage: {e}")
69+
return []
70+
71+
soup = BeautifulSoup(res.text, "html.parser")
72+
results = []
73+
74+
search_items = soup.select(".result")
75+
76+
for item in search_items:
77+
link_tag = item.select_one("a.result-link")
78+
title_tag = item.select_one(".wgl-title")
79+
80+
if link_tag and link_tag.has_attr("href") and title_tag:
81+
href = link_tag["href"]
82+
title = title_tag.get_text(strip=True)
83+
# Ensure it's an external link
84+
if isinstance(href, str) and href.startswith("http"):
85+
results.append({"title": title, "url": href})
86+
87+
return results[:num_results]
88+
89+
5390
def extract_text_from_url(url):
5491
try:
5592
session = requests.Session()
@@ -130,10 +167,15 @@ def format_llm_output(results):
130167
return "\n\n".join(blocks)
131168

132169

133-
def web_search(query, num_results=5):
170+
def web_search(query, num_results=5, provider="duckduckgo"):
134171
"""Function to be called as a tool."""
135-
print(f"[web search] Searching: {query}")
136-
search_results = search_ddg(query, num_results=num_results)
172+
print(f"[web search] Searching ({provider}): {query}")
173+
174+
if provider == "startpage":
175+
search_results = search_startpage(query, num_results=num_results)
176+
else:
177+
search_results = search_ddg(query, num_results=num_results)
178+
137179
processed_results = []
138180

139181
if not search_results:
@@ -151,7 +193,9 @@ def web_search(query, num_results=5):
151193
processed_results.append({**info, "content": content})
152194
print(f"[web search] Fetched: {info['url']}")
153195
except Exception:
154-
processed_results.append({**info, "content": "Failed to extract content."})
196+
processed_results.append(
197+
{**info, "content": "Failed to extract content."}
198+
)
155199

156200
# Sort results to match original search order
157201
url_order = {res["url"]: i for i, res in enumerate(search_results)}
@@ -162,12 +206,17 @@ def web_search(query, num_results=5):
162206

163207
if __name__ == "__main__":
164208
import argparse
209+
165210
parser = argparse.ArgumentParser(
166211
description="Optimized DDG Search & Extract for LLMs."
167212
)
168213
parser.add_argument("query", help="The search query")
169214
parser.add_argument(
170-
"-n", "--num_results", type=int, default=5, help="Number of results (default: 5)"
215+
"-n",
216+
"--num_results",
217+
type=int,
218+
default=5,
219+
help="Number of results (default: 5)",
171220
)
172221
args = parser.parse_args()
173222

0 commit comments

Comments
 (0)