|
| 1 | +import threading |
| 2 | +import webbrowser |
| 3 | +import requests |
| 4 | + |
| 5 | +import tkinter as tk |
| 6 | +from tkinter import messagebox |
| 7 | + |
| 8 | +import ttkbootstrap as tb |
| 9 | +from ttkbootstrap.constants import * |
| 10 | +from ttkbootstrap.widgets.scrolled import ScrolledText |
| 11 | + |
| 12 | +from bs4 import BeautifulSoup |
| 13 | +from sklearn.feature_extraction.text import TfidfVectorizer |
| 14 | +from sklearn.metrics.pairwise import cosine_similarity |
| 15 | +from rank_bm25 import BM25Okapi |
| 16 | + |
| 17 | +from urllib.parse import urlparse, parse_qs, unquote |
| 18 | + |
| 19 | + |
| 20 | +# ---------------- CONFIG ---------------- # |
| 21 | + |
| 22 | +RESULTS_PER_PAGE = 6 |
| 23 | + |
| 24 | + |
| 25 | +# ---------------- GLOBAL STATE ---------------- # |
| 26 | + |
| 27 | +all_ranked_results = [] |
| 28 | +current_page = 1 |
| 29 | +favorites = set() |
| 30 | + |
| 31 | + |
| 32 | +# ---------------- URL CLEANING ---------------- # |
| 33 | + |
| 34 | +def clean_duckduckgo_url(url): |
| 35 | + if "duckduckgo.com/l/?" in url: |
| 36 | + parsed = urlparse(url) |
| 37 | + params = parse_qs(parsed.query) |
| 38 | + if "uddg" in params: |
| 39 | + return unquote(params["uddg"][0]) |
| 40 | + return url |
| 41 | + |
| 42 | + |
| 43 | +def short_display_url(url): |
| 44 | + parsed = urlparse(url) |
| 45 | + path = parsed.path.strip("/").split("/")[:5] |
| 46 | + if path and path[0]: |
| 47 | + return f"{parsed.netloc} › " + " › ".join(path) |
| 48 | + return parsed.netloc |
| 49 | + |
| 50 | + |
| 51 | +# ---------------- SEARCH LOGIC ---------------- # |
| 52 | + |
| 53 | +def fetch_search_results(query): |
| 54 | + headers = {"User-Agent": "Mozilla/5.0"} |
| 55 | + response = requests.get( |
| 56 | + f"https://duckduckgo.com/html/?q={query}", |
| 57 | + headers=headers, |
| 58 | + timeout=10 |
| 59 | + ) |
| 60 | + |
| 61 | + results = [] |
| 62 | + if response.status_code != 200: |
| 63 | + return results |
| 64 | + |
| 65 | + soup = BeautifulSoup(response.text, "html.parser") |
| 66 | + links = soup.find_all("a", class_="result__a") |
| 67 | + |
| 68 | + for link in links[:25]: |
| 69 | + raw_url = link.get("href") |
| 70 | + clean_url = clean_duckduckgo_url(raw_url) |
| 71 | + |
| 72 | + snippet_tag = link.find_parent("div", class_="result").find( |
| 73 | + "a", class_="result__snippet" |
| 74 | + ) |
| 75 | + snippet = snippet_tag.get_text(strip=True) if snippet_tag else "" |
| 76 | + |
| 77 | + results.append({ |
| 78 | + "title": link.get_text(strip=True), |
| 79 | + "url": clean_url, |
| 80 | + "display_url": short_display_url(clean_url), |
| 81 | + "snippet": snippet or "No description available." |
| 82 | + }) |
| 83 | + |
| 84 | + return results |
| 85 | + |
| 86 | + |
| 87 | +def rank_results(query, results): |
| 88 | + docs = [r["title"] + " " + r["snippet"] for r in results] |
| 89 | + |
| 90 | + vectorizer = TfidfVectorizer() |
| 91 | + tfidf = vectorizer.fit_transform(docs + [query]) |
| 92 | + tfidf_scores = cosine_similarity(tfidf[-1], tfidf[:-1]).flatten() |
| 93 | + |
| 94 | + bm25 = BM25Okapi([d.split() for d in docs]) |
| 95 | + bm25_scores = bm25.get_scores(query.split()) |
| 96 | + |
| 97 | + ranked = [] |
| 98 | + for i, res in enumerate(results): |
| 99 | + score = (tfidf_scores[i] + bm25_scores[i]) / 2 |
| 100 | + ranked.append((res, score)) |
| 101 | + |
| 102 | + return sorted(ranked, key=lambda x: x[1], reverse=True) |
| 103 | + |
| 104 | + |
| 105 | +# ---------------- UI HELPERS ---------------- # |
| 106 | + |
| 107 | +def open_url(url): |
| 108 | + webbrowser.open_new_tab(url) |
| 109 | + |
| 110 | + |
| 111 | +def toggle_favorite(url, btn): |
| 112 | + if url in favorites: |
| 113 | + favorites.remove(url) |
| 114 | + btn.config(text="☆") |
| 115 | + else: |
| 116 | + favorites.add(url) |
| 117 | + btn.config(text="★") |
| 118 | + |
| 119 | + |
| 120 | +# ---------------- DISPLAY ---------------- # |
| 121 | + |
| 122 | +def display_page(): |
| 123 | + text.configure(state="normal") |
| 124 | + text.delete("1.0", "end") |
| 125 | + |
| 126 | + start = (current_page - 1) * RESULTS_PER_PAGE |
| 127 | + end = start + RESULTS_PER_PAGE |
| 128 | + page_results = all_ranked_results[start:end] |
| 129 | + |
| 130 | + for idx, (res, _) in enumerate(page_results): |
| 131 | + title_tag = f"title_{idx}" |
| 132 | + |
| 133 | + text.insert("end", res["title"] + "\n", (title_tag,)) |
| 134 | + text.insert("end", res["display_url"] + "\n", "url") |
| 135 | + text.insert("end", res["snippet"] + "\n\n", "snippet") |
| 136 | + |
| 137 | + # --- CLICK OPENS CORRECT URL --- |
| 138 | + text.tag_bind( |
| 139 | + title_tag, |
| 140 | + "<Button-1>", |
| 141 | + lambda e, url=res["url"]: open_url(url) |
| 142 | + ) |
| 143 | + |
| 144 | + # --- HOVER CURSOR --- |
| 145 | + text.tag_bind( |
| 146 | + title_tag, |
| 147 | + "<Enter>", |
| 148 | + lambda e: text.config(cursor="hand2") |
| 149 | + ) |
| 150 | + text.tag_bind( |
| 151 | + title_tag, |
| 152 | + "<Leave>", |
| 153 | + lambda e: text.config(cursor="") |
| 154 | + ) |
| 155 | + |
| 156 | + text.configure(state="disabled") |
| 157 | + update_pagination_label() |
| 158 | + |
| 159 | + |
| 160 | +def update_pagination_label(): |
| 161 | + total_pages = max(1, (len(all_ranked_results) - 1) // RESULTS_PER_PAGE + 1) |
| 162 | + page_label.config(text=f"Page {current_page} of {total_pages}") |
| 163 | + |
| 164 | + |
| 165 | +# ---------------- PAGINATION ---------------- # |
| 166 | + |
| 167 | +def next_page(): |
| 168 | + global current_page |
| 169 | + if current_page * RESULTS_PER_PAGE < len(all_ranked_results): |
| 170 | + current_page += 1 |
| 171 | + display_page() |
| 172 | + |
| 173 | + |
| 174 | +def prev_page(): |
| 175 | + global current_page |
| 176 | + if current_page > 1: |
| 177 | + current_page -= 1 |
| 178 | + display_page() |
| 179 | + |
| 180 | + |
| 181 | +# ---------------- SEARCH ---------------- # |
| 182 | + |
| 183 | +def perform_search(): |
| 184 | + query = query_entry.get().strip() |
| 185 | + if not query: |
| 186 | + messagebox.showwarning("Input Required", "Enter a search query.") |
| 187 | + return |
| 188 | + |
| 189 | + threading.Thread(target=search_thread, args=(query,), daemon=True).start() |
| 190 | + |
| 191 | + |
| 192 | +def search_thread(query): |
| 193 | + global all_ranked_results, current_page |
| 194 | + current_page = 1 |
| 195 | + |
| 196 | + text.configure(state="normal") |
| 197 | + text.delete("1.0", "end") |
| 198 | + text.insert("end", "Searching...\n") |
| 199 | + text.configure(state="disabled") |
| 200 | + |
| 201 | + results = fetch_search_results(query) |
| 202 | + all_ranked_results = rank_results(query, results) |
| 203 | + |
| 204 | + display_page() |
| 205 | + |
| 206 | + |
| 207 | +# ---------------- UI SETUP ---------------- # |
| 208 | + |
| 209 | +app = tb.Window( |
| 210 | + title="Search Ranking App", |
| 211 | + themename="flatly", |
| 212 | + size=(980, 700), |
| 213 | + resizable=(True, True) |
| 214 | +) |
| 215 | + |
| 216 | +top = tb.Frame(app, padding=15) |
| 217 | +top.pack(fill=X) |
| 218 | + |
| 219 | +tb.Label( |
| 220 | + top, |
| 221 | + text="Search", |
| 222 | + font=("Segoe UI", 16, "bold") |
| 223 | +).pack(anchor=W) |
| 224 | + |
| 225 | +query_entry = tb.Entry(top, font=("Segoe UI", 12)) |
| 226 | +query_entry.pack(fill=X, pady=8) |
| 227 | +query_entry.bind("<Return>", lambda e: perform_search()) |
| 228 | + |
| 229 | +tb.Button( |
| 230 | + top, |
| 231 | + text="Search", |
| 232 | + bootstyle="primary", |
| 233 | + command=perform_search |
| 234 | +).pack(anchor=E) |
| 235 | + |
| 236 | +# Results |
| 237 | +result_frame = tb.Frame(app, padding=(15, 5)) |
| 238 | +result_frame.pack(fill=BOTH, expand=True) |
| 239 | + |
| 240 | +result_box = ScrolledText(result_frame, autohide=True) |
| 241 | +result_box.pack(fill=BOTH, expand=True) |
| 242 | + |
| 243 | +text = result_box.text |
| 244 | +text.configure(state="disabled", wrap="word") |
| 245 | + |
| 246 | +# Styles |
| 247 | +text.tag_config( |
| 248 | + "url", |
| 249 | + foreground="#188038", |
| 250 | + font=("Segoe UI", 10) |
| 251 | +) |
| 252 | + |
| 253 | +text.tag_config( |
| 254 | + "snippet", |
| 255 | + foreground="#4d5156", |
| 256 | + font=("Segoe UI", 11), |
| 257 | + spacing3=18 |
| 258 | +) |
| 259 | + |
| 260 | +# Pagination |
| 261 | +nav = tb.Frame(app, padding=10) |
| 262 | +nav.pack(fill=X) |
| 263 | + |
| 264 | +tb.Button(nav, text="← Prev", bootstyle="secondary", command=prev_page).pack(side=LEFT) |
| 265 | +page_label = tb.Label(nav, text="Page 1", font=("Segoe UI", 10)) |
| 266 | +page_label.pack(side=LEFT, padx=10) |
| 267 | +tb.Button(nav, text="Next →", bootstyle="secondary", command=next_page).pack(side=LEFT) |
| 268 | + |
| 269 | +# Run |
| 270 | +app.mainloop() |
0 commit comments