-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
169 lines (144 loc) · 5.92 KB
/
main.py
File metadata and controls
169 lines (144 loc) · 5.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import streamlit as st
import sqlite3
import time
from search import Search, Mode
# Page configuration
st.set_page_config(
page_title="Wiki Search Engine",
page_icon="🔍",
layout="wide"
)
# Custom CSS to reduce padding
st.markdown("""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 1rem;
}
.stMarkdown h1 {
margin-bottom: 0.5rem;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'search_engine' not in st.session_state:
st.session_state.search_engine = None
if 'k_value' not in st.session_state:
st.session_state.k_value = 0
if 'needs_initialization' not in st.session_state:
st.session_state.needs_initialization = False
# Define callback for search submission
def handle_search():
st.session_state.search_submitted = True
# Database connection
def get_db_connection():
return sqlite3.connect("./data/wiki.db")
# Main title
st.title("🔍 Wiki Search Engine")
st.markdown("Search through Wikipedia articles")
# Sidebar configuration
with st.sidebar:
st.header("Configuration")
# SVD dimension with continuous slider
k = st.slider(
"SVD Dimension (k)",
min_value=0,
max_value=1024,
value=st.session_state.k_value,
step=16,
help="k=0 means no dimensionality reduction"
)
# Only mark for initialization, don't initialize yet
if k != st.session_state.k_value:
st.session_state.needs_initialization = True
st.session_state.k_value = k
# Search mode
mode_option = st.radio(
"Search Algorithm",
["Cosine Similarity", "ANN (Approximate Nearest Neighbors)"]
)
search_mode = Mode.COSINE if mode_option == "Cosine Similarity" else Mode.ANN
# ANN warning
if search_mode == Mode.ANN and k == 0:
st.warning("⚠️ ANN requires k > 0")
# Number of results
num_results = st.slider("Results to show", 1, 50, 10)
st.markdown("---")
st.markdown("### About")
st.info("""
This search engine uses Count Vectors and Singular Value Decomposition
to find semantically similar articles.
""")
# Main search area with properly aligned search button
with st.form(key="search_form"):
col1, col2 = st.columns([5, 1])
with col1:
query = st.text_input(
label="Search query",
placeholder="Search for something...",
label_visibility="collapsed",
key="query"
)
with col2:
search_button = st.form_submit_button(
"🔎 Search",
use_container_width=True,
type="primary",
on_click=handle_search
)
# Execute search when form is submitted
if search_button:
search_submitted = True
else:
search_submitted = False
if search_submitted:
if not query.strip():
st.warning("Please enter a search query.")
else:
try:
# Initialize search engine if needed
if st.session_state.needs_initialization or st.session_state.search_engine is None:
with st.spinner(f"Initializing search engine with k={st.session_state.k_value}..."):
try:
st.session_state.search_engine = Search(st.session_state.k_value)
st.session_state.needs_initialization = False
except Exception as e:
st.error(f"Error initializing search engine: {str(e)}")
st.stop()
conn = get_db_connection()
with st.spinner("Searching..."):
# Execute search
start_time = time.time()
results = st.session_state.search_engine.search(query, search_mode, num_results)
search_time = time.time() - start_time
if results:
# Fetch article metadata including text excerpt
cursor = conn.cursor()
placeholders = ','.join(['?'] * len(results))
db_query = f"SELECT title, url, text FROM articles WHERE id IN ({placeholders})"
cursor.execute(db_query, [idx for idx, _ in results])
articles = cursor.fetchall()
# Display search stats
st.success(f"Found {len(results)} results in {search_time:.3f} seconds")
# Display results
for i, ((idx, match_score), (title, url, text)) in enumerate(zip(results, articles), 1):
with st.container():
col1, col2 = st.columns([5, 1])
with col1:
st.markdown(f"##### {i}. {title}")
st.markdown(f"[{url}]({url})")
# Create a preview of the text
preview = text[:130] + "..." if text and len(text) > 150 else text
# Show text in an expander
with st.expander(f"{preview}", expanded = False):
st.markdown(f"<div style='max-height: 400px; overflow-y: auto;'><small><em>{text[130:]}</em></small></div>", unsafe_allow_html=True)
with col2:
st.metric("Score", f"{match_score:.4f}")
st.divider()
else:
if search_mode == Mode.ANN and k == 0:
st.error("ANN search requires k > 0. Please change your configuration.")
else:
st.info("No results found for your query.")
except Exception as e:
st.error(f"Error during search: {str(e)}")