extrasmall0.github.io/ml-flashcards.html at gh-pages · extrasmall0/extrasmall0.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ML Interview Flashcards</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
:root{--bg1:#0f0f1a;--bg2:#1a1a2e;--bg3:#25253e;--card-bg:#2a2a4a;--card-front:#1e1e3a;--card-back:#1a2a1a;--text:#e0e0e0;--text2:#a0a0b0;--accent:#6c63ff;--accent2:#ff6584;--green:#4caf50;--yellow:#ffc107;--red:#ff5252;--shadow:0 8px 32px rgba(0,0,0,0.4)}
body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:linear-gradient(135deg,var(--bg1),var(--bg2));color:var(--text);min-height:100vh;overflow-x:hidden}
.app{max-width:900px;margin:0 auto;padding:16px}
header{text-align:center;padding:20px 0 10px}
header h1{font-size:1.8rem;background:linear-gradient(135deg,var(--accent),var(--accent2));-webkit-background-clip:text;-webkit-text-fill-color:transparent;margin-bottom:4px}
header p{color:var(--text2);font-size:.85rem}
.stats-bar{display:flex;gap:12px;justify-content:center;flex-wrap:wrap;margin:12px 0;padding:12px;background:var(--bg3);border-radius:12px}
.stat{text-align:center;min-width:70px}
.stat .num{font-size:1.4rem;font-weight:700}
.stat .label{font-size:.7rem;color:var(--text2);text-transform:uppercase;letter-spacing:.5px}
.stat.total .num{color:var(--accent)}
.stat.mastered .num{color:var(--green)}
.stat.review .num{color:var(--yellow)}
.stat.unseen .num{color:var(--text2)}
.controls{display:flex;gap:8px;flex-wrap:wrap;justify-content:center;margin:12px 0}
.controls select,.controls button{background:var(--bg3);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:8px;padding:8px 14px;font-size:.85rem;cursor:pointer;transition:all .2s}
.controls select:hover,.controls button:hover{border-color:var(--accent);background:rgba(108,99,255,.15)}
.controls button.active{background:var(--accent);border-color:var(--accent)}
.mode-tabs{display:flex;gap:6px;justify-content:center;margin:8px 0}
.mode-tabs button{background:transparent;color:var(--text2);border:1px solid rgba(255,255,255,.08);border-radius:20px;padding:6px 16px;font-size:.8rem;cursor:pointer;transition:all .2s}
.mode-tabs button.active{background:var(--accent);color:#fff;border-color:var(--accent)}
.card-container{perspective:1200px;margin:20px auto;max-width:680px;min-height:380px;position:relative}
.card{width:100%;min-height:380px;position:relative;transform-style:preserve-3d;transition:transform .6s cubic-bezier(.4,.2,.2,1);cursor:pointer}
.card.flipped{transform:rotateY(180deg)}
.card-face{position:absolute;top:0;left:0;width:100%;min-height:380px;backface-visibility:hidden;border-radius:20px;padding:32px;display:flex;flex-direction:column;box-shadow:var(--shadow)}
.card-front{background:linear-gradient(145deg,var(--card-front),var(--card-bg));z-index:2}
.card-back{background:linear-gradient(145deg,#1a2a2a,#1e3a2e);transform:rotateY(180deg)}
.card-category{display:inline-block;padding:4px 12px;border-radius:20px;font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.5px;margin-bottom:12px;width:fit-content}
.card-company{color:var(--text2);font-size:.75rem;margin-bottom:8px;font-style:italic}
.card-question,.card-answer{font-size:1.05rem;line-height:1.7;flex:1}
.card-answer{font-size:.95rem}
.card-answer code{background:rgba(108,99,255,.2);padding:2px 6px;border-radius:4px;font-family:'SF Mono',Monaco,monospace;font-size:.85rem}
.card-answer .formula{background:rgba(108,99,255,.1);border-left:3px solid var(--accent);padding:8px 12px;margin:8px 0;border-radius:0 8px 8px 0;font-family:'SF Mono',Monaco,monospace;font-size:.85rem;overflow-x:auto}
.card-hint{color:var(--text2);font-size:.8rem;margin-top:auto;padding-top:12px;text-align:center}
.card-nav{display:flex;justify-content:center;align-items:center;gap:16px;margin:16px 0}
.card-nav button{background:var(--bg3);color:var(--text);border:none;border-radius:50%;width:44px;height:44px;font-size:1.2rem;cursor:pointer;transition:all .2s;display:flex;align-items:center;justify-content:center}
.card-nav button:hover{background:var(--accent);transform:scale(1.1)}
.card-nav .counter{color:var(--text2);font-size:.9rem;min-width:80px;text-align:center}
.difficulty-btns{display:flex;gap:8px;justify-content:center;margin:8px 0}
.difficulty-btns button{border:none;border-radius:10px;padding:8px 20px;font-size:.8rem;cursor:pointer;transition:all .2s;font-weight:600}
.difficulty-btns button:hover{transform:translateY(-2px)}
.btn-hard{background:rgba(255,82,82,.2);color:var(--red);border:1px solid rgba(255,82,82,.3)!important}
.btn-medium{background:rgba(255,193,7,.2);color:var(--yellow);border:1px solid rgba(255,193,7,.3)!important}
.btn-easy{background:rgba(76,175,80,.2);color:var(--green);border:1px solid rgba(76,175,80,.3)!important}
.shortcuts{text-align:center;color:var(--text2);font-size:.7rem;margin:8px 0;opacity:.6}
.cat-linear{background:rgba(66,133,244,.25);color:#8ab4f8}
.cat-prob{background:rgba(156,39,176,.25);color:#ce93d8}
.cat-ml{background:rgba(255,152,0,.25);color:#ffb74d}
.cat-dl{background:rgba(244,67,54,.25);color:#ef9a9a}
.cat-nlp{background:rgba(0,150,136,.25);color:#80cbc4}
.cat-cv{background:rgba(233,30,99,.25);color:#f48fb1}
.cat-recsys{background:rgba(63,81,181,.25);color:#9fa8da}
.cat-rl{background:rgba(255,87,34,.25);color:#ff8a65}
.cat-mlops{background:rgba(96,125,139,.25);color:#b0bec5}
.cat-coding{background:rgba(139,195,74,.25);color:#aed581}
.empty-state{text-align:center;padding:60px 20px;color:var(--text2)}
@media(max-width:600px){
.app{padding:10px}
header h1{font-size:1.4rem}
.card-face{padding:20px;min-height:320px}
.card-container{min-height:320px}
.stats-bar{gap:8px;padding:10px}
.stat .num{font-size:1.1rem}
.controls{gap:6px}
.controls select,.controls button{padding:6px 10px;font-size:.8rem}
}
.progress-ring{width:48px;height:48px}
.progress-ring circle{transition:stroke-dashoffset .5s}
footer{text-align:center;padding:20px 0;color:var(--text2);font-size:.7rem;opacity:.5}
</style>
</head>
<body>
<div class="app">
<header>
<h1>🧠 ML Interview Flashcards</h1>
<p>100+ cards covering core ML interview topics</p>
</header>
<div class="stats-bar">
<div class="stat total"><div class="num" id="statTotal">0</div><div class="label">Total</div></div>
<div class="stat mastered"><div class="num" id="statMastered">0</div><div class="label">Mastered</div></div>
<div class="stat review"><div class="num" id="statReview">0</div><div class="label">Review</div></div>
<div class="stat unseen"><div class="num" id="statUnseen">0</div><div class="label">Unseen</div></div>
</div>
<div class="controls">
<select id="categoryFilter"><option value="all">All Categories</option></select>
<select id="statusFilter"><option value="all">All Status</option><option value="unseen">Unseen</option><option value="review">Need Review</option><option value="mastered">Mastered</option></select>
<button id="shuffleBtn">🔀 Shuffle</button>
<button id="resetBtn">🔄 Reset</button>
</div>
<div class="mode-tabs">
<button class="active" data-mode="browse">Browse</button>
<button data-mode="spaced">Spaced Repetition</button>
</div>
<div class="card-container" id="cardContainer">
<div class="card" id="card" onclick="flipCard()">
<div class="card-face card-front" id="cardFront"></div>
<div class="card-face card-back" id="cardBack"></div>
</div>
</div>
<div class="card-nav">
<button onclick="prevCard()">◀</button>
<span class="counter" id="counter">0 / 0</span>
<button onclick="nextCard()">▶</button>
</div>
<div class="difficulty-btns">
<button class="btn-hard" onclick="markCard('review')">1 Hard 😰</button>
<button class="btn-medium" onclick="markCard('review')">2 Medium 🤔</button>
<button class="btn-easy" onclick="markCard('mastered')">3 Easy ✅</button>
</div>
<div class="shortcuts">Space: flip · ←→: navigate · 1/2/3: difficulty</div>
<footer>ML Interview Flashcards · Built for acing your next interview</footer>
</div>
<script>
const CARDS=[
// ===== Linear Algebra & Math (10) =====
{id:1,cat:"linear",q:"What is the eigenvalue decomposition of a matrix? When is it possible? (Google)",a:"A square matrix A can be decomposed as <b>A = PDP⁻¹</b> where D is diagonal with eigenvalues (特征值) and P contains eigenvectors (特征向量).<div class='formula'>Av = λv</div>Possible when A has n linearly independent eigenvectors. Symmetric matrices always have real eigenvalues and orthogonal eigenvectors.",company:"Google"},
{id:2,cat:"linear",q:"Explain Singular Value Decomposition (SVD) and its applications in ML. (Amazon)",a:"Any m×n matrix A can be decomposed as:<div class='formula'>A = UΣVᵀ</div>U: m×m orthogonal (left singular vectors)<br>Σ: m×n diagonal (singular values 奇异值)<br>V: n×n orthogonal (right singular vectors)<br><br><b>Applications:</b> PCA, dimensionality reduction, matrix completion (recommender systems), LSA in NLP, image compression.",company:"Amazon"},
{id:3,cat:"linear",q:"What is the difference between positive definite and positive semi-definite matrices? Why does it matter in ML? (Meta)",a:"<b>Positive Definite (正定):</b> xᵀAx > 0 for all x ≠ 0. All eigenvalues > 0.<br><b>Positive Semi-Definite (半正定):</b> xᵀAx ≥ 0. All eigenvalues ≥ 0.<br><br><b>In ML:</b> Covariance matrices are PSD. Kernel matrices must be PSD (Mercer's condition). Hessian being PD guarantees a local minimum.",company:"Meta"},
{id:4,cat:"linear",q:"Explain the concept of matrix rank and its significance. (Google)",a:"<b>Rank (秩)</b> = number of linearly independent rows/columns = number of non-zero singular values.<br><br><b>Significance:</b><br>• Low-rank matrices → data has redundancy → compression possible<br>• Rank-deficient systems → underdetermined → infinite solutions<br>• Matrix completion exploits low-rank structure (Netflix Prize)<br>• Rank of weight matrices relates to model capacity",company:"Google"},
{id:5,cat:"linear",q:"What is the condition number of a matrix? Why does it matter for optimization? (OpenAI)",a:"<div class='formula'>κ(A) = σ_max / σ_min = |λ_max| / |λ_min|</div>Measures sensitivity to perturbations (条件数).<br><br>• κ ≈ 1: well-conditioned, stable optimization<br>• κ >> 1: ill-conditioned, gradient descent converges slowly<br>• High condition number of Hessian → use preconditioners or adaptive optimizers (Adam)",company:"OpenAI"},
{id:6,cat:"linear",q:"Explain the geometric interpretation of matrix multiplication. (Meta)",a:"Matrix multiplication represents a <b>linear transformation (线性变换)</b>:<br>• Rotation, scaling, shearing, projection<br>• Composition of transformations = matrix product<br>• Neural network layers: each layer applies a linear transform + nonlinearity<br>• Attention mechanism: Q·Kᵀ computes similarity via dot products in transformed space",company:"Meta"},
{id:7,cat:"linear",q:"What is the difference between L1 and L2 norms? How do they affect regularization? (Amazon)",a:"<div class='formula'>L1: ||x||₁ = Σ|xᵢ| → sparsity (稀疏性)</div><div class='formula'>L2: ||x||₂ = √(Σxᵢ²) → small weights</div><b>L1 (Lasso):</b> Diamond constraint → solutions at axes → sparse features, feature selection<br><b>L2 (Ridge):</b> Circle constraint → shrinks all weights uniformly → prevents large weights<br><b>Elastic Net:</b> combines both",company:"Amazon"},
{id:8,cat:"linear",q:"Explain the Moore-Penrose pseudoinverse and when you'd use it. (Google)",a:"For any matrix A, the pseudoinverse A⁺ satisfies:<div class='formula'>A⁺ = (AᵀA)⁻¹Aᵀ (if full column rank)</div>Used when A is not square or not invertible.<br><b>Applications:</b><br>• Least squares solution: x = A⁺b<br>• Linear regression closed-form solution<br>• Computed via SVD: A⁺ = VΣ⁺Uᵀ",company:"Google"},
{id:9,cat:"linear",q:"What is the Jacobian matrix and why is it important in deep learning? (OpenAI)",a:"The <b>Jacobian (雅可比矩阵)</b> J of f: ℝⁿ→ℝᵐ has entries Jᵢⱼ = ∂fᵢ/∂xⱼ.<div class='formula'>J ∈ ℝᵐˣⁿ</div><b>In DL:</b><br>• Backpropagation computes vector-Jacobian products (VJP)<br>• Jacobian singular values relate to vanishing/exploding gradients<br>• Used in normalizing flows: det(J) for density estimation",company:"OpenAI"},
{id:10,cat:"linear",q:"Explain the relationship between PCA and eigendecomposition. (Meta)",a:"<b>PCA (主成分分析)</b> finds directions of maximum variance:<br>1. Compute covariance matrix: C = (1/n)XᵀX<br>2. Eigendecompose C = VΛVᵀ<br>3. Top-k eigenvectors = principal components<div class='formula'>Projection: Z = XV_k</div>Equivalently via SVD of X: X = UΣVᵀ → principal components are columns of V, variances are σᵢ²/n.",company:"Meta"},

// ===== Probability & Statistics (10) =====
{id:11,cat:"prob",q:"Explain Bayes' Theorem and give an ML application. (Google)",a:"<div class='formula'>P(A|B) = P(B|A)·P(A) / P(B)</div><b>贝叶斯定理</b>: Updates prior belief with evidence.<br><br><b>Applications:</b><br>• Naive Bayes classifier: P(class|features)<br>• Bayesian optimization for hyperparameter tuning<br>• Bayesian neural networks for uncertainty estimation<br>• Spam filtering: P(spam|words)",company:"Google"},
{id:12,cat:"prob",q:"What is the difference between MLE and MAP estimation? (Amazon)",a:"<b>MLE (最大似然估计):</b><div class='formula'>θ_MLE = argmax P(D|θ)</div><b>MAP (最大后验估计):</b><div class='formula'>θ_MAP = argmax P(θ|D) = argmax P(D|θ)P(θ)</div>MAP adds a prior P(θ) → equivalent to regularization.<br>• Gaussian prior → L2 regularization<br>• Laplace prior → L1 regularization<br>• As data → ∞, MAP → MLE",company:"Amazon"},
{id:13,cat:"prob",q:"Explain the Central Limit Theorem and its relevance to ML. (Meta)",a:"<b>CLT (中心极限定理):</b> Sum of n i.i.d. random variables → Normal distribution as n→∞, regardless of original distribution.<div class='formula'>√n(X̄ - μ) → N(0, σ²)</div><b>In ML:</b><br>• Justifies Gaussian assumptions in many models<br>• Mini-batch gradient estimates are approximately normal<br>• Confidence intervals for model metrics<br>• A/B testing statistical significance",company:"Meta"},
{id:14,cat:"prob",q:"What is KL Divergence? Is it a true distance metric? (OpenAI)",a:"<div class='formula'>KL(P||Q) = Σ P(x) log(P(x)/Q(x))</div><b>KL散度</b> measures how Q differs from P. It is NOT a metric:<br>• KL(P||Q) ≠ KL(Q||P) (asymmetric 非对称)<br>• Doesn't satisfy triangle inequality<br><br><b>In ML:</b> Loss in VAE (ELBO), knowledge distillation, policy optimization (PPO uses clipped KL), information gain in decision trees.",company:"OpenAI"},
{id:15,cat:"prob",q:"Explain the bias-variance decomposition of expected prediction error. (Google)",a:"<div class='formula'>E[(y - f̂(x))²] = Bias²(f̂) + Var(f̂) + σ²</div><b>Bias (偏差):</b> Error from wrong assumptions → underfitting<br><b>Variance (方差):</b> Sensitivity to training data → overfitting<br><b>σ² (噪声):</b> Irreducible error<br><br>Simple models: high bias, low variance<br>Complex models: low bias, high variance<br>Goal: find the sweet spot (tradeoff 权衡)",company:"Google"},
{id:16,cat:"prob",q:"What is a conjugate prior? Give examples. (Amazon)",a:"A prior is <b>conjugate (共轭先验)</b> to a likelihood if the posterior is in the same family.<br><br><b>Examples:</b><br>• Beta prior + Binomial likelihood → Beta posterior<br>• Normal prior + Normal likelihood → Normal posterior<br>• Gamma prior + Poisson likelihood → Gamma posterior<br><br><b>Benefit:</b> Closed-form posterior updates, no need for MCMC.",company:"Amazon"},
{id:17,cat:"prob",q:"Explain the difference between generative and discriminative models. (Meta)",a:"<b>Generative (生成模型):</b> Models P(X,Y) = P(X|Y)P(Y)<br>Examples: Naive Bayes, GMM, VAE, GAN, GPT<br><br><b>Discriminative (判别模型):</b> Models P(Y|X) directly<br>Examples: Logistic Regression, SVM, Neural Nets<br><br>Generative can generate samples & handle missing data. Discriminative usually has better classification accuracy with enough data.",company:"Meta"},
{id:18,cat:"prob",q:"What is the Expectation-Maximization (EM) algorithm? (Google)",a:"<b>EM (期望最大化)</b> finds MLE with latent variables:<br><br><b>E-step:</b> Compute expected latent variables given current θ<div class='formula'>Q(θ|θ_old) = E_z[log P(X,Z|θ) | X, θ_old]</div><b>M-step:</b> Maximize Q to update θ<br><br><b>Applications:</b> GMM clustering, HMMs, missing data imputation<br>Guarantees monotonic likelihood increase. May converge to local optimum.",company:"Google"},
{id:19,cat:"prob",q:"Explain hypothesis testing. What is a p-value? (Amazon)",a:"<b>Hypothesis testing (假设检验):</b><br>• H₀: null hypothesis (no effect)<br>• H₁: alternative hypothesis<br><br><b>p-value:</b> P(observing data as extreme | H₀ is true)<br>• p < α (typically 0.05) → reject H₀<br><br><b>In ML:</b> A/B testing, feature significance, model comparison<br><b>Caution:</b> p-value ≠ P(H₀ is true). Multiple testing → Bonferroni correction.",company:"Amazon"},
{id:20,cat:"prob",q:"What are copulas and how might they be used in ML? (OpenAI)",a:"<b>Copulas (Copula函数)</b> model dependency structure between random variables, separate from marginals.<div class='formula'>F(x₁,...,xₙ) = C(F₁(x₁),...,Fₙ(xₙ))</div><b>In ML:</b><br>• Financial risk modeling (tail dependencies)<br>• Multivariate data generation<br>• Better than assuming Gaussian dependence<br>• Used in probabilistic forecasting",company:"OpenAI"},

// ===== ML Fundamentals (15) =====
{id:21,cat:"ml",q:"Explain the bias-variance tradeoff. How do you diagnose and fix each? (Google)",a:"<b>High Bias (欠拟合):</b> Training & validation error both high<br>Fix: More features, complex model, less regularization<br><br><b>High Variance (过拟合):</b> Low training error, high validation error<br>Fix: More data, regularization, dropout, simpler model, early stopping<br><br><b>Learning curves</b> help diagnose: plot error vs training size.",company:"Google"},
{id:22,cat:"ml",q:"Compare L1 vs L2 regularization. When would you use each? (Amazon)",a:"<b>L1 (Lasso):</b><div class='formula'>Loss + λΣ|wᵢ|</div>• Produces sparse weights → feature selection<br>• Non-differentiable at 0 → use subgradient/proximal methods<br><br><b>L2 (Ridge):</b><div class='formula'>Loss + λΣwᵢ²</div>• Shrinks weights uniformly, no sparsity<br>• Has closed-form solution<br><br><b>Use L1</b> when many irrelevant features. <b>Use L2</b> when all features matter. <b>Elastic Net</b> = both.",company:"Amazon"},
{id:23,cat:"ml",q:"Explain k-fold cross-validation and stratified CV. (Meta)",a:"<b>K-fold CV (K折交叉验证):</b><br>1. Split data into k folds<br>2. Train on k-1, validate on 1<br>3. Repeat k times, average results<br><br><b>Stratified CV:</b> Preserves class distribution in each fold → essential for imbalanced data.<br><br><b>Variants:</b> Leave-one-out (k=n), repeated k-fold, group k-fold (no data leakage across groups)<br>Typical k: 5 or 10.",company:"Meta"},
{id:24,cat:"ml",q:"Explain precision, recall, F1-score, and when to optimize each. (Google)",a:"<div class='formula'>Precision = TP/(TP+FP) — 精确率</div><div class='formula'>Recall = TP/(TP+FN) — 召回率</div><div class='formula'>F1 = 2·P·R/(P+R) — harmonic mean</div><b>Optimize Precision:</b> When FP is costly (spam detection)<br><b>Optimize Recall:</b> When FN is costly (cancer detection)<br><b>F1:</b> Balanced tradeoff<br><b>Also:</b> AUC-ROC for threshold-independent evaluation, AP for ranking.",company:"Google"},
{id:25,cat:"ml",q:"What is gradient descent? Compare batch, mini-batch, and SGD. (Amazon)",a:"<div class='formula'>θ = θ - η·∇L(θ)</div><b>Batch GD:</b> Full dataset per step → stable but slow, memory-heavy<br><b>SGD (随机梯度下降):</b> One sample → noisy but fast, can escape local minima<br><b>Mini-batch:</b> Best of both → GPU-friendly, moderate noise<br><br>Typical batch sizes: 32-256. Larger batches may need learning rate warmup (linear scaling rule).",company:"Amazon"},
{id:26,cat:"ml",q:"How does a Random Forest work? What are its advantages? (Meta)",a:"<b>Random Forest (随机森林):</b> Ensemble of decision trees with:<br>• <b>Bagging:</b> Each tree trained on bootstrap sample<br>• <b>Feature randomness:</b> Each split considers random subset of features (√p for classification, p/3 for regression)<br><br><b>Advantages:</b> Resistant to overfitting, handles missing values, feature importance, few hyperparameters, parallelizable<br><b>Disadvantage:</b> Less interpretable than single tree, large memory.",company:"Meta"},
{id:27,cat:"ml",q:"Explain gradient boosting. How does XGBoost improve on it? (Google)",a:"<b>Gradient Boosting:</b> Sequentially fits trees to residuals (negative gradients).<div class='formula'>F_m(x) = F_{m-1}(x) + η·h_m(x)</div><b>XGBoost improvements:</b><br>• Regularized objective (L1+L2 on leaf weights)<br>• 2nd order Taylor expansion of loss<br>• Weighted quantile sketch for split finding<br>• Sparsity-aware (handles missing values)<br>• Column block for parallel tree construction<br>• Cache-aware access patterns",company:"Google"},
{id:28,cat:"ml",q:"What is the kernel trick? Explain with SVM. (Amazon)",a:"<b>Kernel trick (核技巧):</b> Compute dot products in high-dimensional space without explicit mapping.<div class='formula'>K(x,y) = φ(x)·φ(y)</div><b>Common kernels:</b><br>• Linear: xᵀy<br>• Polynomial: (xᵀy + c)^d<br>• RBF/Gaussian: exp(-γ||x-y||²)<br><br>SVM dual form only uses dot products → replace with K(xᵢ,xⱼ). Enables nonlinear decision boundaries in original space.",company:"Amazon"},
{id:29,cat:"ml",q:"Explain how decision trees handle feature selection and splitting. (Meta)",a:"<b>Splitting criteria (分裂准则):</b><br>• Classification: Gini impurity, Information gain (entropy)<div class='formula'>Gini = 1 - Σpᵢ²</div><div class='formula'>Entropy = -Σpᵢlog₂(pᵢ)</div>• Regression: MSE reduction<br><br>At each node, try all features & thresholds → pick best split. Greedy top-down approach.<br><b>Pruning:</b> Pre-pruning (max depth, min samples) or post-pruning (cost-complexity).",company:"Meta"},
{id:30,cat:"ml",q:"How do you handle class imbalance in ML? (Google)",a:"<b>Data-level (数据层面):</b><br>• Oversampling minority (SMOTE)<br>• Undersampling majority<br>• Data augmentation<br><br><b>Algorithm-level:</b><br>• Class weights in loss function<br>• Focal loss: (1-pₜ)^γ · CE<br>• Cost-sensitive learning<br><br><b>Evaluation:</b> Use F1, AUC-PR (not accuracy!)<br><b>Ensemble:</b> BalancedRandomForest, EasyEnsemble",company:"Google"},
{id:31,cat:"ml",q:"Explain AUC-ROC. What does a 0.5 AUC mean? (Amazon)",a:"<b>ROC curve:</b> TPR vs FPR at all thresholds<br><b>AUC (曲线下面积):</b> Probability that model ranks a random positive higher than random negative.<div class='formula'>AUC = P(score(pos) > score(neg))</div>• AUC = 1.0: perfect<br>• AUC = 0.5: random (no discrimination)<br>• AUC < 0.5: worse than random (flip predictions)<br><br><b>AUC-PR</b> better for imbalanced datasets.",company:"Amazon"},
{id:32,cat:"ml",q:"What is feature engineering? Give examples of common techniques. (Meta)",a:"<b>Feature engineering (特征工程):</b> Creating informative features from raw data.<br><br><b>Techniques:</b><br>• Numerical: log transform, binning, polynomial features, standardization<br>• Categorical: one-hot, target encoding, frequency encoding<br>• Text: TF-IDF, word embeddings, n-grams<br>• Time: day of week, lag features, rolling statistics<br>• Interaction features: feature crosses<br>• Domain-specific: pixel gradients (CV), mel spectrograms (audio)",company:"Meta"},
{id:33,cat:"ml",q:"Explain how k-Nearest Neighbors works. What are its limitations? (Google)",a:"<b>KNN:</b> Classify by majority vote of k nearest neighbors (distance-based).<br><br><b>Distance metrics:</b> Euclidean, Manhattan, cosine, Minkowski<br><br><b>Limitations:</b><br>• Curse of dimensionality (维度诅咒): distances become meaningless in high-d<br>• O(n) prediction time (use KD-trees, ball trees, or approximate NN like FAISS)<br>• Sensitive to feature scaling<br>• No model to interpret<br>• Memory: stores all training data",company:"Google"},
{id:34,cat:"ml",q:"What is multi-collinearity and how does it affect models? (Amazon)",a:"<b>Multi-collinearity (多重共线性):</b> Features highly correlated with each other.<br><br><b>Effects:</b><br>• Unstable coefficient estimates in linear regression<br>• Large variance of coefficients<br>• Doesn't affect predictions, but hurts interpretability<br><br><b>Detection:</b> VIF (Variance Inflation Factor) > 10<br><b>Fix:</b> Remove correlated features, PCA, regularization (Ridge handles it well)",company:"Amazon"},
{id:35,cat:"ml",q:"Explain the difference between parametric and non-parametric models. (Meta)",a:"<b>Parametric (参数模型):</b> Fixed number of parameters<br>Examples: Linear/Logistic Regression, Naive Bayes, Neural Nets<br>• Assumptions about data distribution<br>• Fast prediction, may underfit<br><br><b>Non-parametric (非参数模型):</b> Parameters grow with data<br>Examples: KNN, Decision Trees, Kernel SVM, GPs<br>• Fewer assumptions, more flexible<br>• Can overfit, slower prediction<br>• Note: \"non-parametric\" ≠ \"no parameters\"",company:"Meta"},

// ===== Deep Learning (15) =====
{id:36,cat:"dl",q:"Explain backpropagation in detail. (Google)",a:"<b>Backprop (反向传播):</b> Efficient computation of gradients via chain rule.<br><br>1. <b>Forward pass:</b> Compute outputs layer by layer, cache intermediate values<br>2. <b>Backward pass:</b> Compute ∂L/∂w for each layer using chain rule<div class='formula'>∂L/∂w_l = ∂L/∂a_l · ∂a_l/∂z_l · ∂z_l/∂w_l</div>Key insight: Reuse intermediate gradients → O(n) instead of recomputing. Implemented as computational graph with automatic differentiation.",company:"Google"},
{id:37,cat:"dl",q:"Compare Adam, SGD with momentum, and AdaGrad. (OpenAI)",a:"<b>SGD + Momentum:</b><div class='formula'>v = βv + ∇L; θ -= ηv</div>Accelerates in consistent gradient directions.<br><br><b>AdaGrad:</b> Per-parameter learning rates, divides by √(sum of squared gradients). Good for sparse features, but LR decays to 0.<br><br><b>Adam (自适应矩估计):</b><div class='formula'>m = β₁m + (1-β₁)∇L (1st moment)</div><div class='formula'>v = β₂v + (1-β₂)(∇L)² (2nd moment)</div>Combines momentum + adaptive LR. Default: β₁=0.9, β₂=0.999, η=3e-4.",company:"OpenAI"},
{id:38,cat:"dl",q:"Explain the vanishing/exploding gradient problem and solutions. (Meta)",a:"<b>Problem:</b> In deep networks, gradients multiplied across layers:<br>• |∂h/∂h| < 1 repeatedly → gradients vanish (梯度消失)<br>• |∂h/∂h| > 1 repeatedly → gradients explode (梯度爆炸)<br><br><b>Solutions:</b><br>• ReLU/variants (avoid saturation)<br>• Residual connections (skip connections)<br>• Batch/Layer normalization<br>• Gradient clipping (for exploding)<br>• Careful initialization (Xavier, He)<br>• LSTM/GRU gating (for RNNs)",company:"Meta"},
{id:39,cat:"dl",q:"Explain how CNNs work. What is a convolution operation? (Google)",a:"<b>Convolution (卷积):</b> Slides a learned kernel over input, computing element-wise multiply + sum.<div class='formula'>Output[i,j] = Σ Σ Input[i+m,j+n] · Kernel[m,n]</div><b>Key properties:</b><br>• Parameter sharing → translation equivariance<br>• Local connectivity → spatial hierarchy<br>• Fewer params than fully connected<br><br><b>CNN architecture:</b> Conv → ReLU → Pool → ... → FC<br>Feature maps go from edges → textures → parts → objects.",company:"Google"},
{id:40,cat:"dl",q:"Explain Batch Normalization. Why does it help training? (Amazon)",a:"<b>BatchNorm (批归一化):</b><div class='formula'>x̂ = (x - μ_B) / √(σ²_B + ε)</div><div class='formula'>y = γx̂ + β (learnable scale & shift)</div><b>Benefits:</b><br>• Reduces internal covariate shift<br>• Allows higher learning rates<br>• Acts as regularization (batch noise)<br>• Smoother loss landscape<br><br><b>At inference:</b> Uses running mean/variance. <b>Issues:</b> Batch size dependent, problematic for RNNs → use LayerNorm instead.",company:"Amazon"},
{id:41,cat:"dl",q:"Explain the Transformer architecture in detail. (OpenAI)",a:"<b>Transformer:</b> Self-attention based architecture (Vaswani et al., 2017).<br><br><b>Key components:</b><br>1. <b>Multi-Head Attention:</b><div class='formula'>Attention(Q,K,V) = softmax(QKᵀ/√d_k)V</div>2. <b>Position-wise FFN:</b> Two linear layers + ReLU<br>3. <b>Positional encoding:</b> sin/cos or learned<br>4. <b>Residual connections + LayerNorm</b><br><br><b>Encoder:</b> Self-attention + FFN (×N)<br><b>Decoder:</b> Masked self-attn + cross-attn + FFN (×N)<br>O(n²d) complexity for sequence length n.",company:"OpenAI"},
{id:42,cat:"dl",q:"What is the attention mechanism? Compare self-attention, cross-attention, and multi-head attention. (Google)",a:"<b>Attention (注意力机制):</b> Weighted aggregation based on relevance.<div class='formula'>α = softmax(score(Q,K)); output = αV</div><b>Self-attention:</b> Q,K,V all from same sequence → captures intra-dependencies<br><b>Cross-attention:</b> Q from one sequence, K,V from another → encoder-decoder connection<br><b>Multi-head:</b> h parallel attention heads, each with d/h dimensions → captures different relationship types<div class='formula'>MultiHead = Concat(head₁,...,headₕ)W^O</div>",company:"Google"},
{id:43,cat:"dl",q:"Explain RNNs, LSTMs, and GRUs. What problem does each solve? (Meta)",a:"<b>RNN:</b> h_t = tanh(W_h·h_{t-1} + W_x·x_t). Problem: vanishing gradients for long sequences.<br><br><b>LSTM (长短期记忆):</b> Adds cell state + 3 gates:<br>• Forget gate: what to discard<br>• Input gate: what to store<br>• Output gate: what to output<br>Solves long-range dependencies via cell state highway.<br><br><b>GRU:</b> Simplified LSTM with 2 gates (reset, update). Fewer params, similar performance. Both largely replaced by Transformers.",company:"Meta"},
{id:44,cat:"dl",q:"What is dropout and why does it work? (Amazon)",a:"<b>Dropout:</b> Randomly zero out neurons with probability p during training.<div class='formula'>h_drop = h · mask / (1-p) (inverted dropout)</div><b>Why it works:</b><br>• Prevents co-adaptation of neurons<br>• Approximately ensemble of 2^n sub-networks<br>• Acts as regularization<br>• Equivalent to approximate Bayesian inference<br><br><b>At inference:</b> Use all neurons (no dropout). Typical p: 0.1-0.5. Not used with BatchNorm usually.",company:"Amazon"},
{id:45,cat:"dl",q:"Explain different weight initialization strategies. Why does initialization matter? (OpenAI)",a:"Bad init → vanishing/exploding activations/gradients.<br><br><b>Xavier/Glorot (for tanh/sigmoid):</b><div class='formula'>W ~ N(0, 2/(n_in + n_out))</div><b>He/Kaiming (for ReLU):</b><div class='formula'>W ~ N(0, 2/n_in)</div>Goal: Keep variance of activations constant across layers.<br><br><b>Modern:</b> Orthogonal init, LSUV, fixup initialization. Transformers often use scaled init (1/√d or 1/√(2N)) for residual connections.",company:"OpenAI"},
{id:46,cat:"dl",q:"What are residual connections (skip connections) and why do they work? (Google)",a:"<b>ResNet (残差连接):</b> Learn residual function instead of direct mapping.<div class='formula'>y = F(x) + x (identity shortcut)</div><b>Why they work:</b><br>• Gradient flows directly through shortcuts → solves vanishing gradient<br>• Easier to learn F(x)=0 than identity mapping<br>• Enables training of 100+ layer networks<br>• Ensemble interpretation: exponential paths through network<br><br>Used everywhere: ResNet, Transformer, DenseNet (dense connections).",company:"Google"},
{id:47,cat:"dl",q:"Explain learning rate scheduling strategies. (Meta)",a:"<b>Common schedules:</b><br>• <b>Step decay:</b> Reduce by factor every n epochs<br>• <b>Cosine annealing:</b> η_t = η_min + ½(η_max-η_min)(1+cos(πt/T))<br>• <b>Warmup + decay:</b> Linear warmup then decay (standard for Transformers)<br>• <b>OneCycleLR:</b> Ramp up then down (super-convergence)<br>• <b>ReduceOnPlateau:</b> Reduce when metric stops improving<br><br><b>Warmup</b> critical for Adam with large batch / Transformer training.",company:"Meta"},
{id:48,cat:"dl",q:"What is knowledge distillation? (OpenAI)",a:"<b>Knowledge Distillation (知识蒸馏):</b> Train a small 'student' to mimic a large 'teacher'.<div class='formula'>L = α·CE(y, p_s) + (1-α)·KL(p_t^T, p_s^T)</div>Temperature T softens probabilities → reveals 'dark knowledge' (inter-class similarities).<br><br><b>Applications:</b> Model compression, BERT→DistilBERT (40% smaller, 97% performance), on-device deployment<br>Also: self-distillation, multi-teacher distillation.",company:"OpenAI"},
{id:49,cat:"dl",q:"Explain GANs. What is mode collapse? (Google)",a:"<b>GAN (生成对抗网络):</b> Generator G vs Discriminator D in minimax game.<div class='formula'>min_G max_D E[log D(x)] + E[log(1-D(G(z)))]</div><b>Mode collapse (模式崩塌):</b> G produces limited variety, ignoring modes of data distribution.<br><br><b>Solutions:</b> Wasserstein GAN (Earth mover distance), spectral normalization, progressive growing, StyleGAN architecture<br>Training is notoriously unstable → diffusion models now preferred.",company:"Google"},
{id:50,cat:"dl",q:"Explain diffusion models at a high level. (OpenAI)",a:"<b>Diffusion Models (扩散模型):</b><br><br><b>Forward process:</b> Gradually add Gaussian noise to data over T steps<div class='formula'>q(x_t|x_{t-1}) = N(x_t; √(1-β_t)x_{t-1}, β_tI)</div><b>Reverse process:</b> Learn to denoise step by step<div class='formula'>p_θ(x_{t-1}|x_t) = N(x_{t-1}; μ_θ(x_t,t), Σ_θ)</div>Train a neural net (U-Net) to predict noise ε.<br><b>Advantages over GANs:</b> Stable training, better diversity, likelihood-based. Used in DALL-E 2, Stable Diffusion, Sora.",company:"OpenAI"},

// ===== NLP (10) =====
{id:51,cat:"nlp",q:"Explain word embeddings. Compare Word2Vec and GloVe. (Google)",a:"<b>Word embeddings (词嵌入):</b> Dense vector representations capturing semantic meaning.<br><br><b>Word2Vec:</b><br>• Skip-gram: predict context from word<br>• CBOW: predict word from context<br>• Local context window<br><br><b>GloVe:</b><br>• Global co-occurrence matrix factorization<div class='formula'>J = Σ f(X_ij)(wᵢᵀw̃ⱼ + bᵢ + b̃ⱼ - log X_ij)²</div>Both capture: king - man + woman ≈ queen<br>Modern: contextual embeddings (BERT, GPT) supersede static embeddings.",company:"Google"},
{id:52,cat:"nlp",q:"Explain BERT's architecture and pre-training objectives. (Meta)",a:"<b>BERT (双向编码器):</b> Transformer encoder, bidirectional context.<br><br><b>Pre-training:</b><br>1. <b>Masked Language Model (MLM):</b> Mask 15% of tokens, predict them. Of masked: 80% [MASK], 10% random, 10% unchanged.<br>2. <b>Next Sentence Prediction (NSP):</b> Binary classification (later shown less useful → RoBERTa drops it).<br><br><b>Fine-tuning:</b> Add task-specific head. BERT-base: 12 layers, 768 hidden, 110M params.<br><b>Variants:</b> RoBERTa, ALBERT, DeBERTa, ELECTRA.",company:"Meta"},
{id:53,cat:"nlp",q:"Explain GPT architecture and how it differs from BERT. (OpenAI)",a:"<b>GPT (生成式预训练):</b> Transformer <b>decoder</b>, autoregressive left-to-right.<div class='formula'>P(x) = Π P(xᵢ|x₁,...,x_{i-1})</div><b>vs BERT:</b><br>• GPT: unidirectional (causal mask) → generation<br>• BERT: bidirectional → understanding<br>• GPT: generative pre-training → few-shot/zero-shot<br>• BERT: masked LM → fine-tuning<br><br><b>Scaling:</b> GPT-3 (175B) → GPT-4 (rumored MoE) → emergent abilities with scale. In-context learning is key GPT capability.",company:"OpenAI"},
{id:54,cat:"nlp",q:"What is tokenization? Compare BPE, WordPiece, and SentencePiece. (Google)",a:"<b>Tokenization (分词):</b> Converting text to model-digestible units.<br><br><b>BPE (Byte Pair Encoding):</b> Iteratively merge most frequent character pairs. Used in GPT.<br><b>WordPiece:</b> Similar but uses likelihood instead of frequency. Used in BERT.<br><b>SentencePiece:</b> Language-agnostic, treats input as raw bytes/unicode. Used in T5, LLaMA.<br><br><b>Vocab size tradeoff:</b> Small → longer sequences, large → sparse embeddings. Typical: 30K-100K.",company:"Google"},
{id:55,cat:"nlp",q:"Explain the seq2seq model with attention. (Amazon)",a:"<b>Seq2Seq (序列到序列):</b> Encoder maps input → context, Decoder generates output.<br><br><b>Problem:</b> Fixed-size context vector bottleneck for long sequences.<br><br><b>Attention solution (Bahdanau, 2014):</b><div class='formula'>c_t = Σ α_ti · h_i</div><div class='formula'>α_ti = softmax(score(s_t, h_i))</div>Decoder attends to all encoder states at each step.<br><b>Score functions:</b> dot product, additive (concat), scaled dot product<br>Led directly to the Transformer architecture.",company:"Amazon"},
{id:56,cat:"nlp",q:"What is transfer learning in NLP? Explain the pre-train → fine-tune paradigm. (Meta)",a:"<b>Transfer learning (迁移学习):</b> Leverage knowledge from one task for another.<br><br><b>Paradigm:</b><br>1. <b>Pre-train</b> on large unlabeled corpus (self-supervised)<br>2. <b>Fine-tune</b> on downstream task with small labeled data<br><br><b>Evolution:</b><br>• Word2Vec → ELMo → BERT → GPT-3 → prompt-based learning<br>• Fine-tuning → prompt engineering → in-context learning<br>• Parameter-efficient: LoRA, adapters, prefix tuning<br>Key insight: language modeling captures transferable knowledge.",company:"Meta"},
{id:57,cat:"nlp",q:"Explain positional encoding in Transformers. Why is it needed? (OpenAI)",a:"<b>Problem:</b> Self-attention is permutation-invariant → no notion of order.<br><br><b>Sinusoidal (original):</b><div class='formula'>PE(pos,2i) = sin(pos/10000^(2i/d))</div><div class='formula'>PE(pos,2i+1) = cos(pos/10000^(2i/d))</div><b>Learned:</b> Trainable embedding per position (BERT, GPT).<br><b>RoPE:</b> Rotary Position Embedding — encodes relative position via rotation matrices. Used in LLaMA, modern LLMs.<br><b>ALiBi:</b> Linear bias based on distance. Extrapolates to longer sequences.",company:"OpenAI"},
{id:58,cat:"nlp",q:"What is beam search? Compare with greedy and sampling decoding. (Google)",a:"<b>Greedy:</b> Pick argmax at each step. Fast but suboptimal.<br><br><b>Beam search (束搜索):</b> Keep top-k hypotheses at each step.<br>• beam_size=1 → greedy<br>• Larger beam → better quality, slower<br>• Length normalization needed<br><br><b>Sampling:</b><br>• Temperature: sharpen/flatten distribution<br>• Top-k: sample from top k tokens<br>• Top-p (nucleus): sample from smallest set with cumulative prob ≥ p<br>Sampling preferred for creative/diverse generation.",company:"Google"},
{id:59,cat:"nlp",q:"What is RLHF? How is it used to align language models? (OpenAI)",a:"<b>RLHF (基于人类反馈的强化学习):</b><br><br>1. <b>SFT:</b> Supervised fine-tuning on demonstrations<br>2. <b>Reward Model:</b> Train on human preference comparisons (A > B)<br>3. <b>PPO:</b> Optimize policy (LLM) to maximize reward with KL penalty<div class='formula'>L = E[R(x,y)] - β·KL(π_θ || π_ref)</div><b>Alternatives:</b> DPO (Direct Preference Optimization) — no separate reward model needed.<br>Key for: helpfulness, harmlessness, honesty.",company:"OpenAI"},
{id:60,cat:"nlp",q:"Explain RAG (Retrieval-Augmented Generation). (Meta)",a:"<b>RAG (检索增强生成):</b> Combine retrieval with generation to reduce hallucination.<br><br><b>Pipeline:</b><br>1. <b>Index:</b> Embed documents into vector store (FAISS, Pinecone)<br>2. <b>Retrieve:</b> Find top-k relevant docs for query<br>3. <b>Generate:</b> LLM generates answer conditioned on retrieved context<br><br><b>Advantages:</b> Up-to-date knowledge, verifiable sources, no retraining needed<br><b>Challenges:</b> Retrieval quality, context window limits, chunking strategy",company:"Meta"},

// ===== Computer Vision (10) =====
{id:61,cat:"cv",q:"Explain the convolution operation in detail. What is stride, padding? (Google)",a:"<b>Convolution (卷积):</b> Kernel slides over input computing dot products.<br><br><b>Output size:</b><div class='formula'>O = (I - K + 2P) / S + 1</div>I=input, K=kernel, P=padding, S=stride<br><br><b>Padding:</b> 'same' (output=input size) vs 'valid' (no padding)<br><b>Stride:</b> Step size of kernel movement. Stride>1 → downsampling<br><b>Dilated convolution:</b> Gaps in kernel → larger receptive field without more params",company:"Google"},
{id:62,cat:"cv",q:"What is pooling? Compare max pooling and average pooling. (Amazon)",a:"<b>Pooling (池化):</b> Downsamples feature maps, reduces computation, adds translation invariance.<br><br><b>Max pooling:</b> Takes maximum value in window → preserves strongest activations, most common<br><b>Average pooling:</b> Takes mean → smoother, used in final layers (Global Average Pooling)<br><br><b>Trends:</b> Strided convolutions replacing pooling in modern architectures. Global average pooling replaces FC layers (fewer params, less overfitting).",company:"Amazon"},
{id:63,cat:"cv",q:"Explain ResNet. Why was it revolutionary? (Meta)",a:"<b>ResNet (残差网络, He et al. 2015):</b><div class='formula'>y = F(x, {Wᵢ}) + x</div><b>Revolution:</b><br>• Enabled training of 152+ layer networks (won ImageNet 2015)<br>• Solved degradation problem (deeper ≠ better before ResNet)<br>• Skip connections allow gradient flow<br><br><b>Variants:</b> ResNet-50/101/152, ResNeXt (grouped convolutions), Wide ResNet, Pre-activation ResNet<br><b>Bottleneck block:</b> 1×1 → 3×3 → 1×1 (reduces computation)",company:"Meta"},
{id:64,cat:"cv",q:"Explain YOLO (You Only Look Once) for object detection. (Google)",a:"<b>YOLO (目标检测):</b> Single-pass detector, divides image into S×S grid.<br><br>Each cell predicts:<br>• B bounding boxes (x, y, w, h, confidence)<br>• C class probabilities<br><br><b>Advantages:</b> Real-time (45+ FPS), global context (fewer background errors)<br><b>vs Two-stage (R-CNN):</b> Faster but slightly less accurate<br><br><b>Evolution:</b> YOLOv1→v2→v3→v4→v5→v8→YOLO-World<br>Key innovations: anchor-free detection, multi-scale features, CSP backbone.",company:"Google"},
{id:65,cat:"cv",q:"Explain image segmentation: semantic, instance, and panoptic. (Amazon)",a:"<b>Semantic segmentation (语义分割):</b> Classify every pixel → no instance distinction<br>Models: FCN, U-Net, DeepLab<br><br><b>Instance segmentation (实例分割):</b> Detect + segment each object instance<br>Models: Mask R-CNN<br><br><b>Panoptic segmentation (全景分割):</b> Combines both — every pixel gets class + instance ID<br><br><b>Key architectures:</b><br>• U-Net: encoder-decoder with skip connections (medical imaging)<br>• DeepLab: atrous convolution + CRF<br>• SAM (Segment Anything): foundation model for segmentation",company:"Amazon"},
{id:66,cat:"cv",q:"What is data augmentation for computer vision? (Meta)",a:"<b>Data augmentation (数据增强):</b> Artificially expand training set.<br><br><b>Basic:</b> Flip, rotation, crop, scale, color jitter, blur<br><b>Advanced:</b><br>• Cutout/Random erasing: mask random patches<br>• Mixup: blend two images and labels<br>• CutMix: paste patch from one image to another<br>• AutoAugment: learned augmentation policies<br>• RandAugment: simplified random augmentation<br><br><b>For self-supervised:</b> SimCLR uses strong augmentation for contrastive learning",company:"Meta"},
{id:67,cat:"cv",q:"Explain the Vision Transformer (ViT). (OpenAI)",a:"<b>ViT (视觉Transformer, Dosovitskiy 2020):</b><br><br>1. Split image into patches (e.g., 16×16)<br>2. Linear projection of flattened patches → patch embeddings<br>3. Add positional embeddings + [CLS] token<br>4. Standard Transformer encoder<br>5. [CLS] token → classification head<br><br><b>Key findings:</b><br>• Needs large data (JFT-300M) or strong augmentation<br>• Scales better than CNNs with more data/compute<br>• Variants: DeiT (data-efficient), Swin Transformer (shifted windows, hierarchical)",company:"OpenAI"},
{id:68,cat:"cv",q:"What is transfer learning in CV? How do you fine-tune a pre-trained model? (Google)",a:"<b>Transfer learning (迁移学习):</b> Use ImageNet pre-trained model as starting point.<br><br><b>Strategies:</b><br>1. <b>Feature extraction:</b> Freeze backbone, train new head only<br>2. <b>Fine-tuning:</b> Unfreeze some/all layers, train with small LR<br>3. <b>Progressive unfreezing:</b> Gradually unfreeze layers (ULMFiT-style)<br><br><b>Tips:</b><br>• Lower LR for pre-trained layers (discriminative LR)<br>• Early layers = general features (edges, textures)<br>• Later layers = task-specific features",company:"Google"},
{id:69,cat:"cv",q:"Explain contrastive learning (SimCLR, MoCo). (Meta)",a:"<b>Contrastive learning (对比学习):</b> Learn representations by pulling positives together, pushing negatives apart.<br><br><b>SimCLR:</b><br>1. Two augmented views of same image → positive pair<br>2. Different images → negative pairs<br>3. NT-Xent loss (normalized temperature-scaled cross-entropy)<div class='formula'>L = -log(exp(sim(zᵢ,zⱼ)/τ) / Σexp(sim(zᵢ,zₖ)/τ))</div><b>MoCo:</b> Momentum encoder + queue of negatives (memory-efficient)<br><b>DINO/DINOv2:</b> Self-distillation, no negatives needed.",company:"Meta"},
{id:70,cat:"cv",q:"What is object detection? Compare one-stage vs two-stage detectors. (Amazon)",a:"<b>Object detection (目标检测):</b> Localize + classify objects in images.<br><br><b>Two-stage (proposal-based):</b><br>• R-CNN → Fast R-CNN → Faster R-CNN<br>• RPN generates proposals → classify + refine<br>• Higher accuracy, slower<br><br><b>One-stage:</b><br>• YOLO, SSD, RetinaNet<br>• Direct prediction, no proposals<br>• Faster, good for real-time<br><br><b>RetinaNet innovation:</b> Focal Loss solves class imbalance<div class='formula'>FL = -(1-pₜ)^γ log(pₜ)</div><b>Modern:</b> DETR (Transformer-based, end-to-end)",company:"Amazon"},

// ===== Recommender Systems (5) =====
{id:71,cat:"recsys",q:"Explain collaborative filtering vs content-based filtering. (Amazon)",a:"<b>Collaborative Filtering (协同过滤):</b><br>• User-based: Find similar users, recommend their items<br>• Item-based: Find similar items to what user liked<br>• Matrix Factorization: Decompose user-item matrix<div class='formula'>R ≈ U·Vᵀ (U: user factors, V: item factors)</div><b>Content-based (基于内容):</b> Recommend items similar to user's history using item features.<br><br><b>Hybrid:</b> Combine both. Cold start: content-based helps new items/users.",company:"Amazon"},
{id:72,cat:"recsys",q:"Explain matrix factorization for recommendations. (Netflix/Google)",a:"<b>Matrix Factorization (矩阵分解):</b><br>Decompose sparse user-item matrix R (m×n) into:<div class='formula'>R ≈ U(m×k) · V(n×k)ᵀ</div><b>Training:</b> Minimize squared error on observed entries + regularization<div class='formula'>L = Σ(rᵢⱼ - uᵢᵀvⱼ)² + λ(||U||² + ||V||²)</div><b>Optimization:</b> SGD or ALS (Alternating Least Squares)<br><b>Extensions:</b> SVD++, factorization machines, implicit feedback (BPR loss)<br>Won Netflix Prize (2009).",company:"Netflix/Google"},
{id:73,cat:"recsys",q:"How do deep learning models work for recommendations? (Meta)",a:"<b>Deep Rec Models:</b><br><br>• <b>NCF (Neural Collaborative Filtering):</b> Replace dot product with MLP<br>• <b>Wide & Deep:</b> Wide (memorization) + Deep (generalization)<br>• <b>DeepFM:</b> FM for feature interactions + DNN<br>• <b>DLRM (Meta):</b> Embedding tables + MLP interaction<br>• <b>Two-tower:</b> Separate user/item encoders → dot product for retrieval<br><br><b>Production pipeline:</b> Candidate generation (retrieval) → Ranking → Re-ranking<br>Embeddings dominate memory in industrial systems.",company:"Meta"},
{id:74,cat:"recsys",q:"What is the cold start problem and how do you handle it? (Amazon)",a:"<b>Cold Start (冷启动):</b> No interaction data for new users/items.<br><br><b>New User:</b><br>• Onboarding survey / preference quiz<br>• Demographic-based recommendations<br>• Popular/trending items as fallback<br>• Explore-exploit (bandit algorithms)<br><br><b>New Item:</b><br>• Content-based features (text, images)<br>• Metadata similarity<br>• Boost exploration of new items<br><br><b>Solutions:</b> Hybrid models, meta-learning, side information, knowledge graphs",company:"Amazon"},
{id:75,cat:"recsys",q:"How do you evaluate recommender systems? (Google)",a:"<b>Offline metrics:</b><br>• RMSE, MAE (rating prediction)<br>• Precision@K, Recall@K, NDCG@K (ranking)<br>• MAP (Mean Average Precision)<br>• Hit Rate@K<br><br><b>Online metrics:</b><br>• CTR (Click-Through Rate)<br>• Conversion rate<br>• User engagement (session length, return rate)<br>• Revenue per session<br><br><b>A/B testing</b> is gold standard. <b>Caution:</b> Offline ≠ online performance. Also consider diversity, novelty, serendipity, fairness.",company:"Google"},

// ===== Reinforcement Learning (5) =====
{id:76,cat:"rl",q:"Explain the core RL framework: agent, environment, reward. (OpenAI)",a:"<b>RL (强化学习):</b> Agent learns by interacting with environment.<br><br><b>Components:</b><br>• <b>State (s):</b> Environment observation<br>• <b>Action (a):</b> Agent's choice<br>• <b>Reward (r):</b> Scalar feedback signal<br>• <b>Policy π(a|s):</b> Action selection strategy<br>• <b>Value V(s):</b> Expected cumulative reward from state<br><div class='formula'>G_t = Σ γᵏ r_{t+k+1} (discounted return)</div><b>Goal:</b> Learn π* that maximizes expected return. Exploration vs exploitation tradeoff.",company:"OpenAI"},
{id:77,cat:"rl",q:"Compare Q-learning and Policy Gradient methods. (Google)",a:"<b>Q-learning (value-based):</b><div class='formula'>Q(s,a) ← Q(s,a) + α[r + γ·max Q(s',a') - Q(s,a)]</div>• Off-policy, learns Q-table/network (DQN)<br>• Discrete actions only (unless continuous extensions)<br><br><b>Policy Gradient:</b><div class='formula'>∇J = E[∇log π(a|s) · G_t]</div>• Directly optimize policy<br>• Works with continuous actions<br>• High variance → use baselines (A2C, PPO)<br><br><b>Actor-Critic:</b> Combines both — actor (policy) + critic (value)",company:"Google"},
{id:78,cat:"rl",q:"Explain PPO (Proximal Policy Optimization). Why is it popular? (OpenAI)",a:"<b>PPO:</b> Stable policy gradient method by clipping updates.<div class='formula'>L = min(rₜ(θ)Âₜ, clip(rₜ(θ), 1-ε, 1+ε)Âₜ)</div>where rₜ(θ) = π_θ(a|s) / π_old(a|s)<br><br><b>Why popular:</b><br>• Simple to implement<br>• Stable training (no trust region computation like TRPO)<br>• Works across many domains<br>• Used for RLHF in ChatGPT/LLM alignment<br>• Good balance of sample efficiency and stability<br>ε typically 0.1-0.2",company:"OpenAI"},
{id:79,cat:"rl",q:"What is model-based vs model-free RL? (Meta)",a:"<b>Model-free (无模型):</b> Learn policy/value directly from experience.<br>• Q-learning, SARSA, PPO, SAC<br>• More samples needed, but simpler<br><br><b>Model-based (基于模型):</b> Learn environment dynamics T(s'|s,a), then plan.<br>• MuZero, Dreamer, World Models<br>• More sample-efficient<br>• But model errors can compound<br><br><b>Hybrid:</b> Dyna-Q — use real + simulated experience<br><b>AlphaGo:</b> Model-based (known dynamics) + MCTS + value/policy networks",company:"Meta"},
{id:80,cat:"rl",q:"Explain multi-armed bandits and their applications. (Amazon)",a:"<b>Multi-Armed Bandit (多臂老虎机):</b> Simplified RL — no state transitions, just actions and rewards.<br><br><b>Algorithms:</b><br>• ε-greedy: Explore with prob ε, exploit otherwise<br>• UCB (Upper Confidence Bound):<div class='formula'>A_t = argmax[Q(a) + c√(ln t / N(a))]</div>• Thompson Sampling: Sample from posterior of each arm<br><br><b>Applications:</b> A/B testing, ad selection, recommendation exploration, clinical trials, hyperparameter tuning",company:"Amazon"},

// ===== System Design & MLOps (10) =====
{id:81,cat:"mlops",q:"Design an ML model serving system. What are the key considerations? (Google)",a:"<b>Key components:</b><br>• <b>Model registry:</b> Version control for models (MLflow, Weights & Biases)<br>• <b>Serving infrastructure:</b> REST/gRPC API, batching, caching<br>• <b>Latency:</b> p50, p99 targets. Model optimization: quantization, pruning, distillation<br>• <b>Scaling:</b> Horizontal (replicas) + auto-scaling based on QPS<br>• <b>A/B testing:</b> Traffic splitting, canary deployments<br>• <b>Monitoring:</b> Data drift, model performance, system metrics<br>• <b>Rollback:</b> Quick fallback to previous model version",company:"Google"},
{id:82,cat:"mlops",q:"How would you design a feature store? (Amazon)",a:"<b>Feature Store (特征存储):</b> Centralized platform for feature management.<br><br><b>Components:</b><br>• <b>Offline store:</b> Historical features for training (data warehouse)<br>• <b>Online store:</b> Low-latency serving (Redis, DynamoDB)<br>• <b>Feature registry:</b> Metadata, lineage, documentation<br>• <b>Transformation pipeline:</b> Batch + streaming feature computation<br><br><b>Benefits:</b> Feature reuse, consistency (train-serve skew prevention), discovery<br><b>Tools:</b> Feast, Tecton, Databricks Feature Store, Vertex AI Feature Store",company:"Amazon"},
{id:83,cat:"mlops",q:"Explain distributed training strategies. (Meta)",a:"<b>Data Parallelism (数据并行):</b><br>• Each GPU has model copy, processes different data<br>• Sync gradients via AllReduce<br>• Most common, easy to implement<br><br><b>Model Parallelism (模型并行):</b><br>• Split model across GPUs (for huge models)<br>• Pipeline parallelism: split by layers<br>• Tensor parallelism: split within layers (Megatron-LM)<br><br><b>ZeRO (DeepSpeed):</b> Shard optimizer states, gradients, parameters<br><b>FSDP (PyTorch):</b> Fully Sharded Data Parallel<br>LLM training uses all three: 3D parallelism",company:"Meta"},
{id:84,cat:"mlops",q:"How do you detect and handle data drift in production? (Google)",a:"<b>Data Drift (数据漂移):</b> Input distribution changes over time.<br><br><b>Types:</b><br>• <b>Covariate shift:</b> P(X) changes<br>• <b>Concept drift:</b> P(Y|X) changes<br>• <b>Label drift:</b> P(Y) changes<br><br><b>Detection:</b><br>• Statistical tests: KS test, Chi-squared, PSI<br>• Monitor feature distributions over time<br>• Track prediction distribution changes<br>• Model performance degradation alerts<br><br><b>Handling:</b> Retrain on recent data, online learning, windowed training, automated retraining pipelines",company:"Google"},
{id:85,cat:"mlops",q:"Design an ML pipeline for a search ranking system. (Google)",a:"<b>Search Ranking Pipeline:</b><br><br>1. <b>Query understanding:</b> Intent classification, query expansion, spell correction<br>2. <b>Candidate retrieval:</b> Inverted index + ANN (approximate nearest neighbor)<br>3. <b>Feature extraction:</b> Query-document features (BM25, semantic similarity, freshness, authority)<br>4. <b>Ranking model:</b> LambdaMART or neural ranker (cross-encoder)<br>5. <b>Re-ranking:</b> Diversity, personalization, freshness boost<br>6. <b>Evaluation:</b> NDCG, MRR, online A/B with engagement metrics<br><br><b>Scale:</b> Two-tower for retrieval (fast), cross-encoder for ranking (accurate)",company:"Google"},
{id:86,cat:"mlops",q:"What is model quantization? Explain different approaches. (OpenAI)",a:"<b>Quantization (量化):</b> Reduce numerical precision to shrink model and speed up inference.<br><br><b>Types:</b><br>• <b>Post-training quantization (PTQ):</b> Quantize after training. FP32→INT8, minimal accuracy loss<br>• <b>Quantization-aware training (QAT):</b> Simulate quantization during training. Better accuracy<br>• <b>Mixed precision:</b> FP16 for most ops, FP32 for sensitive ones<br><br><b>LLM quantization:</b> GPTQ, AWQ, GGML/GGUF (4-bit). 70B model in 4-bit ≈ 35GB → fits on consumer GPU<br>Tradeoff: size/speed vs accuracy",company:"OpenAI"},
{id:87,cat:"mlops",q:"How would you design a fraud detection system? (Amazon)",a:"<b>Fraud Detection System:</b><br><br><b>Features:</b> Transaction amount, frequency, location, device, time, graph features (user connections)<br><br><b>Challenges:</b><br>• Extreme class imbalance (~0.1% fraud)<br>• Adversarial: fraudsters adapt<br>• Real-time requirements<br>• Label delay (fraud discovered later)<br><br><b>Architecture:</b><br>1. Rule engine (known patterns) → fast filter<br>2. ML model (XGBoost/NN) → score transactions<br>3. Graph neural network → detect collusion<br>4. Online learning → adapt to new patterns<br>5. Human review for edge cases",company:"Amazon"},
{id:88,cat:"mlops",q:"Explain A/B testing for ML models. What pitfalls exist? (Meta)",a:"<b>A/B Testing:</b> Randomly split users → control (old model) vs treatment (new model).<br><br><b>Steps:</b><br>1. Define metric (North Star + guardrails)<br>2. Calculate sample size (power analysis)<br>3. Run experiment for sufficient duration<br>4. Analyze with statistical tests<br><br><b>Pitfalls:</b><br>• Peeking problem → use sequential testing<br>• Multiple comparisons → Bonferroni correction<br>• Network effects → use cluster randomization<br>• Novelty/primacy effects → run long enough<br>• Simpson's paradox → segment analysis",company:"Meta"},
{id:89,cat:"mlops",q:"What is MLflow? Describe the ML experiment tracking workflow. (Google)",a:"<b>Experiment Tracking (实验跟踪):</b> Record everything about ML experiments.<br><br><b>MLflow components:</b><br>• <b>Tracking:</b> Log params, metrics, artifacts<br>• <b>Projects:</b> Reproducible runs<br>• <b>Models:</b> Registry + deployment<br>• <b>Model Serving:</b> REST API deployment<br><br><b>Workflow:</b><br>1. Log hyperparameters, data version, code version<br>2. Track metrics during training<br>3. Save model artifacts<br>4. Compare experiments, pick best<br>5. Register → stage → production<br><br><b>Alternatives:</b> W&B, Neptune, CometML, TensorBoard",company:"Google"},
{id:90,cat:"mlops",q:"How do you scale ML training to petabyte-scale datasets? (Meta)",a:"<b>Scaling strategies:</b><br><br>• <b>Data:</b> Sharding, streaming (don't load all in memory), efficient formats (Parquet, TFRecord)<br>• <b>Compute:</b> Distributed training (data + model parallelism), gradient accumulation<br>• <b>Storage:</b> Object storage (S3) + caching layer<br>• <b>Preprocessing:</b> Distributed ETL (Spark, Ray), feature caching<br>• <b>Curriculum learning:</b> Train on easy examples first, progressively harder<br>• <b>Data sampling:</b> Importance sampling, deduplication<br><br><b>Infrastructure:</b> GPU clusters, fast interconnect (NVLink, InfiniBand), checkpointing",company:"Meta"},

// ===== Coding & Algorithms (10) =====
{id:91,cat:"coding",q:"Explain time complexity. What is Big-O notation? (Google)",a:"<b>Big-O (时间复杂度):</b> Upper bound on growth rate.<br><br><b>Common complexities:</b><br>• O(1): Hash table lookup<br>• O(log n): Binary search<br>• O(n): Linear scan<br>• O(n log n): Sorting (merge/quick sort)<br>• O(n²): Nested loops, naive similarity<br>• O(2ⁿ): Subset enumeration<br><br><b>In ML:</b><br>• KNN prediction: O(nd) per query<br>• Self-attention: O(n²d)<br>• Matrix multiply: O(n³) or O(n^2.37)",company:"Google"},
{id:92,cat:"coding",q:"What data structures are important for ML systems? (Amazon)",a:"<b>Key data structures:</b><br><br>• <b>Hash tables:</b> Feature lookup, embedding tables, O(1) access<br>• <b>Heaps:</b> Top-k retrieval, priority queues, beam search<br>• <b>Trees:</b> KD-trees (nearest neighbor), decision trees, B-trees (databases)<br>• <b>Graphs:</b> Knowledge graphs, GNNs, computational graphs<br>• <b>Bloom filters:</b> Membership testing (seen this sample?)<br>• <b>Tries:</b> Tokenizer vocabulary lookup<br>• <b>Arrays/Tensors:</b> NumPy, PyTorch — contiguous memory for SIMD/GPU",company:"Amazon"},
{id:93,cat:"coding",q:"Implement binary search. When is it applicable in ML? (Meta)",a:"<div class='formula'>def binary_search(arr, target):<br>&nbsp;&nbsp;lo, hi = 0, len(arr)-1<br>&nbsp;&nbsp;while lo <= hi:<br>&nbsp;&nbsp;&nbsp;&nbsp;mid = (lo+hi)//2<br>&nbsp;&nbsp;&nbsp;&nbsp;if arr[mid] == target: return mid<br>&nbsp;&nbsp;&nbsp;&nbsp;elif arr[mid] < target: lo = mid+1<br>&nbsp;&nbsp;&nbsp;&nbsp;else: hi = mid-1<br>&nbsp;&nbsp;return -1</div><b>In ML:</b><br>• Hyperparameter search (threshold tuning)<br>• Finding optimal split in decision trees (sorted features)<br>• Quantile computation<br>• Bisection method for root-finding<br>Time: O(log n), Space: O(1)",company:"Meta"},
{id:94,cat:"coding",q:"Explain hash maps and their use in ML systems. (Google)",a:"<b>Hash Map (哈希表):</b> Key-value store with O(1) avg access.<div class='formula'>index = hash(key) % table_size</div><b>Collision handling:</b> Chaining (linked list) or open addressing (linear probing)<br><br><b>In ML:</b><br>• Feature hashing (hashing trick): Map features to fixed-size vector without dictionary<br>• Embedding table lookup<br>• Counting features (CountMin sketch for approximate counts)<br>• Deduplication in training data<br>• Caching predictions / memoization",company:"Google"},
{id:95,cat:"coding",q:"How would you implement a simple neural network from scratch? (Amazon)",a:"<div class='formula'>class NeuralNet:<br>&nbsp;&nbsp;def forward(x, W1, b1, W2, b2):<br>&nbsp;&nbsp;&nbsp;&nbsp;z1 = x @ W1 + b1<br>&nbsp;&nbsp;&nbsp;&nbsp;a1 = relu(z1)  # max(0, z1)<br>&nbsp;&nbsp;&nbsp;&nbsp;z2 = a1 @ W2 + b2<br>&nbsp;&nbsp;&nbsp;&nbsp;return softmax(z2)<br><br>&nbsp;&nbsp;def backward(): # chain rule<br>&nbsp;&nbsp;&nbsp;&nbsp;dz2 = pred - labels  # CE grad<br>&nbsp;&nbsp;&nbsp;&nbsp;dW2 = a1.T @ dz2<br>&nbsp;&nbsp;&nbsp;&nbsp;da1 = dz2 @ W2.T<br>&nbsp;&nbsp;&nbsp;&nbsp;dz1 = da1 * (z1 > 0)<br>&nbsp;&nbsp;&nbsp;&nbsp;dW1 = x.T @ dz1</div>Key: Initialize weights properly, normalize inputs, use mini-batches.",company:"Amazon"},
{id:96,cat:"coding",q:"Explain dynamic programming. Give an ML-relevant example. (Meta)",a:"<b>Dynamic Programming (动态规划):</b> Solve problems by breaking into overlapping subproblems.<br><br><b>Requirements:</b> Optimal substructure + overlapping subproblems<br><br><b>ML examples:</b><br>• <b>Viterbi algorithm:</b> Best path in HMM<br>• <b>CTC decoding:</b> Speech recognition alignment<br>• <b>Edit distance:</b> String matching in NLP<br>• <b>Knapsack:</b> Feature selection under budget<br><div class='formula'>dp[i][j] = min(dp[i-1][j-1] + (a≠b),<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dp[i-1][j]+1, dp[i][j-1]+1)</div>(Edit distance recurrence)",company:"Meta"},
{id:97,cat:"coding",q:"Explain graph algorithms relevant to ML. (Google)",a:"<b>Graph algorithms in ML:</b><br><br>• <b>BFS/DFS:</b> Graph traversal, connected components, knowledge graph exploration<br>• <b>Dijkstra/A*:</b> Shortest path in routing, game AI<br>• <b>PageRank:</b><div class='formula'>PR(v) = (1-d)/N + d·Σ PR(u)/deg(u)</div>Web ranking, node importance<br>• <b>Spectral clustering:</b> Graph Laplacian eigenvectors<br>• <b>Message passing:</b> GNN foundation — aggregate neighbor information<br>• <b>Topological sort:</b> Computational graph execution order (DAG)",company:"Google"},
{id:98,cat:"coding",q:"What is the computational complexity of training common ML models? (Amazon)",a:"<b>Training complexity:</b><br><br>• <b>Linear Regression:</b> O(nd²) closed-form, O(ndi) iterative<br>• <b>Logistic Regression:</b> O(ndi) — i iterations<br>• <b>SVM:</b> O(n²) to O(n³) — kernel matrix<br>• <b>Random Forest:</b> O(n·d·log(n)·T) — T trees<br>• <b>KMeans:</b> O(nkdi) — k clusters, i iterations<br>• <b>Neural Network:</b> O(n·Σlᵢlᵢ₊₁) per epoch — layer sizes<br>• <b>Transformer:</b> O(n²d) per layer — n=seq length<br><br>n=samples, d=features, i=iterations",company:"Amazon"},
{id:99,cat:"coding",q:"How do you efficiently find k-nearest neighbors? (Meta)",a:"<b>Exact methods:</b><br>• Brute force: O(nd) per query<br>• <b>KD-tree:</b> O(d·log n) avg, degrades in high-d<br>• <b>Ball tree:</b> Better for high-d, O(d·log n)<br><br><b>Approximate (ANN):</b><br>• <b>LSH:</b> Hash similar items to same bucket<br>• <b>HNSW:</b> Hierarchical navigable small world graph — state of art<br>• <b>IVF:</b> Inverted file index with quantization<br>• <b>Product Quantization:</b> Compress vectors for fast comparison<br><br><b>Libraries:</b> FAISS (Meta), ScaNN (Google), Annoy (Spotify), Milvus",company:"Meta"},
{id:100,cat:"coding",q:"Explain MapReduce and how it applies to ML. (Google)",a:"<b>MapReduce:</b><br>1. <b>Map:</b> Apply function to each data chunk in parallel<br>2. <b>Shuffle:</b> Group by key<br>3. <b>Reduce:</b> Aggregate results per key<br><br><b>ML applications:</b><br>• Distributed gradient computation: Map=compute gradients per shard, Reduce=aggregate<br>• Large-scale feature engineering<br>• Distributed word count / TF-IDF<br>• AllReduce for distributed training<br><br><b>Modern:</b> Spark (in-memory), Ray (ML-native), Dask (Python-friendly). Pure MapReduce mostly replaced by specialized frameworks.",company:"Google"}
];

const CAT_META={
linear:{name:"Linear Algebra & Math",cls:"cat-linear"},
prob:{name:"Probability & Statistics",cls:"cat-prob"},
ml:{name:"ML Fundamentals",cls:"cat-ml"},
dl:{name:"Deep Learning",cls:"cat-dl"},
nlp:{name:"NLP",cls:"cat-nlp"},
cv:{name:"Computer Vision",cls:"cat-cv"},
recsys:{name:"Recommender Systems",cls:"cat-recsys"},
rl:{name:"Reinforcement Learning",cls:"cat-rl"},
mlops:{name:"System Design & MLOps",cls:"cat-mlops"},
coding:{name:"Coding & Algorithms",cls:"cat-coding"}
};

let currentIndex=0,isFlipped=false,filteredCards=[...CARDS],mode='browse';
const STORAGE_KEY='ml-flashcards-progress';

function loadProgress(){try{return JSON.parse(localStorage.getItem(STORAGE_KEY))||{}}catch(e){return{}}}
function saveProgress(p){localStorage.setItem(STORAGE_KEY,JSON.stringify(p))}
function getCardStatus(id){const p=loadProgress();return p[id]||{status:'unseen',lastReview:0,interval:1,easeFactor:2.5}}
function setCardStatus(id,status){const p=loadProgress();const prev=p[id]||{status:'unseen',lastReview:0,interval:1,easeFactor:2.5};
if(status==='mastered'){prev.interval=Math.max(prev.interval*prev.easeFactor,1);prev.easeFactor=Math.min(prev.easeFactor+0.1,3.0)}
else if(status==='review'){prev.interval=1;prev.easeFactor=Math.max(prev.easeFactor-0.2,1.3)}
prev.status=status;prev.lastReview=Date.now();p[id]=prev;saveProgress(p)}

function initCategoryFilter(){const sel=document.getElementById('categoryFilter');
Object.entries(CAT_META).forEach(([k,v])=>{const o=document.createElement('option');o.value=k;o.textContent=v.name;sel.appendChild(o)})}

function filterCards(){
const cat=document.getElementById('categoryFilter').value;
const status=document.getElementById('statusFilter').value;
filteredCards=CARDS.filter(c=>{
if(cat!=='all'&&c.cat!==cat)return false;
if(status!=='all'){const s=getCardStatus(c.id).status;if(s!==status)return false}
return true});
if(mode==='spaced'){
const now=Date.now();
filteredCards.sort((a,b)=>{
const sa=getCardStatus(a.id),sb=getCardStatus(b.id);
const da=sa.lastReview+sa.interval*86400000-now;
const db=sb.lastReview+sb.interval*86400000-now;
if(sa.status==='unseen'&&sb.status!=='unseen')return-1;
if(sb.status==='unseen'&&sa.status!=='unseen')return 1;
return da-db})}
currentIndex=0;isFlipped=false;renderCard();updateStats()}

function shuffleCards(){filteredCards.sort(()=>Math.random()-0.5);currentIndex=0;isFlipped=false;renderCard()}

function updateStats(){
const p=loadProgress();let mastered=0,review=0,unseen=0;
CARDS.forEach(c=>{const s=(p[c.id]||{}).status||'unseen';
if(s==='mastered')mastered++;else if(s==='review')review++;else unseen++});
document.getElementById('statTotal').textContent=CARDS.length;
document.getElementById('statMastered').textContent=mastered;
document.getElementById('statReview').textContent=review;
document.getElementById('statUnseen').textContent=unseen}

function renderCard(){
const container=document.getElementById('cardContainer');
if(!filteredCards.length){container.innerHTML='<div class="empty-state"><h3>No cards match filters</h3><p>Try changing category or status filter</p></div>';return}
const c=filteredCards[currentIndex];const meta=CAT_META[c.cat];const status=getCardStatus(c.id);
const statusIcon=status.status==='mastered'?'✅':status.status==='review'?'🔄':'○';
document.getElementById('cardFront').innerHTML=`
<span class="card-category ${meta.cls}">${meta.name}</span>
<div class="card-company">${c.company||''} ${statusIcon}</div>
<div class="card-question">${c.q}</div>
<div class="card-hint">Click or press Space to reveal answer</div>`;
document.getElementById('cardBack').innerHTML=`
<span class="card-category ${meta.cls}">${meta.name}</span>
<div class="card-answer">${c.a}</div>`;
document.getElementById('card').classList.remove('flipped');
isFlipped=false;
document.getElementById('counter').textContent=`${currentIndex+1} / ${filteredCards.length}`}

function flipCard(){if(!filteredCards.length)return;
const card=document.getElementById('card');
isFlipped=!isFlipped;
card.classList.toggle('flipped');
if(isFlipped){const c=filteredCards[currentIndex];const s=getCardStatus(c.id);
if(s.status==='unseen')setCardStatus(c.id,'review');updateStats()}}

function nextCard(){if(!filteredCards.length)return;currentIndex=(currentIndex+1)%filteredCards.length;isFlipped=false;renderCard()}
function prevCard(){if(!filteredCards.length)return;currentIndex=(currentIndex-1+filteredCards.length)%filteredCards.length;isFlipped=false;renderCard()}
function markCard(status){if(!filteredCards.length)return;setCardStatus(filteredCards[currentIndex].id,status);updateStats();nextCard()}

document.addEventListener('keydown',e=>{
if(e.code==='Space'){e.preventDefault();flipCard()}
else if(e.code==='ArrowRight')nextCard();
else if(e.code==='ArrowLeft')prevCard();
else if(e.key==='1')markCard('review');
else if(e.key==='2')markCard('review');
else if(e.key==='3')markCard('mastered')});

document.getElementById('categoryFilter').addEventListener('change',filterCards);
document.getElementById('statusFilter').addEventListener('change',filterCards);
document.getElementById('shuffleBtn').addEventListener('click',shuffleCards);
document.getElementById('resetBtn').addEventListener('click',()=>{if(confirm('Reset all progress?')){localStorage.removeItem(STORAGE_KEY);filterCards()}});
document.querySelectorAll('.mode-tabs button').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.mode-tabs button').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');mode=btn.dataset.mode;filterCards()})});

initCategoryFilter();filterCards();updateStats();
</script>
</body>
</html>