|
272 | 272 | " ax.set_yticklabels(labels, fontsize=7)\n", |
273 | 273 | " return im\n", |
274 | 274 | "\n", |
275 | | - "fig, axes = plt.subplots(1, 2, figsize=(16, 7))\n", |
| 275 | + "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n", |
276 | 276 | "\n", |
277 | 277 | "print(\"Computing embeddings for BERT...\")\n", |
278 | 278 | "bert_embs = get_embeddings(\"bert-base-uncased\", sentences, pooling='cls')\n", |
279 | | - "plot_similarity(bert_embs, \"BERT [CLS] Similarity\", axes[0])\n", |
| 279 | + "im1 = plot_similarity(bert_embs, \"BERT [CLS] Similarity\", axes[0])\n", |
280 | 280 | "\n", |
281 | 281 | "print(\"Computing embeddings for RoBERTa...\")\n", |
282 | 282 | "roberta_embs = get_embeddings(\"roberta-base\", sentences, pooling='mean')\n", |
283 | | - "im = plot_similarity(roberta_embs, \"RoBERTa Mean-Pooled Similarity\", axes[1])\n", |
| 283 | + "im2 = plot_similarity(roberta_embs, \"RoBERTa Mean-Pooled Similarity\", axes[1])\n", |
284 | 284 | "\n", |
285 | | - "fig.colorbar(im, ax=axes, shrink=0.8)\n", |
286 | 285 | "plt.suptitle(\"Semantic Clustering Across Variants\", fontsize=16, color='#00693e')\n", |
287 | | - "fig.tight_layout(rect=[0, 0, 1, 0.95])\n", |
| 286 | + "fig.tight_layout(rect=[0, 0, 0.88, 0.95])\n", |
| 287 | + "cbar_ax = fig.add_axes([0.90, 0.15, 0.02, 0.7])\n", |
| 288 | + "fig.colorbar(im2, cax=cbar_ax)\n", |
288 | 289 | "plt.show()" |
289 | 290 | ] |
290 | 291 | }, |
|
338 | 339 | " status = \"FAKE\" if prob > 0.5 else \"REAL\"\n", |
339 | 340 | " print(f\"{token:<12} | {prob:<10.4f} | {status}\")\n", |
340 | 341 | "\n", |
341 | | - "# Example 1: A natural sentence\n", |
342 | | - "detect_fake_tokens(\"The chef cooked a delicious meal for the guests.\")\n", |
| 342 | + "# Example 1: A natural sentence (all tokens should be marked REAL)\n", |
| 343 | + "detect_fake_tokens(\"The doctor examined the patient carefully.\")\n", |
343 | 344 | "\n", |
344 | 345 | "print(\"\\n\" + \"=\"*40 + \"\\n\")\n", |
345 | 346 | "\n", |
346 | | - "# Example 2: A sentence with a 'fake' token (replaced 'cooked' with 'ate')\n", |
347 | | - "detect_fake_tokens(\"The chef ate a delicious meal for the guests.\")" |
| 347 | + "# Example 2: Same sentence but 'examined' → 'watched' (plausible but wrong in context)\n", |
| 348 | + "detect_fake_tokens(\"The doctor watched the patient carefully.\")\n", |
| 349 | + "\n", |
| 350 | + "print(\"\\n\" + \"=\"*40 + \"\\n\")\n", |
| 351 | + "\n", |
| 352 | + "# Example 3: 'doctor' → 'musician' (semantically odd with 'patient')\n", |
| 353 | + "detect_fake_tokens(\"The musician examined the patient carefully.\")" |
348 | 354 | ] |
349 | 355 | }, |
350 | 356 | { |
|
354 | 360 | "outputs": [], |
355 | 361 | "source": [ |
356 | 362 | "# Let's visualize the discriminator's confidence\n", |
357 | | - "sentence = \"The computer programmed the human to write better code.\"\n", |
| 363 | + "# 'published' has been swapped in for 'presented' — plausible but detectable\n", |
| 364 | + "sentence = \"The researcher published her findings at the annual conference.\"\n", |
358 | 365 | "inputs = tokenizer(sentence, return_tensors=\"pt\")\n", |
359 | 366 | "tokens = tokenizer.convert_ids_to_tokens(inputs[\"input_ids\"][0])[1:-1] # Remove CLS/SEP\n", |
360 | 367 | "\n", |
361 | 368 | "with torch.no_grad():\n", |
362 | 369 | " logits = model(**inputs).logits[0][1:-1]\n", |
363 | 370 | " probs = torch.sigmoid(logits).numpy()\n", |
364 | 371 | "\n", |
365 | | - "plt.figure(figsize=(10, 5))\n", |
| 372 | + "plt.figure(figsize=(12, 5))\n", |
366 | 373 | "colors = ['#9d162e' if p > 0.5 else '#00693e' for p in probs] # Red for fake, Green for real\n", |
367 | 374 | "plt.bar(tokens, probs, color=colors)\n", |
368 | | - "plt.axhline(y=0.5, color='gray', linestyle='--')\n", |
| 375 | + "plt.axhline(y=0.5, color='gray', linestyle='--', label='Decision boundary')\n", |
369 | 376 | "plt.ylabel(\"Probability of being 'FAKE'\", color='#9d162e')\n", |
370 | 377 | "plt.title(\"ELECTRA Discriminator: Identifying 'Fake' Tokens\", fontsize=14)\n", |
371 | 378 | "plt.ylim(0, 1)\n", |
| 379 | + "plt.legend()\n", |
| 380 | + "plt.tight_layout()\n", |
372 | 381 | "plt.show()" |
373 | 382 | ] |
374 | 383 | }, |
|
378 | 387 | "source": [ |
379 | 388 | "### \ud83d\udca1 Discussion\n", |
380 | 389 | "\n", |
381 | | - "- In the second example, did ELECTRA correctly identify \"ate\" as the fake token? Why might it be suspicious of that word in that context?\n", |
382 | | - "- How is this task different from BERT's masked language modeling? Why might it be more efficient?\n", |
383 | | - "- Try a sentence where you replace a word with a synonym. Does ELECTRA still flag it as fake?\n", |
384 | | - "- What happens if you give it a completely nonsensical sentence?" |
| 390 | + "- In Example 2, did ELECTRA flag \"watched\" as suspicious? Doctors *examine* patients — \"watched\" is grammatical but semantically odd in a clinical context.\n", |
| 391 | + "- In Example 3, did swapping \"doctor\" → \"musician\" get detected? Why might that be easier or harder to detect than swapping the verb?\n", |
| 392 | + "- How is this task different from BERT's masked language modeling? Why might it be more efficient? (Hint: how many tokens does each approach learn from per sentence?)\n", |
| 393 | + "- Try replacing a word with a close synonym (e.g., \"quickly\" → \"rapidly\"). Does ELECTRA flag it?" |
385 | 394 | ] |
386 | 395 | }, |
387 | 396 | { |
|
0 commit comments