diff --git a/README.md b/README.md index b86a5c9..85603c9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ **๐ŸŒ Live Website**: [https://codexecutives.com](https://codexecutives.com) -**โœจ Now featuring 7 complete learning modules with 70+ interactive visualizations covering Git, JavaScript Engine, RxJS, Data Structures, Next.js, Big-O Notation, Python Programming, and a LeetCode-style playground with advanced debugging and gamification.** +**โœจ Now featuring 8 complete learning modules with 80+ interactive visualizations covering Git, JavaScript Engine, RxJS, Data Structures, Next.js, Big-O Notation, Python Programming, AI Fundamentals, and a LeetCode-style playground with advanced debugging and gamification.** > **๐Ÿ“Œ Repository Maintainers**: See [REPOSITORY-ABOUT-QUICK-REFERENCE.md](./docs/REPOSITORY-ABOUT-QUICK-REFERENCE.md) for GitHub repository About section configuration (description, website, and topics). @@ -19,8 +19,8 @@ - **Data Structures**: Comprehensive guide to fundamental data structures with interactive visualizations - **Big-O Notation**: Complete algorithmic complexity analysis with 10+ interactive tools and metaphors - **Python Programming**: Complete Python tutorial covering philosophy, execution model, memory management, and concurrency +- **AI Fundamentals**: Machine learning concepts from scratch โ€” neural networks, gradient descent, embeddings, and RAG pipelines - **LeetCode-Style Playground**: Interactive coding environment with debugging, visualizations, and gamification -- **More modules coming soon**: Algorithms, system design, design patterns ### ๐ŸŽฎ **Interactive Visualizations** @@ -305,6 +305,24 @@ Complete Python tutorial covering the fundamental concepts that make Python uniq - **Performance Analysis**: GIL impact, memory optimization, and concurrency alternatives - **Real-world Applications**: Best practices for CPU-bound vs I/O-bound tasks, memory profiling, and optimization techniques +### ๐Ÿค– **AI Fundamentals (Complete)** + +Master machine learning concepts from the ground up with interactive visualizations and beginner-friendly explanations: + +- **Introduction to AI/ML**: Core concepts, history, and the difference between AI, ML, and deep learning โ€” with ELI10 analogies +- **ML Lifecycle**: End-to-end machine learning pipeline from data collection to model deployment +- **Feature Engineering**: Transforming raw data into meaningful inputs โ€” normalization, encoding, and selection +- **Neural Networks**: How neurons, layers, and activation functions work โ€” interactive forward pass visualization +- **Loss Functions**: Understanding MSE, cross-entropy, and how models measure their own mistakes +- **Gradient Descent**: Intuitive step-by-step visualization of how models learn by minimizing loss +- **Backpropagation**: Chain rule demystified โ€” interactive gradient flow through a neural network +- **Generalization**: Overfitting, underfitting, bias-variance tradeoff, and regularization techniques +- **Training vs Inference**: The distinction between learning and prediction, with batch/online learning modes +- **Word Embeddings**: Turning text into vectors โ€” Word2Vec, cosine similarity, and semantic relationships +- **RAG Pipeline**: Retrieval-Augmented Generation โ€” grounding LLMs with external knowledge stores +- **Beginner Friendly**: Every section includes ELI10 boxes, real-world analogies, and key takeaways +- **11 Interactive Sections**: Progressive visualizations guiding learners from zero ML knowledge to RAG pipelines + ### ๐ŸŽฏ **LeetCode-Style Playground (Complete)** Interactive coding environment that transforms algorithm learning through visual debugging and gamification: @@ -396,14 +414,14 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file - ๐Ÿ“ง **Issues**: Report bugs or request features via GitHub Issues - ๐Ÿ’ฌ **Discussions**: Join community discussions for questions and ideas - ๐Ÿ“– **Documentation**: Comprehensive docs available in the `/docs` directory -- ๐ŸŽ“ **Learning Modules**: 7 complete interactive modules with 70+ visualizations +- ๐ŸŽ“ **Learning Modules**: 8 complete interactive modules with 80+ visualizations - ๐ŸŽฏ **Playground**: LeetCode-style coding environment with debugging and gamification --- **Built with โค๏ธ for developers, by developers** -_Transform your understanding of programming concepts through interactive visualization and hands-on learning. Master modern web development with our comprehensive Next.js, Git, JavaScript, RxJS, Data Structures, Big-O Notation, and Python Programming modules. Practice algorithms with our LeetCode-style playground featuring advanced debugging, real-time visualizations, and gamification._ +_Transform your understanding of programming concepts through interactive visualization and hands-on learning. Master modern web development with our comprehensive Next.js, Git, JavaScript, RxJS, Data Structures, Big-O Notation, Python Programming, and AI Fundamentals modules. Practice algorithms with our LeetCode-style playground featuring advanced debugging, real-time visualizations, and gamification._ ## 3D/2D Visualization Architecture diff --git a/src/App.tsx b/src/App.tsx index 96b77ae..0ef212b 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -27,6 +27,7 @@ const BigOPage = lazy(() => import('./features/bigo')); const PythonPage = lazy(() => import('./features/python')); const SystemDesignPage = lazy(() => import('./features/systemdesign')); const TypeScriptPage = lazy(() => import('./features/typescript')); +const AIFundamentalsPage = lazy(() => import('./features/ai')); const App: React.FC = () => { // Initialize analytics on app mount @@ -154,6 +155,14 @@ const App: React.FC = () => { } /> + + + + } + /> diff --git a/src/components/Header.tsx b/src/components/Header.tsx index 8065313..acbcec1 100644 --- a/src/components/Header.tsx +++ b/src/components/Header.tsx @@ -14,6 +14,7 @@ import { ChevronDown, BookOpen, Boxes, + Brain, } from 'lucide-react'; import { useUI } from '../shared/contexts'; @@ -109,6 +110,12 @@ const Header: React.FC = () => { icon: , description: 'Version control mastery', }, + { + label: 'AI Fundamentals', + path: '/ai', + icon: , + description: 'AI & Machine Learning fundamentals', + }, ], }, ]; @@ -172,6 +179,7 @@ const Header: React.FC = () => { if (path === '/git') return 'text-orange-600 bg-orange-50'; if (path === '/datastructures' || path === '/systemdesign') return 'text-blue-600 bg-blue-50'; if (path === '/bigo') return 'text-purple-600 bg-purple-50'; + if (path === '/ai') return 'text-rose-600 bg-rose-50'; return 'text-gray-700 hover:bg-gray-50'; }; diff --git a/src/components/Sidebar.tsx b/src/components/Sidebar.tsx index d4e0368..6083df6 100644 --- a/src/components/Sidebar.tsx +++ b/src/components/Sidebar.tsx @@ -52,6 +52,13 @@ const getThemeColorClass = ( buttonHover: 'hover:text-emerald-600', border: 'border-emerald-100', }, + rose: { + active: 'bg-rose-100 text-rose-800 border-rose-500', + hover: 'hover:bg-rose-50 hover:text-rose-700', + buttonActive: 'text-rose-600 hover:text-rose-700', + buttonHover: 'hover:text-rose-600', + border: 'border-rose-100', + }, }; const colors = colorMap[theme.primary] || colorMap.blue; @@ -299,6 +306,19 @@ const sidebarSections: Record> = { { label: 'Best Practices', path: '/typescript?section=Best%20Practices' }, { label: 'Migration Guide', path: '/typescript?section=Migration%20Guide' }, ], + '/ai': [ + { label: 'Introduction', path: '/ai?section=Introduction' }, + { label: 'ML Lifecycle', path: '/ai?section=ML%20Lifecycle' }, + { label: 'Feature Engineering', path: '/ai?section=Feature%20Engineering' }, + { label: 'Neural Networks', path: '/ai?section=Neural%20Networks' }, + { label: 'Loss Functions', path: '/ai?section=Loss%20Functions' }, + { label: 'Gradient Descent', path: '/ai?section=Gradient%20Descent' }, + { label: 'Backpropagation', path: '/ai?section=Backpropagation' }, + { label: 'Generalization', path: '/ai?section=Generalization' }, + { label: 'Training vs Inference', path: '/ai?section=Training%20vs%20Inference' }, + { label: 'Word Embeddings', path: '/ai?section=Word%20Embeddings' }, + { label: 'RAG Pipeline', path: '/ai?section=RAG%20Pipeline' }, + ], '/': [], '/about': [], }; @@ -323,7 +343,9 @@ const Sidebar: React.FC = () => { | 'bigo' | 'python' | 'systemdesign' - | 'typescript' => { + | 'typescript' + | 'ai' => { + if (path.includes('/ai')) return 'ai'; if (path.includes('javascript')) return 'javascript'; if (path.includes('python')) return 'python'; if (path.includes('react')) return 'react'; diff --git a/src/features/ai/AIFundamentalsPage.tsx b/src/features/ai/AIFundamentalsPage.tsx new file mode 100644 index 0000000..c5199ca --- /dev/null +++ b/src/features/ai/AIFundamentalsPage.tsx @@ -0,0 +1,55 @@ +import React, { Suspense } from 'react'; +import { useLocation } from 'react-router-dom'; + +// Lazy load section components for better performance +import Introduction from './components/sections/Introduction'; +const MLLifecycle = React.lazy(() => import('./components/sections/MLLifecycle')); +const FeatureEngineering = React.lazy(() => import('./components/sections/FeatureEngineering')); +const NeuralNetworks = React.lazy(() => import('./components/sections/NeuralNetworks')); +const LossFunctions = React.lazy(() => import('./components/sections/LossFunctions')); +const GradientDescent = React.lazy(() => import('./components/sections/GradientDescent')); +const Backpropagation = React.lazy(() => import('./components/sections/Backpropagation')); +const Generalization = React.lazy(() => import('./components/sections/Generalization')); +const TrainingVsInference = React.lazy(() => import('./components/sections/TrainingVsInference')); +const WordEmbeddings = React.lazy(() => import('./components/sections/WordEmbeddings')); +const RAGPipeline = React.lazy(() => import('./components/sections/RAGPipeline')); + +const sectionComponents: Record = { + Introduction, + 'ML Lifecycle': MLLifecycle, + 'Feature Engineering': FeatureEngineering, + 'Neural Networks': NeuralNetworks, + 'Loss Functions': LossFunctions, + 'Gradient Descent': GradientDescent, + Backpropagation, + Generalization, + 'Training vs Inference': TrainingVsInference, + 'Word Embeddings': WordEmbeddings, + 'RAG Pipeline': RAGPipeline, +}; + +function useQuery(): URLSearchParams { + return new URLSearchParams(useLocation().search); +} + +const AIFundamentalsPage: React.FC = () => { + const query = useQuery(); + const section = query.get('section') || 'Introduction'; + const Component = sectionComponents[section] || Introduction; + + return ( +
+ +
+
+ } + > + + + + ); +}; + +export default AIFundamentalsPage; diff --git a/src/features/ai/components/sections/Backpropagation.tsx b/src/features/ai/components/sections/Backpropagation.tsx new file mode 100644 index 0000000..4299409 --- /dev/null +++ b/src/features/ai/components/sections/Backpropagation.tsx @@ -0,0 +1,478 @@ +import React, { useState, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { ArrowRight, RotateCcw, Lightbulb } from 'lucide-react'; + +interface GraphNode { + id: string; + label: string; + x: number; + y: number; + value: number | null; + grad: number | null; + op?: string; +} + +interface Edge { + from: string; + to: string; + label?: string; +} + +const initialNodes: GraphNode[] = [ + { id: 'x', label: 'x', x: 60, y: 80, value: 2, grad: null }, + { id: 'w', label: 'w', x: 60, y: 220, value: 3, grad: null }, + { id: 'b', label: 'b', x: 240, y: 220, value: 1, grad: null }, + { id: 'mul', label: 'ร—', x: 170, y: 140, value: null, grad: null, op: 'mul' }, + { id: 'add', label: '+', x: 320, y: 140, value: null, grad: null, op: 'add' }, + { id: 'sq', label: 'ยฒ', x: 470, y: 140, value: null, grad: null, op: 'sq' }, + { id: 'loss', label: 'Loss', x: 600, y: 140, value: null, grad: null }, +]; + +const edges: Edge[] = [ + { from: 'x', to: 'mul' }, + { from: 'w', to: 'mul' }, + { from: 'mul', to: 'add' }, + { from: 'b', to: 'add' }, + { from: 'add', to: 'sq' }, + { from: 'sq', to: 'loss' }, +]; + +const Backpropagation: React.FC = () => { + const [nodes, setNodes] = useState(initialNodes); + const [phase, setPhase] = useState<'idle' | 'forward' | 'backward'>('idle'); + const [step, setStep] = useState(0); + const [activeEdge, setActiveEdge] = useState(null); + + const runForward = useCallback(() => { + const n = initialNodes.map((nd) => ({ ...nd })); + // x=2, w=3: mul = 6 + const x = n.find((nd) => nd.id === 'x')!.value!; + const w = n.find((nd) => nd.id === 'w')!.value!; + const b = n.find((nd) => nd.id === 'b')!.value!; + const mul = x * w; + const add = mul + b; + const sq = add * add; + n.find((nd) => nd.id === 'mul')!.value = mul; + n.find((nd) => nd.id === 'add')!.value = add; + n.find((nd) => nd.id === 'sq')!.value = sq; + n.find((nd) => nd.id === 'loss')!.value = sq; + setNodes(n); + setPhase('forward'); + setStep(0); + setActiveEdge(null); + }, []); + + const backSteps: { + nodeId: string; + grad: number; + edge: string; + explanation: string; + }[] = [ + { nodeId: 'loss', grad: 1, edge: '', explanation: 'dL/dL = 1 (by definition)' }, + { + nodeId: 'sq', + grad: 2 * (nodes.find((n) => n.id === 'add')?.value ?? 7), + edge: 'sq->loss', + explanation: `dL/d(sq_input) = 2ร—${nodes.find((n) => n.id === 'add')?.value ?? 7} = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)}`, + }, + { + nodeId: 'add', + grad: 2 * (nodes.find((n) => n.id === 'add')?.value ?? 7), + edge: 'add->sq', + explanation: `dL/d(add) = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)} ร— 1 (chain rule)`, + }, + { + nodeId: 'b', + grad: 2 * (nodes.find((n) => n.id === 'add')?.value ?? 7), + edge: 'b->add', + explanation: `dL/db = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)} (+ passes gradient through)`, + }, + { + nodeId: 'mul', + grad: 2 * (nodes.find((n) => n.id === 'add')?.value ?? 7), + edge: 'mul->add', + explanation: `dL/d(mul) = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)} (+ passes gradient through)`, + }, + { + nodeId: 'w', + grad: + 2 * + (nodes.find((n) => n.id === 'add')?.value ?? 7) * + (nodes.find((n) => n.id === 'x')?.value ?? 2), + edge: 'w->mul', + explanation: `dL/dw = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)} ร— x = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7) * (nodes.find((n) => n.id === 'x')?.value ?? 2)}`, + }, + { + nodeId: 'x', + grad: + 2 * + (nodes.find((n) => n.id === 'add')?.value ?? 7) * + (nodes.find((n) => n.id === 'w')?.value ?? 3), + edge: 'x->mul', + explanation: `dL/dx = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7)} ร— w = ${2 * (nodes.find((n) => n.id === 'add')?.value ?? 7) * (nodes.find((n) => n.id === 'w')?.value ?? 3)}`, + }, + ]; + + const stepBackward = useCallback(() => { + if (phase !== 'forward' && phase !== 'backward') return; + setPhase('backward'); + const nextStep = step + 1; + if (nextStep > backSteps.length) return; + + const bs = backSteps[nextStep - 1]; + setNodes((prev) => prev.map((n) => (n.id === bs.nodeId ? { ...n, grad: bs.grad } : n))); + setActiveEdge(bs.edge); + setStep(nextStep); + }, [phase, step, backSteps]); + + const handleReset = useCallback(() => { + setNodes(initialNodes); + setPhase('idle'); + setStep(0); + setActiveEdge(null); + }, []); + + const getNodeColor = (node: GraphNode): string => { + if (phase === 'backward' && node.grad !== null) return '#fecdd3'; + if (phase === 'forward' && node.value !== null) return '#dbeafe'; + return '#f3f4f6'; + }; + + const currentExplanation = + step > 0 && step <= backSteps.length ? backSteps[step - 1].explanation : null; + + const heroContent = ( +
+

Backpropagation

+

+ In the previous section, gradient descent told the network to "step downhill." But + how does the network figure out which direction is downhill for every single one of + its millions of parameters? That's the job of backpropagation โ€” the clever algorithm + that traces the error backward through the computation graph, assigning + blame to each parameter along the way. +

+

+ Without backpropagation, training would take exponentially longer. It's the reason + modern deep learning is practical at all. +

+
+ ); + + const mainContent = ( + <> + {/* ELI10 box */} +
+
+ +
+

Explain Like I'm 10

+

+ Imagine you're in a relay race team. You drop the baton and your team loses. The + coach doesn't just shout at the last runner โ€” they watch the replay and figure + out who dropped it and when. Backpropagation is that instant replay: + it rewinds through every calculation the model made and tells each part exactly how + much it messed up. Then every part can fix its own mistake for next time. +

+
+
+
+ +
+

Computation Graph

+
+ + + +
+
+ +

+ How to use: Click "Forward Pass" to push values left โ†’ right. + Then click "Step Backward" repeatedly to watch gradients flow right โ†’ left + through the graph. Red badges show each node's gradient (how much it contributed to + the error). Try "Auto Backward" to watch the full replay automatically! +

+ +
+ + {/* Edges */} + {edges.map((edge) => { + const fromN = nodes.find((n) => n.id === edge.from)!; + const toN = nodes.find((n) => n.id === edge.to)!; + const edgeKey = `${edge.from}->${edge.to}`; + const isActiveBack = activeEdge === edgeKey; + return ( + + {/* Forward arrow */} + + {/* Backward pulse */} + {isActiveBack && ( + + + + )} + + ); + })} + + {/* Nodes */} + {nodes.map((node) => ( + + + + {node.label} + + {/* Value */} + {node.value !== null && ( + + ={node.value} + + )} + {/* Gradient badge */} + {node.grad !== null && ( + + + + โˆ‡={node.grad} + + + )} + + ))} + + {/* Arrow markers */} + + + + + + + + + + + {/* Explanation */} + {currentExplanation && ( +
+ Step {step}: {currentExplanation} +
+ )} +
+
+ + +

๐Ÿญ The Corporate Chain of Command

+

+ Think of a neural network as a factory with multiple departments. When a defective product + ships, the QC boss doesn't fire everyone โ€” they trace the assembly line backward to + figure out who is responsible and how much. +

+
+ {[ + { + team: 'QC Boss (Loss Function)', + emoji: '๐Ÿ”', + role: 'Inspects the final product, measures how bad the defect is, and starts the blame investigation.', + }, + { + team: 'Packaging (Output Layer)', + emoji: '๐Ÿ“ฆ', + role: 'Closest to the output โ€” gets feedback first. "The label was crooked? That\'s on me."', + }, + { + team: 'Assembly Team (Hidden Layers)', + emoji: '๐Ÿ”ง', + role: 'Middle of the chain. Receives proportional blame: "I used what the floor gave me, but I also bent it wrong."', + }, + { + team: 'Factory Floor (Input Layer)', + emoji: '๐Ÿ—๏ธ', + role: 'The first to touch raw materials. Gets the smallest blame โ€” but still adjusts for next time.', + }, + ].map(({ team, emoji, role }) => ( +
+
{emoji}
+
+ {team} +

{role}

+
+
+ ))} +
+
+

+ Key insight: Each department only needs to know two things โ€” what it + received and what it sent out. It computes its local gradient and passes the + blame upstream. This is why backprop is so efficient: no department needs to understand + the entire factory! +

+
+
+ + +

The Chain Rule

+
+

+ โˆ‚L/โˆ‚w = (โˆ‚L/โˆ‚ลท) ร— (โˆ‚ลท/โˆ‚z) ร— (โˆ‚z/โˆ‚w) +

+

+ Multiply local gradients together as you traverse backward through the computation + graph. Each node only needs to know its own local derivative. +

+
+
+

๐Ÿง’ In plain English:

+

+ Suppose you increase w by a tiny amount. That changes{' '} + z a little. That change in z changes ลท a little. And + that change in ลท changes the loss a little. The chain rule multiplies + all these "a little" effects together to get the total effect of changing w on + the loss. +

+

+ It's like a row of dominoes โ€” each one tips the next, and the chain rule tells you + how hard the last domino falls based on how hard you pushed the first one. +

+
+
+ + {/* Forward vs Backward summary */} +
+ + + + + + + + + + {[ + ['Direction', 'Input โ†’ Output', 'Output โ†’ Input'], + ['What flows', 'Data values', 'Gradients (blame)'], + ['Purpose', 'Compute prediction', 'Compute how to improve'], + ['When', 'Every training step', 'Every training step (after forward)'], + ['Result', 'A prediction + loss value', 'Updated weights'], + ].map(([aspect, forward, backward]) => ( + + + + + + ))} + +
AspectForward Pass โ†’Backward Pass โ†
{aspect}{forward}{backward}
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ Backpropagation is not a separate algorithm from gradient descent โ€” it's the{' '} + efficient way to compute the gradients that gradient descent needs. It + works by applying the chain rule backward through the computation graph, so each node only + computes its own local derivative. This makes training networks with millions of + parameters practical on modern hardware. +

+
+ + ); + + return ; +}; + +export default Backpropagation; diff --git a/src/features/ai/components/sections/FeatureEngineering.tsx b/src/features/ai/components/sections/FeatureEngineering.tsx new file mode 100644 index 0000000..1506c0b --- /dev/null +++ b/src/features/ai/components/sections/FeatureEngineering.tsx @@ -0,0 +1,505 @@ +import React, { useState, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { ArrowRight, Trash2, Plus, Lightbulb, AlertTriangle } from 'lucide-react'; + +interface RawColumn { + name: string; + values: string[]; + type: 'categorical' | 'numeric'; +} + +const RAW_DATA: RawColumn[] = [ + { name: 'Country', values: ['USA', 'UK', 'Japan'], type: 'categorical' }, + { name: 'Subscription', values: ['Free', 'Pro', 'Enterprise'], type: 'categorical' }, + { name: 'Income', values: ['$85,000', '$62,000', '$91,000'], type: 'numeric' }, + { name: 'Debt', values: ['$12,000', '$35,000', '$8,000'], type: 'numeric' }, +]; + +const ENCODED_COUNTRY = [ + { name: 'Country_USA', values: [1, 0, 0] }, + { name: 'Country_UK', values: [0, 1, 0] }, + { name: 'Country_Japan', values: [0, 0, 1] }, +]; + +const ENCODED_SUB = [ + { name: 'Sub_Free', values: [1, 0, 0] }, + { name: 'Sub_Pro', values: [0, 1, 0] }, + { name: 'Sub_Enterprise', values: [0, 0, 1] }, +]; + +const FeatureEngineering: React.FC = () => { + const [processedFeatures, setProcessedFeatures] = useState([]); + const [debtRatioCreated, setDebtRatioCreated] = useState(false); + + const handleEncode = useCallback( + (columnName: string) => { + if (!processedFeatures.includes(columnName)) { + setProcessedFeatures((prev) => [...prev, columnName]); + } + }, + [processedFeatures] + ); + + const handleCreateRatio = useCallback(() => { + setDebtRatioCreated(true); + if (!processedFeatures.includes('DebtRatio')) { + setProcessedFeatures((prev) => [...prev, 'DebtRatio']); + } + }, [processedFeatures]); + + const handleReset = useCallback(() => { + setProcessedFeatures([]); + setDebtRatioCreated(false); + }, []); + + const heroContent = ( +
+

Feature Engineering

+

+ Machine learning models only understand numbers. But real-world data includes things like + "USA", "Pro subscription", and "Male". Feature engineering is + the art of converting messy, real-world information into clean numbers that machines can + learn from. +

+
+ ); + + const mainContent = ( + <> + {/* Simple explanation */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Imagine you want to teach a calculator to predict which students will pass an exam. + You can't type "likes math" or "studies a lot" into a + calculator โ€” it only understands numbers! +

+

+ So you need to convert everything into numbers. "Likes + math" becomes 1, + "doesn't like math" becomes{' '} + 0. "Studies 3 hours/day" + stays as 3. That's feature + engineering โ€” preparing data so machines can understand it. +

+
+
+
+ + {/* Why is this needed? */} + +

+ ๐Ÿค” Why Can't Models Use Text Directly? +

+

+ At its core, a machine learning model is just a mathematical formula. It multiplies inputs + by numbers (weights), adds them up, and produces an output. You can't multiply + "Japan" ร— 0.5 โ€” that doesn't make mathematical sense! +

+
+
+

โŒ What the model sees

+
+
Country: "USA" โ†’ ???
+
Plan: "Pro" โ†’ ???
+
Can't do math with text!
+
+
+
+

โœ… After feature engineering

+
+
Country_USA: 1, Country_UK: 0, Country_JP: 0
+
Plan_Pro: 1, Plan_Free: 0
+
Now we can multiply and add!
+
+
+
+
+ {/* Interactive Feature Factory */} + +
+

Interactive Feature Factory

+ +
+

+ Click on categorical columns to one-hot encode them, or combine numeric features to + engineer new ones. +

+ +
+ {/* Raw Data Panel */} +
+

+ Raw Data +

+
+ {RAW_DATA.map((col) => ( + + ))} + + {/* Debt Ratio button */} + {!debtRatioCreated && ( + + )} +
+
+ + {/* Processing Zone */} +
+
+

+ Processing Zone +

+ {processedFeatures.length === 0 ? ( +
+ +

Click a categorical column to encode it

+
+ ) : ( +
+ {processedFeatures.includes('Country') && ( +
+
+ One-Hot Encoding: Country +
+
+ Country โ†’ Country_USA, Country_UK, Country_Japan +
+
+ {ENCODED_COUNTRY.map((col) => ( +
+
+ {col.name} +
+ {col.values.map((v, i) => ( +
+ {v} +
+ ))} +
+ ))} +
+
+ )} + {processedFeatures.includes('Subscription') && ( +
+
+ One-Hot Encoding: Subscription +
+
+ {ENCODED_SUB.map((col) => ( +
+
+ {col.name} +
+ {col.values.map((v, i) => ( +
+ {v} +
+ ))} +
+ ))} +
+
+ )} + {debtRatioCreated && ( +
+
+ Engineered Feature +
+
Debt รท Income = Debt Ratio
+
+ {['0.141', '0.565', '0.088'].map((v, i) => ( +
+ Row {i + 1}: + {v} +
+ ))} +
+
+ )} +
+ )} +
+
+ + {/* Output Vector Panel */} +
+

+ Output Feature Vector +

+ {processedFeatures.length === 0 ? ( +
+

No features processed yet

+
+ ) : ( +
+ {processedFeatures.includes('Country') && + ENCODED_COUNTRY.map((col) => ( +
+ + {col.name} + +
+ {col.values.map((v, i) => ( + + {v} + + ))} +
+
+ ))} + {processedFeatures.includes('Subscription') && + ENCODED_SUB.map((col) => ( +
+ + {col.name} + +
+ {col.values.map((v, i) => ( + + {v} + + ))} +
+
+ ))} + {debtRatioCreated && ( +
+ Debt Ratio +
+ {['0.14', '0.57', '0.09'].map((v, i) => ( + + {v} + + ))} +
+
+ )} +
+ + {processedFeatures.length} feature group + {processedFeatures.length > 1 ? 's' : ''} ready for model input + +
+
+ )} +
+
+
+ + {/* Analogy */} + +

+ ๐Ÿ‘จโ€๐Ÿณ The Analogy: Master Chef's Kitchen +

+

+ Imagine a machine learning model as a world-class master chef, and the raw data as + groceries delivered straight from a farm. If you hand the chef an unpeeled onion, a live + chicken, and unwashed wheat stalks, the chef cannot immediately bake a chicken pie. +

+

+ Feature engineering is the meticulous prep work done by sous-chefs before + any cooking begins: peeling, chopping, measuring, washing, and marinating. +

+
+
+

One-Hot Encoding

+

+ Like separating a mixed basket of vegetables into clearly labeled containers โ€” + "Tomatoes here, Peppers here, Onions here." Each gets its own binary column. +

+
+
+

Feature Creation

+

+ Like creating a new sauce by combining existing ingredients โ€” mixing Debt รท Income + gives you a "financial health score" that's more informative than + either alone. +

+
+
+

Feature Selection

+

+ Like throwing out the handful of gravel that accidentally got mixed into the delivery + โ€” irrelevant data that would ruin the meal. +

+
+
+
+ + {/* Dimensionality Warning */} +
+
+ +
+

+ โš ๏ธ The Dimensionality Explosion Problem +

+

+ One-hot encoding can create a LOT of columns. If "Country" has 200 possible + values, it creates 200 new columns! A dataset with 50 categorical features, each + having 100 values, would explode from 50 columns to 5,000 columns. +

+

+ This is called the "curse of dimensionality" โ€” the more + columns you have, the more data you need for the model to learn effectively. + That's why feature selection (removing irrelevant columns) is + just as important as feature creation. +

+
+
+
+ + {/* Feature Selection Methods */} + +

Feature Selection Methods

+

+ Not all features are useful. Some are noisy, redundant, or irrelevant. Selecting the right + features is like packing for a trip โ€” bring only what you need. +

+
+
+

Filter Methods

+

+ Evaluate each feature independently using statistics โ€” like checking each + ingredient's expiry date before cooking. +

+
+ Cost: Low โ€” no model training needed +
+ Example: Correlation analysis, Chi-squared test +
+ Best for: Quick removal of obviously useless features +
+
+
+

Wrapper Methods

+

+ Try every combination of features and pick the best one โ€” like taste-testing every + possible ingredient combination. +

+
+ Cost: Very High โ€” trains many models +
+ Example: Forward/backward selection +
+ Best for: Small datasets where accuracy matters most +
+
+
+

Embedded Methods

+

+ The model itself decides which features matter as it trains โ€” like a chef who + naturally ignores ingredients that don't affect taste. +

+
+ Cost: Medium โ€” done during training +
+ Example: Lasso Regression, Random Forests +
+ Best for: Most real-world applications +
+
+
+
+ + {/* Key Takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ Feature engineering is often called "the most important part of machine + learning." A simple model with great features will outperform a complex model with + bad features. Data scientists spend about 80% of their time on data + preparation and feature engineering โ€” only 20% on actual model training! +

+
+ + ); + + return ; +}; + +export default FeatureEngineering; diff --git a/src/features/ai/components/sections/Generalization.tsx b/src/features/ai/components/sections/Generalization.tsx new file mode 100644 index 0000000..2bf2e04 --- /dev/null +++ b/src/features/ai/components/sections/Generalization.tsx @@ -0,0 +1,436 @@ +import React, { useState, useMemo, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Eye, EyeOff, Lightbulb } from 'lucide-react'; + +// Generate a smooth U-shape with noise +const generateData = ( + seed: number +): { train: { x: number; y: number }[]; test: { x: number; y: number }[] } => { + const rng = (s: number): number => { + const x = Math.sin(s * 9301 + 49297) * 233280; + return x - Math.floor(x); + }; + const train: { x: number; y: number }[] = []; + const test: { x: number; y: number }[] = []; + for (let i = 0; i < 20; i++) { + const t = (i / 19) * 2 - 1; // -1 to 1 + const y = 0.8 * t * t + 0.1 * t + 0.15 * (rng(seed + i) - 0.5); + train.push({ x: t, y }); + } + for (let i = 0; i < 12; i++) { + const t = ((i + 0.5) / 12) * 2 - 1; + const y = 0.8 * t * t + 0.1 * t + 0.12 * (rng(seed + 100 + i) - 0.5); + test.push({ x: t, y }); + } + return { train, test }; +}; + +const fitPoly = (data: { x: number; y: number }[], degree: number): number[] => { + // Simple polynomial fit via normal equations (this is a simplified version) + const n = data.length; + const m = degree + 1; + // Build Vandermonde + const X: number[][] = data.map((d) => { + const row: number[] = []; + for (let j = 0; j < m; j++) row.push(Math.pow(d.x, j)); + return row; + }); + const Y = data.map((d) => d.y); + + // X^T X + const XtX: number[][] = Array.from({ length: m }, () => Array(m).fill(0)); + const XtY: number[] = Array(m).fill(0); + for (let i = 0; i < n; i++) { + for (let j = 0; j < m; j++) { + XtY[j] += X[i][j] * Y[i]; + for (let k = 0; k < m; k++) { + XtX[j][k] += X[i][j] * X[i][k]; + } + } + } + + // Regularize slightly to avoid singularity + for (let j = 0; j < m; j++) XtX[j][j] += 0.0001; + + // Gaussian elimination + const aug: number[][] = XtX.map((row, i) => [...row, XtY[i]]); + for (let col = 0; col < m; col++) { + let maxRow = col; + for (let row = col + 1; row < m; row++) { + if (Math.abs(aug[row][col]) > Math.abs(aug[maxRow][col])) maxRow = row; + } + [aug[col], aug[maxRow]] = [aug[maxRow], aug[col]]; + if (Math.abs(aug[col][col]) < 1e-12) continue; + for (let row = 0; row < m; row++) { + if (row === col) continue; + const f = aug[row][col] / aug[col][col]; + for (let j = col; j <= m; j++) aug[row][j] -= f * aug[col][j]; + } + } + + return aug.map((row, i) => row[m] / (row[i] || 1)); +}; + +const evalPoly = (coeffs: number[], x: number): number => + coeffs.reduce((sum, c, i) => sum + c * Math.pow(x, i), 0); + +const Generalization: React.FC = () => { + const [degree, setDegree] = useState(2); + const [showTest, setShowTest] = useState(false); + + const { train, test } = useMemo(() => generateData(42), []); + + const coeffs = useMemo(() => fitPoly(train, degree), [train, degree]); + + const curvePath = useMemo(() => { + const pts: string[] = []; + for (let i = -100; i <= 100; i++) { + const x = i / 100; // -1 to 1 + const y = evalPoly(coeffs, x); + const sx = 80 + (x + 1) * 220; + const sy = 320 - (y + 0.2) * 280; + pts.push(`${sx},${Math.max(20, Math.min(380, sy))}`); + } + return pts.join(' '); + }, [coeffs]); + + const toSvg = useCallback( + (pt: { x: number; y: number }) => ({ + sx: 80 + (pt.x + 1) * 220, + sy: 320 - (pt.y + 0.2) * 280, + }), + [] + ); + + // Compute errors + const trainError = useMemo(() => { + const e = + train.reduce((sum, pt) => sum + Math.abs(pt.y - evalPoly(coeffs, pt.x)), 0) / train.length; + return e; + }, [train, coeffs]); + + const testError = useMemo(() => { + const e = + test.reduce((sum, pt) => sum + Math.abs(pt.y - evalPoly(coeffs, pt.x)), 0) / test.length; + return e; + }, [test, coeffs]); + + const fitLabel = + degree <= 1 + ? { text: 'Underfitting (High Bias)', color: 'text-blue-600 bg-blue-50 border-blue-200' } + : degree <= 3 + ? { text: 'Best Fit', color: 'text-emerald-600 bg-emerald-50 border-emerald-200' } + : degree <= 7 + ? { text: 'Slight Overfitting', color: 'text-amber-600 bg-amber-50 border-amber-200' } + : { text: 'Overfitting (High Variance)', color: 'text-red-600 bg-red-50 border-red-200' }; + + const heroContent = ( +
+

Generalization

+

+ The whole point of machine learning is to make good predictions on{' '} + data the model has never seen before. A model that aces its homework but + fails every test has memorized the noise instead of learning the pattern. + This section explores the tightrope walk between underfitting and overfitting โ€” known as the{' '} + Bias-Variance Tradeoff. +

+

+ Use the interactive slider below to see exactly what happens when a model is too simple, + just right, or way too complex. +

+
+ ); + + const mainContent = ( + <> + {/* ELI10 box */} +
+
+ +
+

Explain Like I'm 10

+

+ Imagine studying for a math test. You could memorize every single practice problem โ€” + but if the teacher changes the numbers, you'd be lost. Or you could learn{' '} + how to do the math, and then you can solve any problem. + That's what "generalization" means: learning the rules, not just the + answers. +

+
+
+
+ +
+

Interactive Curve Fitter

+ +
+ + {/* Fit label */} +
+ {fitLabel.text} +
+ +
+ + {/* Axes */} + + + + Input Feature (x) + + + Output (y) + + + {/* Fitted curve */} + + + {/* Training data */} + {train.map((pt, i) => { + const { sx, sy } = toSvg(pt); + return ( + + ); + })} + + {/* Test data */} + {showTest && + test.map((pt, i) => { + const { sx, sy } = toSvg(pt); + return ( + + + {/* Error line to curve */} + + + ); + })} + + {/* Legend */} + + + Training Data + + {showTest && ( + <> + + + Test Data (Unseen) + + + )} + +
+ + {/* Poly degree slider */} +
+
+ +
+ setDegree(parseInt(e.target.value))} + className="w-full accent-rose-500" + /> +
+ 1 (straight line) + 2-3 (smooth curve) + 15 (wild zig-zag) +
+
+ + {/* Error display */} +
+
+
Training Error (MAE)
+
{trainError.toFixed(4)}
+
+
+
+ Test Error (MAE) +
+
+ {showTest ? testError.toFixed(4) : 'โ€”'} +
+
+
+
+ + +

๐Ÿ“ The School Exam

+

+ Think of training data as practice problems and test data as the real exam. Three types of + students show exactly what underfitting, overfitting, and good generalization look like: +

+
+ {[ + { + student: 'The Underfitter', + emoji: '๐Ÿ˜ด', + behavior: + "Doesn't study at all. Assumes every answer is 'C'. Fails both practice and exam. The model is too simple to capture the pattern.", + result: 'Practice: โŒ | Exam: โŒ', + color: 'from-blue-50 to-blue-100 border-blue-200', + }, + { + student: 'The Overfitter', + emoji: '๐Ÿค“', + behavior: + 'Photographic memory. Memorizes all 100 practice answers word-for-word, including typos. Aces practice, but when the numbers change on the real exam โ€” total confusion.', + result: 'Practice: โœ… | Exam: โŒ', + color: 'from-red-50 to-red-100 border-red-200', + }, + { + student: 'The Best Fit', + emoji: '๐Ÿง ', + behavior: + 'Studies the formulas and underlying rules. Might miss one tricky practice question, but understands the concepts well enough to handle new problems.', + result: 'Practice: โœ… | Exam: โœ…', + color: 'from-emerald-50 to-emerald-100 border-emerald-200', + }, + ].map(({ student, emoji, behavior, result, color }) => ( +
+
{emoji}
+

{student}

+

{behavior}

+
+ {result} +
+
+ ))} +
+
+ + {/* Bias-Variance table */} +
+

โš–๏ธ The Bias-Variance Tradeoff

+ + + + + + + + + + {[ + ['Model complexity', 'Too simple (straight line)', 'Too complex (wild zig-zag)'], + ['Training error', 'High', 'Very low (nearly zero)'], + ['Test error', 'High', 'Very high'], + ['What it learned', 'Nothing useful', 'Noise + signal together'], + ['Real-world analogy', 'Guessing randomly', 'Memorizing the textbook'], + [ + 'Fix by...', + 'Adding more features/complexity', + 'Regularization, more data, early stopping', + ], + ].map(([concept, bias, variance]) => ( + + + + + + ))} + +
ConceptHigh Bias (Underfitting)High Variance (Overfitting)
{concept}{bias}{variance}
+
+ + {/* Regularization note */} +
+

๐Ÿ›ก๏ธ How Do We Prevent Overfitting?

+
+ {[ + { + name: 'More Training Data', + desc: 'More examples make it harder to memorize patterns.', + }, + { + name: 'Regularization (L1/L2)', + desc: 'Penalizes overly large weights โ€” forces simplicity.', + }, + { + name: 'Early Stopping', + desc: 'Stop training before the model starts memorizing noise.', + }, + { + name: 'Dropout', + desc: 'Randomly disable neurons during training โ€” forces redundancy.', + }, + ].map(({ name, desc }) => ( +
+ {name} +

{desc}

+
+ ))} +
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ The goal of machine learning is not to get zero error on training data โ€” + it's to get low error on data the model has never seen. Use the + slider above to see this in action: as you increase the degree past 3, the training error + drops to near zero, but the test error starts climbing. The sweet spot is a model + that's complex enough to capture the pattern, but simple enough to ignore the noise. +

+
+ + ); + + return ; +}; + +export default Generalization; diff --git a/src/features/ai/components/sections/GradientDescent.tsx b/src/features/ai/components/sections/GradientDescent.tsx new file mode 100644 index 0000000..65c1ba7 --- /dev/null +++ b/src/features/ai/components/sections/GradientDescent.tsx @@ -0,0 +1,457 @@ +import React, { useState, useCallback, useRef, useEffect } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Play, RotateCcw, Pause, Lightbulb, SkipForward } from 'lucide-react'; + +// Rastrigin-like loss landscape for interesting topology +const lossAt = (x: number, y: number): number => { + const nx = (x - 300) / 60; + const ny = (y - 200) / 60; + return ( + 0.5 * (nx * nx + ny * ny) + + 2 * Math.cos(2 * Math.PI * nx * 0.6) + + 2 * Math.cos(2 * Math.PI * ny * 0.6) + ); +}; + +const gradAt = (x: number, y: number): [number, number] => { + const eps = 0.5; + const dx = (lossAt(x + eps, y) - lossAt(x - eps, y)) / (2 * eps); + const dy = (lossAt(x, y + eps) - lossAt(x, y - eps)) / (2 * eps); + return [dx, dy]; +}; + +const GradientDescent: React.FC = () => { + const [ballPos, setBallPos] = useState<{ x: number; y: number } | null>(null); + const [path, setPath] = useState<{ x: number; y: number }[]>([]); + const [lr, setLr] = useState(0.5); + const [isRunning, setIsRunning] = useState(false); + const animRef = useRef(null); + const ballRef = useRef(ballPos); + ballRef.current = ballPos; + + // Generate contour data + const contourLines: React.ReactNode[] = []; + const levels = [-3, -2, -1, 0, 1, 2, 3, 4, 6, 8, 10, 14]; + const colors = [ + '#1e3a5f', + '#1e4d7a', + '#1a6b9a', + '#2588b8', + '#3ba5d0', + '#60c0de', + '#86d4e8', + '#b0e4f0', + '#d4f0f8', + '#e8f7fb', + '#f5e6d0', + '#f0c8a0', + ]; + + for (let gy = 10; gy < 400; gy += 4) { + for (let gx = 10; gx < 590; gx += 4) { + const v = lossAt(gx, gy); + let idx = levels.findIndex((l) => v < l); + if (idx === -1) idx = levels.length - 1; + contourLines.push( + + ); + } + } + + const handleSvgClick = useCallback( + (e: React.MouseEvent) => { + if (isRunning) return; + const svg = e.currentTarget; + const rect = svg.getBoundingClientRect(); + const scaleX = 600 / rect.width; + const scaleY = 400 / rect.height; + const x = (e.clientX - rect.left) * scaleX; + const y = (e.clientY - rect.top) * scaleY; + setBallPos({ x, y }); + setPath([{ x, y }]); + }, + [isRunning] + ); + + const stepDescent = useCallback(() => { + if (!ballRef.current) return false; + const { x, y } = ballRef.current; + const [gx, gy] = gradAt(x, y); + const mag = Math.sqrt(gx * gx + gy * gy); + if (mag < 0.001) return false; + + const step = lr * 30; + const nx = x - gx * step; + const ny = y - gy * step; + + // Check bounds โ€” divergence + if (nx < 0 || nx > 600 || ny < 0 || ny > 400) { + setBallPos({ x: Math.max(0, Math.min(600, nx)), y: Math.max(0, Math.min(400, ny)) }); + setPath((p) => [ + ...p, + { x: Math.max(0, Math.min(600, nx)), y: Math.max(0, Math.min(400, ny)) }, + ]); + return false; // diverged + } + + setBallPos({ x: nx, y: ny }); + setPath((p) => [...p, { x: nx, y: ny }]); + return true; + }, [lr]); + + useEffect(() => { + if (!isRunning) { + if (animRef.current) cancelAnimationFrame(animRef.current); + return; + } + + let stepCount = 0; + const animate = (): void => { + stepCount++; + const ok = stepDescent(); + if (!ok || stepCount > 500) { + setIsRunning(false); + return; + } + animRef.current = requestAnimationFrame(animate); + }; + + const timeout = setTimeout(() => { + animRef.current = requestAnimationFrame(animate); + }, 60); + + return () => { + clearTimeout(timeout); + if (animRef.current) cancelAnimationFrame(animRef.current); + }; + }, [isRunning, stepDescent]); + + const handleReset = useCallback(() => { + setIsRunning(false); + setBallPos(null); + setPath([]); + }, []); + + const lrLabel = + lr < 0.2 + ? 'Very Low โ€” Slow convergence' + : lr < 0.6 + ? 'Good โ€” Balanced' + : lr < 1.2 + ? 'High โ€” Risk of overshoot' + : 'Extreme โ€” Likely divergence!'; + const lrColor = + lr < 0.2 + ? 'text-blue-600' + : lr < 0.6 + ? 'text-emerald-600' + : lr < 1.2 + ? 'text-amber-600' + : 'text-red-600'; + + const heroContent = ( +
+

Gradient Descent

+

+ This is the core optimization algorithm that makes neural networks actually learn. + It answers: "I know I'm wrong โ€” but which direction should I adjust my weights to + be less wrong next time?" +

+
+ ); + + const mainContent = ( + <> + {/* Simple explanation */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Imagine you're blindfolded on a hilly mountain and need to reach the lowest + valley. You can't see, but you CAN feel the ground with your feet. You take a + step in whichever direction slopes downward the most. +

+

+ That's gradient descent! The "slope of the ground" is + the gradient (math that tells you which way is downhill), and the "size + of your step" is the learning rate. Big steps are fast but risky (you + might overshoot the valley). Tiny steps are safe but slow. +

+
+
+
+ +
+

Interactive Loss Landscape

+
+ + + +
+
+ +

+ Click anywhere on the landscape to drop the ball, then adjust the learning rate and start + descent. +

+ + {/* Learning rate slider */} +
+
+ + {lrLabel} +
+ setLr(parseFloat(e.target.value))} + className="w-full accent-rose-500" + /> +
+ 0.05 (tiny steps) + 1.0 (moderate) + 2.0 (huge leaps) +
+
+ +
+ + {/* Contour heatmap */} + {contourLines} + + {/* Global minimum indicator */} + + + Global Min + + + {/* Descent path */} + {path.length > 1 && ( + `${p.x},${p.y}`).join(' ')} + fill="none" + stroke="#f43f5e" + strokeWidth={2} + strokeLinejoin="round" + opacity={0.9} + /> + )} + + {/* Path dots */} + {path.map((p, i) => ( + + ))} + + {/* Ball */} + {ballPos && ( + + + + + + + )} + + {/* Legend */} + + + Deep Blue = Low Loss + + + Warm = High Loss + + +
+ + {/* Info strip */} + {ballPos && ( +
+ + Steps: {path.length} + + + Loss:{' '} + {lossAt(ballPos.x, ballPos.y).toFixed(3)} + + + Position: ({ballPos.x.toFixed(0)}, {ballPos.y.toFixed(0)}) + +
+ )} +
+ + +

The Update Rule

+
+

+ ฮธnew = ฮธold โˆ’ ฮฑ ร— โˆ‡L(ฮธ) +

+

+ Move the parameters (ฮธ) in the direction opposite to the gradient (โˆ‡L), scaled by the + learning rate (ฮฑ). +

+
+
+

๐Ÿง’ In plain English:

+
    +
  • + โ€ข ฮธ = the model's current guess (weights and biases) +
  • +
  • + โ€ข โˆ‡L(ฮธ) = which direction is "uphill" (increasing the + error). We go the opposite way. +
  • +
  • + โ€ข ฮฑ = how big of a step we take (learning rate) +
  • +
  • + โ€ข Result: a slightly better set of weights! Repeat this millions of + times. +
  • +
+
+
+ + +

+ โ›ฐ๏ธ The Blindfolded Hiker: Learning Rate Matters +

+

+ The learning rate is the single most important setting in training a neural network. Too + small and training takes forever. Too large and the model explodes. Here's what + happens: +

+
+ {[ + { + label: 'Tiny Steps (ฮฑ โ‰ˆ 0.001)', + emoji: '๐Ÿข', + desc: 'The hiker inch-crawls safely downhill. Guaranteed to reach the bottom โ€” but it might take weeks of computer time. In practice, this wastes expensive GPU hours.', + color: 'bg-blue-50 border-blue-200 text-blue-800', + }, + { + label: 'Good Stride (ฮฑ โ‰ˆ 0.01โ€“0.1)', + emoji: '๐Ÿšถ', + desc: 'Confident walking pace. Reaches the valley floor quickly without overshooting. This is the sweet spot โ€” most successful models use learning rates in this range.', + color: 'bg-emerald-50 border-emerald-200 text-emerald-800', + }, + { + label: 'Flying Leaps (ฮฑ > 1)', + emoji: '๐Ÿคธ', + desc: 'The hiker launches off the opposite hillside. Each step makes things worse โ€” the loss increases instead of decreasing. The model learns nothing.', + color: 'bg-red-50 border-red-200 text-red-800', + }, + ].map((item) => ( +
+
{item.emoji}
+

{item.label}

+

{item.desc}

+
+ ))} +
+
+ + {/* Local minima warning */} +
+

๐Ÿ•ณ๏ธ The Local Minima Problem

+

+ In the landscape above, you might notice the ball gets "stuck" in a valley that + isn't the deepest one. This is called a local minimum โ€” a low point + that's not the global lowest point. It's like finding a puddle on a mountain and + thinking you've reached the ocean. +

+

+ Real neural networks have billions of dimensions (not just 2), so the landscape is + incredibly complex. Modern optimizers like Adam and{' '} + SGD with momentum help escape local minima โ€” like giving the hiker a + running start so they can roll over small bumps. +

+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ Gradient descent is the engine that makes learning happen. Without it, a neural network is + just a random number generator. The gradient tells you{' '} + which direction to adjust, and the learning rate tells you{' '} + how much to adjust. Together, they turn a bad model into a good one, one + tiny step at a time. +

+
+ + ); + + return ; +}; + +export default GradientDescent; diff --git a/src/features/ai/components/sections/Introduction.tsx b/src/features/ai/components/sections/Introduction.tsx new file mode 100644 index 0000000..6c5d0e2 --- /dev/null +++ b/src/features/ai/components/sections/Introduction.tsx @@ -0,0 +1,533 @@ +import React from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import NavigationCard from '../../../../components/shared/NavigationCard'; +import CTASection from '../../../../components/shared/CTASection'; +import StatsGrid from '../../../../components/shared/StatsGrid'; +import { + Brain, + Cpu, + Database, + Zap, + TrendingUp, + Network, + Layers, + Target, + Lightbulb, + MessageSquare, + Music, + Camera, + Map, + ShieldCheck, +} from 'lucide-react'; + +const Introduction: React.FC = () => { + const navigateToSection = (sectionName: string): void => { + const baseUrl = '/ai?section='; + const encodedSection = encodeURIComponent(sectionName); + window.location.href = baseUrl + encodedSection; + }; + + const stats = [ + { + value: '11', + label: 'Interactive Topics', + icon: , + }, + { + value: '175B+', + label: 'Parameters in Modern LLMs', + icon: , + }, + { + value: '100%', + label: 'Visual Learning', + icon: , + }, + ]; + + const heroContent = ( +
+
+
+ +
+
+

+ AI Fundamentals: The Architecture of Intelligence +

+

+ Ever wondered how your phone recognizes your face, or how ChatGPT writes essays? This module + breaks down the magic behind artificial intelligence into simple, visual, and interactive + lessons โ€” no PhD required. +

+ +
+ + ๐Ÿง  Neural Networks + + + ๐Ÿ“Š Gradient Descent + + + ๐Ÿ”— Backpropagation + + + ๐Ÿค– RAG & Embeddings + +
+
+ ); + + const mainContent = ( + <> + {/* Simple Explanation Box */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Imagine you have a really smart pet dog. You don't program the dog with a rule + book โ€” instead, you show the dog many examples. "This is a cat, + this is not a cat." After seeing thousands of examples, the dog learns to + recognize cats on its own โ€” even cats it has never seen before! +

+

+ That's essentially what AI does. Instead of following rules + humans wrote, AI learns patterns from data. The more examples you give it, the smarter + it gets. This course teaches you exactly how that learning process works, + step by step. +

+
+
+
+ + +

What is Artificial Intelligence?

+ + {/* AI in Your Daily Life */} +
+

+ ๐ŸŒ AI Is Already Everywhere In Your Life +

+

+ You might think AI is some futuristic technology, but you already use it dozens of times + every day without realizing it: +

+
+ {[ + { + icon: , + title: 'Autocomplete', + desc: 'When your phone predicts the next word you want to type', + }, + { + icon: , + title: 'Face Unlock', + desc: 'Your phone recognizing your face to unlock the screen', + }, + { + icon: , + title: 'Spotify / YouTube', + desc: 'Recommending songs or videos you might like', + }, + { + icon: , + title: 'Google Maps', + desc: 'Predicting traffic and finding the fastest route', + }, + { + icon: , + title: 'Spam Filter', + desc: 'Your email automatically catching spam messages', + }, + { + icon: , + title: 'ChatGPT', + desc: 'Having conversations and answering questions', + }, + ].map((item) => ( +
+
{item.icon}
+
+ {item.title} +

{item.desc}

+
+
+ ))} +
+
+ +
+
+

+ From Rules to Learning: A Big Shift +

+

+ In traditional software, a programmer writes every rule by hand. For example:{' '} + "If temperature > 30ยฐC, turn on the AC." The computer only does + what it's explicitly told. +

+

+ With AI, something fundamentally different happens. Instead of writing rules, you give + the computer thousands of examples of inputs and correct outputs, and + the computer figures out the rules by itself. This is called{' '} + machine learning. +

+
+

+ Traditional: Human writes rules โ†’ Computer follows them +

+

+ AI/ML: Human provides examples โ†’ Computer discovers rules +

+
+
+
+

Two Schools of AI

+
+
+

+ Symbolic AI (The "Rule Book" Approach) +

+

+ Humans manually write logical rules for the computer to follow. Think of it like a + recipe book โ€” the computer follows each step exactly. +

+

+ Example: A medical system with 10,000 hand-coded rules like "IF fever AND + cough AND fatigue THEN possibly flu" +

+
+
+

+ Connectionist AI (The "Learning Brain" Approach) +

+

+ Inspired by how the human brain works. The computer learns patterns from data + using networks of simple connected units (neurons). This is what ChatGPT, image + recognition, and self-driving cars use. +

+

+ Example: Show a network 1 million photos of cats and dogs โ€” it learns to tell them + apart without any explicit rules +

+
+
+
+
+ + {/* How Does a Machine Actually "Learn"? */} +
+

+ ๐Ÿค” How Does a Machine Actually "Learn"? +

+

+ Machine learning follows a surprisingly simple loop, repeated millions of times: +

+
+ {[ + { step: '1', label: 'Make a guess', color: 'bg-emerald-100 text-emerald-800' }, + { step: 'โ†’', label: '', color: 'text-gray-400' }, + { + step: '2', + label: 'Check how wrong it was', + color: 'bg-amber-100 text-amber-800', + }, + { step: 'โ†’', label: '', color: 'text-gray-400' }, + { + step: '3', + label: 'Adjust to be less wrong', + color: 'bg-blue-100 text-blue-800', + }, + { step: 'โ†’', label: '', color: 'text-gray-400' }, + { step: '๐Ÿ”', label: 'Repeat!', color: 'bg-rose-100 text-rose-800' }, + ].map((item, i) => + item.label === '' ? ( + + ) : ( +
+ {item.step !== '๐Ÿ”' && Step {item.step}:} + {item.label} +
+ ) + )} +
+

+ This is exactly how you learned to ride a bike โ€” try, fall, adjust, try again. The + difference is that a computer can do this loop millions of times per second. +

+
+ +
+

+ Core Concepts You Will Master +

+
+
+
+ +
+

The ML Lifecycle

+

+ The complete journey from raw data โ†’ trained model โ†’ real-world predictions +

+
+
+
+ +
+

Neural Network Internals

+

+ How tiny math operations (weights ร— inputs + bias) connect to create intelligence +

+
+
+
+ +
+

Modern NLP & RAG

+

+ How computers understand human language and retrieve knowledge to answer questions +

+
+
+
+ + {/* Key Terminology */} +
+

๐Ÿ“– Key Terminology

+
+
+
+
+ Parametric Models +

+ Models with a fixed number of knobs to tune, no matter how much + data you give them. Like a recipe with exactly 5 ingredients โ€” you adjust the + amounts but never add new ones. +

+

+ Examples: Linear Regression, Neural Networks +

+
+
+
+
+
+ Non-parametric Models +

+ Models that grow more complex as you give them more data. Like a + photo album โ€” the more photos you add, the more detailed the collection becomes. +

+

+ Examples: Decision Trees, K-Nearest Neighbors +

+
+
+
+
+
+ Training +

+ The process of showing a model thousands of examples so it can learn patterns. + Like studying for an exam โ€” the more practice problems you solve, the better you + get. +

+
+
+
+
+
+ Inference +

+ Using a trained model to make predictions on new, unseen data. Like taking the + actual exam after you've studied โ€” you apply what you learned to new + questions. +

+
+
+
+
+
+ + {/* Learning Path Overview */} + +

+ ๐Ÿ—บ๏ธ Your Learning Journey (11 Sections) +

+

+ Each section builds on the previous one. We recommend going in order, but feel free to + jump to any topic that interests you: +

+
+ {[ + { + num: 1, + title: 'ML Lifecycle', + desc: 'The end-to-end pipeline โ€” from collecting data to shipping a model', + }, + { + num: 2, + title: 'Feature Engineering', + desc: 'How to prepare and transform raw data so machines can understand it', + }, + { + num: 3, + title: 'Neural Networks', + desc: 'The brain-inspired architecture that powers modern AI', + }, + { + num: 4, + title: 'Loss Functions', + desc: 'How we measure "how wrong" a prediction is', + }, + { + num: 5, + title: 'Gradient Descent', + desc: 'The algorithm that makes models smarter, step by step', + }, + { + num: 6, + title: 'Backpropagation', + desc: 'How errors flow backward through a network to fix mistakes', + }, + { + num: 7, + title: 'Generalization', + desc: 'Why "memorizing answers" is bad and how to avoid it', + }, + { + num: 8, + title: 'Training vs Inference', + desc: 'The difference between learning and applying knowledge', + }, + { + num: 9, + title: 'Word Embeddings', + desc: 'How computers turn words into numbers that capture meaning', + }, + { + num: 10, + title: 'RAG Pipeline', + desc: 'How AI retrieves knowledge to give accurate, grounded answers', + }, + ].map((item) => ( + + ))} +
+
+ + ); + + const sidebar = ( +
+

Explore Topics

+ } + onClick={() => navigateToSection('ML Lifecycle')} + /> + } + onClick={() => navigateToSection('Feature Engineering')} + /> + } + onClick={() => navigateToSection('Neural Networks')} + /> + } + onClick={() => navigateToSection('Loss Functions')} + /> + } + onClick={() => navigateToSection('Gradient Descent')} + /> + } + onClick={() => navigateToSection('Backpropagation')} + /> + } + onClick={() => navigateToSection('Generalization')} + /> + } + onClick={() => navigateToSection('Training vs Inference')} + /> + } + onClick={() => navigateToSection('Word Embeddings')} + /> + } + onClick={() => navigateToSection('RAG Pipeline')} + /> +
+ ); + + return ( + <> + + navigateToSection('ML Lifecycle')} + colorScheme="rose" + /> + + ); +}; + +export default Introduction; diff --git a/src/features/ai/components/sections/LossFunctions.tsx b/src/features/ai/components/sections/LossFunctions.tsx new file mode 100644 index 0000000..acd55d5 --- /dev/null +++ b/src/features/ai/components/sections/LossFunctions.tsx @@ -0,0 +1,315 @@ +import React, { useState, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Play, RefreshCw, Lightbulb } from 'lucide-react'; + +const LossFunctions: React.FC = () => { + const [arrows, setArrows] = useState<{ x: number; y: number }[]>([]); + const [totalLoss, setTotalLoss] = useState(0); + const trueX = 300; + const trueY = 200; + + const shootArrow = useCallback(() => { + const spread = 120 - arrows.length * 8; + const actualSpread = Math.max(20, spread); + const newX = trueX + (Math.random() - 0.5) * actualSpread * 2; + const newY = trueY + (Math.random() - 0.5) * actualSpread * 2; + const dist = Math.sqrt((newX - trueX) ** 2 + (newY - trueY) ** 2); + setArrows((prev) => [...prev, { x: newX, y: newY }]); + setTotalLoss((prev) => prev + dist); + }, [arrows.length]); + + const shootBatch = useCallback(() => { + for (let i = 0; i < 10; i++) { + setTimeout(() => shootArrow(), i * 100); + } + }, [shootArrow]); + + const handleReset = useCallback(() => { + setArrows([]); + setTotalLoss(0); + }, []); + + const heroContent = ( +
+

Forward Pass & Loss Functions

+

+ How does an AI know it made a mistake? The forward pass makes a prediction, + and the loss function measures exactly how wrong that prediction was. The + entire goal of training is to make this number as small as possible. +

+
+ ); + + const mainContent = ( + <> + {/* Simple explanation */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Imagine playing a game where you throw a ball at a target with your eyes closed. After + each throw, your friend tells you: "You were 20 inches away!" or{' '} + "Only 3 inches that time!" +

+

+ That distance measurement IS the loss function. It's just a{' '} + number that tells you how wrong you were. The goal is to get that + number to zero โ€” a perfect bullseye! +

+
+
+
+ + {/* How forward pass works */} + +

+ ๐Ÿ”„ What Is a "Forward Pass"? +

+

+ A forward pass is when you feed data into the neural network and it produces a prediction. + The data flows forward from input โ†’ through hidden layers โ†’ to output. +

+
+ {[ + { + label: 'Input Data', + color: 'bg-blue-100 text-blue-800', + desc: '(e.g., a photo)', + }, + { label: 'โ†’', color: 'text-gray-400', desc: '' }, + { + label: 'Network computes', + color: 'bg-purple-100 text-purple-800', + desc: '(weights ร— inputs + bias)', + }, + { label: 'โ†’', color: 'text-gray-400', desc: '' }, + { + label: 'Prediction', + color: 'bg-rose-100 text-rose-800', + desc: '(e.g., "cat")', + }, + { label: 'โ†’', color: 'text-gray-400', desc: '' }, + { + label: 'Compare to Truth', + color: 'bg-amber-100 text-amber-800', + desc: '(was actually "dog")', + }, + { label: '=', color: 'text-gray-400 font-bold text-xl', desc: '' }, + { label: 'Loss!', color: 'bg-red-100 text-red-800 font-bold', desc: '' }, + ].map((item, i) => + item.desc === '' && item.label.length <= 2 ? ( + + {item.label} + + ) : ( +
+ {item.label} + {item.desc &&
{item.desc}
} +
+ ) + )} +
+
+ +
+

The Prediction Target

+
+ + + +
+
+ +
+ {/* Stats bar */} +
+
+ Predictions:{' '} + {arrows.length} +
+
+ Total Error:{' '} + {totalLoss.toFixed(1)} +
+
+ Mean Error (MAE):{' '} + + {arrows.length > 0 ? (totalLoss / arrows.length).toFixed(1) : 'โ€”'} + +
+
+ + + {/* Concentric target rings */} + {[140, 110, 80, 50, 20].map((r, i) => ( + + ))} + + + True Value + + + {/* Error lines and arrows */} + {arrows.map((a, i) => { + const dist = Math.sqrt((a.x - trueX) ** 2 + (a.y - trueY) ** 2); + return ( + + + + + {dist.toFixed(1)} + + + ); + })} + +
+
+ + +

+ ๐Ÿน The Analogy: Blindfolded Archery Tournament +

+

+ Imagine a child learning archery while blindfolded. They can't see the target, but + after each shot, a referee shouts out exactly how far the arrow landed from the bullseye. +

+
+
+

๐Ÿน Arrow = Forward Pass

+

+ Each arrow shot is one prediction. The archer uses their current stance, arm angle, + and strength (weights and biases) to aim. +

+
+
+

๐Ÿ“ Distance = Loss

+

+ The referee measures how far the arrow is from the bullseye. "20 inches + away!" โ€” that's the loss value. Zero means a perfect hit. +

+
+
+

๐ŸŽฏ Goal = Minimize

+

+ The archer adjusts their aim each round, trying to reduce the distance. Over hundreds + of shots, they get closer and closer to the bullseye. +

+
+
+
+ + +

Mean Absolute Error (MAE)

+
+
+ + MAE = (1/n) ร— ฮฃ |yactual โˆ’ ypredicted| + +
+

+ The average distance between where you aimed and where you should have aimed โ€” across + ALL your shots. +

+
+
+

๐Ÿง’ In everyday terms:

+

+ Imagine 5 students took a test. The teacher predicted their scores: 70, 80, 90, 60, 85. + Their actual scores were: 75, 78, 95, 55, 90. The differences are: 5, 2, 5, 5, 5. The + MAE is (5+2+5+5+5)/5 = 4.4 points. That means on average, the + predictions were off by about 4.4 points. +

+
+ + {/* Other loss functions mentioned */} +

Other Common Loss Functions

+
+
+

MSE (Mean Squared Error)

+

+ Like MAE, but squares each error โ€” so big mistakes are punished much more heavily. +

+
+ MSE = (1/n) ร— ฮฃ (actual โˆ’ predicted)ยฒ +
+
+
+

Cross-Entropy Loss

+

+ Used for classification (cat vs dog). Measures how surprised the model is by the + actual answer โ€” high surprise = high loss. +

+
+ Used by: image classifiers, language models +
+
+
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ The loss function is the only way a model knows it's wrong. Without + a loss function, a neural network has no feedback and cannot learn. Different tasks use + different loss functions, but they all answer the same question: "How far off was my + prediction?" +

+
+ + ); + + return ; +}; + +export default LossFunctions; diff --git a/src/features/ai/components/sections/MLLifecycle.tsx b/src/features/ai/components/sections/MLLifecycle.tsx new file mode 100644 index 0000000..dfe6a74 --- /dev/null +++ b/src/features/ai/components/sections/MLLifecycle.tsx @@ -0,0 +1,608 @@ +import React, { useState, useEffect, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { + RefreshCw, + Play, + Pause, + SkipForward, + SkipBack, + Lightbulb, + ChevronRight, +} from 'lucide-react'; + +const PHASES = [ + { + id: 'define', + label: 'Problem\nDefinition', + emoji: '๐ŸŽฏ', + color: '#e11d48', + lightColor: '#ffe4e6', + }, + { id: 'data', label: 'Data\nPreparation', emoji: '๐Ÿงน', color: '#c026d3', lightColor: '#fae8ff' }, + { id: 'train', label: 'Model\nTraining', emoji: '๐Ÿ‹๏ธ', color: '#7c3aed', lightColor: '#ede9fe' }, + { id: 'eval', label: 'Model\nEvaluation', emoji: '๐Ÿ“Š', color: '#2563eb', lightColor: '#dbeafe' }, + { + id: 'deploy', + label: 'Model\nDeployment', + emoji: '๐Ÿš€', + color: '#059669', + lightColor: '#d1fae5', + }, + { + id: 'monitor', + label: 'Monitoring\n& Maintenance', + emoji: '๐Ÿ‘๏ธ', + color: '#d97706', + lightColor: '#fef3c7', + }, +]; + +const PHASE_DETAILS: Record< + string, + { + title: string; + description: string; + simpleExplanation: string; + realWorldExample: string; + outputs: string[]; + infra: string; + } +> = { + define: { + title: 'Problem Definition', + description: + 'Identify the specific task the AI must solve, define success metrics, and establish baseline performance requirements. This is where you ask: "What exactly do we want the AI to do?" โ€” without a clear problem, even the best model is useless.', + simpleExplanation: + 'Deciding what question you want the AI to answer. Like choosing what subject to study before opening a textbook.', + realWorldExample: + 'Netflix asking: "Can we predict which shows a user will enjoy?" or a hospital asking: "Can we detect tumors in X-ray images?"', + outputs: ['Project Roadmap', 'KPIs & Metrics', 'Data Requirements'], + infra: 'Collaboration Platforms', + }, + data: { + title: 'Data Preparation', + description: + 'Gather raw data, clean it, handle missing values, and transform it into a format digestible by mathematical models. This is often the most time-consuming phase โ€” data scientists spend ~80% of their time here. Garbage in = garbage out.', + simpleExplanation: + 'Collecting and cleaning ingredients before cooking. If your eggs are rotten, no recipe can save the cake!', + realWorldExample: + 'Collecting 10 million photos of cats and dogs, removing blurry images, ensuring each photo is labeled correctly, and resizing them all to the same dimensions.', + outputs: ['Clean Datasets', 'Feature Store', 'Data Versioning'], + infra: 'Data Lakes & Feature Stores', + }, + train: { + title: 'Model Training', + description: + 'Select optimal architecture, feed prepared data, and iteratively refine hyperparameters to learn underlying patterns. The model sees examples, makes predictions, checks errors, and adjusts โ€” millions of times.', + simpleExplanation: + 'Like a student doing practice problems over and over. Each wrong answer helps them learn the right approach.', + realWorldExample: + 'Training GPT-4 required thousands of GPUs running for months, processing hundreds of billions of words from books, websites, and articles.', + outputs: ['Trained Weights', 'Hyperparameters', 'Training Logs'], + infra: 'GPU/TPU Clusters', + }, + eval: { + title: 'Model Evaluation', + description: + 'Rigorously test the trained model against unseen data to validate generalization. This is like a final exam โ€” you test with questions the model has NEVER seen to make sure it actually learned the concepts, not just memorized answers.', + simpleExplanation: + 'Giving the student a practice test with brand new questions to see if they really understand the material.', + realWorldExample: + 'Testing a self-driving car model on road conditions from cities it was never trained on โ€” rain, snow, night driving.', + outputs: ['Accuracy Metrics', 'Confusion Matrix', 'Error Analysis'], + infra: 'Model Registry', + }, + deploy: { + title: 'Model Deployment', + description: + 'Package the validated model and deploy it behind an API endpoint so real users can use it. This is where the model goes from a lab experiment to a real product millions of people can interact with.', + simpleExplanation: + "Publishing a finished book โ€” moving it from the author's desk to bookstore shelves where anyone can read it.", + realWorldExample: + "When you ask Siri a question, your voice is sent to Apple's servers where a deployed speech recognition model converts it to text in milliseconds.", + outputs: ['API Endpoint', 'Docker Container', 'Scaling Config'], + infra: 'Kubernetes / Cloud', + }, + monitor: { + title: 'Monitoring & Maintenance', + description: + 'Continuously track model accuracy, detect data drift (when the real world changes and the model becomes outdated), and trigger retraining when performance degrades. Models are not "set and forget" โ€” they need ongoing care.', + simpleExplanation: + 'Like maintaining a car โ€” regular check-ups, oil changes, and tire rotations to keep it running smoothly.', + realWorldExample: + 'A fraud detection model trained in 2020 may miss new scam patterns in 2024. Monitoring detects the drop in accuracy and triggers retraining with fresh data.', + outputs: ['Drift Alerts', 'Performance Logs', 'Retrain Triggers'], + infra: 'Feedback Loops & Schedulers', + }, +}; + +const MLLifecycle: React.FC = () => { + const [activePhase, setActivePhase] = useState(null); + const [animatingPhase, setAnimatingPhase] = useState(0); + const [isPlaying, setIsPlaying] = useState(false); + const [speed, setSpeed] = useState(1500); + const [pulseOpacity, setPulseOpacity] = useState(1); + + const cx = 300; + const cy = 240; + const radius = 160; + + useEffect(() => { + if (!isPlaying) return; + const interval = setInterval(() => { + setAnimatingPhase((prev) => { + const next = (prev + 1) % PHASES.length; + setActivePhase(PHASES[next].id); + return next; + }); + }, speed); + return () => clearInterval(interval); + }, [isPlaying, speed]); + + useEffect(() => { + let frame: number; + let start: number; + const animate = (ts: number): void => { + if (!start) start = ts; + const elapsed = (ts - start) % 2000; + setPulseOpacity(0.4 + 0.6 * Math.abs(Math.sin((elapsed / 2000) * Math.PI))); + frame = requestAnimationFrame(animate); + }; + frame = requestAnimationFrame(animate); + return () => cancelAnimationFrame(frame); + }, []); + + const getNodePosition = useCallback( + (index: number): { x: number; y: number } => { + const angle = (index / PHASES.length) * 2 * Math.PI - Math.PI / 2; + return { + x: cx + radius * Math.cos(angle), + y: cy + radius * Math.sin(angle), + }; + }, + [cx, cy, radius] + ); + + const detail = activePhase ? PHASE_DETAILS[activePhase] : null; + + const heroContent = ( +
+

The Machine Learning Lifecycle

+

+ Building an AI system isn't just about "training a model." It's a + structured journey with 6 critical phases โ€” and they repeat in a loop, just like how + software gets version updates. +

+
+ ); + + const mainContent = ( + <> + {/* Simple explanation */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Building AI is like baking a cake. First you decide what cake to make + (problem definition). Then you buy and prepare ingredients (data + preparation). You mix and bake (training). You{' '} + taste-test it (evaluation). You{' '} + serve it at the party (deployment). And afterward, you{' '} + ask guests if they liked it so you can improve next time + (monitoring). Then the cycle repeats with the next cake! +

+
+
+
+ + {/* Interactive Lifecycle Visualization */} + +
+

Interactive Lifecycle Pipeline

+
+ + + + +
+
+ + {/* Speed control */} +
+ Speed: + setSpeed(3500 - Number(e.target.value))} + className="flex-1 accent-rose-500" + /> + + {speed <= 750 ? '๐Ÿ‡ Fast' : speed <= 1500 ? '๐Ÿšถ Med' : '๐Ÿข Slow'} + +
+ +

+ ๐Ÿ’ก Click any phase in the circle to see its details. Use โฎโญ to step through phases one + by one. +

+ +
+ + + + + + + + + + + + + + + {/* Connection arrows between phases */} + {PHASES.map((_, i) => { + const from = getNodePosition(i); + const to = getNodePosition((i + 1) % PHASES.length); + const midX = (from.x + to.x) / 2; + const midY = (from.y + to.y) / 2; + const offsetX = (midX - cx) * 0.15; + const offsetY = (midY - cy) * 0.15; + const isActive = animatingPhase === i && isPlaying; + + return ( + + ); + })} + + {/* Feedback loop arrow (Monitor -> Data) */} + + + Feedback Loop + + + {/* Phase nodes */} + {PHASES.map((phase, i) => { + const pos = getNodePosition(i); + const isActive = activePhase === phase.id || (isPlaying && animatingPhase === i); + + return ( + setActivePhase(activePhase === phase.id ? null : phase.id)} + className="cursor-pointer" + role="button" + aria-label={`Phase: ${phase.label.replace('\n', ' ')}`} + > + {/* Glow ring when active */} + {isActive && ( + + )} + {/* Node circle */} + + {/* Emoji */} + + {phase.emoji} + + {/* Label */} + {phase.label.split('\n').map((line, li) => ( + + {line} + + ))} + + ); + })} + + {/* Center label */} + + ML + + + Lifecycle + + +
+ + {/* Detail panel */} + {detail && ( +
+

{detail.title}

+

{detail.description}

+ + {/* Simple explanation callout */} +
+

+ ๐Ÿง’ In simple terms: {detail.simpleExplanation} +

+
+ + {/* Real-world example */} +
+

+ ๐ŸŒ Real-world example: {detail.realWorldExample} +

+
+ +
+
+

Key Outputs

+
+ {detail.outputs.map((o) => ( + + {o} + + ))} +
+
+
+

Infrastructure

+ + {detail.infra} + +
+
+
+ )} + + {!detail && ( +
+

+ ๐Ÿ‘† Click on any phase in the circle above to see detailed information, or use the + play/step controls. +

+
+ )} +
+ + {/* Phases Comparison Table */} + +

๐Ÿ“‹ Phases at a Glance

+
+ + + + + + + + + + + {[ + ['๐ŸŽฏ Problem Definition', 'Define what to solve', '~5%', 'Product + Data Science'], + ['๐Ÿงน Data Preparation', 'Clean & organize data', '~60%', 'Data Engineers'], + ['๐Ÿ‹๏ธ Training', 'Teach the model', '~15%', 'ML Engineers'], + ['๐Ÿ“Š Evaluation', 'Test on new data', '~10%', 'ML Engineers + QA'], + ['๐Ÿš€ Deployment', 'Ship to production', '~5%', 'MLOps / DevOps'], + ['๐Ÿ‘๏ธ Monitoring', 'Watch for degradation', '~5%', 'MLOps + Alerts'], + ].map(([phase, goal, time, who], i) => ( + + + + + + + ))} + +
+ Phase + + Goal + + Time Spent + + Who Does It +
+ {phase} + {goal}{time}{who}
+
+

+ โš ๏ธ Notice that Data Preparation takes ~60% of the time! Most beginners think training is + the hard part, but in reality, getting clean data is the biggest challenge. +

+
+ + {/* Analogy */} + +

+ ๐Ÿƒ The Analogy: Professional Athlete's Journey +

+

+ Imagine the complete career of an Olympic sprinter. Every phase of the ML lifecycle maps + perfectly to their journey: +

+
+ {[ + { + phase: 'Problem Definition', + athlete: + 'Deciding which race to compete in โ€” the 100-meter dash. What does "winning" mean? Breaking 10 seconds? Medaling? You need a clear goal before any training begins.', + color: 'rose', + }, + { + phase: 'Data Preparation', + athlete: + 'The athlete\'s diet, sleep schedule, and conditioning routine. Just like "garbage in, garbage out" in AI โ€” bad nutrition leads to poor performance no matter how hard you train.', + color: 'fuchsia', + }, + { + phase: 'Training', + athlete: + "Daily practice on the track. Running the same drill hundreds of times, making tiny adjustments to form to shave off milliseconds. The athlete's muscles (like model weights) improve with each repetition.", + color: 'violet', + }, + { + phase: 'Evaluation', + athlete: + 'The trial race before the Olympics โ€” a qualifying event with new competitors the athlete has never raced against. This reveals whether the training generalized or was specific to one track.', + color: 'blue', + }, + { + phase: 'Deployment', + athlete: + 'The Olympic final itself โ€” performing under real conditions with real pressure, in front of millions of people. This is where all the preparation is put to the ultimate test.', + color: 'emerald', + }, + { + phase: 'Monitoring', + athlete: + 'Post-race physical therapy, off-season adjustments, and re-evaluating technique. Even gold medalists need to adapt โ€” new competitors emerge, injuries happen, and performance can degrade without maintenance.', + color: 'amber', + }, + ].map((item) => ( +
+ +
+ {item.phase}: + {item.athlete} +
+
+ ))} +
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ The ML lifecycle is never truly finished. Just like an athlete + constantly trains, evaluates, and adjusts, AI models need continuous monitoring and + retraining. The feedback loop (monitoring โ†’ data preparation) is what separates + production-grade AI from toy experiments. +

+
+
+ + ); + + return ; +}; + +export default MLLifecycle; diff --git a/src/features/ai/components/sections/NeuralNetworks.tsx b/src/features/ai/components/sections/NeuralNetworks.tsx new file mode 100644 index 0000000..356baa8 --- /dev/null +++ b/src/features/ai/components/sections/NeuralNetworks.tsx @@ -0,0 +1,535 @@ +import React, { useState, useMemo, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { RefreshCw, Lightbulb } from 'lucide-react'; + +const LAYER_SIZES = [3, 4, 4, 2]; +const NODE_RADIUS = 20; + +function sigmoid(x: number): number { + return 1 / (1 + Math.exp(-x)); +} + +function valueToColor(value: number): string { + const clamped = Math.max(0, Math.min(1, value)); + const r = Math.round(225 * clamped); + const b = Math.round(225 * (1 - clamped)); + return `rgb(${r}, 60, ${b})`; +} + +const NeuralNetworks: React.FC = () => { + const [weights, setWeights] = useState(() => { + const w: number[][][] = []; + for (let l = 0; l < LAYER_SIZES.length - 1; l++) { + const layerWeights: number[][] = []; + for (let i = 0; i < LAYER_SIZES[l]; i++) { + const row: number[] = []; + for (let j = 0; j < LAYER_SIZES[l + 1]; j++) { + row.push(Math.random() * 2 - 1); + } + layerWeights.push(row); + } + w.push(layerWeights); + } + return w; + }); + + const [biases, setBiases] = useState(() => { + const b: number[][] = []; + for (let l = 1; l < LAYER_SIZES.length; l++) { + const layerBiases: number[] = []; + for (let j = 0; j < LAYER_SIZES[l]; j++) { + layerBiases.push(Math.random() * 2 - 1); + } + b.push(layerBiases); + } + return b; + }); + + const [selectedEdge, setSelectedEdge] = useState<{ l: number; i: number; j: number } | null>( + null + ); + const inputs = [0.8, 0.4, 0.6]; + + const nodePositions = useMemo(() => { + const positions: { x: number; y: number }[][] = []; + const width = 560; + const height = 340; + const layerSpacing = width / (LAYER_SIZES.length + 1); + + for (let l = 0; l < LAYER_SIZES.length; l++) { + const layerPositions: { x: number; y: number }[] = []; + const nodeSpacing = height / (LAYER_SIZES[l] + 1); + for (let n = 0; n < LAYER_SIZES[l]; n++) { + layerPositions.push({ + x: layerSpacing * (l + 1), + y: nodeSpacing * (n + 1), + }); + } + positions.push(layerPositions); + } + return positions; + }, []); + + const activations = useMemo(() => { + const acts: number[][] = [inputs]; + for (let l = 0; l < weights.length; l++) { + const layerActs: number[] = []; + for (let j = 0; j < LAYER_SIZES[l + 1]; j++) { + let sum = biases[l][j]; + for (let i = 0; i < LAYER_SIZES[l]; i++) { + sum += acts[l][i] * weights[l][i][j]; + } + layerActs.push(sigmoid(sum)); + } + acts.push(layerActs); + } + return acts; + }, [weights, biases, inputs]); + + const handleWeightChange = useCallback((l: number, i: number, j: number, value: number) => { + setWeights((prev) => { + const next = prev.map((layer) => layer.map((row) => [...row])); + next[l][i][j] = value; + return next; + }); + }, []); + + const handleBiasChange = useCallback((l: number, j: number, delta: number) => { + setBiases((prev) => { + const next = prev.map((layer) => [...layer]); + next[l][j] = Math.max(-3, Math.min(3, next[l][j] + delta)); + return next; + }); + }, []); + + const handleReset = useCallback(() => { + setWeights(() => { + const w: number[][][] = []; + for (let l = 0; l < LAYER_SIZES.length - 1; l++) { + const layerWeights: number[][] = []; + for (let i = 0; i < LAYER_SIZES[l]; i++) { + const row: number[] = []; + for (let j = 0; j < LAYER_SIZES[l + 1]; j++) { + row.push(Math.random() * 2 - 1); + } + layerWeights.push(row); + } + w.push(layerWeights); + } + return w; + }); + setBiases(() => { + const b: number[][] = []; + for (let l = 1; l < LAYER_SIZES.length; l++) { + const layerBiases: number[] = []; + for (let j = 0; j < LAYER_SIZES[l]; j++) { + layerBiases.push(Math.random() * 2 - 1); + } + b.push(layerBiases); + } + return b; + }); + setSelectedEdge(null); + }, []); + + const layerLabels = ['Input', 'Hidden 1', 'Hidden 2', 'Output']; + + const heroContent = ( +
+

Neural Networks: Weights & Biases

+

+ A neural network is the brain-inspired architecture that powers almost all modern AI โ€” from + ChatGPT to self-driving cars. It's built from layers of simple math operations that, + when stacked together, can learn incredibly complex patterns. +

+
+ ); + + const mainContent = ( + <> + {/* Simple explanation */} +
+
+
+ +
+
+

Explain Like I'm 10 ๐Ÿง’

+

+ Imagine a team of friends playing a guessing game. You whisper three numbers to the + first row of friends. Each friend{' '} + multiplies your number by how important they think it is (that's + the weight), then adds their personal opinion (that's the bias + ), and passes the result to the next row. +

+

+ After going through several rows, the last row gives you a final answer. If the answer + is wrong, everyone adjusts their importance multipliers a tiny bit. After millions of + rounds, the team gets incredibly accurate. That's a neural + network! +

+
+
+
+ + {/* What's happening mathematically */} + +

๐Ÿ“ The Math Behind Each Neuron

+

+ Every single neuron (circle) in the network does just three things: +

+
+
+
โœ–๏ธ
+

1. Multiply

+

Each input ร— its weight

+
+ xโ‚ยทwโ‚ + xโ‚‚ยทwโ‚‚ + xโ‚ƒยทwโ‚ƒ +
+
+
+
โž•
+

2. Add Bias

+

Add the neuron's baseline

+
+ sum + b = z +
+
+
+
๐Ÿ”„
+

3. Activate

+

Squash result to 0-1 range

+
+ ฯƒ(z) = 1/(1+eโปแถป) +
+
+
+
+

+ Why the activation function? Without it, stacking layers would just be + one big multiplication โ€” you could simplify the whole network to a single layer. The + activation function (like sigmoid) introduces a "curve" that lets the network + learn complex, non-linear patterns like shapes in images or context in text. +

+
+
+ +
+

Interactive Neural Network

+ +
+
+

+ ๐ŸŽฎ How to interact: +

+
    +
  • + โ€ข Click a connection line to select it, then use the slider to adjust + its weight +
  • +
  • + โ€ข Scroll on a node (middle layers) to change its bias value +
  • +
  • โ€ข Watch how inputs (left) flow through the network and produce outputs (right)
  • +
  • + โ€ข Green lines = positive influence, Red lines = + negative influence, Thicker = stronger +
  • +
+
+ +
+ + {/* Layer labels */} + {nodePositions.map((layer, l) => ( + + {layerLabels[l]} + + ))} + + {/* Edges */} + {weights.map((layer, l) => + layer.map((row, i) => + row.map((w, j) => { + const from = nodePositions[l][i]; + const to = nodePositions[l + 1][j]; + const absW = Math.abs(w); + const isSelected = + selectedEdge?.l === l && selectedEdge?.i === i && selectedEdge?.j === j; + + return ( + = 0 ? '#34d399' : '#f87171'} + strokeWidth={Math.max(0.5, absW * 3)} + opacity={isSelected ? 1 : 0.25 + absW * 0.5} + className="cursor-pointer transition-all duration-200" + onClick={() => setSelectedEdge(isSelected ? null : { l, i, j })} + /> + ); + }) + ) + )} + + {/* Nodes */} + {nodePositions.map((layer, l) => + layer.map((pos, n) => { + const activation = activations[l][n]; + return ( + { + e.preventDefault(); + if (l > 0) { + handleBiasChange(l - 1, n, e.deltaY > 0 ? -0.2 : 0.2); + } + }} + > + + + {activation.toFixed(2)} + + {l > 0 && ( + + b={biases[l - 1][n].toFixed(1)} + + )} + + ); + }) + )} + + {/* Input labels */} + {inputs.map((val, i) => ( + + x{i + 1}={val} + + ))} + + {/* Output labels */} + {activations[activations.length - 1].map((val, i) => ( + + {val >= 0.5 ? 'โ—' : 'โ—‹'} {val.toFixed(3)} + + ))} + + + {/* Weight slider */} + {selectedEdge && ( +
+ + w[{selectedEdge.l}][{selectedEdge.i}โ†’{selectedEdge.j}] + + + handleWeightChange( + selectedEdge.l, + selectedEdge.i, + selectedEdge.j, + parseFloat(e.target.value) + ) + } + className="flex-1 accent-rose-500" + /> + + {weights[selectedEdge.l][selectedEdge.i][selectedEdge.j].toFixed(1)} + +
+ )} +
+ +
+
+
+ Positive Weight +
+
+
+ Negative Weight +
+
+
+
+
+
+
+ Cold โ†’ Hot (Activation) +
+
+
+ + {/* Analogy */} + +

๐Ÿ• The Analogy: Pizza Taste Test

+

+ Imagine 100 children rating how delicious different pizzas are. Each pizza has three + toppings (inputs): cheese amount, pepperoni count, and{' '} + pineapple slices. +

+
+
+

Weights = Preferences

+

+ Each child has different taste preferences. A pepperoni lover has a{' '} + high pepperoni weight (like +2.5), while a pineapple hater has a{' '} + negative pineapple weight (like -1.5). These weights represent how + much each topping matters to each child's final score. +

+
+
+

Bias = Base Mood

+

+ Some children are always hungry (positive bias = rate any pizza highly). Others are + picky (negative bias = start with low expectations). The bias shifts the entire + scoring up or down regardless of toppings. +

+
+
+

+ Training the network means figuring out the exact preferences (weights) + and moods (biases) of millions of children, so that their combined votes perfectly predict + the quality of any pizza โ€” even one they've never tasted! +

+
+

+ ๐ŸŽฏ Key insight: A single neuron is simple โ€” just multiply, add, and + squash. But when you connect thousands of neurons in layers, they can recognize faces, + translate languages, and play chess. Complexity emerges from simplicity! +

+
+
+ + {/* Component reference table */} + +

Neural Network Components

+

+ Here's a quick reference for the four key building blocks of every neural network: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Component + + What It Is + + Everyday Analogy + + Learnable? +
Inputs (x)Raw data features fed into the networkPizza toppingsNo (fixed)
Weights (w)How important each input is to the neuronHow much a child likes each toppingโœ“ Yes
Biases (b)Baseline activation regardless of input + The child's hunger level (base mood) + โœ“ Yes
Activation fnIntroduces non-linearity (sigmoid, ReLU) + "Am I excited enough to pass it on?" + No (chosen by engineer)
+
+
+ + {/* Brief history */} +
+

๐Ÿ“œ Brief History

+

+ The first artificial neuron (the "Perceptron") was invented by Frank Rosenblatt + in 1957. It could only solve simple problems. It took until the 2010s โ€” + with massive data and powerful GPUs โ€” for neural networks with many layers ("deep + learning") to become practical. GPT-4 has over 175 billion{' '} + parameters (weights + biases), but each neuron still does the same simple + multiply-add-activate operation shown above. +

+
+ + ); + + return ; +}; + +export default NeuralNetworks; diff --git a/src/features/ai/components/sections/RAGPipeline.tsx b/src/features/ai/components/sections/RAGPipeline.tsx new file mode 100644 index 0000000..b12f7ec --- /dev/null +++ b/src/features/ai/components/sections/RAGPipeline.tsx @@ -0,0 +1,471 @@ +import React, { useState, useCallback, useEffect } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Play, RotateCcw, Search, Lightbulb } from 'lucide-react'; + +const stages = [ + { id: 'query', label: 'User Query', x: 60, y: 180, color: '#3b82f6', icon: '๐Ÿ’ฌ' }, + { id: 'embed', label: 'Embed Query', x: 180, y: 180, color: '#8b5cf6', icon: '๐Ÿ”ข' }, + { id: 'vectordb', label: 'Vector DB', x: 310, y: 180, color: '#06b6d4', icon: '๐Ÿ—„๏ธ' }, + { id: 'retrieve', label: 'Retrieved Context', x: 440, y: 180, color: '#10b981', icon: '๐Ÿ“„' }, + { id: 'augment', label: 'Super Prompt', x: 540, y: 120, color: '#f59e0b', icon: 'โšก' }, + { id: 'llm', label: 'LLM Generate', x: 540, y: 250, color: '#e11d48', icon: '๐Ÿค–' }, +]; + +const RAGPipeline: React.FC = () => { + const [activeStage, setActiveStage] = useState(-1); + const [isPlaying, setIsPlaying] = useState(false); + const [queryText] = useState('How many vacation days do I get?'); + + const stageExplanations = [ + 'User types a natural language question into the search bar.', + 'The query is converted into a numerical vector (embedding) using the same embedding model used during ingestion.', + 'The query vector dives into the Vector Database. A similarity search finds the closest matching document chunks.', + 'The top 3 most relevant text chunks are retrieved and converted back to readable English.', + 'The original query is combined with the retrieved context to form an augmented "Super Prompt."', + 'The LLM generates a precise, factual answer based strictly on the provided context โ€” no hallucination.', + ]; + + useEffect(() => { + if (!isPlaying) return; + if (activeStage >= stages.length - 1) { + setIsPlaying(false); + return; + } + const timeout = setTimeout(() => { + setActiveStage((s) => s + 1); + }, 1200); + return () => clearTimeout(timeout); + }, [isPlaying, activeStage]); + + const handlePlay = useCallback(() => { + setActiveStage(0); + setIsPlaying(true); + }, []); + + const handleReset = useCallback(() => { + setIsPlaying(false); + setActiveStage(-1); + }, []); + + const retrievedChunks = [ + { + id: 1, + text: 'Section 4.2: Full-time employees accrue 15 days of paid time off per calendar year.', + }, + { id: 2, text: 'Section 4.3: After 5 years of service, PTO increases to 20 days annually.' }, + { id: 3, text: 'Section 4.5: Unused PTO may be carried over up to a maximum of 5 days.' }, + ]; + + const heroContent = ( +
+

RAG Pipeline

+

+ Large Language Models are incredibly smart โ€” but they can also confidently make things up. + Retrieval-Augmented Generation (RAG) fixes this by giving the LLM an{' '} + open-book advantage: before answering, it retrieves real facts from a + database and uses those facts to generate a grounded, accurate response. +

+

+ Click "Run Pipeline" below to watch all 6 stages of RAG in action. +

+
+ ); + + const mainContent = ( + <> + {/* ELI10 box */} +
+
+ +
+

Explain Like I'm 10

+

+ Imagine you're in an open-book test. Without the book, you'd have to guess + the answers โ€” and you might guess wrong! But with the book, you can flip to the right + page, read the exact facts, and write a perfect answer.{' '} + RAG is the open book for AI. It finds the right pages (retrieval), + hands them to the AI (augmentation), and the AI writes a factual answer (generation). +

+
+
+
+ +
+

RAG Pipeline Animation

+
+ + +
+
+ + {/* Query bar */} +
+ + {queryText} +
+ +
+ + {/* Connection lines */} + {stages.slice(0, -2).map((s, i) => { + const next = stages[i + 1]; + const isActive = activeStage > i; + return ( + + ); + })} + {/* Retrieved โ†’ Super Prompt */} + {(() => { + const from = stages[3]; + const to = stages[4]; + const isActive = activeStage >= 4; + return ( + + ); + })()} + {/* Super Prompt โ†’ LLM */} + {(() => { + const from = stages[4]; + const to = stages[5]; + const isActive = activeStage >= 5; + return ( + + ); + })()} + {/* Also connect query to augment */} + {activeStage >= 4 && ( + + )} + + {/* Animated pulse traveling along active connection */} + {activeStage >= 0 && activeStage < stages.length && ( + + + + )} + + {/* Stage nodes */} + {stages.map((s, i) => { + const isActive = i <= activeStage; + const isCurrent = i === activeStage; + return ( + + {isCurrent && ( + + + + )} + + + {s.icon} + + + {s.label} + + + ); + })} + + {/* Floating numbers animation during embed stage */} + {activeStage === 1 && ( + + {['0.42', '-0.18', '0.91', '0.33', '-0.67'].map((n, i) => ( + + + {n} + + ))} + + )} + + {/* Vector DB search animation */} + {activeStage === 2 && ( + <> + {[0, 1, 2, 3, 4].map((i) => ( + + + + ))} + + )} + +
+ + {/* Stage explanation */} + {activeStage >= 0 && ( +
+ Stage {activeStage + 1}: {stageExplanations[activeStage]} +
+ )} +
+ + {/* Retrieved chunks */} + {activeStage >= 3 && ( + +

Retrieved Context

+
+ {retrievedChunks.map((chunk) => ( +
+
+ + Chunk {chunk.id} + +

{chunk.text}

+
+
+ + similarity: {(0.95 - chunk.id * 0.05).toFixed(2)} + +
+
+ ))} +
+
+ )} + + {/* Final answer */} + {activeStage >= 5 && ( + +

๐Ÿค– LLM Response

+
+

+ Full-time employees receive 15 days of paid time off per year. After + 5 years of service this increases to 20 days. Up to{' '} + 5 unused days can be carried over to the next year. +

+
+ + โœ“ Grounded in retrieved context + + + โœ“ No hallucination + +
+
+
+ )} + + +

Similarity Metrics

+
+ {[ + { + name: 'Cosine Similarity', + desc: 'Measures the angle between two vectors. Best for semantic similarity regardless of document length.', + icon: '๐Ÿ“', + }, + { + name: 'Euclidean Distance', + desc: 'Straight-line distance between points. Best for exact matches in low-dimensional space.', + icon: '๐Ÿ“', + }, + { + name: 'Dot Product', + desc: 'Multiplies vectors to measure alignment. Fast retrieval with normalized vectors.', + icon: 'โœ–๏ธ', + }, + ].map(({ name, desc, icon }) => ( +
+
{icon}
+

{name}

+

{desc}

+
+ ))} +
+
+ + +

๐Ÿ“š The Smart Librarian

+

+ Think of an LLM as a brilliant, fast-talking student who has read millions of books โ€” but + hasn't read your company's employee handbook. Without RAG, it{' '} + confidently guesses the vacation policy (hallucination). With RAG: +

+
+ {[ + { + step: '1. You Ask', + emoji: '๐Ÿ™‹', + desc: '"How many vacation days do I get?" The question is converted into a meaning-vector.', + }, + { + step: '2. Librarian Searches', + emoji: '๐Ÿ“–', + desc: "The vector database (the library's master index) finds pages about PTO by meaning โ€” not keywords.", + }, + { + step: '3. Student Answers', + emoji: 'โœ๏ธ', + desc: 'The LLM reads the retrieved pages and writes a factual, cited answer. No guessing needed.', + }, + ].map(({ step, emoji, desc }) => ( +
+
{emoji}
+

{step}

+

{desc}

+
+ ))} +
+
+ + {/* Hallucination explainer */} +
+

๐Ÿคฅ What Is a Hallucination?

+

+ When an LLM makes up facts that sound completely true but are wrong, it's called a{' '} + hallucination. For example, an LLM might say "Your company offers + unlimited PTO" โ€” even if it doesn't. This happens because the model is + predicting likely-sounding words, not checking facts. +

+
+
+

โŒ Without RAG

+

+ "Based on typical tech company policies, you probably get 20 days PTO." โ€”{' '} + Made up! +

+
+
+

โœ… With RAG

+

+ "According to Section 4.2, you get 15 days PTO. After 5 years, this increases to + 20 days." โ€” Factual! +

+
+
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ RAG is the most practical way to make AI trustworthy for business applications. Instead of + retraining an entire model (expensive and slow), you give it access to your documents at + query time. The model stays general-purpose, but its answers become{' '} + specific, accurate, and citable. This is how most enterprise chatbots, + internal search tools, and customer support AI systems work today. +

+
+ + ); + + return ; +}; + +export default RAGPipeline; diff --git a/src/features/ai/components/sections/TrainingVsInference.tsx b/src/features/ai/components/sections/TrainingVsInference.tsx new file mode 100644 index 0000000..d009e6b --- /dev/null +++ b/src/features/ai/components/sections/TrainingVsInference.tsx @@ -0,0 +1,505 @@ +import React, { useState, useEffect, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Play, Pause, Lightbulb } from 'lucide-react'; + +const TrainingVsInference: React.FC = () => { + const [isAnimating, setIsAnimating] = useState(false); + const [epoch, setEpoch] = useState(0); + const [inferenceActive, setInferenceActive] = useState(false); + const [inferenceTimer, setInferenceTimer] = useState(null); + + useEffect(() => { + if (!isAnimating) return; + const interval = setInterval(() => { + setEpoch((e) => (e + 1) % 100); + }, 200); + return () => clearInterval(interval); + }, [isAnimating]); + + const runInference = useCallback(() => { + setInferenceActive(true); + setInferenceTimer(0); + const start = performance.now(); + const tick = (): void => { + const elapsed = performance.now() - start; + setInferenceTimer(Math.round(elapsed)); + if (elapsed < 800) { + requestAnimationFrame(tick); + } else { + setInferenceTimer(12); // simulated final latency + } + }; + requestAnimationFrame(tick); + setTimeout(() => setInferenceActive(false), 2000); + }, []); + + const gearRotation = epoch * 3.6; + + const heroContent = ( +
+

Training vs. Inference

+

+ These are the two fundamentally different phases of every AI system. Training is when the + model learns โ€” it's slow, expensive, and happens once (or rarely). + Inference is when the model answers questions โ€” it's fast, cheap, and + happens millions of times per day. Understanding the difference is critical for anyone + building or using AI in production. +

+

+ Click the buttons below to see both phases in action, side by side. +

+
+ ); + + const mainContent = ( + <> + {/* ELI10 box */} +
+
+ +
+

Explain Like I'm 10

+

+ Think of learning to ride a bike. Training is the weeks of practice + with wobbling, falling, and scraping your knees โ€” it's hard and takes a long + time. Inference is when you can finally just hop on and ride to + school without thinking about it. You already learned; now you just do. AI + works the same way! +

+
+
+
+ +

Factory vs. Vending Machine

+

+ Training is a massive, expensive factory grinding through data. Inference is a sleek + vending machine dispensing instant predictions. +

+ +
+ {/* Training side */} +
+
+

๐Ÿญ Training Factory

+ +
+ + + {/* Factory building */} + + + {/* Chimney smoke */} + {isAnimating && ( + <> + + + + + + + + + + )} + + + {/* Data truck */} + + + + DATA + + + + + + {/* Gears */} + + + {[0, 45, 90, 135, 180, 225, 270, 315].map((a) => ( + + ))} + + + Epoch {epoch} + + + {/* GPU meter */} + + + + GPU: {isAnimating ? '98%' : '12%'} + + + {/* Cost label */} + + ๐Ÿ’ฐ $$$$ / hour + + +
+ + {/* Inference side */} +
+
+

๐Ÿค– Inference API

+ +
+ + + {/* Sleek machine */} + + + + FROZEN MODEL + + + {/* Input coin */} + {inferenceActive && ( + + + + + + Q + + + )} + + {/* Output product */} + {inferenceActive && inferenceTimer !== null && inferenceTimer >= 300 && ( + + + + + + Prediction + + + )} + + {/* Latency display */} + + + โฑ {inferenceTimer !== null ? `${inferenceTimer}ms` : 'โ€” ms'} + + + {/* GPU meter โ€” light */} + + + + GPU: 5% + + + {/* Cost */} + + ๐Ÿ’ฐ $ / hour + + +
+
+
+ + {/* Comparison table */} + +

Side-by-Side Comparison

+
+ + + + + + + + + + {[ + ['Objective', 'Build model; learn patterns', 'Use frozen model; make predictions'], + ['Math', 'Forward + Backward passes (iterative)', 'Single forward pass only'], + ['Data', 'Massive batches of labeled data', 'Individual live unlabeled data'], + ['Hardware', 'GPU/TPU clusters (expensive)', 'Optimized lighter GPUs/CPUs'], + ['KPI', 'Accuracy, Loss Minimization', 'Latency (ms), Throughput, Cost'], + ['Duration', 'Days to weeks', 'Milliseconds per query'], + ].map(([feature, training, inference]) => ( + + + + + + ))} + +
FeatureTrainingInference
{feature}{training}{inference}
+
+
+ + +

๐Ÿƒ The Marathon Metaphor

+

+ Training a model is like an athlete preparing for a marathon. Inference is race day. + Here's why this metaphor captures the essence of both phases perfectly: +

+
+
+

๐Ÿ‹๏ธ Training = Preparation

+
    +
  • + โ€ข Months of effort: Lifting weights, running 20 miles daily, strict + diet +
  • +
  • + โ€ข Constantly breaking down: Muscles tear to grow back stronger + (like adjusting weights through backprop) +
  • +
  • + โ€ข Needs a coach: Someone to check form and give feedback (like the + loss function) +
  • +
  • + โ€ข Immense cost: Gym membership, nutrition, time off work (like GPU + clusters costing $10K+/day) +
  • +
+
+
+

๐Ÿ… Inference = Race Day

+
    +
  • + โ€ข Preparation is done: The runner steps to the starting line +
  • +
  • + โ€ข No more learning: Not building new muscle mid-race โ€” just + executing +
  • +
  • + โ€ข Speed matters: Every millisecond counts (latency is critical) +
  • +
  • + โ€ข Low cost per run: Some water and a pair of shoes (like a small + API server) +
  • +
+
+
+
+ + {/* Real-world examples */} +
+

๐ŸŒ Real-World Examples

+
+ {[ + { + app: 'ChatGPT', + training: 'Trained on trillions of words over months using thousands of GPUs', + inference: 'Generates your response in ~1 second', + }, + { + app: 'Google Translate', + training: 'Learned from billions of translated sentence pairs', + inference: 'Translates your text in milliseconds', + }, + { + app: 'Spotify Recommend', + training: 'Analyzed listening patterns of 600M+ users', + inference: 'Picks your next song in <100ms', + }, + ].map(({ app, training, inference }) => ( +
+

{app}

+

+ Training: {training} +

+

+ Inference: {inference} +

+
+ ))} +
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ Training and inference are like building a car vs. driving it. You wouldn't redesign + the engine every time you drive to work. The model trains once (or periodically), freezes + its learned weights, and then serves predictions at lightning speed. Most of the AI you + interact with daily โ€” search engines, voice assistants, recommendation feeds โ€” is running{' '} + inference, not training. +

+
+ + ); + + return ; +}; + +export default TrainingVsInference; diff --git a/src/features/ai/components/sections/WordEmbeddings.tsx b/src/features/ai/components/sections/WordEmbeddings.tsx new file mode 100644 index 0000000..3d5119d --- /dev/null +++ b/src/features/ai/components/sections/WordEmbeddings.tsx @@ -0,0 +1,498 @@ +import React, { useState, useMemo, useCallback } from 'react'; +import SectionLayout from '../../../../components/shared/SectionLayout'; +import ThemeCard from '../../../../components/shared/ThemeCard'; +import { Search, Lightbulb } from 'lucide-react'; + +interface WordPoint { + word: string; + x: number; + y: number; + category: string; +} + +const words: WordPoint[] = [ + // Royalty cluster + { word: 'King', x: 350, y: 80, category: 'royalty' }, + { word: 'Queen', x: 410, y: 100, category: 'royalty' }, + { word: 'Prince', x: 370, y: 120, category: 'royalty' }, + { word: 'Princess', x: 430, y: 130, category: 'royalty' }, + { word: 'Crown', x: 390, y: 60, category: 'royalty' }, + + // Gender + { word: 'Man', x: 200, y: 90, category: 'gender' }, + { word: 'Woman', x: 260, y: 110, category: 'gender' }, + { word: 'Boy', x: 180, y: 130, category: 'gender' }, + { word: 'Girl', x: 240, y: 150, category: 'gender' }, + + // Emotions cluster + { word: 'Happy', x: 100, y: 260, category: 'emotion' }, + { word: 'Joyful', x: 130, y: 290, category: 'emotion' }, + { word: 'Glad', x: 80, y: 300, category: 'emotion' }, + { word: 'Excited', x: 150, y: 310, category: 'emotion' }, + { word: 'Sad', x: 420, y: 320, category: 'emotion' }, + { word: 'Gloomy', x: 450, y: 340, category: 'emotion' }, + { word: 'Melancholy', x: 390, y: 350, category: 'emotion' }, + + // Food + { word: 'Apple', x: 80, y: 170, category: 'food' }, + { word: 'Orange', x: 100, y: 200, category: 'food' }, + { word: 'Banana', x: 60, y: 220, category: 'food' }, + { word: 'Pizza', x: 140, y: 180, category: 'food' }, + + // Transportation + { word: 'Car', x: 460, y: 200, category: 'transport' }, + { word: 'Airplane', x: 500, y: 180, category: 'transport' }, + { word: 'Train', x: 480, y: 230, category: 'transport' }, + { word: 'Bicycle', x: 440, y: 250, category: 'transport' }, +]; + +const categoryColors: Record = { + royalty: { fill: '#f59e0b', glow: '#fbbf24', label: 'Royalty' }, + gender: { fill: '#8b5cf6', glow: '#a78bfa', label: 'Gender' }, + emotion: { fill: '#ec4899', glow: '#f472b6', label: 'Emotions' }, + food: { fill: '#10b981', glow: '#34d399', label: 'Food' }, + transport: { fill: '#3b82f6', glow: '#60a5fa', label: 'Transport' }, +}; + +const cosineSim = (a: WordPoint, b: WordPoint): number => { + const dist = Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2); + return Math.max(0, 1 - dist / 500); +}; + +const WordEmbeddings: React.FC = () => { + const [selected, setSelected] = useState(null); + const [showArithmetic, setShowArithmetic] = useState(false); + + const selectedWord = useMemo(() => words.find((w) => w.word === selected) ?? null, [selected]); + + const neighbors = useMemo(() => { + if (!selectedWord) return []; + return words + .filter((w) => w.word !== selected) + .map((w) => ({ word: w, sim: cosineSim(selectedWord, w) })) + .sort((a, b) => b.sim - a.sim) + .slice(0, 4); + }, [selectedWord, selected]); + + const handleWordClick = useCallback((word: string) => { + setSelected((prev) => (prev === word ? null : word)); + setShowArithmetic(false); + }, []); + + const kingW = words.find((w) => w.word === 'King')!; + const manW = words.find((w) => w.word === 'Man')!; + const womanW = words.find((w) => w.word === 'Woman')!; + const queenW = words.find((w) => w.word === 'Queen')!; + + const heroContent = ( +
+

Word Embeddings

+

+ Computers don't understand words โ€” they understand numbers. Word embeddings are the + breakthrough that lets AI understand meaning by converting each word into a + list of numbers (a vector) where similar meanings have similar numbers. + Words become coordinates in a semantic galaxy, and math becomes the language of meaning. +

+

+ Click any word in the galaxy below to see its nearest semantic neighbors. +

+
+ ); + + const mainContent = ( + <> + {/* ELI10 box */} +
+
+ +
+

Explain Like I'm 10

+

+ Imagine a magical school seating chart where the teacher places kids who are friends + close together and kids who don't know each other far apart. Word embeddings do + the same thing with words โ€” "Happy" sits next to "Joyful," and + both are far from "Car." The computer figures out these friendships by + reading billions of sentences and noticing which words hang out together. +

+
+
+
+ +
+

Semantic Galaxy

+ +
+ +

+ Click any word to see its nearest semantic neighbors and cosine similarity scores. +

+ +
+ + {/* Stars background */} + {Array.from({ length: 40 }, (_, i) => ( + + ))} + + {/* Cluster halos */} + {Object.entries( + words.reduce>((acc, w) => { + if (!acc[w.category]) acc[w.category] = { xs: [], ys: [] }; + acc[w.category].xs.push(w.x); + acc[w.category].ys.push(w.y); + return acc; + }, {}) + ).map(([cat, { xs, ys }]) => { + const cx = xs.reduce((a, b) => a + b, 0) / xs.length; + const cy = ys.reduce((a, b) => a + b, 0) / ys.length; + const r = + Math.max(...xs.map((x) => Math.abs(x - cx)), ...ys.map((y) => Math.abs(y - cy))) + + 30; + return ( + + ); + })} + + {/* Neighbor connections */} + {selectedWord && + neighbors.map(({ word: nw, sim }) => ( + + + + + + {sim.toFixed(2)} + + + ))} + + {/* Vector arithmetic arrows */} + {showArithmetic && ( + + {/* King โ†’ Man (subtract) */} + + + โˆ’ Man + + + {/* + Woman */} + + + {/* Woman โ†’ Queen (result) */} + + + + + + Woman = Queen! + + + {/* Highlight glow on Queen */} + + + + + )} + + {/* Word points */} + {words.map((w) => { + const isSelected = w.word === selected; + const col = categoryColors[w.category]; + return ( + handleWordClick(w.word)} className="cursor-pointer"> + {isSelected && ( + + + + )} + + + {w.word} + + + ); + })} + + {/* Legend */} + {Object.entries(categoryColors).map(([cat, col], i) => ( + + + + {col.label} + + + ))} + + {/* Defs */} + + + + + + + + + +
+ + {/* Selected word info */} + {selectedWord && ( +
+
+ + {selectedWord.word} + + ({categoryColors[selectedWord.category].label}) + +
+
+ Nearest neighbors:{' '} + {neighbors.map((n) => `${n.word.word} (${n.sim.toFixed(2)})`).join(', ')} +
+
+ )} +
+ + {showArithmetic && ( + +

Vector Arithmetic on Language

+
+

+ King + โˆ’ + Man + + + Woman + โ‰ˆ + Queen +

+

+ The model has mathematically encoded abstract concepts of gender and royalty as + directional vectors. Subtracting "maleness" and adding "femaleness" navigates to the + correct answer. +

+
+
+ )} + + +

๐Ÿ“š The Magical Library

+

+ Imagine a massive library where books automatically slide across the room to group near + books with similar meaning โ€” not alphabetically. Drop a book about + "Apples" and it slides next to "Oranges" and "Bananas," far + from "Airplanes." +

+
+ {[ + { + concept: 'Each word = a GPS coordinate', + desc: 'Every word gets a unique position in this meaning-space. "Dog" might be at [0.2, 0.8, -0.3, ...].', + emoji: '๐Ÿ“', + }, + { + concept: 'Close = similar meaning', + desc: '"Happy" and "Joyful" are neighbors. "Happy" and "Airplane" are on opposite ends of the library.', + emoji: '๐Ÿค', + }, + { + concept: 'Directions = relationships', + desc: 'The direction from "King" to "Queen" is the same as "Man" to "Woman" โ€” both capture the concept of gender.', + emoji: '๐Ÿงญ', + }, + ].map(({ concept, desc, emoji }) => ( +
+
{emoji}
+

{concept}

+

{desc}

+
+ ))} +
+
+ + {/* How embeddings are created */} +
+

๐Ÿ”ฌ How Are Embeddings Created?

+

+ The model reads billions of sentences and learns:{' '} + "Words that appear in similar contexts have similar meanings." For + example, "The ___ sat on the throne" could be filled by "King" or + "Queen," so they get similar coordinates. +

+
+ {[ + { + method: 'Word2Vec (2013)', + desc: 'Predicted a word from its neighbors. Started the embedding revolution.', + }, + { + method: 'GloVe (2014)', + desc: 'Used word co-occurrence statistics across the entire corpus.', + }, + { + method: 'BERT/GPT Embeddings', + desc: 'Context-aware: "bank" means different things in "river bank" vs "bank account".', + }, + { + method: 'Typical Dimensions', + desc: '100 to 1,536 numbers per word. GPT-4 uses 12,288-dimensional vectors.', + }, + ].map(({ method, desc }) => ( +
+ {method} +

{desc}

+
+ ))} +
+
+ + {/* Key takeaway */} +
+

๐ŸŽฏ Key Takeaway

+

+ Word embeddings are the foundation of all modern NLP. They solve a fundamental problem: + computers only understand numbers, but language is made of words. By converting words into + vectors where meaning = position, we enable math on language โ€” and + that's what makes search engines, chatbots, translation, and sentiment analysis + possible. +

+
+ + ); + + return ; +}; + +export default WordEmbeddings; diff --git a/src/features/ai/index.ts b/src/features/ai/index.ts new file mode 100644 index 0000000..3534a30 --- /dev/null +++ b/src/features/ai/index.ts @@ -0,0 +1,20 @@ +/** + * AI Fundamentals Feature Module + * Entry point for the AI Fundamentals learning module + * + * Covers artificial intelligence concepts including: + * - Machine Learning Lifecycle + * - Feature Engineering and Data Preparation + * - Neural Networks, Weights, and Biases + * - Forward Pass and Loss Functions + * - Gradient Descent Optimization + * - Backpropagation and the Chain Rule + * - Overfitting, Underfitting, and the Bias-Variance Tradeoff + * - Training vs. Inference + * - Word Embeddings and Vectorization + * - RAG and Vector Databases + * - AI Interface Design Patterns + */ + +export { default } from './AIFundamentalsPage'; +export { default as AIFundamentalsPage } from './AIFundamentalsPage'; diff --git a/src/pages/Home.tsx b/src/pages/Home.tsx index a38b038..bb71e95 100644 --- a/src/pages/Home.tsx +++ b/src/pages/Home.tsx @@ -158,10 +158,23 @@ const Home: React.FC = () => { level: 'Advanced', duration: '3-4 hours', }, + { + icon: , + title: 'AI Fundamentals', + description: + 'Master machine learning from scratch โ€” neural networks, gradient descent, embeddings, and RAG pipelines through interactive visualizations', + path: '/ai', + color: 'rose', + gradient: 'from-rose-600 to-fuchsia-600', + bgGradient: 'from-rose-50 to-fuchsia-50', + topics: ['Neural Networks', 'Gradient Descent', 'Word Embeddings', 'RAG Pipeline'], + level: 'Beginner Friendly', + duration: '3-4 hours', + }, ]; const stats = [ - { value: '10+', label: 'Interactive Modules', icon: }, + { value: '11+', label: 'Interactive Modules', icon: }, { value: '70+', label: 'Visualizations', icon: }, { value: '1000+', label: 'Code Examples', icon: }, { value: '24/7', label: 'Available Learning', icon: }, @@ -191,10 +204,11 @@ const Home: React.FC = () => { }, { phase: 'Architecture & Systems', - modules: [allModules[7], allModules[8], allModules[9]], // Python, System Design, TypeScript + modules: [allModules[7], allModules[8], allModules[9], allModules[10]], // Python, System Design, TypeScript, AI icon: , color: 'emerald', - description: 'Master system design patterns and multi-language architectures', + description: + 'Master system design patterns, multi-language architectures, and AI fundamentals', }, ]; @@ -395,7 +409,7 @@ const Home: React.FC = () => { }} className="inline-flex items-center space-x-2 text-indigo-600 font-semibold hover:text-indigo-700 transition-colors" > - View All 10 Modules + View All 11 Modules diff --git a/src/utils/theme.ts b/src/utils/theme.ts index bb49088..c3a1900 100644 --- a/src/utils/theme.ts +++ b/src/utils/theme.ts @@ -84,6 +84,14 @@ export const theme = { border: 'indigo-100', shadow: 'indigo-200', }, + ai: { + primary: 'rose', + secondary: 'fuchsia', + accent: 'violet', + gradient: 'from-rose-50 via-fuchsia-50 to-violet-50', + border: 'rose-200', + shadow: 'rose-200', + }, playground: { primary: 'blue', secondary: 'indigo',