11"""
22t-Distributed Stochastic Neighbor Embedding (t-SNE)
33---------------------------------------------------
4+
45t-SNE is a nonlinear dimensionality reduction algorithm used for visualizing
56high-dimensional data in a lower-dimensional (usually 2D or 3D) space.
67
78It models pairwise similarities between points in both the high-dimensional
8- and low-dimensional spaces, and minimizes the difference between them
9- using gradient descent.
9+ and low-dimensional spaces, and minimizes the difference between them using
10+ gradient descent.
1011
1112This simplified implementation demonstrates the core idea of t-SNE for
1213educational purposes — it is **not optimized for large datasets**.
1314
1415This implementation:
1516- Computes pairwise similarities in the high-dimensional space.
1617- Computes pairwise similarities in the low-dimensional (embedding) space.
17- - Minimizes the Kullback– Leibler divergence between these distributions
18+ - Minimizes the Kullback- Leibler divergence between these distributions
1819 using gradient descent.
1920- Follows the original t-SNE formulation by van der Maaten & Hinton (2008).
2021
2122References:
2223- van der Maaten, L. and Hinton, G. (2008).
2324 "Visualizing Data using t-SNE". Journal of Machine Learning Research.
2425- https://lvdmaaten.github.io/tsne/
25-
26- Key Steps:
27- 1. Compute pairwise similarities (P) in high-dimensional space.
28- 2. Initialize low-dimensional map (Y) randomly.
29- 3. Compute pairwise similarities (Q) in low-dimensional space using
30- Student-t distribution.
31- 4. Minimize KL-divergence between P and Q using gradient descent.
3226"""
27+
3328import doctest
29+
3430import numpy as np
3531from sklearn .datasets import load_iris
3632
33+
3734def collect_dataset () -> tuple [np .ndarray , np .ndarray ]:
3835 """
39- Collects the dataset ( Iris dataset) and returns feature matrix and target values .
36+ Collects the Iris dataset and returns features and labels .
4037
41- :return: Tuple containing feature matrix (X) and target labels (y)
38+ :return: Tuple containing feature matrix and target labels
4239
4340 Example:
44- >>> X , y = collect_dataset()
45- >>> X .shape
41+ >>> x , y = collect_dataset()
42+ >>> x .shape
4643 (150, 4)
4744 >>> y.shape
4845 (150,)
4946 """
5047 data = load_iris ()
5148 return np .array (data .data ), np .array (data .target )
5249
53- def compute_pairwise_affinities (X : np .ndarray , sigma : float = 1.0 ) -> np .ndarray :
50+
51+ def compute_pairwise_affinities (x : np .ndarray , sigma : float = 1.0 ) -> np .ndarray :
5452 """
5553 Computes pairwise affinities (P matrix) in high-dimensional space using Gaussian kernel.
5654
57- :param X : Input data of shape (n_samples, n_features)
55+ :param x : Input data of shape (n_samples, n_features)
5856 :param sigma: Variance (Bandwidth) of the Gaussian kernel
59- :return: Symmetrized probability matrix P of shape (n_samples, n_samples)/ Pairwise affinity matrix P
57+ :return: Symmetrized probability matrix p
6058
6159 Example:
6260 >>> import numpy as np
63- >>> X = np.array([[0.0, 0.0], [1.0, 0.0]])
64- >>> P = compute_pairwise_affinities(X )
65- >>> float(round(P [0, 1], 3))
61+ >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
62+ >>> p = compute_pairwise_affinities(x )
63+ >>> float(round(p [0, 1], 3))
6664 0.25
6765 """
68- n = X .shape [0 ]
69- sum_X = np .sum (np .square (X ), axis = 1 )
70- D = np .add (np .add (- 2 * np .dot (X , X .T ), sum_X ).T , sum_X )
71- P = np .exp (- D / (2 * sigma ** 2 ))
72- np .fill_diagonal (P , 0 )
73- P /= np .sum (P )
74- return (P + P .T ) / (2 * n )
75-
76- def compute_low_dim_affinities (Y : np .ndarray ) -> tuple [np .ndarray , np .ndarray ]:
66+ n_samples = x .shape [0 ]
67+ sum_x = np .sum (np .square (x ), axis = 1 )
68+ d = np .add (np .add (- 2 * np .dot (x , x .T ), sum_x ).T , sum_x )
69+ p = np .exp (- d / (2 * sigma ** 2 ))
70+ np .fill_diagonal (p , 0 )
71+ p /= np .sum (p )
72+ return (p + p .T ) / (2 * n_samples )
73+
74+
75+ def compute_low_dim_affinities (y : np .ndarray ) -> tuple [np .ndarray , np .ndarray ]:
7776 """
7877 Computes low-dimensional similarities (Q matrix) using Student-t distribution.
7978
80- :param Y : Low-dimensional embeddings (n_samples, n_components)
81- :return: Tuple (Q , num) where Q is the probability matrix and num is numerator array
79+ :param y : Low-dimensional embeddings (n_samples, n_components)
80+ :return: Tuple (q , num) where q is the probability matrix and num is numerator array
8281 """
83- sum_Y = np .sum (np .square (Y ), axis = 1 )
84- num = 1 / (1 + np .add (np .add (- 2 * np .dot (Y , Y .T ), sum_Y ).T , sum_Y ))
82+ sum_y = np .sum (np .square (y ), axis = 1 )
83+ num = 1 / (1 + np .add (np .add (- 2 * np .dot (y , y .T ), sum_y ).T , sum_y ))
8584 np .fill_diagonal (num , 0 )
86- Q = num / np .sum (num )
87- return Q , num
85+ q = num / np .sum (num )
86+ return q , num
8887
8988
9089def apply_tsne (
@@ -103,9 +102,9 @@ def apply_tsne(
103102 :return: Transformed dataset (low-dimensional embedding)
104103
105104 Example:
106- >>> X , _ = collect_dataset()
107- >>> Y = apply_tsne(X , n_components=2, n_iter=250 )
108- >>> Y .shape
105+ >>> x , _ = collect_dataset()
106+ >>> y_emb = apply_tsne(x , n_components=2, n_iter=50 )
107+ >>> y_emb .shape
109108 (150, 2)
110109 """
111110 if n_components < 1 :
@@ -116,50 +115,49 @@ def apply_tsne(
116115 n_samples = data_x .shape [0 ]
117116
118117 # Initialize low-dimensional map randomly
119- Y = np .random .randn (n_samples , n_components ) * 1e-4
120- P = compute_pairwise_affinities (data_x )
121- P = np .maximum (P , 1e-12 )
118+ y = np .random .randn (n_samples , n_components ) * 1e-4
119+ p = compute_pairwise_affinities (data_x )
120+ p = np .maximum (p , 1e-12 )
122121
123122 # Initialize parameters
124- Y_inc = np .zeros_like (Y )
123+ y_inc = np .zeros_like (y )
125124 momentum = 0.5
126125
127126 for i in range (n_iter ):
128- Q , num = compute_low_dim_affinities (Y )
129- Q = np .maximum (Q , 1e-12 )
127+ q , num = compute_low_dim_affinities (y )
128+ q = np .maximum (q , 1e-12 )
130129
131- PQ = P - Q
130+ pq = p - q
132131
133132 # Compute gradient
134- dY = 4 * (
135- np .dot ((PQ * num ), Y )
136- - np .multiply (np .sum (PQ * num , axis = 1 )[:, np .newaxis ], Y )
133+ d_y = 4 * (
134+ np .dot ((pq * num ), y )
135+ - np .multiply (np .sum (pq * num , axis = 1 )[:, np .newaxis ], y )
137136 )
138137
139138 # Update with momentum and learning rate
140- Y_inc = momentum * Y_inc - learning_rate * dY
141- Y += Y_inc
139+ y_inc = momentum * y_inc - learning_rate * d_y
140+ y += y_inc
142141
143142 # Adjust momentum halfway through
144143 if i == int (n_iter / 4 ):
145144 momentum = 0.8
146145
147- return Y
146+ return y
148147
149148
150149def main () -> None :
151150 """
152151 Driver function for t-SNE demonstration.
153152 """
154- X , y = collect_dataset ()
155-
156- Y = apply_tsne (X , n_components = 2 , n_iter = 300 )
153+ x , y_labels = collect_dataset ()
154+ y_emb = apply_tsne (x , n_components = 2 , n_iter = 300 )
157155 print ("t-SNE embedding (first 5 points):" )
158- print (Y [:5 ])
156+ print (y_emb [:5 ])
159157
160158 # Optional visualization (commented to avoid dependency)
161159 # import matplotlib.pyplot as plt
162- # plt.scatter(Y [:, 0], Y [:, 1], c=y , cmap="viridis")
160+ # plt.scatter(y_emb [:, 0], y_emb [:, 1], c=y_labels , cmap="viridis")
163161 # plt.title("t-SNE Visualization of Iris Dataset")
164162 # plt.xlabel("Component 1")
165163 # plt.ylabel("Component 2")
@@ -170,35 +168,26 @@ def main() -> None:
170168 doctest .testmod ()
171169 main ()
172170
171+
173172"""
174- Explanation of t-SNE Implementation
175- -----------------------------------
173+ Explanation of Input and Output
174+ --------------------------------
176175
177176Input:
178177- data_x: numpy array of shape (n_samples, n_features)
179178 Example: Iris dataset (150 samples × 4 features)
180- - n_components: target dimension (usually 2 or 3 for visualization )
181- - learning_rate: controls step size in gradient descent
179+ - n_components: target dimension (usually 2 or 3)
180+ - learning_rate: gradient descent step size
182181- n_iter: number of iterations for optimization
183182
184183Output:
185- - Y : numpy array of shape (n_samples, n_components)
184+ - y : numpy array of shape (n_samples, n_components)
186185 Each row is the low-dimensional embedding of the corresponding high-dimensional point.
187186
188187How it works:
189- 1. Compute high-dimensional similarities (P matrix):
190- - Measures how likely points are neighbors in the original space.
191- 2. Initialize low-dimensional map (Y) randomly.
192- 3. Compute low-dimensional similarities (Q matrix) using Student-t distribution:
193- - Heavy tail prevents distant points from crowding together.
194- 4. Compute gradient of KL divergence between P and Q:
195- - If points are too far in low-D (Q < P), pull them closer.
196- - If points are too close in low-D (Q > P), push them apart.
197- 5. Update Y using gradient descent with momentum:
198- - Repeat for n_iter iterations until low-dimensional layout reflects high-dimensional structure.
199-
200- Why it works:
201- - t-SNE tries to preserve **local structure**: neighbors stay close in the embedding.
202- - Distant points may not be perfectly preserved (global structure is secondary).
203- - The algorithm minimizes the KL divergence between high-D and low-D similarity distributions.
188+ 1. Compute high-dimensional similarities (p matrix)
189+ 2. Initialize low-dimensional map (y) randomly
190+ 3. Compute low-dimensional similarities (q matrix)
191+ 4. Minimize KL divergence between p and q using gradient descent
192+ 5. Update y with momentum and learning rate iteratively
204193"""
0 commit comments