@@ -46,12 +46,12 @@ def __init__(self, n_components: int | None = None) -> None:
4646 self .mean_ : np .ndarray | None = None
4747 self .std_ : np .ndarray | None = None
4848
49- def _standardize_data (self , X : np .ndarray ) -> np .ndarray :
49+ def _standardize_data (self , x : np .ndarray ) -> np .ndarray :
5050 """
5151 Standardize the data by mean centering and scaling to unit variance.
5252
5353 Args:
54- X : Input data matrix of shape (n_samples, n_features)
54+ x : Input data matrix of shape (n_samples, n_features)
5555
5656 Returns:
5757 Standardized data matrix
@@ -65,23 +65,23 @@ def _standardize_data(self, X: np.ndarray) -> np.ndarray:
6565 True
6666 """
6767 # Calculate mean and standard deviation
68- self .mean_ = np .mean (X , axis = 0 )
69- self .std_ = np .std (X , axis = 0 , ddof = 0 ) # ddof=0 for population std
68+ self .mean_ = np .mean (x , axis = 0 )
69+ self .std_ = np .std (x , axis = 0 , ddof = 0 ) # ddof=0 for population std
7070
7171 # Avoid division by zero for constant features
7272 self .std_ [self .std_ == 0 ] = 1.0
7373
7474 # Standardize the data
75- X_standardized = (X - self .mean_ ) / self .std_
75+ x_standardized = (x - self .mean_ ) / self .std_
7676
77- return X_standardized
77+ return x_standardized
7878
79- def _compute_covariance_matrix (self , X : np .ndarray ) -> np .ndarray :
79+ def _compute_covariance_matrix (self , x : np .ndarray ) -> np .ndarray :
8080 """
8181 Compute the covariance matrix of the standardized data.
8282
8383 Args:
84- X : Standardized data matrix of shape (n_samples, n_features)
84+ x : Standardized data matrix of shape (n_samples, n_features)
8585
8686 Returns:
8787 Covariance matrix of shape (n_features, n_features)
@@ -95,9 +95,9 @@ def _compute_covariance_matrix(self, X: np.ndarray) -> np.ndarray:
9595 >>> np.allclose(cov_matrix, cov_matrix.T) # Symmetric matrix
9696 True
9797 """
98- n_samples = X .shape [0 ]
98+ n_samples = x .shape [0 ]
9999 # Covariance matrix = (X^T * X) / (n_samples - 1)
100- covariance_matrix = np .dot (X .T , X ) / (n_samples - 1 )
100+ covariance_matrix = np .dot (x .T , x ) / (n_samples - 1 )
101101 return covariance_matrix
102102
103103 def _eigenvalue_decomposition (
@@ -130,12 +130,12 @@ def _eigenvalue_decomposition(
130130
131131 return eigenvalues , eigenvectors
132132
133- def fit (self , X : np .ndarray ) -> "PCAFromScratch" :
133+ def fit (self , x : np .ndarray ) -> "PCAFromScratch" :
134134 """
135135 Fit PCA to the data.
136136
137137 Args:
138- X : Input data matrix of shape (n_samples, n_features)
138+ x : Input data matrix of shape (n_samples, n_features)
139139
140140 Returns:
141141 Self for method chaining
@@ -146,10 +146,10 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
146146 >>> isinstance(fitted, PCAFromScratch)
147147 True
148148 """
149- if X .ndim != 2 :
149+ if x .ndim != 2 :
150150 raise ValueError ("Input data must be 2-dimensional" )
151151
152- n_samples , n_features = X .shape
152+ n_samples , n_features = x .shape
153153
154154 # Set default number of components
155155 if self .n_components is None :
@@ -164,10 +164,10 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
164164 )
165165
166166 # Standardize the data
167- X_standardized = self ._standardize_data (X )
167+ x_standardized = self ._standardize_data (x )
168168
169169 # Compute covariance matrix
170- covariance_matrix = self ._compute_covariance_matrix (X_standardized )
170+ covariance_matrix = self ._compute_covariance_matrix (x_standardized )
171171
172172 # Perform eigenvalue decomposition
173173 eigenvalues , eigenvectors = self ._eigenvalue_decomposition (covariance_matrix )
@@ -184,12 +184,12 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
184184
185185 return self
186186
187- def transform (self , X : np .ndarray ) -> np .ndarray :
187+ def transform (self , x : np .ndarray ) -> np .ndarray :
188188 """
189189 Transform data using the fitted PCA.
190190
191191 Args:
192- X : Input data matrix of shape (n_samples, n_features)
192+ x : Input data matrix of shape (n_samples, n_features)
193193
194194 Returns:
195195 Transformed data matrix of shape (n_samples, n_components)
@@ -205,19 +205,19 @@ def transform(self, X: np.ndarray) -> np.ndarray:
205205 raise ValueError ("PCA must be fitted before transform" )
206206
207207 # Standardize the input data using the same parameters as during fit
208- X_standardized = (X - self .mean_ ) / self .std_
208+ x_standardized = (x - self .mean_ ) / self .std_
209209
210210 # Project data onto principal components
211- X_transformed = np .dot (X_standardized , self .components_ )
211+ x_transformed = np .dot (x_standardized , self .components_ )
212212
213- return X_transformed
213+ return x_transformed
214214
215- def fit_transform (self , X : np .ndarray ) -> np .ndarray :
215+ def fit_transform (self , x : np .ndarray ) -> np .ndarray :
216216 """
217217 Fit PCA and transform data in one step.
218218
219219 Args:
220- X : Input data matrix of shape (n_samples, n_features)
220+ x : Input data matrix of shape (n_samples, n_features)
221221
222222 Returns:
223223 Transformed data matrix of shape (n_samples, n_components)
@@ -228,14 +228,14 @@ def fit_transform(self, X: np.ndarray) -> np.ndarray:
228228 >>> X_transformed.shape
229229 (50, 2)
230230 """
231- return self .fit (X ).transform (X )
231+ return self .fit (x ).transform (x )
232232
233- def inverse_transform (self , X_transformed : np .ndarray ) -> np .ndarray :
233+ def inverse_transform (self , x_transformed : np .ndarray ) -> np .ndarray :
234234 """
235235 Transform data back to original space.
236236
237237 Args:
238- X_transformed : Transformed data matrix of shape (n_samples, n_components)
238+ x_transformed : Transformed data matrix of shape (n_samples, n_components)
239239
240240 Returns:
241241 Data in original space of shape (n_samples, n_features)
@@ -251,12 +251,12 @@ def inverse_transform(self, X_transformed: np.ndarray) -> np.ndarray:
251251 raise ValueError ("PCA must be fitted before inverse_transform" )
252252
253253 # Transform back to standardized space
254- X_standardized = np .dot (X_transformed , self .components_ .T )
254+ x_standardized = np .dot (x_transformed , self .components_ .T )
255255
256256 # Denormalize to original space
257- X_original = (X_standardized * self .std_ ) + self .mean_
257+ x_original = (x_standardized * self .std_ ) + self .mean_
258258
259- return X_original
259+ return x_original
260260
261261
262262def compare_with_sklearn () -> None :
@@ -267,31 +267,31 @@ def compare_with_sklearn() -> None:
267267 very close to the scikit-learn implementation.
268268 """
269269 from sklearn .datasets import make_blobs
270- from sklearn .decomposition import PCA as sklearn_pca
270+ from sklearn .decomposition import PCA
271271
272272 # Generate sample data
273- X , _ = make_blobs (n_samples = 100 , centers = 3 , n_features = 4 , random_state = 42 )
273+ x , _ = make_blobs (n_samples = 100 , centers = 3 , n_features = 4 , random_state = 42 )
274274
275275 # Our implementation
276276 pca_ours = PCAFromScratch (n_components = 2 )
277- X_transformed_ours = pca_ours .fit_transform (X )
277+ x_transformed_ours = pca_ours .fit_transform (x )
278278
279279 # Scikit-learn implementation
280- pca_sklearn = sklearn_pca (n_components = 2 , random_state = 42 )
281- X_transformed_sklearn = pca_sklearn .fit_transform (X )
280+ pca_sklearn = PCA (n_components = 2 , random_state = 42 )
281+ x_transformed_sklearn = pca_sklearn .fit_transform (x )
282282
283283 # Compare results (should be very similar, possibly with different signs)
284284 print ("Our PCA - First 5 rows:" )
285- print (X_transformed_ours [:5 ])
285+ print (x_transformed_ours [:5 ])
286286 print ("\n Scikit-learn PCA - First 5 rows:" )
287- print (X_transformed_sklearn [:5 ])
287+ print (x_transformed_sklearn [:5 ])
288288
289289 print (f"\n Our explained variance ratio: { pca_ours .explained_variance_ratio_ } " )
290290 print (f"Sklearn explained variance ratio: { pca_sklearn .explained_variance_ratio_ } " )
291291
292292 # Check if results are similar (within tolerance)
293293 correlation = np .corrcoef (
294- X_transformed_ours .flatten (), X_transformed_sklearn .flatten ()
294+ x_transformed_ours .flatten (), x_transformed_sklearn .flatten ()
295295 )[0 , 1 ]
296296 print (f"\n Correlation between implementations: { correlation :.6f} " )
297297
@@ -303,26 +303,26 @@ def main() -> None:
303303 # Generate sample data
304304 rng = np .random .default_rng (42 )
305305 n_samples , n_features = 100 , 4
306- X = rng .standard_normal ((n_samples , n_features ))
306+ x = rng .standard_normal ((n_samples , n_features ))
307307
308- print ("Original data shape:" , X .shape )
308+ print ("Original data shape:" , x .shape )
309309 print ("Original data (first 5 rows):" )
310- print (X [:5 ])
310+ print (x [:5 ])
311311
312312 # Apply PCA
313313 pca = PCAFromScratch (n_components = 2 )
314- X_transformed = pca .fit_transform (X )
314+ x_transformed = pca .fit_transform (x )
315315
316- print (f"\n Transformed data shape: { X_transformed .shape } " )
316+ print (f"\n Transformed data shape: { x_transformed .shape } " )
317317 print ("Transformed data (first 5 rows):" )
318- print (X_transformed [:5 ])
318+ print (x_transformed [:5 ])
319319
320320 print (f"\n Explained variance ratio: { pca .explained_variance_ratio_ } " )
321321 print (f"Total variance explained: { np .sum (pca .explained_variance_ratio_ ):.4f} " )
322322
323323 # Demonstrate inverse transform
324- X_reconstructed = pca .inverse_transform (X_transformed )
325- reconstruction_error = np .mean ((X - X_reconstructed ) ** 2 )
324+ x_reconstructed = pca .inverse_transform (x_transformed )
325+ reconstruction_error = np .mean ((x - x_reconstructed ) ** 2 )
326326 print (f"\n Reconstruction error (MSE): { reconstruction_error :.6f} " )
327327
328328 # Compare with sklearn
0 commit comments