|
87 | 87 | " ax.set_zlabel(features_3d[2], labelpad=8)\n", |
88 | 88 | "\n", |
89 | 89 | "\n", |
90 | | - "fig, axes = plt.subplots(1, 2, figsize=(15, 5), subplot_kw={\"projection\": \"3d\"})\n", |
| 90 | + "fig, axes = plt.subplots(\n", |
| 91 | + " 1, 2, figsize=(15, 5), subplot_kw={\"projection\": \"3d\"}\n", |
| 92 | + ")\n", |
91 | 93 | "\n", |
92 | 94 | "for ax, title in zip(axes, (\"Raw\", \"StandardScaler\")):\n", |
93 | 95 | " X_t = X_3D if title == \"Raw\" else standard_scaler.fit_transform(X_3D)\n", |
94 | | - " ax.scatter(X_t[\"GrLivArea\"], X_t[\"OverallQual\"], X_t[\"YearBuilt\"], alpha=0.2, s=5)\n", |
| 96 | + " ax.scatter(\n", |
| 97 | + " X_t[\"GrLivArea\"], X_t[\"OverallQual\"], X_t[\"YearBuilt\"], alpha=0.2, s=5\n", |
| 98 | + " )\n", |
95 | 99 | " set_equal_3d_axes(ax, X_t)\n", |
96 | 100 | " ax.set_title(title)\n", |
97 | 101 | " ax.view_init(elev=20, azim=30)\n", |
|
205 | 209 | " ax.set_xticklabels(feature_names, rotation=45, ha=\"right\", fontsize=14)\n", |
206 | 210 | " ax.set_yticks(range(len(components)))\n", |
207 | 211 | " ax.set_yticklabels(\n", |
208 | | - " [f\"PC{i + 1}\\n({v:.1%})\" for i, v in enumerate(pca.explained_variance_ratio_)],\n", |
| 212 | + " [\n", |
| 213 | + " f\"PC{i + 1}\\n({v:.1%})\"\n", |
| 214 | + " for i, v in enumerate(pca.explained_variance_ratio_)\n", |
| 215 | + " ],\n", |
209 | 216 | " fontsize=14,\n", |
210 | 217 | " )\n", |
211 | 218 | " return im\n", |
|
280 | 287 | "metadata": {}, |
281 | 288 | "outputs": [], |
282 | 289 | "source": [ |
| 290 | + "%pip install skrub\n", |
283 | 291 | "from sklearn.preprocessing import RobustScaler\n", |
284 | 292 | "from skrub import SquashingScaler\n", |
285 | 293 | "\n", |
|
642 | 650 | "source": [ |
643 | 651 | "from sklearn.preprocessing import OneHotEncoder\n", |
644 | 652 | "\n", |
645 | | - "pipe_ohe = make_pipeline(OneHotEncoder(sparse_output=False), PCA(n_components=8))\n", |
| 653 | + "pipe_ohe = make_pipeline(\n", |
| 654 | + " OneHotEncoder(sparse_output=False), PCA(n_components=8)\n", |
| 655 | + ")\n", |
646 | 656 | "pipe_ohe.fit(data[[\"Neighborhood\"]])\n", |
647 | 657 | "categories = pipe_ohe[0].categories_[0]\n", |
648 | 658 | "\n", |
649 | 659 | "freq_order = data[\"Neighborhood\"].value_counts(normalize=True)\n", |
650 | 660 | "sorted_idx = np.searchsorted(categories, freq_order.index)\n", |
651 | 661 | "\n", |
652 | 662 | "fig, ax = plt.subplots(figsize=(18, 6))\n", |
653 | | - "im = plot_sq_loadings(ax, pipe_ohe[-1], categories, col_order=sorted_idx, decimals=1)\n", |
| 663 | + "im = plot_sq_loadings(\n", |
| 664 | + " ax, pipe_ohe[-1], categories, col_order=sorted_idx, decimals=1\n", |
| 665 | + ")\n", |
654 | 666 | "fig.colorbar(im, ax=ax)\n", |
655 | 667 | "plt.show()" |
656 | 668 | ] |
|
691 | 703 | " [\"A\", \"B\", \"C\", \"D\"], size=n, p=[0.60, 0.19, 0.18, 0.03]\n", |
692 | 704 | ").reshape(-1, 1)\n", |
693 | 705 | "\n", |
694 | | - "pipe_ohe = make_pipeline(OneHotEncoder(sparse_output=False), PCA(n_components=3))\n", |
| 706 | + "pipe_ohe = make_pipeline(\n", |
| 707 | + " OneHotEncoder(sparse_output=False), PCA(n_components=3)\n", |
| 708 | + ")\n", |
695 | 709 | "pipe_ohe.fit(categories)\n", |
696 | 710 | "\n", |
697 | 711 | "fig, ax = plt.subplots(figsize=(5, 3))\n", |
|
0 commit comments