fixing the ruff format errors

CaesarGhazi · WuorBhang · commit 4629936d207e · 2025-12-05T10:48:59.000+03:00
diff --git a/4_data_analysis/LOS_prediction.ipynb b/4_data_analysis/LOS_prediction.ipynb
@@ -87,7 +87,7 @@
     "print(f\"Total records in dataset:   {len(df):,}\")\n",
     "print(f\"Records with LOS data:      {len(df_model):,}\")\n",
     "print(f\"Percentage with LOS data:   {len(df_model) / len(df) * 100:.2f}%\")\n",
-    "print(df_model[\"length_of_stay\"].value_counts().sort_index())\n"
+    "print(df_model[\"length_of_stay\"].value_counts().sort_index())"
    ]
   },
   {
@@ -166,7 +166,7 @@
     ")\n",
     "print(\n",
     "    f\"Least common class: {los_counts.index[-1]} ({los_counts.values[-1] / len(df_model) * 100:.1f}%)\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -267,7 +267,7 @@
     "\n",
     "# Check the new balance\n",
     "print(\"New Target Distribution:\")\n",
-    "print(df_model[\"length_of_stay_collapsed\"].value_counts(normalize=True))\n"
+    "print(df_model[\"length_of_stay_collapsed\"].value_counts(normalize=True))"
    ]
   },
   {
@@ -350,7 +350,7 @@
    ],
    "source": [
     "X = df_model[features].copy()\n",
-    "y = df_model['length_of_stay'].copy()\n",
+    "y = df_model[\"length_of_stay\"].copy()\n",
     "\n",
     "print(f\"Initial feature count: {X.shape[1]}\")\n",
     "\n",
@@ -360,16 +360,16 @@
     "if len(missing_features) > 0:\n",
     "    print(\"\\nFeatures with missing values:\")\n",
     "    for feat, count in missing_features.items():\n",
-    "        print(f\"  {feat:30} {count:>8,} ({count/len(X)*100:>5.2f}%)\")\n",
-    "    \n",
+    "        print(f\"  {feat:30} {count:>8,} ({count / len(X) * 100:>5.2f}%)\")\n",
+    "\n",
     "    for col in X.columns:\n",
     "        if X[col].isnull().sum() > 0:\n",
-    "            if X[col].dtype in ['object', 'category']:\n",
-    "                X[col] = X[col].fillna('Unknown')\n",
+    "            if X[col].dtype in [\"object\", \"category\"]:\n",
+    "                X[col] = X[col].fillna(\"Unknown\")\n",
     "            else:\n",
     "                X[col] = X[col].fillna(X[col].median())\n",
     "\n",
-    "categorical_cols = X.select_dtypes(include=['object', 'category']).columns\n",
+    "categorical_cols = X.select_dtypes(include=[\"object\", \"category\"]).columns\n",
     "X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)\n",
     "\n",
     "print(f\"\\nFeatures after encoding: {X_encoded.shape[1]}\")\n",
@@ -417,7 +417,7 @@
     "X = df_model[features].copy()\n",
     "\n",
     "# Define Target\n",
-    "y = df_model[\"length_of_stay_collapsed\"].copy() \n",
+    "y = df_model[\"length_of_stay_collapsed\"].copy()\n",
     "\n",
     "# Handle Missing Values & Encoding\n",
     "categorical_cols = X.select_dtypes(include=[\"object\", \"category\"]).columns\n",
@@ -433,7 +433,7 @@
     "X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)\n",
     "\n",
     "print(\"Target Class Distribution in Training:\")\n",
-    "print(y_train_resampled.value_counts(normalize=True))\n"
+    "print(y_train_resampled.value_counts(normalize=True))"
    ]
   },
   {
@@ -486,7 +486,7 @@
     "    estimator=rf,\n",
     "    param_distributions=param_dist,\n",
     "    n_iter=20,\n",
-    "    cv=3, \n",
+    "    cv=3,\n",
     "    verbose=2,\n",
     "    random_state=42,\n",
     "    n_jobs=-1,\n",
@@ -561,7 +561,7 @@
     "\n",
     "# This ensures consistency for training and validation splits\n",
     "X_train_resampled = clean_cols_lgbm(X_train_resampled.copy())\n",
-    "X_test = clean_cols_lgbm(X_test.copy())  \n",
+    "X_test = clean_cols_lgbm(X_test.copy())\n",
     "\n",
     "X_train_sub, X_val, y_train_sub, y_val = train_test_split(\n",
     "    X_train_resampled, y_train_resampled, test_size=0.1, random_state=42\n",
@@ -600,7 +600,7 @@
     "\n",
     "print(f\"\\nLightGBM Final Iteration: {best_iteration}\")\n",
     "print(f\"LightGBM Accuracy: {accuracy_gb:.4f} ({accuracy_gb * 100:.2f}%)\")\n",
-    "print(f\"LightGBM Macro F1 Score: {f1_gb:.4f}\")\n"
+    "print(f\"LightGBM Macro F1 Score: {f1_gb:.4f}\")"
    ]
   },
   {
@@ -786,7 +786,7 @@
     "plt.title(f\"Top 15 Features Predicting Length of Stay Category ({best_name})\")\n",
     "plt.gca().invert_yaxis()\n",
     "plt.tight_layout()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   },
   {
@@ -856,7 +856,7 @@
     "ax2.set_xlabel(\"Predicted\")\n",
     "\n",
     "plt.tight_layout()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   },
   {
diff --git a/4_data_analysis/resource_demand.ipynb b/4_data_analysis/resource_demand.ipynb
@@ -91,7 +91,7 @@
    ],
    "source": [
     "missing_pct = (df.isnull().sum() / len(df) * 100).sort_values(ascending=False)\n",
-    "print(missing_pct[missing_pct > 0].head(10))\n"
+    "print(missing_pct[missing_pct > 0].head(10))"
    ]
   },
   {
@@ -186,7 +186,7 @@
     "agg_dict[\"patient_id\"] = \"count\"\n",
     "\n",
     "df_grouped = df_encoded.groupby([\"state\", \"service_type\"], as_index=False).agg(agg_dict)\n",
-    "df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)\n"
+    "df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)"
    ]
   },
   {
@@ -337,7 +337,7 @@
    ],
    "source": [
     "models = {\n",
-    "    \"Ridge\": Ridge(alpha=10.0),  \n",
+    "    \"Ridge\": Ridge(alpha=10.0),\n",
     "    \"Random Forest\": RandomForestRegressor(\n",
     "        n_estimators=500,\n",
     "        max_depth=15,\n",
@@ -382,7 +382,7 @@
     "\n",
     "cv = KFold(n_splits=5, shuffle=True, random_state=42)\n",
     "cv_scores = cross_val_score(model, X_train_scaled, y_train_log, cv=cv, scoring=\"r2\")\n",
-    "print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")\n"
+    "print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")"
    ]
   },
   {
@@ -637,7 +637,7 @@
     "\n",
     "print(\"\\nTop 5 High-Demand Facilities:\")\n",
     "top_demand = df_grouped.nlargest(5, \"predicted_admissions\")[available_cols]\n",
-    "print(top_demand.to_string(index=False))\n"
+    "print(top_demand.to_string(index=False))"
    ]
   },
   {
@@ -732,7 +732,7 @@
     "ax.axhline(y=0, color=\"k\", linestyle=\"-\", linewidth=0.5)\n",
     "\n",
     "plt.tight_layout()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   }
  ],

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@`
`91`	`91`	`],`
`92`	`92`	`"source": [`
`93`	`93`	`"missing_pct = (df.isnull().sum() / len(df) * 100).sort_values(ascending=False)\n",`
`94`		`- "print(missing_pct[missing_pct > 0].head(10))\n"`
	`94`	`+ "print(missing_pct[missing_pct > 0].head(10))"`
`95`	`95`	`]`
`96`	`96`	`},`
`97`	`97`	`{`
`@@ -186,7 +186,7 @@`
`186`	`186`	`"agg_dict[\"patient_id\"] = \"count\"\n",`
`187`	`187`	`"\n",`
`188`	`188`	`"df_grouped = df_encoded.groupby([\"state\", \"service_type\"], as_index=False).agg(agg_dict)\n",`
`189`		`- "df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)\n"`
	`189`	`+ "df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)"`
`190`	`190`	`]`
`191`	`191`	`},`
`192`	`192`	`{`
`@@ -337,7 +337,7 @@`
`337`	`337`	`],`
`338`	`338`	`"source": [`
`339`	`339`	`"models = {\n",`
`340`		`- " \"Ridge\": Ridge(alpha=10.0), \n",`
	`340`	`+ " \"Ridge\": Ridge(alpha=10.0),\n",`
`341`	`341`	`" \"Random Forest\": RandomForestRegressor(\n",`
`342`	`342`	`" n_estimators=500,\n",`
`343`	`343`	`" max_depth=15,\n",`
`@@ -382,7 +382,7 @@`
`382`	`382`	`"\n",`
`383`	`383`	`"cv = KFold(n_splits=5, shuffle=True, random_state=42)\n",`
`384`	`384`	`"cv_scores = cross_val_score(model, X_train_scaled, y_train_log, cv=cv, scoring=\"r2\")\n",`
`385`		`- "print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")\n"`
	`385`	`+ "print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")"`
`386`	`386`	`]`
`387`	`387`	`},`
`388`	`388`	`{`
`@@ -637,7 +637,7 @@`
`637`	`637`	`"\n",`
`638`	`638`	`"print(\"\\nTop 5 High-Demand Facilities:\")\n",`
`639`	`639`	`"top_demand = df_grouped.nlargest(5, \"predicted_admissions\")[available_cols]\n",`
`640`		`- "print(top_demand.to_string(index=False))\n"`
	`640`	`+ "print(top_demand.to_string(index=False))"`
`641`	`641`	`]`
`642`	`642`	`},`
`643`	`643`	`{`
`@@ -732,7 +732,7 @@`
`732`	`732`	`"ax.axhline(y=0, color=\"k\", linestyle=\"-\", linewidth=0.5)\n",`
`733`	`733`	`"\n",`
`734`	`734`	`"plt.tight_layout()\n",`
`735`		`- "plt.show()\n"`
	`735`	`+ "plt.show()"`
`736`	`736`	`]`
`737`	`737`	`}`
`738`	`738`	`],`