Skip to content

Commit 4629936

Browse files
CaesarGhaziWuorBhang
authored andcommitted
fixing the ruff format errors
1 parent fbcf39c commit 4629936

2 files changed

Lines changed: 22 additions & 22 deletions

File tree

4_data_analysis/LOS_prediction.ipynb

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
"print(f\"Total records in dataset: {len(df):,}\")\n",
8888
"print(f\"Records with LOS data: {len(df_model):,}\")\n",
8989
"print(f\"Percentage with LOS data: {len(df_model) / len(df) * 100:.2f}%\")\n",
90-
"print(df_model[\"length_of_stay\"].value_counts().sort_index())\n"
90+
"print(df_model[\"length_of_stay\"].value_counts().sort_index())"
9191
]
9292
},
9393
{
@@ -166,7 +166,7 @@
166166
")\n",
167167
"print(\n",
168168
" f\"Least common class: {los_counts.index[-1]} ({los_counts.values[-1] / len(df_model) * 100:.1f}%)\"\n",
169-
")\n"
169+
")"
170170
]
171171
},
172172
{
@@ -267,7 +267,7 @@
267267
"\n",
268268
"# Check the new balance\n",
269269
"print(\"New Target Distribution:\")\n",
270-
"print(df_model[\"length_of_stay_collapsed\"].value_counts(normalize=True))\n"
270+
"print(df_model[\"length_of_stay_collapsed\"].value_counts(normalize=True))"
271271
]
272272
},
273273
{
@@ -350,7 +350,7 @@
350350
],
351351
"source": [
352352
"X = df_model[features].copy()\n",
353-
"y = df_model['length_of_stay'].copy()\n",
353+
"y = df_model[\"length_of_stay\"].copy()\n",
354354
"\n",
355355
"print(f\"Initial feature count: {X.shape[1]}\")\n",
356356
"\n",
@@ -360,16 +360,16 @@
360360
"if len(missing_features) > 0:\n",
361361
" print(\"\\nFeatures with missing values:\")\n",
362362
" for feat, count in missing_features.items():\n",
363-
" print(f\" {feat:30} {count:>8,} ({count/len(X)*100:>5.2f}%)\")\n",
364-
" \n",
363+
" print(f\" {feat:30} {count:>8,} ({count / len(X) * 100:>5.2f}%)\")\n",
364+
"\n",
365365
" for col in X.columns:\n",
366366
" if X[col].isnull().sum() > 0:\n",
367-
" if X[col].dtype in ['object', 'category']:\n",
368-
" X[col] = X[col].fillna('Unknown')\n",
367+
" if X[col].dtype in [\"object\", \"category\"]:\n",
368+
" X[col] = X[col].fillna(\"Unknown\")\n",
369369
" else:\n",
370370
" X[col] = X[col].fillna(X[col].median())\n",
371371
"\n",
372-
"categorical_cols = X.select_dtypes(include=['object', 'category']).columns\n",
372+
"categorical_cols = X.select_dtypes(include=[\"object\", \"category\"]).columns\n",
373373
"X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)\n",
374374
"\n",
375375
"print(f\"\\nFeatures after encoding: {X_encoded.shape[1]}\")\n",
@@ -417,7 +417,7 @@
417417
"X = df_model[features].copy()\n",
418418
"\n",
419419
"# Define Target\n",
420-
"y = df_model[\"length_of_stay_collapsed\"].copy() \n",
420+
"y = df_model[\"length_of_stay_collapsed\"].copy()\n",
421421
"\n",
422422
"# Handle Missing Values & Encoding\n",
423423
"categorical_cols = X.select_dtypes(include=[\"object\", \"category\"]).columns\n",
@@ -433,7 +433,7 @@
433433
"X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)\n",
434434
"\n",
435435
"print(\"Target Class Distribution in Training:\")\n",
436-
"print(y_train_resampled.value_counts(normalize=True))\n"
436+
"print(y_train_resampled.value_counts(normalize=True))"
437437
]
438438
},
439439
{
@@ -486,7 +486,7 @@
486486
" estimator=rf,\n",
487487
" param_distributions=param_dist,\n",
488488
" n_iter=20,\n",
489-
" cv=3, \n",
489+
" cv=3,\n",
490490
" verbose=2,\n",
491491
" random_state=42,\n",
492492
" n_jobs=-1,\n",
@@ -561,7 +561,7 @@
561561
"\n",
562562
"# This ensures consistency for training and validation splits\n",
563563
"X_train_resampled = clean_cols_lgbm(X_train_resampled.copy())\n",
564-
"X_test = clean_cols_lgbm(X_test.copy()) \n",
564+
"X_test = clean_cols_lgbm(X_test.copy())\n",
565565
"\n",
566566
"X_train_sub, X_val, y_train_sub, y_val = train_test_split(\n",
567567
" X_train_resampled, y_train_resampled, test_size=0.1, random_state=42\n",
@@ -600,7 +600,7 @@
600600
"\n",
601601
"print(f\"\\nLightGBM Final Iteration: {best_iteration}\")\n",
602602
"print(f\"LightGBM Accuracy: {accuracy_gb:.4f} ({accuracy_gb * 100:.2f}%)\")\n",
603-
"print(f\"LightGBM Macro F1 Score: {f1_gb:.4f}\")\n"
603+
"print(f\"LightGBM Macro F1 Score: {f1_gb:.4f}\")"
604604
]
605605
},
606606
{
@@ -786,7 +786,7 @@
786786
"plt.title(f\"Top 15 Features Predicting Length of Stay Category ({best_name})\")\n",
787787
"plt.gca().invert_yaxis()\n",
788788
"plt.tight_layout()\n",
789-
"plt.show()\n"
789+
"plt.show()"
790790
]
791791
},
792792
{
@@ -856,7 +856,7 @@
856856
"ax2.set_xlabel(\"Predicted\")\n",
857857
"\n",
858858
"plt.tight_layout()\n",
859-
"plt.show()\n"
859+
"plt.show()"
860860
]
861861
},
862862
{

4_data_analysis/resource_demand.ipynb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
],
9292
"source": [
9393
"missing_pct = (df.isnull().sum() / len(df) * 100).sort_values(ascending=False)\n",
94-
"print(missing_pct[missing_pct > 0].head(10))\n"
94+
"print(missing_pct[missing_pct > 0].head(10))"
9595
]
9696
},
9797
{
@@ -186,7 +186,7 @@
186186
"agg_dict[\"patient_id\"] = \"count\"\n",
187187
"\n",
188188
"df_grouped = df_encoded.groupby([\"state\", \"service_type\"], as_index=False).agg(agg_dict)\n",
189-
"df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)\n"
189+
"df_grouped.rename(columns={\"patient_id\": \"total_admissions\"}, inplace=True)"
190190
]
191191
},
192192
{
@@ -337,7 +337,7 @@
337337
],
338338
"source": [
339339
"models = {\n",
340-
" \"Ridge\": Ridge(alpha=10.0), \n",
340+
" \"Ridge\": Ridge(alpha=10.0),\n",
341341
" \"Random Forest\": RandomForestRegressor(\n",
342342
" n_estimators=500,\n",
343343
" max_depth=15,\n",
@@ -382,7 +382,7 @@
382382
"\n",
383383
"cv = KFold(n_splits=5, shuffle=True, random_state=42)\n",
384384
"cv_scores = cross_val_score(model, X_train_scaled, y_train_log, cv=cv, scoring=\"r2\")\n",
385-
"print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")\n"
385+
"print(f\"{name} CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})\")"
386386
]
387387
},
388388
{
@@ -637,7 +637,7 @@
637637
"\n",
638638
"print(\"\\nTop 5 High-Demand Facilities:\")\n",
639639
"top_demand = df_grouped.nlargest(5, \"predicted_admissions\")[available_cols]\n",
640-
"print(top_demand.to_string(index=False))\n"
640+
"print(top_demand.to_string(index=False))"
641641
]
642642
},
643643
{
@@ -732,7 +732,7 @@
732732
"ax.axhline(y=0, color=\"k\", linestyle=\"-\", linewidth=0.5)\n",
733733
"\n",
734734
"plt.tight_layout()\n",
735-
"plt.show()\n"
735+
"plt.show()"
736736
]
737737
}
738738
],

0 commit comments

Comments
 (0)