diff --git a/backtracking/coloring.py b/backtracking/coloring.py index f10cdbcf9d26..abfdf16f1342 100644 --- a/backtracking/coloring.py +++ b/backtracking/coloring.py @@ -104,6 +104,14 @@ def color(graph: list[list[int]], max_colors: int) -> list[int]: >>> max_colors = 2 >>> color(graph, max_colors) [] + >>> color([], 2) # empty graph + [] + >>> color([[0]], 1) # single node, 1 color + [0] + >>> color([[0, 1], [1, 0]], 1) # 2 nodes, 1 color (impossible) + [] + >>> color([[0, 1], [1, 0]], 2) # 2 nodes, 2 colors (possible) + [0, 1] """ colored_vertices = [-1] * len(graph) diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py new file mode 100644 index 000000000000..7ec50346d5e4 --- /dev/null +++ b/machine_learning/gradient_boosting_regressor.py @@ -0,0 +1,67 @@ +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to + predict house price. +""" + +import matplotlib.pyplot as plt +import pandas as pd +from sklearn.datasets import fetch_openml +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.model_selection import train_test_split + + +def main(): + + # loading the dataset from the sklearn + # Note: load_boston is deprecated and removed, using fetch_openml instead + boston = fetch_openml(name="boston", version=1, as_frame=True) + print(boston.keys()) + # now let construct a data frame + df_boston = boston.data + # let add the target to the dataframe + df_boston["Price"] = boston.target + # print the first five rows using the head function + print(df_boston.head()) + # Summary statistics + print(df_boston.describe().T) + # Feature selection + + x = df_boston.iloc[:, :-1] + y = df_boston.iloc[:, -1] # target variable + # split the data with 75% train and 25% test sets. + x_train, x_test, y_train, y_test = train_test_split( + x, y, random_state=0, test_size=0.25 + ) + + model = GradientBoostingRegressor( + n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 + ) + # training the model + model.fit(x_train, y_train) + # to see how good the model fit the data + training_score = model.score(x_train, y_train).round(3) + test_score = model.score(x_test, y_test).round(3) + print("Training score of GradientBoosting is :", training_score) + print("The test score of GradientBoosting is :", test_score) + # Let us evaluation the model by finding the errors + y_pred = model.predict(x_test) + + # The mean squared error + print(f"Mean squared error: {mean_squared_error(y_test, y_pred):.2f}") + # Explained variance score: 1 is perfect prediction + print(f"Test Variance score: {r2_score(y_test, y_pred):.2f}") + + # So let's run the model against the test data + fig, ax = plt.subplots() + ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) + ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4) + ax.set_xlabel("Actual") + ax.set_ylabel("Predicted") + ax.set_title("Truth vs Predicted") + # this show function will display the plotting + plt.show() + + +if __name__ == "__main__": + main()