-
Notifications
You must be signed in to change notification settings - Fork 434
Predict from selected or from all models models (Issues #798 and #423) #805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
cec9a50
e63b384
2219b6b
366f85f
b0dfadb
3f9a5a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1209,6 +1209,9 @@ def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None): | |
|
|
||
| if not self._models: | ||
| raise AutoMLException("No models produced.") | ||
|
|
||
| #sorting models by lowest loss | ||
| self._models = sorted(self._models, key=lambda x: x.get_final_loss()) | ||
| self._fit_level = "finished" | ||
| self.save_progress() | ||
| self.select_and_save_best(show_warnings=True) | ||
|
|
@@ -1434,6 +1437,23 @@ def models_needed_on_predict(self, required_model_name): | |
| + [required_model_name] | ||
| ) | ||
| ) | ||
|
|
||
| def _do_prediction_union(self, X, model_list = []): | ||
| predictions = [] | ||
|
|
||
| for i, model in enumerate(model_list): | ||
| prediction = self._base_predict(X, model) | ||
|
|
||
| if i > 0: | ||
| prediction = prediction.add_suffix(f"_{i}") | ||
|
|
||
| predictions.append(prediction) | ||
|
|
||
| df_union = pd.concat(predictions, axis=1) | ||
|
|
||
| return df_union | ||
|
|
||
|
|
||
|
|
||
| def _base_predict(self, X, model=None): | ||
| if model is None: | ||
|
|
@@ -1504,16 +1524,94 @@ def _base_predict(self, X, model=None): | |
| else: | ||
| return predictions | ||
|
|
||
| def _predict(self, X): | ||
| predictions = self._base_predict(X) | ||
| # Return predictions | ||
| # If classification task the result is in column 'label' | ||
| # If regression task the result is in column 'prediction' | ||
| return ( | ||
| predictions["label"].to_numpy() | ||
| if self._ml_task != REGRESSION | ||
| else predictions["prediction"].to_numpy() | ||
| ) | ||
| def _predict(self, X, models=[]): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to add tests for predict. It would be good if we have following test cases:
If we are going to provide such functionality to
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. For the predict_all method, does it make sense for it to just call the new predict implementation? since both of them originally just called the base predict method.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it makes sense. |
||
| """ | ||
| Generates predictions using one or multiple models based on the selected models. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| X : array-like, pandas.DataFrame | ||
| Input data to generate predictions for. | ||
|
|
||
| models : list, default=[] | ||
| List of model names to be used. | ||
| Raises an exception if any provided model name does not exist. | ||
| If no models are provided, use the best one. | ||
|
|
||
| Returns | ||
| ------- | ||
| numpy.ndarray | ||
| - If a single model is selected: returns a 1D array of predictions. | ||
| - If multiple models are selected: returns a 2D array (n_samples x n_models), | ||
| containing predictions from each model side-by-side. | ||
|
|
||
| Raises | ||
| ------ | ||
| AutoMLException | ||
| - If no models were selected. | ||
| - If invalid prediction mode is provided. | ||
| - If custom models are missing or invalid. | ||
| """ | ||
|
|
||
| selected_models = [] | ||
| n_models = len(models) | ||
|
|
||
| if n_models >= 1: | ||
| # Collect valid model names available in the system | ||
| available = [m.get_name() for m in self._models] | ||
|
|
||
| # Detect invalid names passed by the user | ||
| invalid = [name for name in models if name not in available] | ||
|
|
||
| # If any invalid custom model name is found → raise detailed error | ||
| if invalid: | ||
| raise AutoMLException( | ||
| f"The following custom models are not available: {invalid}\n" | ||
| f"Available models are: {available}" | ||
| ) | ||
|
|
||
| # Select the models that match the requested names | ||
| filtered_models = [ | ||
| m for m in self._models if m.get_name() in models | ||
| ] | ||
|
|
||
| for model in filtered_models: | ||
| selected_models.append(model) | ||
| else: | ||
| selected_models.append(self._best_model) | ||
|
|
||
| print(f'Models being used for prediction: {[model.get_name() for model in selected_models]}') | ||
| # ------------------------------------------------------------------ | ||
| # MULTI-MODEL PREDICTION (returns 2D array) | ||
| # ------------------------------------------------------------------ | ||
| if len(selected_models) > 1: | ||
| # Perform the union of predictions for all selected models | ||
| predictions = self._do_prediction_union(X, selected_models) | ||
|
|
||
| # Select the correct output columns depending on the task | ||
| if self._ml_task != REGRESSION: | ||
| # Multi-class/binary classification → use "label" columns | ||
| cols = [c for c in predictions.columns if c.startswith("label")] | ||
| else: | ||
| # Regression → use "prediction" columns | ||
| cols = [c for c in predictions.columns if c.startswith("prediction")] | ||
|
|
||
| # Return predictions as a 2D numpy array (n_samples x n_models) | ||
| return predictions[cols].to_numpy() | ||
|
|
||
| # ------------------------------------------------------------------ | ||
| # SINGLE-MODEL PREDICTION (returns 1D array) | ||
| # ------------------------------------------------------------------ | ||
| elif len(selected_models) == 1: | ||
| predictions = self._base_predict(X, selected_models[0]) | ||
|
|
||
| return ( | ||
| predictions["label"].to_numpy() | ||
| if self._ml_task != REGRESSION | ||
| else predictions["prediction"].to_numpy() | ||
| ) | ||
|
|
||
|
|
||
|
|
||
| def _predict_proba(self, X): | ||
| # Check is task type is correct | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that comment with description how the functions is working should be here. Docs are generated based on this comment https://supervised.mljar.com/api/#supervised.automl.AutoML.predict