Sinapsis-AI
diff --git a/‎packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py‎
Lines changed: 8 additions & 8 deletions b/‎packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/image_saver.py‎
Lines changed: 3 additions & 3 deletions b/‎packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/image_saver.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sinapsis_data_analysis/README.md‎
Lines changed: 159 additions & 0 deletions b/‎sinapsis_data_analysis/README.md‎
Lines changed: 159 additions & 0 deletions
diff --git a/‎sinapsis_data_analysis/pyproject.toml‎
Lines changed: 40 additions & 0 deletions b/‎sinapsis_data_analysis/pyproject.toml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎sinapsis_data_analysis/src/sinapsis_data_analysis/__init__.py‎ b/‎sinapsis_data_analysis/src/sinapsis_data_analysis/__init__.py‎
diff --git a/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/decision_sklearn.yml‎
Lines changed: 38 additions & 0 deletions b/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/decision_sklearn.yml‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/mainfold_sklearn.yml‎
Lines changed: 39 additions & 0 deletions b/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/mainfold_sklearn.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/mlp_sklearn.yml‎
Lines changed: 48 additions & 0 deletions b/‎sinapsis_data_analysis/src/sinapsis_data_analysis/configs/mlp_sklearn.yml‎
Lines changed: 48 additions & 0 deletions
@@ -129,7 +129,7 @@ def execute(self, container: DataContainer) -> DataContainer:
         x_data, y_data = self.extract_x_y_from_packet(packet)
 
         custom_dataset = self.store_data_in_data_splitter(x_data, y_data)
-        container.set_generic_data(container, custom_dataset)
+        self._set_generic_data(container, custom_dataset)
         return container
 
 
@@ -212,16 +212,16 @@ class AttributesBaseModel(DatasetSplitterBase.AttributesBaseModel):
         generic_data_target_key: str = "target"  # labels
         generic_data_feature_key: str = "data"  # arrays
 
-    def extract_x_y_from_packet(self, packets: list[Packet] | dict) -> tuple[ArrayDataFrameType, StringDataFrameType]:
+    def extract_x_y_from_packet(self, packets: list[Packet] | dict) -> tuple[StringDataFrameType, ArrayDataFrameType]:
         packet = cast(dict, packets)
         dataframe: pd.DataFrame | None = packet.get(self.attributes.generic_data_extract_key, None)
-        target: pd.DataFrame
-        feature: pd.DataFrame
-        if dataframe:
-            target = dataframe.get(self.attributes.generic_dataset_target_key)
-            feature = dataframe.get(self.attributes.generic_dataset_feature_key)
+        target: pd.DataFrame = pd.DataFrame()
+        feature: pd.DataFrame = pd.DataFrame()
+        if isinstance(dataframe, pd.DataFrame):
+            target = dataframe.get(self.attributes.generic_data_target_key)
+            feature = dataframe.get(self.attributes.generic_data_feature_key)
 
-        return target, feature
+        return feature, target
 
     @staticmethod
     def return_data_splitter_object(
 
@@ -101,16 +101,16 @@ def save_image(self, img_destination: Path, image_packet: ImagePacket) -> str:
                 img_destination = img_destination.with_suffix(f".{self.attributes.extension}")
 
             path_to_save = str(img_destination)
-            if image_packet.content is not None and image_packet.content.size > 0:  # Check if image is valid
-                if image_packet.color_space != ImageColor.GRAY:
+            if image_packet.content is not None and image_packet.content.size > 0:
+                if image_packet.color_space is not None and image_packet.color_space != ImageColor.GRAY:
                     image_packet = convert_color_space(image_packet, ImageColor.BGR)
                 cv2.imwrite(str(img_destination.absolute()), image_packet.content)
                 self.logger.debug(f"Saved image to: {img_destination.absolute()}")
                 return path_to_save
             else:
                 self.logger.warning(f"Attempted to save an invalid image: {img_destination}")
                 return ""
-        except (FileNotFoundError, PermissionError, OSError) as e:
+        except OSError as e:
             self.logger.error(f"File system error while saving image to {img_destination}: {e}")
             return ""
 
 
@@ -0,0 +1,159 @@
+<h1 align="center">
+<br>
+<a href="https://sinapsis.tech/">
+  <img
+    src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
+    alt="" width="300">
+</a><br>
+Sinapsis Data Analysis
+<br>
+</h1>
+
+<h4 align="center">Module for machine learning model training, analysis, and inference, using the Scikit-learn and XGBoost libraries.</h4>
+
+<p align="center">
+<a href="#installation">🐍  Installation</a> •
+<a href="#features"> 🚀 Features</a> •
+<a href="#example"> 📚 Usage Example</a> •
+<a href="#documentation">📙 Documentation</a> •
+<a href="#license"> 🔍 License </a>
+</p>
+
+**Sinapsis Data Analysis** provides a comprehensive set of tools for machine learning model training, evaluation, and inference using industry-standard libraries like scikit-learn and XGBoost.
+
+<h2 id="installation"> 🐍  Installation </h2>
+
+Install using your package manager of choice. We encourage the use of <code>uv</code>
+
+Example with <code>uv</code>:
+
+```bash
+  uv pip install sinapsis-data-analysis --extra-index-url https://pypi.sinapsis.tech
+```
+ or with raw <code>pip</code>:
+```bash
+  pip install sinapsis-data-analysis --extra-index-url https://pypi.sinapsis.tech
+```
+
+
+<h2 id="features">🚀 Features</h2>
+
+<h3> Templates Supported</h3>
+
+**Sinapsis Data Analysis** provides a variety of templates for machine learning workflows:
+
+<details>
+<summary><strong><span style="font-size: 1.25em;">Scikit-Learn Models</span></strong></summary>
+
+The following model types are supported:
+
+- **Linear Models**: LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression, etc.
+- **Neighbors Models**: KNeighborsClassifier, KNeighborsRegressor, RadiusNeighborsClassifier, etc.
+- **Neural Network Models**: MLPClassifier, MLPRegressor, BernoulliRBM
+- **Tree Models**: DecisionTreeClassifier, DecisionTreeRegressor, ExtraTreeClassifier, etc.
+
+Each template uses the same base attributes:
+- **`generic_field_key` (str, required)**: Key of the generic field where datasets are stored
+- **`model_save_path` (str, required)**: Path where the trained model will be saved
+</details>
+
+<details>
+<summary><strong><span style="font-size: 1.25em;">XGBoost Models</span></strong></summary>
+
+XGBoost model templates include:
+- XGBClassifier
+- XGBRegressor
+- XGBRanker
+- XGBRFClassifier
+- XGBRFRegressor
+- Booster
+
+Attributes are the same as those for Scikit-learn templates.
+</details>
+
+<details>
+<summary><strong><span style="font-size: 1.25em;">Manifold Learning</span></strong></summary>
+
+Templates for dimensionality reduction using scikit-learn's manifold learning techniques:
+
+- **SKLearnManifold**: Base class for all manifold learning algorithms
+  - **`generic_field_key` (str, required)**: Key of the generic field where the input data is stored
+
+Specific algorithms include t-SNE, MDS, Isomap, LocallyLinearEmbedding, and more.
+</details>
+
+<details>
+<summary><strong><span style="font-size: 1.25em;">Inference Templates</span></strong></summary>
+
+Templates for using trained models to make predictions on new data:
+
+- **SKLearnInference**: For inference with scikit-learn models
+- **XGBoostInference**: For inference with XGBoost models
+
+To use these templates, you should replace the **`model_path`** to point to the path of the trained model.
+</details>
+
+> [!TIP]
+> Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis Data Analysis.
+
+> [!TIP]
+> Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
+
+For example, for ***LinearRegression*** use ```sinapsis info --example-template-config LinearRegression``` to produce an example config.
+
+<h2 id="example"> 📚 Usage Example </h2>
+Below is an example configuration for **Sinapsis Data Analysis** using LinearRegressionWrapper for regression.
+
+<details>
+<summary><strong><span style="font-size: 1.25em;">Example config</span></strong></summary>
+
+```yaml
+agent:
+  name: sklearn_linear_models_agent
+  description: agent to train a LinearRegression model from scikit-learn using the load_diabetes dataset
+
+templates:
+- template_name: InputTemplate
+  class_name: InputTemplate
+  attributes: {}
+
+- template_name: load_diabetesWrapper
+  class_name: load_diabetesWrapper
+  template_input: InputTemplate
+  attributes:
+    split_dataset: true
+    train_size: 0.8
+    load_diabetes:
+      return_X_y: false
+      as_frame: true
+
+- template_name: LinearRegressionWrapper
+  class_name: LinearRegressionWrapper
+  template_input: load_diabetesWrapper
+  attributes:
+    generic_field_for_data: load_diabetesWrapper
+    model_save_path: "artifacts/linear_regression.joblib"
+    linearregression_init:
+      fit_intercept: true
+      copy_X: true
+      n_jobs: null
+      positive: false
+```
+</details>
+
+To run the config, use the CLI:
+```bash
+sinapsis run name_of_config.yml
+```
+
+<h2 id="documentation">📙 Documentation</h2>
+
+Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
+
+Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
+
+<h2 id="license">🔍 License</h2>
+
+This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
+
+For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
@@ -0,0 +1,40 @@
+[project]
+name = "sinapsis-data-analysis"
+version = "0.1.0"
+description = "Templates to work with models for classification, regression and clustering with xgboost and sklearn."
+authors = [{ name = "SinapsisAI", email = "dev@sinapsis.tech" }]
+
+readme = "README.md"
+license = { file = "LICENSE" }
+requires-python = ">=3.10"
+dependencies = [
+    "scikit-learn>=1.6.1",
+    "sinapsis>=0.1.1",
+    "sinapsis-data-readers",
+    "xgboost>=3.0.0",
+]
+
+[project.optional-dependencies]
+
+all = [
+]
+
+
+[tool.uv.sources]
+sinapsis-data-readers = { workspace = true }
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+
+[[tool.uv.index]]
+name = "sinapsis"
+url = "https://pypi.sinapsis.tech/"
+
+
+[project.urls]
+Homepage = "https://sinapsis.tech"
+Documentation = "https://docs.sinapsis.tech/docs"
+Tutorials = "https://docs.sinapsis.tech/tutorials"
+Repository = "https://github.com/Sinapsis-AI/sinapsis-data-tools.git"
@@ -0,0 +1,38 @@
+agent:
+  name: sklearn_tree_models_agent
+  description: agent to train a DecisionTreeClassifier using the load_wine dataset
+
+templates:
+- template_name: InputTemplate
+  class_name: InputTemplate
+  attributes: {}
+
+- template_name: load_wineWrapper
+  class_name: load_wineWrapper
+  template_input: InputTemplate
+  attributes:
+    split_dataset: true
+    train_size: 0.8
+    load_wine:
+      return_X_y: false
+      as_frame: true
+
+- template_name: DecisionTreeClassifierWrapper
+  class_name: DecisionTreeClassifierWrapper
+  template_input: load_wineWrapper
+  attributes:
+    generic_field_key: load_wineWrapper
+    model_save_path: "artifacts/decision_tree.joblib"
+    decisiontreeclassifier_init:
+      criterion: 'gini'
+      splitter: 'best'
+      max_depth: 5
+      min_samples_split: 2
+      min_samples_leaf: 1
+      min_weight_fraction_leaf: 0.0
+      max_features: null
+      random_state: 42
+      max_leaf_nodes: null
+      min_impurity_decrease: 0.0
+      class_weight: null
+      ccp_alpha: 0.0
@@ -0,0 +1,39 @@
+agent:
+  name: sklearn_manifold_agent
+  description: agent to train a TSNE from sklearn using the load_digits dataset
+
+templates:
+- template_name: InputTemplate
+  class_name: InputTemplate
+  attributes: {}
+
+- template_name: load_digitsWrapper
+  class_name: load_digitsWrapper
+  template_input: InputTemplate
+  attributes:
+    split_dataset: true
+    train_size: 0.8
+    load_digits:
+      n_class: 10
+      return_X_y: false
+      as_frame: true
+
+- template_name: TSNEWrapper
+  class_name: TSNEWrapper
+  template_input: load_digitsWrapper
+  attributes:
+    generic_field_key: load_digitsWrapper
+    tsne_init:
+      n_components: 2
+      perplexity: 30.0
+      early_exaggeration: 12.0
+      learning_rate: 200.0
+      n_iter: 1000
+      n_iter_without_progress: 300
+      min_grad_norm: 0.0000001
+      metric: 'euclidean'
+      init: 'random'
+      random_state: 42
+      method: 'barnes_hut'
+      angle: 0.5
+      n_jobs: null
@@ -0,0 +1,48 @@
+agent:
+  name: sklearn_nn_models_agent
+  description: agent to train a MLPClassifier using the load_breast_cancer dataset
+
+templates:
+- template_name: InputTemplate
+  class_name: InputTemplate
+  attributes: {}
+
+- template_name: load_breast_cancerWrapper
+  class_name: load_breast_cancerWrapper
+  template_input: InputTemplate
+  attributes:
+    split_dataset: true
+    train_size: 0.8
+    load_breast_cancer:
+      return_X_y: false
+      as_frame: true
+
+- template_name: MLPClassifierWrapper
+  class_name: MLPClassifierWrapper
+  template_input: load_breast_cancerWrapper
+  attributes:
+    generic_field_key: load_breast_cancerWrapper
+    model_save_path: "artifacts/mlp_classifier.joblib"
+    mlpclassifier_init:
+      hidden_layer_sizes: [100, 50]
+      activation: 'relu'
+      solver: 'adam'
+      alpha: 0.0001
+      batch_size: 'auto'
+      learning_rate: 'constant'
+      learning_rate_init: 0.001
+      max_iter: 200
+      shuffle: true
+      random_state: 42
+      tol: 0.0001
+      verbose: false
+      warm_start: false
+      momentum: 0.9
+      nesterovs_momentum: true
+      early_stopping: false
+      validation_fraction: 0.1
+      beta_1: 0.9
+      beta_2: 0.999
+      epsilon: 0.00000001
+      n_iter_no_change: 10
+      max_fun: 15000