emlearn
diff --git a/‎Makefile‎
Lines changed: 1 addition & 0 deletions b/‎Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎airquality_check.py‎
Lines changed: 0 additions & 76 deletions b/‎airquality_check.py‎
Lines changed: 0 additions & 76 deletions
diff --git a/‎airquality_download.py‎
Lines changed: 0 additions & 86 deletions b/‎airquality_download.py‎
Lines changed: 0 additions & 86 deletions
diff --git a/‎examples/datasets/airquality/X_test.npy‎
8.55 KB b/‎examples/datasets/airquality/X_test.npy‎
8.55 KB
diff --git a/‎examples/datasets/airquality/X_train.npy‎
33.7 KB b/‎examples/datasets/airquality/X_train.npy‎
33.7 KB
diff --git a/‎examples/datasets/airquality/prepare.py‎
Lines changed: 83 additions & 0 deletions b/‎examples/datasets/airquality/prepare.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎examples/datasets/airquality/y_test.npy‎
792 Bytes b/‎examples/datasets/airquality/y_test.npy‎
792 Bytes
diff --git a/‎examples/datasets/airquality/y_train.npy‎
2.71 KB b/‎examples/datasets/airquality/y_train.npy‎
2.71 KB
diff --git a/‎tests/test_all.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/test_all.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_plsr_airquality.py‎
Lines changed: 81 additions & 0 deletions b/‎tests/test_plsr_airquality.py‎
Lines changed: 81 additions & 0 deletions
@@ -63,6 +63,7 @@ emlearn_arrayutils_SRC = src/emlearn_arrayutils
 emlearn_linreg_SRC = src/emlearn_linreg
 emlearn_logreg_SRC = src/emlearn_logreg
 emlearn_extratrees_SRC = src/emlearn_extratrees
+emlearn_plsr_SRC = src/emlearn_plsr
 
 # Dependencies for each .mpy file: .c, .h, .py files, and Makefile
 $(foreach mod,$(MODULES),\
 
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Download and preprocess the Air Quality UCI dataset for PLS regression.
+
+Also computes sklearn PLSR reference results for comparison.
+Run with CPython: python3 examples/datasets/airquality/prepare.py
+"""
+
+from pathlib import Path
+import os
+import urllib.request
+import zipfile
+
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.metrics import r2_score, mean_squared_error
+
+
+def main():
+
+    here = os.path.dirname(__file__)
+    OUTPUT_DIR = Path(here)
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    # Download
+    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00360/AirQualityUCI.zip"
+    zip_path = OUTPUT_DIR / "AirQualityUCI.zip"
+    if not zip_path.exists():
+        print("Downloading Air Quality UCI dataset...")
+        urllib.request.urlretrieve(url, zip_path)
+    with zipfile.ZipFile(zip_path, 'r') as zf:
+        zf.extractall(OUTPUT_DIR)
+
+    # Load and preprocess
+    csv_file = OUTPUT_DIR / "AirQualityUCI.csv"
+    df = pd.read_csv(csv_file, sep=';', decimal=',')
+    df = df.iloc[:, :-2]  # drop last two empty columns
+    df.replace(-200, np.nan, inplace=True)
+    df.dropna(inplace=True)
+
+    X = df.iloc[:, 2:].values.astype(np.float32)  # sensor columns
+    y = df["CO(GT)"].values.astype(np.float32)
+
+    scaler_X = StandardScaler()
+    X = scaler_X.fit_transform(X).astype(np.float32)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    FILENAMES = {
+        'X_train': OUTPUT_DIR / 'X_train.npy',
+        'X_test': OUTPUT_DIR / 'X_test.npy',
+        'y_train': OUTPUT_DIR / 'y_train.npy',
+        'y_test': OUTPUT_DIR / 'y_test.npy',
+    }
+
+    np.save(FILENAMES['X_train'], X_train)
+    np.save(FILENAMES['X_test'], X_test)
+    np.save(FILENAMES['y_train'], y_train)
+    np.save(FILENAMES['y_test'], y_test)
+
+    print('Saved datasets:')
+    print(f"  X_train: {X_train.shape} -> {FILENAMES['X_train']}")
+    print(f"  X_test : {X_test.shape} -> {FILENAMES['X_test']}")
+    print(f"  y_train: {y_train.shape} -> {FILENAMES['y_train']}")
+    print(f"  y_test : {y_test.shape} -> {FILENAMES['y_test']}")
+
+    # Sklearn PLSR reference results
+    print('\nSklearn PLSR reference:')
+    for nc in [3, 5]:
+        pls = PLSRegression(n_components=nc)
+        pls.fit(X_train, y_train)
+        y_pred = pls.predict(X_test).ravel()
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        print(f"  n_components={nc}: MSE={mse:.5f}, R^2={r2:.5f}")
+
+
+if __name__ == '__main__':
+    main()
@@ -37,6 +37,7 @@
     'test_extratrees_xor',
     'test_extratrees_cancer',
     'test_extratrees_wine',
+    'test_plsr_airquality',
 ]
 
 def main():
 
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""MicroPython test for PLSR on the Air Quality UCI dataset."""
+
+import array
+import emlearn_plsr
+import npyfile
+
+
+DATA_DIR = 'examples/datasets/airquality/'
+
+
+def mean_squared_error(y_true, y_pred):
+    n = len(y_true)
+    return sum((yi - yi_hat) ** 2 for yi, yi_hat in zip(y_true, y_pred)) / n
+
+
+def r2_score(y_true, y_pred):
+    n = len(y_true)
+    y_mean = sum(y_true) / n
+    ss_tot = sum((yi - y_mean) ** 2 for yi in y_true)
+    ss_res = sum((yi - yi_hat) ** 2 for yi, yi_hat in zip(y_true, y_pred))
+    return 1 - ss_res / ss_tot if ss_tot != 0 else 0.0
+
+
+def test_plsr_airquality():
+    """Test PLSR on Air Quality UCI dataset (regression with 13 features)."""
+    print("\n=== Air Quality PLSR Test ===")
+
+    # Load data
+    shape_X_train, X_train = npyfile.load(DATA_DIR + 'X_train.npy')
+    shape_y_train, y_train = npyfile.load(DATA_DIR + 'y_train.npy')
+    shape_X_test, X_test = npyfile.load(DATA_DIR + 'X_test.npy')
+    shape_y_test, y_test = npyfile.load(DATA_DIR + 'y_test.npy')
+
+    n_train = shape_X_train[0]
+    n_features = shape_X_train[1]
+    n_test = shape_X_test[0]
+
+    print(f"Loaded: {n_train} train, {n_test} test samples")
+    print(f"Features: {n_features}")
+
+    n_components = 3
+
+    # Create and train model
+    model = emlearn_plsr.new(n_train, n_features, n_components)
+    total_iter, final_metric = emlearn_plsr.fit(
+        model, X_train, y_train,
+        max_iterations=2000,
+        tolerance=1e-5,
+        verbose=0,
+    )
+
+    assert total_iter > 0, "Some iterations performed"
+    assert model.is_complete(), "Training complete"
+    print(f"Trained: {total_iter} iterations")
+
+    # Predict on test set
+    y_pred = array.array('f')
+    for i in range(n_test):
+        row = X_test[i * n_features:(i + 1) * n_features]
+        y_pred.append(model.predict(row))
+
+    # Compute metrics
+    mse = mean_squared_error(y_test, y_pred)
+    r2 = r2_score(y_test, y_pred)
+
+    print(f"Test MSE: {mse:.5f}")
+    print(f"Test R^2: {r2:.5f}")
+    print(f"Target (sklearn PLSR): ~0.97")
+
+    # emlearn PLSR should be close to sklearn (which gets ~0.977)
+    assert r2 > 0.90, "R^2 above 0.90"
+
+    if r2 >= 0.90:
+        print("✅ GOOD: Solid regression performance on real data!")
+    else:
+        print("❌ POOR: R^2 below threshold")
+
+
+if __name__ == '__main__':
+    test_plsr_airquality()
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@`
`37`	`37`	`'test_extratrees_xor',`
`38`	`38`	`'test_extratrees_cancer',`
`39`	`39`	`'test_extratrees_wine',`
	`40`	`+ 'test_plsr_airquality',`
`40`	`41`	`]`
`41`	`42`
`42`	`43`	`def main():`