mlsysops-eu
diff --git a/‎.gitignore‎
Lines changed: 16 additions & 1 deletion b/‎.gitignore‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎mlconnector/README.md‎
Lines changed: 0 additions & 2 deletions b/‎mlconnector/README.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎mlconnector/api_full_documentation.md‎
Lines changed: 58 additions & 15 deletions b/‎mlconnector/api_full_documentation.md‎
Lines changed: 58 additions & 15 deletions
diff --git a/‎mlconnector/db/Dockerfile‎
Lines changed: 9 additions & 5 deletions b/‎mlconnector/db/Dockerfile‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎mlconnector/db/configs/data.py‎
Lines changed: 63 additions & 0 deletions b/‎mlconnector/db/configs/data.py‎
Lines changed: 63 additions & 0 deletions
@@ -3,4 +3,19 @@ values.yaml
 
 __pycache__/
 *.pyc
-*.log
+*.log
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
@@ -34,8 +34,6 @@ This is used for internal communication of the varrious services. You can setup
 - `POSTGRES_DB`: PostgreSQL database name (default, `mlmodel`)
 - `POSTGRES_USER`: PostgreSQL username (default, `postgres`)
 - `POSTGRES_PASSWORD`: PostgreSQL password (default, `strongpassword`)
-- `PGADMIN_DEFAULT_EMAIL`: pgAdmin default login email (default, `user@mail.com`)
-- `PGADMIN_DEFAULT_PASSWORD`: pgAdmin default login password (default, `strongpassword`)
 - `DB_HOST_NAME`: Database host (e.g., `database`, This corresponds to the name of the container)
 - `DB_PORT`: Database port (default: `5432`)
 - `DB_DRIVER`: Database driver string (default, `postgresql+asyncpg`)  **NOTE:** Only use an async driver
 
@@ -647,11 +647,12 @@ reg = Ridge(alpha=1.0, random_state=0)
 reg.fit(X, y)
 ...
 
-# It is important that all models are saved with a .pkl extension
-# Serialize  with pickle to a .pkl file
+# Serialize  with pickle to a .pkl file or any other format
 output_path = "diabetes_ridge.pkl"
 with open(output_path, "wb") as f:
     pickle.dump(reg, f)
+# joblib.dump(bundle, model_path) (Using joblib)
+# or you  can load the model in the custom function (see inference section)
 
 ```
 ## 2. Register ML model with
@@ -719,17 +720,20 @@ The above step should return a model_id that will be used in the next steps. Her
 - Model file (pickled file saved in step one above)
 - Training data. This will be used for explainability and drift detection. (Note, it has to be the exact same data used to train the model, otherwise you will get wrong results)
 - Requirements file that defines the environment the model was trained in.
+- If you will use a different predict function (See inference section).
 
 Upload these one by one using the example bellow;
 Note: file_kind can be `model`, `data`, `code`, and `env`
+ 
+
 ```python
 import requests
 
 files = {
     "file": open("model.pkl", "rb"),
     "file_kind": (None, "model")
 }
-resp = requests.post("BASE_URL/model/1234/upload", files=files)
+resp = requests.post("BASE_URL/model/{model_id}/upload", files=files)
 print(resp.json())
 ```
 ## 3. Deployment
@@ -775,20 +779,59 @@ curl -X GET "BASE_URL/deployment/get/status/dep-iris-001"
 
 ## 4. Inference Endpoint (including Explainability)
 
-### 4.1 Predict Call
-
-Assuming deployment created with `deployment_id = dep-iris-001`:
+### 4.1 Inference
 
-```bash
-curl -X POST "BASE_URL/deployment/dep-iris-001/predict" \
-     -H "Content-Type: application/json" \
-     -d '{
-           "data": [[5.1, 3.5, 1.4, 0.2]],
-           "explain": true
-         }'
-```
+Once the ML application is ready, the response will contain the inference endpoint.
 
-**Response:**
+```python
+url = ”BASE_URL/prediction"
+headers = {
+    "accept": "application/json",
+    "Content-Type": "application/json",
+}
+payload = {
+    "data": [{…}],
+    "is_fun": False,
+    "explanation": False
+}
+resp = requests.post(url, json=payload, headers=headers)
+```
+  - `data` is list of dictionaries in the format of `feature:value`
+  - `is_fun` If set to `True` the inference application will use a custom predict function. This has to specified by the application owner. See example below.
+
+  <table style="width:100%; border-collapse:collapse; font-size:12px;">
+  <tr>
+    <th style="text-align:left; border:1px solid #e0e0e0; padding:6px;">scikilearn</th>
+    <th style="text-align:left; border:1px solid #e0e0e0; padding:6px;">pytorch</th>
+  </tr>
+  <tr>
+    <td style="vertical-align:top; border:1px solid #e0e0e0; padding:6px;">
+<pre><code>import joblib
+
+def predict(path, df):
+    &quot;&quot;&quot;Minimal sklearn: load bundle &amp; predict.&quot;&quot;&quot;
+    b = joblib.load(path)              # {&#x27;pipeline&#x27;: fitted_estimator, ...}
+    return b[&quot;pipeline&quot;].predict(df).tolist()
+</code></pre>
+    </td>
+    <td style="vertical-align:top; border:1px solid #e0e0e0; padding:6px;">
+<pre><code>import torch, numpy as np
+
+def predict(path, df, feats=None, mean=None, scale=None):
+    &quot;&quot;&quot;Minimal PyTorch (TorchScript).&quot;&quot;&quot;
+    m = torch.jit.load(path, map_location=&quot;cpu&quot;).eval()   # one-file scripted model
+    X = df[feats].to_numpy(np.float32) if feats else df.to_numpy(np.float32)
+    if mean is not None and scale is not None:              # optional scaling
+        X = (X - np.asarray(mean, np.float32)) / np.asarray(scale, np.float32)
+    with torch.no_grad():
+        y = m(torch.from_numpy(X)).argmax(1).cpu().numpy()
+    return y.tolist()
+</code></pre>
+  </tr>
+</table>
+
+  - `explanation` If set to True, then the response includes explanations. 
+**Example response:**
 ```json
 {
   "prediction": [0],
 
@@ -1,5 +1,9 @@
-FROM harbor.nbfc.io/proxy_cache/library/postgres
-USER root
-RUN export LANGUAGE=en_US.UTF-8
-COPY configs/init-my-db.sh /docker-entrypoint-initdb.d/init-user-db.sh
-# COPY configs/drift_metrics_mmd.csv /docker-entrypoint-initdb.d/drift_metrics_mmd.csv
+FROM postgres:16-bookworm
+
+ENV LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8
+
+# Copy init assets. Number prefix enforces order if you have multiple files.
+COPY configs/init-my-db.sh /docker-entrypoint-initdb.d/10-init-my-db.sh
+
+# Normalize Windows line endings just in case (no harm if already LF)
+RUN sed -i 's/\r$//' /docker-entrypoint-initdb.d/10-init-my-db.sh
@@ -0,0 +1,63 @@
+import pandas as pd
+from sqlalchemy import text
+from dotenv import load_dotenv
+import os
+
+# Load environment variables
+load_dotenv(override=True)
+
+# Database config
+db_config = {
+    "DB_DRIVER": "postgresql+psycopg2",  # e.g. postgresql+asyncpg
+    "DB_USER": os.getenv("POSTGRES_USER"),
+    "DB_PASSWORD": os.getenv("POSTGRES_PASSWORD"),
+    "DB_HOST": "localhost",
+    "DB_PORT": os.getenv("DB_PORT"),
+    "DB_NAME": os.getenv("POSTGRES_DB")
+}
+
+# Build connection string
+DATABASE_URL = (
+    f"{db_config['DB_DRIVER']}://{db_config['DB_USER']}:{db_config['DB_PASSWORD']}"
+    f"@{db_config['DB_HOST']}:{db_config['DB_PORT']}/{db_config['DB_NAME']}"
+)
+print(f"Connecting to database at {DATABASE_URL}")
+"""# Create async engine and session
+engine = create_async_engine(DATABASE_URL, echo=False)
+AsyncSessionLocal = sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession)
+
+# Main async logic
+async def insert_drift_metrics():
+    df = pd.read_csv("drift_metrics_mmd.csv")
+
+    # Add required fields
+    df["rowid"] = [str(uuid.uuid4()) for _ in range(len(df))]
+    df["timestamp"] = datetime.utcnow()
+
+    async with AsyncSessionLocal() as session:
+        for _, row in df.iterrows():
+            await session.execute(text(""
+                INSERT INTO drift_metrics (
+                    rowid, feature, type, statistic, p_value,
+                    method, drift_detected, timestamp, modelid
+                ) VALUES (
+                    :rowid, :feature, :type, :statistic, :p_value,
+                    :method, :drift_detected, :timestamp, :modelid
+                )
+            ""), {
+                "rowid": row["rowid"],
+                "feature": row["feature"],
+                "type": row["type"],
+                "statistic": float(row["statistic"]),
+                "p_value": float(row["p_value"]),
+                "method": row["method"],
+                "drift_detected": str(row["drift_detected"]),
+                "timestamp": row["timestamp"],
+                "modelid": row["modelid"]
+            })
+        await session.commit()
+
+# Entry point
+if __name__ == "__main__":
+    asyncio.run(insert_drift_metrics())
+"""