From d75648ab743060a9e04fdc6a7121fafee4277c97 Mon Sep 17 00:00:00 2001
From: Mesh <meshach.ogunmodede@datakind.org>
Date: Wed, 18 Jun 2025 13:54:15 -0500
Subject: [PATCH 1/2] adjusted schema configuration

---
 .../validation_schemas/pdp_schema_extension.json       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/webapp/validation_schemas/pdp_schema_extension.json b/src/webapp/validation_schemas/pdp_schema_extension.json
index 6f6e4400..1cc93c10 100644
--- a/src/webapp/validation_schemas/pdp_schema_extension.json
+++ b/src/webapp/validation_schemas/pdp_schema_extension.json
@@ -575,7 +575,7 @@
               "checks": []
             },
             "core_competency_completed": {
-              "dtype": "float64",
+              "dtype": "category",
               "coerce": true,
               "nullable": true,
               "required": true,
@@ -599,7 +599,7 @@
               "checks": []
             },
             "course_instructor_employment_status": {
-              "dtype": "float64",
+              "dtype": "category",
               "coerce": true,
               "nullable": true,
               "required": true,
@@ -607,7 +607,7 @@
               "checks": []
             },
             "course_instructor_rank": {
-              "dtype": "float64",
+              "dtype": "category",
               "coerce": true,
               "nullable": true,
               "required": false,
@@ -615,7 +615,7 @@
               "checks": []
             },
             "credential_engine_identifier": {
-              "dtype": "float64",
+              "dtype": "string",
               "coerce": true,
               "nullable": true,
               "required": true,
@@ -671,7 +671,7 @@
               "checks": []
             },
             "institution_id": {
-              "dtype": "float64",
+              "dtype": "string",
               "coerce": true,
               "nullable": true,
               "required": true,

From 7f1aac27874760b227282930f3afd7d052a6685c Mon Sep 17 00:00:00 2001
From: Mesh <meshach.ogunmodede@datakind.org>
Date: Wed, 18 Jun 2025 15:44:28 -0500
Subject: [PATCH 2/2] adjusted schema configuration

---
 src/webapp/databricks.py   | 3 +--
 src/webapp/routers/data.py | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/webapp/databricks.py b/src/webapp/databricks.py
index 4334c8a8..29a8c714 100644
--- a/src/webapp/databricks.py
+++ b/src/webapp/databricks.py
@@ -206,7 +206,6 @@ def fetch_table_data(
         inst_name: str,
         table_name: str,
         warehouse_id: str,
-        limit: int = 1000,
     ) -> List[Dict[str, Any]]:
         """
         Executes a SELECT * query on the specified table within the given catalog and schema,
@@ -226,7 +225,7 @@ def fetch_table_data(
         fully_qualified_table = (
             f"`{catalog_name}`.`{schema_name}_silver`.`{table_name}`"
         )
-        sql_query = f"SELECT * FROM {fully_qualified_table} LIMIT {limit}"
+        sql_query = f"SELECT * FROM {fully_qualified_table}"
 
         try:
             # Execute the SQL statement
diff --git a/src/webapp/routers/data.py b/src/webapp/routers/data.py
index bcb4b509..95c90997 100644
--- a/src/webapp/routers/data.py
+++ b/src/webapp/routers/data.py
@@ -1057,7 +1057,6 @@ def get_top_features(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_inference_{run_id}_features_with_most_impact",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows
@@ -1101,7 +1100,6 @@ def get_support_overview(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_inference_{run_id}_support_overview",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows
@@ -1144,7 +1142,6 @@ def get_training_support_overview(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_training_{run_id}_support_overview",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows
@@ -1187,7 +1184,6 @@ def get_feature_value(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_inference_{run_id}_shap_feature_importance",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows
@@ -1230,7 +1226,6 @@ def get_confusion_matrix(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_training_{run_id}_confusion_matrix",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows
@@ -1273,7 +1268,6 @@ def get_roc_curve(
             inst_name=f"{query_result[0][0].name}",
             table_name=f"sample_training_{run_id}_roc_curve",
             warehouse_id=env_vars["SQL_WAREHOUSE_ID"],
-            limit=500,
         )
 
         return rows