datakind
diff --git a/‎src/webapp/database.py‎
Lines changed: 6 additions & 0 deletions b/‎src/webapp/database.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/webapp/databricks.py‎
Lines changed: 23 additions & 6 deletions b/‎src/webapp/databricks.py‎
Lines changed: 23 additions & 6 deletions
diff --git a/‎src/webapp/gcsutil.py‎
Lines changed: 3 additions & 2 deletions b/‎src/webapp/gcsutil.py‎
Lines changed: 3 additions & 2 deletions
@@ -511,6 +511,9 @@ class ModelTable(Base):
     )
     # version is unused. version is not currently supported. The webapp only knows about the name of the model and any usages of a model will only use the live version.
     version: Mapped[int] = mapped_column(Integer, default=0)
+    framework: Mapped[str | None] = mapped_column(
+        String(VAR_CHAR_STANDARD_LENGTH), nullable=False, default="sklearn"
+    )
 
     # Within a given institution, there should be no duplicated model names.
     __table_args__ = (UniqueConstraint("name", "inst_id", name="model_name_inst_uc"),)
@@ -548,6 +551,9 @@ class JobTable(Base):
         String(VAR_CHAR_STANDARD_LENGTH), nullable=True
     )
     completed: Mapped[bool] = mapped_column(nullable=True)
+    framework: Mapped[str | None] = mapped_column(
+        String(VAR_CHAR_STANDARD_LENGTH), nullable=False, default="sklearn"
+    )
 
 
 class DocType(enum.Enum):
 
@@ -35,6 +35,7 @@
 
 # The name of the deployed pipeline in Databricks. Must match directly.
 PDP_INFERENCE_JOB_NAME = "github_sourced_pdp_inference_pipeline"
+PDP_H2O_INFERENCE_JOB_NAME = "edvise_github_sourced_pdp_inference_pipeline"
 
 
 class DatabricksInferenceRunRequest(BaseModel):
@@ -44,7 +45,7 @@ class DatabricksInferenceRunRequest(BaseModel):
     # Note that the following should be the filepath.
     filepath_to_type: dict[str, list[SchemaType]]
     model_name: str
-    model_type: str = "sklearn"
+    model_type: str
     # The email where notifications will get sent.
     email: str
     gcp_external_bucket_name: str
@@ -98,7 +99,17 @@ def setup_new_inst(self, inst_name: str) -> None:
         db_inst_name = databricksify_inst_name(inst_name)
         cat_name = databricks_vars["CATALOG_NAME"]
         for medallion in MEDALLION_LEVELS:
-            w.schemas.create(name=f"{db_inst_name}_{medallion}", catalog_name=cat_name)
+            try:
+                w.schemas.create(
+                    name=f"{db_inst_name}_{medallion}", catalog_name=cat_name
+                )
+            except Exception as e:
+                LOGGER.exception(
+                    f"Failed to provision schemas in databricks for {db_inst_name}_{medallion}: {e}"
+                )
+                raise ValueError(
+                    f"setup_new_inst(): Failed to provision schemas in databricks for {db_inst_name}_{medallion}: {e}"
+                )
             LOGGER.info(
                 f"Creating medallion level schemas for {db_inst_name} & {medallion}."
             )
@@ -192,16 +203,22 @@ def run_pdp_inference(
 
         db_inst_name = databricksify_inst_name(req.inst_name)
 
+        if req.model_type == "sklearn":
+            pipeline_type = PDP_INFERENCE_JOB_NAME
+        elif req.model_type == "h2o":
+            pipeline_type = PDP_H2O_INFERENCE_JOB_NAME
+        else:
+            raise ValueError("Invalid model framework assigned to institution model")
         try:
-            job = next(w.jobs.list(name=PDP_INFERENCE_JOB_NAME), None)
+            job = next(w.jobs.list(name=pipeline_type), None)
             if not job or job.job_id is None:
                 raise ValueError(
-                    f"run_pdp_inference(): Job '{PDP_INFERENCE_JOB_NAME}' was not found or has no job_id."
+                    f"run_pdp_inference(): Job '{pipeline_type}' was not found or has no job_id for '{gcs_vars['GCP_SERVICE_ACCOUNT_EMAIL']}' and '{databricks_vars['DATABRICKS_HOST_URL']}'."
                 )
             job_id = job.job_id
-            LOGGER.info(f"Resolved job ID for '{PDP_INFERENCE_JOB_NAME}': {job_id}")
+            LOGGER.info(f"Resolved job ID for '{pipeline_type}': {job_id}")
         except Exception as e:
-            LOGGER.exception(f"Job lookup failed for '{PDP_INFERENCE_JOB_NAME}'.")
+            LOGGER.exception(f"Job lookup failed for '{pipeline_type}'.")
             raise ValueError(f"run_pdp_inference(): Failed to find job: {e}")
 
         try:
 
@@ -340,8 +340,9 @@ def validate_file(
                     f"If you see this file validation was successful {schems}"
                 )
         except Exception as e:
-            blob.delete()
-            raise e
+            logging.exception("Validation failed for %s: %s", file_name, e)
+            raise
+
         new_blob = bucket.blob(new_blob_name)
         if new_blob.exists():
             raise ValueError(new_blob_name + ": File already exists.")