Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
106 commits
Select commit Hold shift + click to select a range
34d412d
testing h2o pipeline on synthetic 2
Sep 2, 2025
45888ae
style
Sep 2, 2025
62b477d
Merge pull request #150 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 2, 2025
4963fa3
bcrypt dep issue
Sep 3, 2025
65e9c3b
Merge pull request #151 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 3, 2025
767a3fd
reverting deps for now
Sep 3, 2025
990f16d
Merge pull request #152 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 3, 2025
cabb5c3
reverting changes with pyproject
Sep 3, 2025
c94bff2
Merge pull request #154 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 3, 2025
9fc7d85
changed FE inference and training endpoint args for better understanding
Mesh-ach Sep 3, 2025
5b3fd3c
changed FE inference and training endpoint args for better understanding
Mesh-ach Sep 3, 2025
99e6dc8
fixed course flags in filename inference
Mesh-ach Sep 3, 2025
73ef358
fixed course flags in filename inference
Mesh-ach Sep 3, 2025
ba7c04f
fixed course flags in filename inference
Mesh-ach Sep 3, 2025
c37030f
fixed course flags in filename inference
Mesh-ach Sep 3, 2025
44a73e6
fixed course flags in filename inference
Mesh-ach Sep 3, 2025
503d5cb
Merge pull request #156 from datakind/DebuggingMSUD
Mesh-ach Sep 3, 2025
51118f1
changed FE inference and training endpoint args for better understanding
Mesh-ach Sep 3, 2025
5e824f1
patching validation.py
Mesh-ach Sep 3, 2025
92ca1eb
fix import ish
Mesh-ach Sep 3, 2025
6f63a0e
fix import ish
Mesh-ach Sep 3, 2025
d66a609
Merge pull request #158 from datakind/DebuggingMSUD
Mesh-ach Sep 3, 2025
410b8ea
fixed table read
Mesh-ach Sep 3, 2025
51cd252
fixed table read
Mesh-ach Sep 3, 2025
6ed5a31
Merge pull request #159 from datakind/DebuggingMSUD
Mesh-ach Sep 3, 2025
4d1de4c
fixed table read
Mesh-ach Sep 3, 2025
eb6fbdf
Merge pull request #160 from datakind/DebuggingMSUD
Mesh-ach Sep 3, 2025
f786e05
fixed table read
Mesh-ach Sep 4, 2025
f1135b7
Merge pull request #161 from datakind/DebuggingMSUD
Mesh-ach Sep 4, 2025
182ef28
trying to test why pipeline isn't being found
Sep 4, 2025
73b24b7
Merge pull request #162 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 4, 2025
fe99c8d
black
Sep 4, 2025
23e5fb0
type check
Sep 4, 2025
d0d937f
Merge pull request #163 from datakind/feat/testing_h2o_pipeline
vishpillai123 Sep 4, 2025
7d2dc91
added framework column to cloud sql with default sklearn
Mesh-ach Sep 10, 2025
705dbaf
defined acceptance criteria from FE
Mesh-ach Sep 10, 2025
e510cac
reverted databricks to original file
Mesh-ach Sep 10, 2025
a227592
feat: added databricks framework layer
Mesh-ach Sep 10, 2025
27a5eee
added framework param to job
Mesh-ach Sep 10, 2025
166b329
added case block to job run
Mesh-ach Sep 10, 2025
f45b00c
added case block to job run
Mesh-ach Sep 10, 2025
4d4bf68
added case block to job run
Mesh-ach Sep 10, 2025
ae912c2
fix linting and test
Mesh-ach Sep 10, 2025
18708f2
fix linting and test
Mesh-ach Sep 10, 2025
cafcb10
fix linting and test
Mesh-ach Sep 10, 2025
be4cbda
fix linting and test
Mesh-ach Sep 10, 2025
abcd801
fix linting and test
Mesh-ach Sep 10, 2025
f142976
fix linting and test
Mesh-ach Sep 10, 2025
9c49443
fix linting and test
Mesh-ach Sep 10, 2025
00065c7
fix TYPECHECK
Mesh-ach Sep 10, 2025
0badfd3
fix TYPECHECK
Mesh-ach Sep 10, 2025
fecef5a
fix: alllllllllll the typecheck issues
Mesh-ach Sep 10, 2025
457fd14
fix: alllllllllll the typecheck issues
Mesh-ach Sep 10, 2025
ee85f6b
fix: typecheck issues
Mesh-ach Sep 10, 2025
2cc5937
fix: typecheck issues
Mesh-ach Sep 10, 2025
7500626
fix: typecheck issues
Mesh-ach Sep 10, 2025
7aeefab
fix: typecheck issues
Mesh-ach Sep 10, 2025
baee11d
fix: typecheck issues
Mesh-ach Sep 10, 2025
2ede593
fix: typecheck issues
Mesh-ach Sep 10, 2025
8338011
fix: typecheck issues
Mesh-ach Sep 10, 2025
5ba9886
fix: typecheck issues
Mesh-ach Sep 10, 2025
2e8630b
Merge pull request #164 from datakind/AddFrameworkSelector
Mesh-ach Sep 10, 2025
34c7093
fix: typecheck issues
Mesh-ach Sep 10, 2025
b00de72
Merge pull request #165 from datakind/AddFrameworkSelector
Mesh-ach Sep 10, 2025
d234f1c
fix added logging
Mesh-ach Sep 11, 2025
85d59c9
fix added logging
Mesh-ach Sep 11, 2025
8741a83
fix added logging
Mesh-ach Sep 11, 2025
11f2ace
fix added logging
Mesh-ach Sep 11, 2025
13e1b30
fix added logging
Mesh-ach Sep 11, 2025
39396f5
fix databricks h2o job name
Mesh-ach Sep 12, 2025
c2d6576
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
9189062
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
2a81f1d
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
04ed780
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
84c03f8
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
7e9d209
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
f8c3b20
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
666d455
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
af4053a
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 12, 2025
ca6801d
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
cbcc1b9
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
624beaf
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
a92862c
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
680796b
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
1b5452e
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
c8a5872
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
e9d2067
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
1d8fd3f
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
1085628
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
a5fd596
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
e0f104a
fixing validation issues with problematic MSUD file: Optimizing encod…
Mesh-ach Sep 13, 2025
edb2a34
revamped entire validation helper script
Mesh-ach Sep 15, 2025
00d939d
revamped entire validation helper script
Mesh-ach Sep 15, 2025
5a5cd32
revamped entire validation helper script
Mesh-ach Sep 15, 2025
a261e8e
revamped entire validation helper script
Mesh-ach Sep 15, 2025
e332c37
debugging not being able to find h2o pipeline
Sep 15, 2025
1271573
style
Sep 15, 2025
955809a
revamped entire validation helper script
Mesh-ach Sep 15, 2025
0c0de57
Merge branch 'develop' of github.com-work:datakind/sst-app-api into d…
Mesh-ach Sep 15, 2025
86df88f
revamped entire validation helper script
Mesh-ach Sep 15, 2025
5cdcb99
revamped entire validation helper script
Mesh-ach Sep 15, 2025
3a1819a
revamped entire validation helper script
Mesh-ach Sep 15, 2025
f7cae1d
revamped entire validation helper script
Mesh-ach Sep 15, 2025
0f94774
revamped entire validation helper script
Mesh-ach Sep 15, 2025
b9a6695
revamped entire validation helper script
Mesh-ach Sep 15, 2025
4f915e0
revamped entire validation helper script
Mesh-ach Sep 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/webapp/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ class ModelTable(Base):
)
# version is unused. version is not currently supported. The webapp only knows about the name of the model and any usages of a model will only use the live version.
version: Mapped[int] = mapped_column(Integer, default=0)
framework: Mapped[str | None] = mapped_column(
String(VAR_CHAR_STANDARD_LENGTH), nullable=False, default="sklearn"
)

# Within a given institution, there should be no duplicated model names.
__table_args__ = (UniqueConstraint("name", "inst_id", name="model_name_inst_uc"),)
Expand Down Expand Up @@ -548,6 +551,9 @@ class JobTable(Base):
String(VAR_CHAR_STANDARD_LENGTH), nullable=True
)
completed: Mapped[bool] = mapped_column(nullable=True)
framework: Mapped[str | None] = mapped_column(
String(VAR_CHAR_STANDARD_LENGTH), nullable=False, default="sklearn"
)


class DocType(enum.Enum):
Expand Down
29 changes: 23 additions & 6 deletions src/webapp/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

# The name of the deployed pipeline in Databricks. Must match directly.
PDP_INFERENCE_JOB_NAME = "github_sourced_pdp_inference_pipeline"
PDP_H2O_INFERENCE_JOB_NAME = "edvise_github_sourced_pdp_inference_pipeline"


class DatabricksInferenceRunRequest(BaseModel):
Expand All @@ -44,7 +45,7 @@ class DatabricksInferenceRunRequest(BaseModel):
# Note that the following should be the filepath.
filepath_to_type: dict[str, list[SchemaType]]
model_name: str
model_type: str = "sklearn"
model_type: str
# The email where notifications will get sent.
email: str
gcp_external_bucket_name: str
Expand Down Expand Up @@ -98,7 +99,17 @@ def setup_new_inst(self, inst_name: str) -> None:
db_inst_name = databricksify_inst_name(inst_name)
cat_name = databricks_vars["CATALOG_NAME"]
for medallion in MEDALLION_LEVELS:
w.schemas.create(name=f"{db_inst_name}_{medallion}", catalog_name=cat_name)
try:
w.schemas.create(
name=f"{db_inst_name}_{medallion}", catalog_name=cat_name
)
except Exception as e:
LOGGER.exception(
f"Failed to provision schemas in databricks for {db_inst_name}_{medallion}: {e}"
)
raise ValueError(
f"setup_new_inst(): Failed to provision schemas in databricks for {db_inst_name}_{medallion}: {e}"
)
LOGGER.info(
f"Creating medallion level schemas for {db_inst_name} & {medallion}."
)
Expand Down Expand Up @@ -192,16 +203,22 @@ def run_pdp_inference(

db_inst_name = databricksify_inst_name(req.inst_name)

if req.model_type == "sklearn":
pipeline_type = PDP_INFERENCE_JOB_NAME
elif req.model_type == "h2o":
pipeline_type = PDP_H2O_INFERENCE_JOB_NAME
else:
raise ValueError("Invalid model framework assigned to institution model")
try:
job = next(w.jobs.list(name=PDP_INFERENCE_JOB_NAME), None)
job = next(w.jobs.list(name=pipeline_type), None)
if not job or job.job_id is None:
raise ValueError(
f"run_pdp_inference(): Job '{PDP_INFERENCE_JOB_NAME}' was not found or has no job_id."
f"run_pdp_inference(): Job '{pipeline_type}' was not found or has no job_id for '{gcs_vars['GCP_SERVICE_ACCOUNT_EMAIL']}' and '{databricks_vars['DATABRICKS_HOST_URL']}'."
)
job_id = job.job_id
LOGGER.info(f"Resolved job ID for '{PDP_INFERENCE_JOB_NAME}': {job_id}")
LOGGER.info(f"Resolved job ID for '{pipeline_type}': {job_id}")
except Exception as e:
LOGGER.exception(f"Job lookup failed for '{PDP_INFERENCE_JOB_NAME}'.")
LOGGER.exception(f"Job lookup failed for '{pipeline_type}'.")
raise ValueError(f"run_pdp_inference(): Failed to find job: {e}")

try:
Expand Down
5 changes: 3 additions & 2 deletions src/webapp/gcsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,9 @@ def validate_file(
f"If you see this file validation was successful {schems}"
)
except Exception as e:
blob.delete()
raise e
logging.exception("Validation failed for %s: %s", file_name, e)
raise

new_blob = bucket.blob(new_blob_name)
if new_blob.exists():
raise ValueError(new_blob_name + ": File already exists.")
Expand Down
Loading
Loading