Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/webapp/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class Base(DeclarativeBase):
LOCAL_PASSWORD = "tester_password"
DATETIME_TESTING = datetime.datetime(2024, 12, 26, 19, 37, 59, 753357)

# Test institution - same ID as DEV USC Beaufort for testing
TEST_INST_UUID = uuid.UUID("942d4b0e-12e7-4d2a-9187-9508ae3cef7c")
TEST_BATCH_UUID = uuid.UUID("3182f472-e079-4678-a0a1-9ca5ead6c49a")


@event.listens_for(Mapper, "before_insert")
@event.listens_for(Mapper, "before_update")
Expand Down Expand Up @@ -106,6 +110,19 @@ def init_db(env: str) -> None:
updated_at=DATETIME_TESTING,
)
)
# USC Beaufort - matches DEV for testing
session.merge(
InstTable(
id=TEST_INST_UUID,
name="University of South Carolina - Beaufort",
state="SC",
pdp_id="345000",
schemas=["COURSE", "STUDENT"],
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
created_by=LOCAL_USER_UUID,
)
)
session.merge(
ApiKeyTable(
id=LOCAL_APIKEY_UUID,
Expand All @@ -118,6 +135,94 @@ def init_db(env: str) -> None:
valid=True,
)
)
# Create test files and batches for LOCAL environment
if env == "LOCAL":
# Create test files
test_file_1 = FileTable(
id=uuid.UUID("f0bb3a20-6d92-4254-afed-6a72f43c562a"),
inst_id=LOCAL_INST_UUID,
name="test_course_file.csv",
source="MANUAL_UPLOAD",
uploader=LOCAL_USER_UUID,
sst_generated=False,
valid=True,
schemas=["COURSE"], # Using string literal to avoid circular import
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)
test_file_2 = FileTable(
id=uuid.UUID("cb02d06c-2a59-486a-9bdd-d394a4fcb833"),
inst_id=LOCAL_INST_UUID,
name="test_cohort_file.csv",
source="MANUAL_UPLOAD",
uploader=LOCAL_USER_UUID,
sst_generated=False,
valid=True,
schemas=[
"STUDENT"
], # Using string literal to avoid circular import
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)
# Create test batch for LOCAL_INST_UUID (using a different ID)
test_batch = BatchTable(
id=uuid.UUID("f0bb3a20-6d92-4254-afed-6a72f43c562b"),
inst_id=LOCAL_INST_UUID,
name="test_batch_1",
created_by=LOCAL_USER_UUID,
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)
# Associate files with batch
test_batch.files.add(test_file_1)
test_batch.files.add(test_file_2)
session.merge(test_file_1)
session.merge(test_file_2)
session.merge(test_batch)

# Create test files for EDA test institution (TEST_INST_UUID)
# Real files from DEV batch 3182f472e0794678a0a19ca5ead6c49a
test_file_student = FileTable(
id=uuid.UUID("f1d7c0a4-5211-459f-a79a-a1c2752f45c5"),
inst_id=TEST_INST_UUID,
name="1762967705679_AO1600pdp_AO1600_AR_DEIDENTIFIED_STUDYID_20250522120554.csv",
source="MANUAL_UPLOAD",
uploader=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
sst_generated=False,
valid=True,
schemas=["STUDENT"],
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)
test_file_course = FileTable(
id=uuid.UUID("d19d0129-96de-464c-98e9-694996965c7b"),
inst_id=TEST_INST_UUID,
name="1762967705683_AO1600pdp_AO1600_COURSE_LEVEL_AR_DEIDENTIFIED_STUDYID_20250522120554.csv",
source="MANUAL_UPLOAD",
uploader=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
sst_generated=False,
valid=True,
schemas=["COURSE"],
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)

# Test batch - matches DEV USC Beaufort
test_batch = BatchTable(
id=TEST_BATCH_UUID,
inst_id=TEST_INST_UUID,
name="Batch_2025-11-12_1762967767400",
completed=True,
created_by=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
created_at=DATETIME_TESTING,
updated_at=DATETIME_TESTING,
)
# Associate files with batch
test_batch.files.add(test_file_student)
test_batch.files.add(test_file_course)
session.merge(test_file_student)
session.merge(test_file_course)
session.merge(test_batch)
session.commit()
except Exception as e:
session.rollback()
Expand Down
25 changes: 25 additions & 0 deletions src/webapp/gcsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,28 @@ def get_file_contents(self, bucket_name: str, file_name: str) -> Any:
blob = bucket.blob(file_name)
res = blob.download_as_bytes()
return res

def read_csv_as_dataframe(self, bucket_name: str, file_name: str) -> Any:
"""Read a CSV file from GCS and return as pandas DataFrame.

Args:
bucket_name: GCS bucket name
file_name: Full blob path (e.g., 'validated/filename.csv')

Returns:
pandas DataFrame

Raises:
ValueError: If bucket or file not found
"""
import pandas as pd

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(file_name)

if not blob.exists():
raise ValueError(f"File not found: {file_name}")

with blob.open("r") as fh:
return pd.read_csv(fh)
Loading
Loading