Skip to content

Commit 772b8a4

Browse files
authored
Merge pull request #16 from GACWR/dev/eda-etl-3
feat: experiment metrics, model downloads, viz controls, and auth improvements
2 parents aed643e + 41c9b04 commit 772b8a4

35 files changed

Lines changed: 996 additions & 195 deletions

File tree

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,21 @@ Create, render, and publish data visualizations from notebooks or the in-browser
9292

9393
Combine visualizations into **drag-and-drop dashboards** with resizable panels, lock/unlock layout, and persistent configuration. Each visualization also has a full **in-browser editor** with Monaco, live preview for JSON backends, template insertion, and data/config tabs. See the [Visualizations Guide](docs/VISUALIZATIONS.md) for SDK usage.
9494

95+
#### Individual Visualization Editor
9596
<p align="center">
9697
<img src="docs/screenshots/oms-screenshot3.png" alt="OpenModelStudio Visualization Framework" width="100%" />
9798
</p>
9899

100+
<p align="center">
101+
<img src="docs/screenshots/oms-screenshot4.png" alt="OpenModelStudio Visualization Framework" width="100%" />
102+
</p>
103+
104+
#### Dashboard
105+
106+
<p align="center">
107+
<img src="docs/screenshots/oms-screenshot5.png" alt="OpenModelStudio Visualization Framework" width="100%" />
108+
</p>
109+
99110
### Model Registry
100111

101112
Browse, install, and manage models from the [Open Model Registry](https://github.com/GACWR/open-model-registry) -- a public GitHub repo that acts as a decentralized model package manager.

api/Cargo.lock

Lines changed: 25 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ password-hash = "0.5"
3333
sha2 = "0.10"
3434
hex = "0.4"
3535
rustls = { version = "0.23", default-features = false, features = ["ring"] }
36+
csv = "1"
3637

3738
[dev-dependencies]
3839
tower = { version = "0.5", features = ["util"] }

api/src/auth.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub fn create_access_token(
4646
email: email.to_string(),
4747
role,
4848
iat: now.timestamp(),
49-
exp: (now + Duration::minutes(15)).timestamp(),
49+
exp: (now + Duration::hours(24)).timestamp(),
5050
token_type: "access".into(),
5151
};
5252
encode(

api/src/main.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,13 @@ async fn main() {
4040
let llm = Arc::new(LlmService::new(&config));
4141

4242
let k8s = match K8sService::new(&config).await {
43-
Ok(svc) => Some(Arc::new(svc)),
43+
Ok(svc) => {
44+
tracing::info!("K8s service initialized successfully (namespace: {})", config.k8s_namespace);
45+
Some(Arc::new(svc))
46+
}
4447
Err(e) => {
45-
tracing::warn!("K8s client not available: {e}. Running without K8s integration.");
48+
tracing::error!("K8s service initialization FAILED: {e}");
49+
tracing::error!("Training jobs and workspace pods will NOT work until K8s is properly configured");
4650
None
4751
}
4852
};
@@ -105,6 +109,7 @@ async fn main() {
105109
.route("/models/{id}/code", put(routes::models::update_code))
106110
.route("/models/{id}/run", post(routes::models::run_model))
107111
.route("/models/{id}/versions", get(routes::models::list_versions))
112+
.route("/models/{id}/experiment-runs", get(routes::models::experiment_runs))
108113
// Training
109114
.route("/training/jobs", get(routes::training::list_all_jobs))
110115
.route("/training/start", post(routes::training::start))
@@ -130,6 +135,7 @@ async fn main() {
130135
.route("/experiments/{id}/compare", get(routes::experiments::compare))
131136
// Artifacts
132137
.route("/jobs/{job_id}/artifacts", get(routes::artifacts::list))
138+
.route("/models/{model_id}/artifacts", get(routes::artifacts::list_for_model))
133139
.route("/artifacts", post(routes::artifacts::create))
134140
.route("/artifacts/{id}", get(routes::artifacts::get))
135141
.route("/artifacts/{id}", delete(routes::artifacts::delete))

api/src/models/artifact.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ use uuid::Uuid;
66
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
77
pub struct Artifact {
88
pub id: Uuid,
9-
pub job_id: Uuid,
9+
pub job_id: Option<Uuid>,
10+
pub workspace_id: Option<Uuid>,
1011
pub name: String,
1112
pub artifact_type: String,
1213
pub s3_key: String,

api/src/models/dataset.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ pub struct Dataset {
1515
pub row_count: Option<i64>,
1616
pub version: i32,
1717
pub created_by: Uuid,
18+
pub snapshots: i32,
19+
pub schema: Option<serde_json::Value>,
1820
pub created_at: DateTime<Utc>,
1921
pub updated_at: DateTime<Utc>,
20-
pub snapshots: i32,
2122
}
2223

2324
#[derive(Debug, Deserialize)]

api/src/models/experiment.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ pub struct Experiment {
1919
pub struct ExperimentRun {
2020
pub id: Uuid,
2121
pub experiment_id: Uuid,
22-
pub job_id: Uuid,
22+
pub job_id: Option<Uuid>,
23+
pub model_id: Option<Uuid>,
2324
pub parameters: Option<serde_json::Value>,
2425
pub metrics: Option<serde_json::Value>,
2526
pub created_at: DateTime<Utc>,
@@ -34,7 +35,8 @@ pub struct CreateExperimentRequest {
3435

3536
#[derive(Debug, Deserialize)]
3637
pub struct AddRunRequest {
37-
pub job_id: Uuid,
38+
pub job_id: Option<Uuid>,
39+
pub model_id: Option<Uuid>,
3840
pub parameters: Option<serde_json::Value>,
3941
pub metrics: Option<serde_json::Value>,
4042
}

api/src/routes/artifacts.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,24 @@ pub async fn create(
5555
Ok(Json(artifact))
5656
}
5757

58+
/// List all artifacts for a model (via its jobs)
59+
pub async fn list_for_model(
60+
State(state): State<AppState>,
61+
AuthUser(_claims): AuthUser,
62+
Path(model_id): Path<Uuid>,
63+
) -> AppResult<Json<Vec<Artifact>>> {
64+
let artifacts: Vec<Artifact> = sqlx::query_as(
65+
"SELECT a.* FROM artifacts a
66+
JOIN jobs j ON a.job_id = j.id
67+
WHERE j.model_id = $1
68+
ORDER BY a.created_at DESC"
69+
)
70+
.bind(model_id)
71+
.fetch_all(&state.db)
72+
.await?;
73+
Ok(Json(artifacts))
74+
}
75+
5876
pub async fn download(
5977
State(state): State<AppState>,
6078
AuthUser(_claims): AuthUser,

api/src/routes/datasets.rs

Lines changed: 124 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,119 @@ pub async fn get(
4949
AuthUser(_claims): AuthUser,
5050
Path(id): Path<Uuid>,
5151
) -> AppResult<Json<Dataset>> {
52-
let dataset: Dataset = sqlx::query_as("SELECT * FROM datasets WHERE id = $1")
52+
let mut dataset: Dataset = sqlx::query_as("SELECT * FROM datasets WHERE id = $1")
5353
.bind(id)
5454
.fetch_one(&state.db)
5555
.await?;
56+
57+
// Lazy backfill: if schema is missing but we have a stored CSV file, extract it now
58+
if dataset.schema.is_none() && dataset.format.eq_ignore_ascii_case("csv") {
59+
if let Some(ref key) = dataset.s3_key {
60+
let path = key.strip_prefix("local:").unwrap_or(key);
61+
if let Ok(bytes) = std::fs::read(path) {
62+
if let Some((schema, row_count)) = extract_csv_schema(&bytes) {
63+
let _ = sqlx::query(
64+
"UPDATE datasets SET schema = $1, row_count = COALESCE(row_count, $2), updated_at = NOW() WHERE id = $3"
65+
)
66+
.bind(&schema)
67+
.bind(row_count)
68+
.bind(dataset.id)
69+
.execute(&state.db)
70+
.await;
71+
dataset.schema = Some(schema);
72+
if dataset.row_count.is_none() {
73+
dataset.row_count = Some(row_count);
74+
}
75+
}
76+
}
77+
}
78+
}
79+
5680
Ok(Json(dataset))
5781
}
5882

83+
/// Infer the type of a CSV cell value by attempting numeric/bool parsing.
84+
fn infer_cell_type(val: &str) -> &'static str {
85+
if val.is_empty() {
86+
return "string";
87+
}
88+
if val.parse::<i64>().is_ok() {
89+
return "int64";
90+
}
91+
if val.parse::<f64>().is_ok() {
92+
return "float64";
93+
}
94+
if val.eq_ignore_ascii_case("true") || val.eq_ignore_ascii_case("false") {
95+
return "boolean";
96+
}
97+
"string"
98+
}
99+
100+
/// Parse a CSV byte slice and return (schema JSON, row_count).
101+
fn extract_csv_schema(bytes: &[u8]) -> Option<(serde_json::Value, i64)> {
102+
let mut rdr = csv::ReaderBuilder::new()
103+
.has_headers(true)
104+
.from_reader(bytes);
105+
106+
let headers = rdr.headers().ok()?.clone();
107+
if headers.is_empty() {
108+
return None;
109+
}
110+
111+
let num_cols = headers.len();
112+
// Track best type per column: start with unknown, refine by sampling rows
113+
let mut col_types: Vec<Option<&'static str>> = vec![None; num_cols];
114+
let mut row_count: i64 = 0;
115+
let sample_limit = 100; // sample first 100 rows for type inference
116+
117+
for result in rdr.records() {
118+
let record = match result {
119+
Ok(r) => r,
120+
Err(_) => continue,
121+
};
122+
row_count += 1;
123+
124+
if row_count <= sample_limit {
125+
for (i, field) in record.iter().enumerate() {
126+
if i >= num_cols {
127+
break;
128+
}
129+
let cell_type = infer_cell_type(field.trim());
130+
col_types[i] = Some(match col_types[i] {
131+
None => cell_type,
132+
Some(prev) => {
133+
if prev == cell_type {
134+
prev
135+
} else if (prev == "int64" && cell_type == "float64")
136+
|| (prev == "float64" && cell_type == "int64")
137+
{
138+
"float64" // promote int ↔ float
139+
} else {
140+
"string" // fall back to string on conflict
141+
}
142+
}
143+
});
144+
}
145+
}
146+
}
147+
// Count remaining rows after sampling
148+
// (rdr already consumed all records in the loop above)
149+
150+
let columns: Vec<serde_json::Value> = headers
151+
.iter()
152+
.enumerate()
153+
.map(|(i, name)| {
154+
serde_json::json!({
155+
"name": name,
156+
"type": col_types.get(i).and_then(|t| *t).unwrap_or("string"),
157+
"nullable": true
158+
})
159+
})
160+
.collect();
161+
162+
Some((serde_json::Value::Array(columns), row_count))
163+
}
164+
59165
pub async fn create(
60166
State(state): State<AppState>,
61167
AuthUser(claims): AuthUser,
@@ -64,7 +170,7 @@ pub async fn create(
64170
let dataset_id = Uuid::new_v4();
65171

66172
// If file data is provided (base64), store it to local PVC
67-
let (s3_key, size_bytes) = if let Some(ref data_b64) = req.data {
173+
let (s3_key, size_bytes, inferred_schema, inferred_row_count) = if let Some(ref data_b64) = req.data {
68174
use base64::Engine;
69175
let bytes = base64::engine::general_purpose::STANDARD
70176
.decode(data_b64)
@@ -80,14 +186,24 @@ pub async fn create(
80186
std::fs::write(&file_path, &bytes)
81187
.map_err(|e| AppError::Internal(format!("Failed to write file: {e}")))?;
82188

83-
(Some(format!("local:{}", file_path)), Some(size))
189+
// Extract schema from CSV files
190+
let (schema, row_count) = if ext == "csv" {
191+
extract_csv_schema(&bytes).unwrap_or((serde_json::Value::Null, 0))
192+
} else {
193+
(serde_json::Value::Null, 0)
194+
};
195+
196+
let schema_opt = if schema.is_null() { None } else { Some(schema) };
197+
let row_count_opt = if row_count > 0 { Some(row_count) } else { req.row_count };
198+
199+
(Some(format!("local:{}", file_path)), Some(size), schema_opt, row_count_opt)
84200
} else {
85-
(None, None)
201+
(None, None, None, req.row_count)
86202
};
87203

88204
let dataset: Dataset = sqlx::query_as(
89-
"INSERT INTO datasets (id, project_id, name, description, format, s3_key, size_bytes, row_count, version, created_by, created_at, updated_at)
90-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 1, $9, NOW(), NOW()) RETURNING *"
205+
"INSERT INTO datasets (id, project_id, name, description, format, s3_key, size_bytes, row_count, version, created_by, schema, created_at, updated_at)
206+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 1, $9, $10, NOW(), NOW()) RETURNING *"
91207
)
92208
.bind(dataset_id)
93209
.bind(req.project_id)
@@ -96,8 +212,9 @@ pub async fn create(
96212
.bind(&req.format)
97213
.bind(&s3_key)
98214
.bind(size_bytes)
99-
.bind(req.row_count)
215+
.bind(inferred_row_count)
100216
.bind(claims.sub)
217+
.bind(&inferred_schema)
101218
.fetch_one(&state.db)
102219
.await?;
103220
notify(&state.db, claims.sub, "Dataset Created", &format!("Dataset '{}' ({}) uploaded", dataset.name, dataset.format), NotifyType::Success, Some(&format!("/datasets/{}", dataset.id))).await;

0 commit comments

Comments
 (0)