@@ -506,6 +506,7 @@ def get_conversations(dataset: str, offset: int = 0, limit: int | None = None) -
506506 """Get conversations with pagination.
507507
508508 Returns only the requested slice, not entire file.
509+ Uses caching for improved performance.
509510
510511 Args:
511512 dataset: Dataset name (folder under final_results/)
@@ -525,15 +526,12 @@ def get_conversations(dataset: str, offset: int = 0, limit: int | None = None) -
525526 if not conversations_file .exists ():
526527 raise HTTPException (status_code = 404 , detail = f"Dataset not found: { dataset } " )
527528
528- conversations = []
529- import json
530- with open (conversations_file , encoding = 'utf-8' , errors = 'replace' ) as f :
531- for i , line in enumerate (f ):
532- if i < offset :
533- continue
534- if limit is not None and i >= offset + limit :
535- break
536- conversations .append (json .loads (line ))
529+ # Use cached read for entire file, then slice in memory
530+ all_conversations = _get_cached_jsonl (conversations_file )
531+
532+ # Apply pagination to cached data
533+ end_idx = offset + limit if limit is not None else len (all_conversations )
534+ conversations = all_conversations [offset :end_idx ]
537535
538536 return {
539537 "data" : conversations ,
@@ -547,6 +545,8 @@ def get_conversations(dataset: str, offset: int = 0, limit: int | None = None) -
547545def get_properties (dataset : str ) -> Dict [str , Any ]:
548546 """Get properties (usually smaller, can load all at once).
549547
548+ Uses caching for improved performance.
549+
550550 Args:
551551 dataset: Dataset name (folder under final_results/)
552552
@@ -562,11 +562,8 @@ def get_properties(dataset: str) -> Dict[str, Any]:
562562 if not properties_file .exists ():
563563 return {"data" : []}
564564
565- properties = []
566- import json
567- with open (properties_file ) as f :
568- for line in f :
569- properties .append (json .loads (line ))
565+ # Use cached read instead of manual parsing
566+ properties = _get_cached_jsonl (properties_file )
570567
571568 return {"data" : properties }
572569
@@ -575,6 +572,8 @@ def get_properties(dataset: str) -> Dict[str, Any]:
575572def get_clusters (dataset : str ) -> Dict [str , Any ]:
576573 """Get clusters.
577574
575+ Uses caching for improved performance.
576+
578577 Args:
579578 dataset: Dataset name (folder under final_results/)
580579
@@ -590,15 +589,12 @@ def get_clusters(dataset: str) -> Dict[str, Any]:
590589 if not clusters_file .exists ():
591590 return {"data" : []}
592591
593- import json
592+ # Use cached read for both JSONL and JSON files
594593 if clusters_file .suffix == ".jsonl" :
595- clusters = []
596- with open (clusters_file ) as f :
597- for line in f :
598- clusters .append (json .loads (line ))
594+ clusters = _get_cached_jsonl (clusters_file )
599595 else :
600- with open ( clusters_file ) as f :
601- clusters = json . load ( f )
596+ # For JSON files, use the cached JSON reader
597+ clusters = _read_json_safe ( clusters_file )
602598
603599 return {"data" : clusters }
604600
@@ -607,6 +603,8 @@ def get_clusters(dataset: str) -> Dict[str, Any]:
607603def get_metrics (dataset : str ) -> Dict [str , Any ]:
608604 """Get all metrics files.
609605
606+ Uses caching for improved performance.
607+
610608 Args:
611609 dataset: Dataset name (folder under final_results/)
612610
@@ -616,19 +614,61 @@ def get_metrics(dataset: str) -> Dict[str, Any]:
616614 final_results_dir = Path ("final_results" ) / dataset
617615 metrics = {}
618616
619- import json
620617 for metric_type in ["model_cluster_scores_df" , "cluster_scores_df" , "model_scores_df" ]:
621618 metric_file = final_results_dir / f"{ metric_type } .jsonl"
622619 if metric_file .exists ():
623- data = []
624- with open (metric_file ) as f :
625- for line in f :
626- data .append (json .loads (line ))
627- metrics [metric_type ] = data
620+ # Use cached read for metrics files
621+ metrics [metric_type ] = _get_cached_jsonl (metric_file )
628622
629623 return metrics
630624
631625
626+ @router .get ("/results/{dataset}/all" )
627+ def get_all_dataset_data (dataset : str , conversations_limit : int = 1000 ) -> Dict [str , Any ]:
628+ """Get all dataset data in a single request (combined endpoint).
629+
630+ This is a performance optimization that reduces network round trips by combining
631+ conversations, properties, clusters, and metrics into a single response.
632+ Uses caching for all data sources.
633+
634+ Args:
635+ dataset: Dataset name (folder under final_results/)
636+ conversations_limit: Maximum number of conversations to return (default: 1000)
637+
638+ Returns:
639+ Dict with conversations, properties, clusters, metrics, and metadata
640+ """
641+ # Use the cached individual endpoint functions
642+ conversations_result = get_conversations (dataset , offset = 0 , limit = conversations_limit )
643+ properties_result = get_properties (dataset )
644+ clusters_result = get_clusters (dataset )
645+ metrics_result = get_metrics (dataset )
646+
647+ # Calculate total conversations for metadata
648+ final_results_dir = Path ("final_results" ) / dataset
649+ conversations_file = final_results_dir / "clustered_results_lightweight.jsonl"
650+ if not conversations_file .exists ():
651+ conversations_file = final_results_dir / "conversations.jsonl"
652+
653+ total_conversations = 0
654+ if conversations_file .exists ():
655+ with open (conversations_file , encoding = 'utf-8' , errors = 'replace' ) as f :
656+ total_conversations = sum (1 for _ in f )
657+
658+ return {
659+ "conversations" : conversations_result ["data" ],
660+ "properties" : properties_result ["data" ],
661+ "clusters" : clusters_result ["data" ],
662+ "metrics" : {
663+ "model_cluster_scores_df" : metrics_result .get ("model_cluster_scores_df" , []),
664+ "cluster_scores_df" : metrics_result .get ("cluster_scores_df" , []),
665+ "model_scores_df" : metrics_result .get ("model_scores_df" , [])
666+ },
667+ "total_conversations" : total_conversations ,
668+ "has_more" : conversations_result ["has_more" ]
669+ }
670+
671+
632672@router .get ("/results/{dataset}/summary" )
633673def get_dataset_summary (dataset : str ) -> Dict [str , Any ]:
634674 """Get dataset summary (fast - just metadata, no full data).
0 commit comments