11import datetime
22import logging
3+ import os
4+ import subprocess
5+
6+ import cloudfiles
37import redis
4- from dynamicannotationdb .models import AnalysisTable , Base
5- from flask import abort , current_app , request , jsonify
8+ from dynamicannotationdb .models import AnalysisTable , AnalysisVersion , Base
9+ from flask import abort , current_app , jsonify , request
610from flask_accepts import accepts
7- from flask_restx import Namespace , Resource , inputs , reqparse , fields
11+ from flask_restx import Namespace , Resource , fields , inputs , reqparse
12+ from middle_auth_client import (
13+ auth_requires_admin ,
14+ auth_requires_dataset_admin ,
15+ auth_requires_permission ,
16+ )
17+ from sqlalchemy import MetaData , Table
18+ from sqlalchemy .engine .url import make_url
19+ from sqlalchemy .exc import NoSuchTableError
20+
821from materializationengine .blueprints .client .utils import get_latest_version
22+ from materializationengine .blueprints .materialize .schemas import (
23+ AnnotationIDListSchema ,
24+ BadRootsSchema ,
25+ VirtualVersionSchema ,
26+ )
927from materializationengine .blueprints .reset_auth import reset_auth
1028from materializationengine .database import (
11- dynamic_annotation_cache ,
1229 db_manager ,
30+ dynamic_annotation_cache ,
1331)
1432from materializationengine .info_client import (
1533 get_aligned_volumes ,
1634 get_datastack_info ,
1735 get_relevant_datastack_info ,
1836)
19- from dynamicannotationdb .models import AnalysisVersion
2037from materializationengine .schemas import AnalysisTableSchema , AnalysisVersionSchema
21- from materializationengine .blueprints .materialize .schemas import BadRootsSchema
22- from middle_auth_client import auth_requires_admin , auth_requires_permission , auth_requires_dataset_admin
23- from sqlalchemy import MetaData , Table
24- from sqlalchemy .engine .url import make_url
25- from sqlalchemy .exc import NoSuchTableError
2638from materializationengine .utils import check_write_permission
27- import os
28- import subprocess
29- import cloudfiles
30-
31-
32- from materializationengine .blueprints .materialize .schemas import (
33- VirtualVersionSchema ,
34- AnnotationIDListSchema ,
35- )
36-
3739
38- __version__ = "5.18.0 "
40+ __version__ = "5.20.1 "
3941
4042
4143bulk_upload_parser = reqparse .RequestParser ()
@@ -715,7 +717,7 @@ class TableResource(Resource):
715717 @mat_bp .doc ("get_all_tables" , security = "apikey" )
716718 def get (self , aligned_volume_name , version ):
717719 check_aligned_volume (aligned_volume_name )
718-
720+
719721 with db_manager .session_scope (aligned_volume_name ) as session :
720722 response = (
721723 session .query (AnalysisTable )
@@ -742,11 +744,11 @@ class AnnotationResource(Resource):
742744 @mat_bp .doc ("get_top_materialized_annotations" , security = "apikey" )
743745 def get (self , aligned_volume_name : str , version : int , tablename : str ):
744746 check_aligned_volume (aligned_volume_name )
745-
747+
746748 try :
747749 with db_manager .session_scope (aligned_volume_name ) as session :
748750 engine = db_manager .get_engine (aligned_volume_name )
749-
751+
750752 metadata = MetaData ()
751753 try :
752754 annotation_table = Table (
@@ -755,23 +757,24 @@ def get(self, aligned_volume_name: str, version: int, tablename: str):
755757 except NoSuchTableError as e :
756758 logging .error (f"No table exists { e } " )
757759 return abort (404 )
758-
760+
759761 response = session .query (annotation_table ).limit (10 ).all ()
760762 annotations = [r ._asdict () for r in response ]
761-
763+
762764 return (annotations , 200 ) if annotations else abort (404 )
763-
765+
764766 except Exception as e :
765767 logging .error (f"Error querying annotations: { e } " )
766768 return abort (500 )
767769
770+
768771@mat_bp .route ("/materialize/run/create_virtual/datastack/<string:datastack_name>" )
769772class CreateVirtualPublicVersionResource (Resource ):
770773 @reset_auth
771774 @auth_requires_dataset_admin (table_arg = "datastack_name" )
772775 @mat_bp .doc ("create virtual materialization" , security = "apikey" )
773776 @accepts ("VirtualVersionSchema" , schema = VirtualVersionSchema , api = mat_bp )
774- def post (self , datastack_name :str ):
777+ def post (self , datastack_name : str ):
775778 """Create a virtual version from an existing frozen version.
776779
777780 Args:
@@ -790,8 +793,7 @@ def post(self, datastack_name:str):
790793 if not tables_to_include :
791794 return abort (400 , "No tables included" )
792795
793- with db_manager .session_scope (aligned_volume ) as session :
794-
796+ with db_manager .session_scope (aligned_volume ) as session :
795797 analysis_version = (
796798 session .query (AnalysisVersion )
797799 .filter (AnalysisVersion .version == target_version )
@@ -851,3 +853,80 @@ def post(self, datastack_name:str):
851853 analysis_version .expires_on = expiration_timestamp
852854
853855 return f"{ virtual_datastack_name } created" , 200
856+
857+
858+ @mat_bp .route (
859+ "/materialize/run/write_deltalake/datastack/<string:datastack_name>/version/<int(signed=True):version>/table_name/<string:table_name>/"
860+ )
861+ class WriteDeltalakeResource (Resource ):
862+ @reset_auth
863+ @auth_requires_dataset_admin (table_arg = "datastack_name" )
864+ @mat_bp .doc ("Export a table to Delta Lake" , security = "apikey" )
865+ def post (self , datastack_name : str , version : int , table_name : str ):
866+ """Export a frozen materialization table to partitioned Delta Lake.
867+
868+ Args:
869+ datastack_name (str): name of datastack from infoservice
870+ version (int): materialization version (-1 for latest)
871+ table_name (str): annotation table name to export
872+ """
873+ from materializationengine .workflows .deltalake_export import (
874+ write_deltalake_table ,
875+ )
876+
877+ if version == - 1 :
878+ version = get_latest_version (datastack_name )
879+
880+ datastack_info = get_datastack_info (datastack_name )
881+
882+ # Accept optional output_specs from JSON body.
883+ output_specs = None
884+ if request .is_json and request .json :
885+ output_specs = request .json .get ("output_specs" , None )
886+
887+ if output_specs is not None :
888+ from materializationengine .workflows .deltalake_export import (
889+ DeltaLakeOutputSpec ,
890+ )
891+
892+ if not isinstance (output_specs , list ):
893+ return abort (400 , "output_specs must be a list" )
894+ for item in output_specs :
895+ if not isinstance (item , dict ):
896+ return abort (400 , "each entry in output_specs must be an object" )
897+ try :
898+ DeltaLakeOutputSpec (** item )
899+ except TypeError as exc :
900+ return abort (400 , f"invalid output_specs entry: { exc } " )
901+
902+ write_deltalake_table .s (
903+ datastack_info , version , table_name , output_specs = output_specs
904+ ).apply_async ()
905+
906+ return {
907+ "message" : f"Delta Lake export enqueued for { table_name } v{ version } "
908+ }, 200
909+
910+ @reset_auth
911+ @auth_requires_dataset_admin (table_arg = "datastack_name" )
912+ @mat_bp .doc ("Get Delta Lake export progress" , security = "apikey" )
913+ def get (self , datastack_name : str , version : int , table_name : str ):
914+ """Get progress of a Delta Lake export for a table.
915+
916+ Returns JSON with ``status``, ``rows_processed``, ``total_rows``,
917+ and ``percent_complete``. Returns 404 if no export is tracked.
918+ """
919+ from materializationengine .workflows .deltalake_export import (
920+ get_deltalake_export_progress ,
921+ )
922+
923+ if version == - 1 :
924+ version = get_latest_version (datastack_name )
925+
926+ progress = get_deltalake_export_progress (datastack_name , version , table_name )
927+ if progress is None :
928+ return {
929+ "message" : f"No export progress found for { table_name } v{ version } "
930+ }, 404
931+
932+ return progress , 200
0 commit comments