11import importlib .metadata
22import json
3+ import mimetypes
34import uuid
45import logging
56from itertools import count
@@ -471,6 +472,7 @@ def ingest_upload(
471472 metadata : Optional [Dict ] = None ,
472473 sync : bool = False ,
473474 index : bool = True ,
475+ signed_url : bool = False ,
474476 ) -> Dict :
475477 """
476478 Create an empty folder in a collection or upload a document to it
@@ -483,6 +485,9 @@ def ingest_upload(
483485 files, metadata contains foreign_id of the parent. Metadata for a
484486 directory contains foreign_id for itself as well as its parent and the
485487 name of the directory.
488+ signed_url: use the signed URL workflow for file uploads. When True,
489+ files are uploaded via a signed URL instead of multipart ingest.
490+ Directories always use the standard ingest endpoint.
486491 """
487492 url_path = "collections/{0}/ingest" .format (collection_id )
488493 params = {"sync" : sync , "index" : index }
@@ -491,6 +496,9 @@ def ingest_upload(
491496 data = {"meta" : json .dumps (metadata )}
492497 return self ._request ("POST" , url , data = data )
493498
499+ if signed_url :
500+ return self ._signed_url_upload (collection_id , file_path , metadata , index )
501+
494502 for attempt in count (1 ):
495503 try :
496504 with file_path .open ("rb" ) as fh :
@@ -509,6 +517,54 @@ def ingest_upload(
509517 backoff (ae , attempt )
510518 return {}
511519
520+ def _signed_url_upload (
521+ self ,
522+ collection_id : str ,
523+ file_path : Path ,
524+ metadata : Optional [Dict ],
525+ index : bool ,
526+ ) -> Dict :
527+ mime_type = mimetypes .guess_type (file_path .name )[0 ] or MIME
528+ meta = dict (metadata or {})
529+ meta ["file_name" ] = file_path .name
530+ meta ["mime_type" ] = mime_type
531+
532+ for attempt in count (1 ):
533+ try :
534+ # Request a signed upload URL
535+ upload_url = self ._make_url ("file/uploadUrl" )
536+ result = self ._request ("POST" , upload_url )
537+ signed_url = result ["url" ]
538+ upload_id = result ["id" ]
539+
540+ # PUT file content to the signed URL
541+ try :
542+ with file_path .open ("rb" ) as fh :
543+ response = self .session .put (
544+ signed_url ,
545+ data = fh ,
546+ headers = {"Content-Type" : "application/octet-stream" },
547+ )
548+ response .raise_for_status ()
549+ except (RequestException , HTTPError ) as exc :
550+ raise AlephException (exc ) from exc
551+
552+ # Create document record.
553+ # The server returns an empty 200 when a document with
554+ # the same foreign_id already exists in the collection.
555+ doc_url_path = f"collections/{ collection_id } /document"
556+ doc_url = self ._make_url (doc_url_path , params = {"index" : index })
557+ payload = {"upload_id" : upload_id , "meta" : meta }
558+ result = self ._request ("POST" , doc_url , json = payload )
559+ if not result :
560+ return {"id" : upload_id , "status" : "ok" }
561+ return result
562+ except AlephException as ae :
563+ if not ae .transient or attempt > self .retries :
564+ raise ae from ae
565+ backoff (ae , attempt )
566+ return {}
567+
512568 def create_entityset (
513569 self , collection_id : str , type : str , label : str , summary : Optional [str ]
514570 ) -> Dict :
0 commit comments