@@ -472,7 +472,6 @@ def ingest_upload(
472472 metadata : Optional [Dict ] = None ,
473473 sync : bool = False ,
474474 index : bool = True ,
475- signed_url : bool = False ,
476475 ) -> Dict :
477476 """
478477 Create an empty folder in a collection or upload a document to it
@@ -485,9 +484,6 @@ def ingest_upload(
485484 files, metadata contains foreign_id of the parent. Metadata for a
486485 directory contains foreign_id for itself as well as its parent and the
487486 name of the directory.
488- signed_url: use the signed URL workflow for file uploads. When True,
489- files are uploaded via a signed URL instead of multipart ingest.
490- Directories always use the standard ingest endpoint.
491487 """
492488 url_path = "collections/{0}/ingest" .format (collection_id )
493489 params = {"sync" : sync , "index" : index }
@@ -496,9 +492,6 @@ def ingest_upload(
496492 data = {"meta" : json .dumps (metadata )}
497493 return self ._request ("POST" , url , data = data )
498494
499- if signed_url :
500- return self ._signed_url_upload (collection_id , file_path , metadata , index )
501-
502495 for attempt in count (1 ):
503496 try :
504497 with file_path .open ("rb" ) as fh :
@@ -517,27 +510,51 @@ def ingest_upload(
517510 backoff (ae , attempt )
518511 return {}
519512
520- def _signed_url_upload (
513+ def signed_url_upload (
521514 self ,
522515 collection_id : str ,
523- file_path : Path ,
524- metadata : Optional [Dict ],
525- index : bool ,
516+ file_path : Optional [ Path ] = None ,
517+ metadata : Optional [Dict ] = None ,
518+ index : bool = True ,
526519 ) -> Dict :
520+ """
521+ Upload a document using the signed URL workflow.
522+
523+ For directories (no file), falls back to the standard ingest endpoint
524+ since there is no file content to upload.
525+
526+ The workflow is:
527+ 1. POST /file/uploadUrl -> {url, id}
528+ 2. PUT file content to the signed url
529+ 3. POST /collections/{id}/document with the upload_id and metadata
530+
531+ params
532+ ------
533+ collection_id: id of the collection to upload to
534+ file_path: path of the file to upload. None while creating folders
535+ metadata: dict containing metadata for the file or folders
536+ index: whether to index the document after creation
537+ """
538+ if not file_path or file_path .is_dir ():
539+ return self .ingest_upload (
540+ collection_id , file_path , metadata = metadata , index = index
541+ )
542+
527543 mime_type = mimetypes .guess_type (file_path .name )[0 ] or MIME
528544 meta = dict (metadata or {})
529545 meta ["file_name" ] = file_path .name
530546 meta ["mime_type" ] = mime_type
531547
532548 for attempt in count (1 ):
533549 try :
534- # Request a signed upload URL
550+ # Step 1: request a signed upload URL
535551 upload_url = self ._make_url ("file/uploadUrl" )
536552 result = self ._request ("POST" , upload_url )
537553 signed_url = result ["url" ]
538554 upload_id = result ["id" ]
555+ log .info ("Signed URL [%s]: %s" , upload_id , signed_url )
539556
540- # PUT file content to the signed URL
557+ # Step 2: PUT file content to the signed URL
541558 try :
542559 with file_path .open ("rb" ) as fh :
543560 response = self .session .put (
@@ -549,9 +566,7 @@ def _signed_url_upload(
549566 except (RequestException , HTTPError ) as exc :
550567 raise AlephException (exc ) from exc
551568
552- # Create document record.
553- # The server returns an empty 200 when a document with
554- # the same foreign_id already exists in the collection.
569+ # Step 3: create the document record
555570 doc_url_path = f"collections/{ collection_id } /document"
556571 doc_url = self ._make_url (doc_url_path , params = {"index" : index })
557572 payload = {"upload_id" : upload_id , "meta" : meta }
0 commit comments