AitoDotAI
diff --git a/‎aito/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎aito/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aito/api.py‎
Lines changed: 74 additions & 18 deletions b/‎aito/api.py‎
Lines changed: 74 additions & 18 deletions
diff --git a/‎aito/client/aito_client.py‎
Lines changed: 79 additions & 4 deletions b/‎aito/client/aito_client.py‎
Lines changed: 79 additions & 4 deletions
@@ -1 +1 @@
-__version__ = "0.5.0"
+__version__ = "0.5.1"
@@ -17,7 +17,7 @@
 import aito.client.responses as aito_responses
 from aito.client import AitoClient, RequestError
 from aito.schema import AitoDatabaseSchema, AitoTableSchema, AitoColumnTypeSchema
-from aito.utils._file_utils import gzip_file, check_file_is_gzipped
+from aito.utils._file_utils import gzip_file, check_file_is_gzipped, read_ndjson_gz_file
 from aito.utils.data_frame_handler import DataFrameHandler
 
 LOG = logging.getLogger('AitoAPI')
@@ -470,15 +470,39 @@ def poll_file_processing_status(client: AitoClient, table_name: str, session_id:
         time.sleep(polling_time)
 
 
+def _stream_entries_from_gzip(binary_file: BinaryIO) -> Iterable[Dict]:
+    """Stream entries from a gzipped ndjson file.
+
+    This is a generator that yields entries one by one from a gzipped ndjson file,
+    which is memory-efficient for large files.
+
+    :param binary_file: binary file object of a gzipped ndjson file
+    :type binary_file: BinaryIO
+    :yield: entries from the file
+    :rtype: Iterable[Dict]
+    """
+    import gzip
+    import json
+    with gzip.open(binary_file, 'rt', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                yield json.loads(line)
+
+
 def upload_binary_file(
         client: AitoClient,
         table_name: str,
         binary_file: BinaryIO,
         polling_time: int = 10,
-        optimize_on_finished: bool = True
+        optimize_on_finished: bool = True,
+        batch_size: int = 1000
 ):
     """`upload a binary file object to a table <https://aito.ai/docs/api/#post-api-v1-data-table-file>`__
 
+    For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
+    as S3 file upload is not supported in multitenant environments.
+
     .. note::
 
         requires the client to be setup with the READ-WRITE API key
@@ -489,34 +513,54 @@ def upload_binary_file(
     :type table_name: str
     :param binary_file: binary file object
     :type binary_file: BinaryIO
-    :param polling_time: polling wait time
+    :param polling_time: polling wait time (only used for non-multitenant S3 upload)
     :type polling_time: int
     :param optimize_on_finished: :func:`optimize_table` when finished uploading, defaults to True
     :type optimize_on_finished: bool
+    :param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
+    :type batch_size: int
     """
     LOG.debug(f'uploading file object to table `{table_name}`...')
-    init_upload_resp = initiate_upload_file(client=client, table_name=table_name)
-    upload_binary_file_to_s3(initiate_upload_file_response=init_upload_resp, binary_file=binary_file)
-    upload_session_id = init_upload_resp['id']
-    trigger_file_processing(client=client, table_name=table_name, session_id=upload_session_id)
-    poll_file_processing_status(
-        client=client, table_name=table_name, session_id=upload_session_id, polling_time=polling_time
-    )
 
-    LOG.info(f'uploaded file object to table `{table_name}`')
-    if optimize_on_finished:
-        optimize_table(client, table_name)
+    if client.is_multitenant:
+        # For multitenant instances, use streaming batch upload
+        LOG.info(f'using streaming upload for multitenant instance')
+        entries = _stream_entries_from_gzip(binary_file)
+        upload_entries(
+            client=client,
+            table_name=table_name,
+            entries=entries,
+            batch_size=batch_size,
+            optimize_on_finished=optimize_on_finished
+        )
+    else:
+        # For non-multitenant instances, use S3 file upload
+        init_upload_resp = initiate_upload_file(client=client, table_name=table_name)
+        upload_binary_file_to_s3(initiate_upload_file_response=init_upload_resp, binary_file=binary_file)
+        upload_session_id = init_upload_resp['id']
+        trigger_file_processing(client=client, table_name=table_name, session_id=upload_session_id)
+        poll_file_processing_status(
+            client=client, table_name=table_name, session_id=upload_session_id, polling_time=polling_time
+        )
+
+        LOG.info(f'uploaded file object to table `{table_name}`')
+        if optimize_on_finished:
+            optimize_table(client, table_name)
 
 
 def upload_file(
         client: AitoClient,
         table_name: str,
         file_path: PathLike,
         polling_time: int = 10,
-        optimize_on_finished: bool = True
+        optimize_on_finished: bool = True,
+        batch_size: int = 1000
 ):
     """`upload a file <https://aito.ai/docs/api/#post-api-v1-data-table-file>`__ to the specfied table
 
+    For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
+    as S3 file upload is not supported in multitenant environments.
+
     .. note::
 
         requires the client to be setup with the READ-WRITE API key
@@ -527,10 +571,12 @@ def upload_file(
     :type table_name: str
     :param file_path: path to the file to be uploaded
     :type file_path: PathLike
-    :param polling_time: polling wait time
+    :param polling_time: polling wait time (only used for non-multitenant S3 upload)
     :type polling_time: int
     :param optimize_on_finished: :func:`optimize_table` when finished uploading, defaults to True
     :type optimize_on_finished: bool
+    :param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
+    :type batch_size: int
     :raises ValueError: incorrect file extension, should be .ndjson.gz
     """
     if not check_file_is_gzipped(file_path):
@@ -541,15 +587,23 @@ def upload_file(
             table_name=table_name,
             binary_file=f,
             polling_time=polling_time,
-            optimize_on_finished=optimize_on_finished
+            optimize_on_finished=optimize_on_finished,
+            batch_size=batch_size
         )
 
 
 def quick_add_table(
-        client: AitoClient, input_file: Union[Path, PathLike], table_name: str = None, input_format: str = None
+        client: AitoClient,
+        input_file: Union[Path, PathLike],
+        table_name: str = None,
+        input_format: str = None,
+        batch_size: int = 1000
 ):
     """Create a table and upload a file to the table, using the default inferred schema
 
+    For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
+    as S3 file upload is not supported in multitenant environments.
+
     :param client: the AitoClient instance
     :type client: AitoClient
     :param input_file: path to the input file to be uploaded
@@ -558,6 +612,8 @@ def quick_add_table(
     :type table_name: Optional[str]
     :param input_format: specify the format of the input file, defaults to the input file extension
     :type input_format: Optional[str]
+    :param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
+    :type batch_size: int
     """
     df_handler = DataFrameHandler()
 
@@ -586,7 +642,7 @@ def quick_add_table(
     create_table(client, table_name, inferred_schema)
 
     with open(converted_tmp_file.name, 'rb') as in_f:
-        upload_binary_file(client=client, table_name=table_name, binary_file=in_f)
+        upload_binary_file(client=client, table_name=table_name, binary_file=in_f, batch_size=batch_size)
     converted_tmp_file.close()
     unlink(converted_tmp_file.name)
 
 
@@ -40,8 +40,12 @@ def __init__(self, request_obj: AitoRequest, error: Exception):
         self.request_obj = request_obj
         self.error = error
         if isinstance(error, requestslib.HTTPError):
-            resp = error.response.json()
-            error_msg = resp['message'] if 'message' in resp else resp
+            try:
+                resp = error.response.json()
+                error_msg = resp['message'] if 'message' in resp else resp
+            except (ValueError, KeyError):
+                # Response is not valid JSON or doesn't have expected structure
+                error_msg = error.response.text or str(error)
         elif isinstance(error, ClientResponseError):
             error_msg = error.message
         else:
@@ -53,6 +57,23 @@ class AitoClient:
     """A versatile client that connects to the Aito Database Instance
 
     """
+
+    # Pattern to detect multitenant URLs: /db/{database_name}
+    _MULTITENANT_PATH_PREFIX = '/db/'
+
+    @property
+    def is_multitenant(self) -> bool:
+        """Check if the client is connected to a multitenant instance.
+
+        Multitenant URLs have the format: https://shared.aito.ai/db/{database_name}
+
+        :return: True if connected to a multitenant instance
+        :rtype: bool
+        """
+        from urllib.parse import urlparse
+        parsed = urlparse(self.instance_url)
+        return self._MULTITENANT_PATH_PREFIX in parsed.path
+
     def __init__(
             self,
             instance_url: str,
@@ -82,11 +103,65 @@ def __init__(
         self.instance_version = None
         if check_credentials:
             try:
-                version_resp = self.request(request_obj=GetVersionRequest(), raise_for_status=True)
+                version_resp = self._request_version()
                 self.instance_version = version_resp.version
+                # Also verify API key is valid by making an authenticated request
+                self._verify_api_key()
             except Exception:
                 raise Error('failed to instantiate Aito Client, please check your credentials')
 
+    @property
+    def _base_url(self) -> str:
+        """Extract the base URL for endpoints that don't include the database path.
+
+        For multitenant URLs like 'https://shared.aito.ai/db/my-database',
+        returns 'https://shared.aito.ai'.
+        For regular URLs, returns the instance_url unchanged.
+        """
+        from urllib.parse import urlparse, urlunparse
+        parsed = urlparse(self.instance_url)
+        path = parsed.path
+        if self._MULTITENANT_PATH_PREFIX in path:
+            # Strip /db/{database_name} from the path
+            db_index = path.find(self._MULTITENANT_PATH_PREFIX)
+            base_path = path[:db_index]
+            return urlunparse(parsed._replace(path=base_path))
+        return self.instance_url
+
+    def _request_version(self):
+        """Request the Aito instance version.
+
+        For multitenant deployments, the /version endpoint is at the base URL,
+        not under the database path.
+        """
+        version_url = self._base_url + GetVersionRequest.endpoint
+        try:
+            resp = requestslib.request(
+                method=GetVersionRequest.method,
+                url=version_url,
+                headers=self.headers,
+                json=None
+            )
+            resp.raise_for_status()
+            return GetVersionRequest.response_cls(resp.json())
+        except Exception as e:
+            raise RequestError(GetVersionRequest(), e)
+
+    def _verify_api_key(self):
+        """Verify the API key is valid by making an authenticated request.
+
+        The /version endpoint doesn't require authentication, so we need to
+        make a separate request to an authenticated endpoint to verify credentials.
+        """
+        schema_url = self.instance_url + '/api/v1/schema'
+        resp = requestslib.request(
+            method='GET',
+            url=schema_url,
+            headers=self.headers,
+            json=None
+        )
+        resp.raise_for_status()
+
     @property
     def headers(self):
         """ the headers that will be used to send a request to the Aito instance
@@ -157,7 +232,7 @@ def request(
          ...    }
          ... )) # doctest: +NORMALIZE_WHITESPACE
          >>> print(res.top_prediction) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-         {"$p": ..., "field": ..., "feature": ...}
+         {"$p": ..., "$value": ...}
 
          Returns an error when make a request to an incorrect path:
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.5.0"`
	`1`	`+__version__ = "0.5.1"`