Skip to content

Commit f161d95

Browse files
authored
Merge pull request #61 from AitoDotAI/feature/multitenancy
Fix multitenancy support for version endpoint
2 parents 771c6ef + 083c835 commit f161d95

6 files changed

Lines changed: 249 additions & 25 deletions

File tree

aito/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.0"
1+
__version__ = "0.5.1"

aito/api.py

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import aito.client.responses as aito_responses
1818
from aito.client import AitoClient, RequestError
1919
from aito.schema import AitoDatabaseSchema, AitoTableSchema, AitoColumnTypeSchema
20-
from aito.utils._file_utils import gzip_file, check_file_is_gzipped
20+
from aito.utils._file_utils import gzip_file, check_file_is_gzipped, read_ndjson_gz_file
2121
from aito.utils.data_frame_handler import DataFrameHandler
2222

2323
LOG = logging.getLogger('AitoAPI')
@@ -470,15 +470,39 @@ def poll_file_processing_status(client: AitoClient, table_name: str, session_id:
470470
time.sleep(polling_time)
471471

472472

473+
def _stream_entries_from_gzip(binary_file: BinaryIO) -> Iterable[Dict]:
474+
"""Stream entries from a gzipped ndjson file.
475+
476+
This is a generator that yields entries one by one from a gzipped ndjson file,
477+
which is memory-efficient for large files.
478+
479+
:param binary_file: binary file object of a gzipped ndjson file
480+
:type binary_file: BinaryIO
481+
:yield: entries from the file
482+
:rtype: Iterable[Dict]
483+
"""
484+
import gzip
485+
import json
486+
with gzip.open(binary_file, 'rt', encoding='utf-8') as f:
487+
for line in f:
488+
line = line.strip()
489+
if line:
490+
yield json.loads(line)
491+
492+
473493
def upload_binary_file(
474494
client: AitoClient,
475495
table_name: str,
476496
binary_file: BinaryIO,
477497
polling_time: int = 10,
478-
optimize_on_finished: bool = True
498+
optimize_on_finished: bool = True,
499+
batch_size: int = 1000
479500
):
480501
"""`upload a binary file object to a table <https://aito.ai/docs/api/#post-api-v1-data-table-file>`__
481502
503+
For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
504+
as S3 file upload is not supported in multitenant environments.
505+
482506
.. note::
483507
484508
requires the client to be setup with the READ-WRITE API key
@@ -489,34 +513,54 @@ def upload_binary_file(
489513
:type table_name: str
490514
:param binary_file: binary file object
491515
:type binary_file: BinaryIO
492-
:param polling_time: polling wait time
516+
:param polling_time: polling wait time (only used for non-multitenant S3 upload)
493517
:type polling_time: int
494518
:param optimize_on_finished: :func:`optimize_table` when finished uploading, defaults to True
495519
:type optimize_on_finished: bool
520+
:param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
521+
:type batch_size: int
496522
"""
497523
LOG.debug(f'uploading file object to table `{table_name}`...')
498-
init_upload_resp = initiate_upload_file(client=client, table_name=table_name)
499-
upload_binary_file_to_s3(initiate_upload_file_response=init_upload_resp, binary_file=binary_file)
500-
upload_session_id = init_upload_resp['id']
501-
trigger_file_processing(client=client, table_name=table_name, session_id=upload_session_id)
502-
poll_file_processing_status(
503-
client=client, table_name=table_name, session_id=upload_session_id, polling_time=polling_time
504-
)
505524

506-
LOG.info(f'uploaded file object to table `{table_name}`')
507-
if optimize_on_finished:
508-
optimize_table(client, table_name)
525+
if client.is_multitenant:
526+
# For multitenant instances, use streaming batch upload
527+
LOG.info(f'using streaming upload for multitenant instance')
528+
entries = _stream_entries_from_gzip(binary_file)
529+
upload_entries(
530+
client=client,
531+
table_name=table_name,
532+
entries=entries,
533+
batch_size=batch_size,
534+
optimize_on_finished=optimize_on_finished
535+
)
536+
else:
537+
# For non-multitenant instances, use S3 file upload
538+
init_upload_resp = initiate_upload_file(client=client, table_name=table_name)
539+
upload_binary_file_to_s3(initiate_upload_file_response=init_upload_resp, binary_file=binary_file)
540+
upload_session_id = init_upload_resp['id']
541+
trigger_file_processing(client=client, table_name=table_name, session_id=upload_session_id)
542+
poll_file_processing_status(
543+
client=client, table_name=table_name, session_id=upload_session_id, polling_time=polling_time
544+
)
545+
546+
LOG.info(f'uploaded file object to table `{table_name}`')
547+
if optimize_on_finished:
548+
optimize_table(client, table_name)
509549

510550

511551
def upload_file(
512552
client: AitoClient,
513553
table_name: str,
514554
file_path: PathLike,
515555
polling_time: int = 10,
516-
optimize_on_finished: bool = True
556+
optimize_on_finished: bool = True,
557+
batch_size: int = 1000
517558
):
518559
"""`upload a file <https://aito.ai/docs/api/#post-api-v1-data-table-file>`__ to the specfied table
519560
561+
For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
562+
as S3 file upload is not supported in multitenant environments.
563+
520564
.. note::
521565
522566
requires the client to be setup with the READ-WRITE API key
@@ -527,10 +571,12 @@ def upload_file(
527571
:type table_name: str
528572
:param file_path: path to the file to be uploaded
529573
:type file_path: PathLike
530-
:param polling_time: polling wait time
574+
:param polling_time: polling wait time (only used for non-multitenant S3 upload)
531575
:type polling_time: int
532576
:param optimize_on_finished: :func:`optimize_table` when finished uploading, defaults to True
533577
:type optimize_on_finished: bool
578+
:param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
579+
:type batch_size: int
534580
:raises ValueError: incorrect file extension, should be .ndjson.gz
535581
"""
536582
if not check_file_is_gzipped(file_path):
@@ -541,15 +587,23 @@ def upload_file(
541587
table_name=table_name,
542588
binary_file=f,
543589
polling_time=polling_time,
544-
optimize_on_finished=optimize_on_finished
590+
optimize_on_finished=optimize_on_finished,
591+
batch_size=batch_size
545592
)
546593

547594

548595
def quick_add_table(
549-
client: AitoClient, input_file: Union[Path, PathLike], table_name: str = None, input_format: str = None
596+
client: AitoClient,
597+
input_file: Union[Path, PathLike],
598+
table_name: str = None,
599+
input_format: str = None,
600+
batch_size: int = 1000
550601
):
551602
"""Create a table and upload a file to the table, using the default inferred schema
552603
604+
For multitenant instances, this function uses streaming batch uploads instead of S3 file upload,
605+
as S3 file upload is not supported in multitenant environments.
606+
553607
:param client: the AitoClient instance
554608
:type client: AitoClient
555609
:param input_file: path to the input file to be uploaded
@@ -558,6 +612,8 @@ def quick_add_table(
558612
:type table_name: Optional[str]
559613
:param input_format: specify the format of the input file, defaults to the input file extension
560614
:type input_format: Optional[str]
615+
:param batch_size: batch size for streaming upload (only used for multitenant), defaults to 1000
616+
:type batch_size: int
561617
"""
562618
df_handler = DataFrameHandler()
563619

@@ -586,7 +642,7 @@ def quick_add_table(
586642
create_table(client, table_name, inferred_schema)
587643

588644
with open(converted_tmp_file.name, 'rb') as in_f:
589-
upload_binary_file(client=client, table_name=table_name, binary_file=in_f)
645+
upload_binary_file(client=client, table_name=table_name, binary_file=in_f, batch_size=batch_size)
590646
converted_tmp_file.close()
591647
unlink(converted_tmp_file.name)
592648

aito/client/aito_client.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ def __init__(self, request_obj: AitoRequest, error: Exception):
4040
self.request_obj = request_obj
4141
self.error = error
4242
if isinstance(error, requestslib.HTTPError):
43-
resp = error.response.json()
44-
error_msg = resp['message'] if 'message' in resp else resp
43+
try:
44+
resp = error.response.json()
45+
error_msg = resp['message'] if 'message' in resp else resp
46+
except (ValueError, KeyError):
47+
# Response is not valid JSON or doesn't have expected structure
48+
error_msg = error.response.text or str(error)
4549
elif isinstance(error, ClientResponseError):
4650
error_msg = error.message
4751
else:
@@ -53,6 +57,23 @@ class AitoClient:
5357
"""A versatile client that connects to the Aito Database Instance
5458
5559
"""
60+
61+
# Pattern to detect multitenant URLs: /db/{database_name}
62+
_MULTITENANT_PATH_PREFIX = '/db/'
63+
64+
@property
65+
def is_multitenant(self) -> bool:
66+
"""Check if the client is connected to a multitenant instance.
67+
68+
Multitenant URLs have the format: https://shared.aito.ai/db/{database_name}
69+
70+
:return: True if connected to a multitenant instance
71+
:rtype: bool
72+
"""
73+
from urllib.parse import urlparse
74+
parsed = urlparse(self.instance_url)
75+
return self._MULTITENANT_PATH_PREFIX in parsed.path
76+
5677
def __init__(
5778
self,
5879
instance_url: str,
@@ -82,11 +103,65 @@ def __init__(
82103
self.instance_version = None
83104
if check_credentials:
84105
try:
85-
version_resp = self.request(request_obj=GetVersionRequest(), raise_for_status=True)
106+
version_resp = self._request_version()
86107
self.instance_version = version_resp.version
108+
# Also verify API key is valid by making an authenticated request
109+
self._verify_api_key()
87110
except Exception:
88111
raise Error('failed to instantiate Aito Client, please check your credentials')
89112

113+
@property
114+
def _base_url(self) -> str:
115+
"""Extract the base URL for endpoints that don't include the database path.
116+
117+
For multitenant URLs like 'https://shared.aito.ai/db/my-database',
118+
returns 'https://shared.aito.ai'.
119+
For regular URLs, returns the instance_url unchanged.
120+
"""
121+
from urllib.parse import urlparse, urlunparse
122+
parsed = urlparse(self.instance_url)
123+
path = parsed.path
124+
if self._MULTITENANT_PATH_PREFIX in path:
125+
# Strip /db/{database_name} from the path
126+
db_index = path.find(self._MULTITENANT_PATH_PREFIX)
127+
base_path = path[:db_index]
128+
return urlunparse(parsed._replace(path=base_path))
129+
return self.instance_url
130+
131+
def _request_version(self):
132+
"""Request the Aito instance version.
133+
134+
For multitenant deployments, the /version endpoint is at the base URL,
135+
not under the database path.
136+
"""
137+
version_url = self._base_url + GetVersionRequest.endpoint
138+
try:
139+
resp = requestslib.request(
140+
method=GetVersionRequest.method,
141+
url=version_url,
142+
headers=self.headers,
143+
json=None
144+
)
145+
resp.raise_for_status()
146+
return GetVersionRequest.response_cls(resp.json())
147+
except Exception as e:
148+
raise RequestError(GetVersionRequest(), e)
149+
150+
def _verify_api_key(self):
151+
"""Verify the API key is valid by making an authenticated request.
152+
153+
The /version endpoint doesn't require authentication, so we need to
154+
make a separate request to an authenticated endpoint to verify credentials.
155+
"""
156+
schema_url = self.instance_url + '/api/v1/schema'
157+
resp = requestslib.request(
158+
method='GET',
159+
url=schema_url,
160+
headers=self.headers,
161+
json=None
162+
)
163+
resp.raise_for_status()
164+
90165
@property
91166
def headers(self):
92167
""" the headers that will be used to send a request to the Aito instance
@@ -157,7 +232,7 @@ def request(
157232
... }
158233
... )) # doctest: +NORMALIZE_WHITESPACE
159234
>>> print(res.top_prediction) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
160-
{"$p": ..., "field": ..., "feature": ...}
235+
{"$p": ..., "$value": ...}
161236
162237
Returns an error when make a request to an incorrect path:
163238

0 commit comments

Comments
 (0)