Skip to content

Commit 7277439

Browse files
authored
Merge pull request #617 from splitgraph/feature/CU-1z4bhdb-reintrospection-mode
Allow reintrospecting datasets when running `sgr cloud load`
2 parents bfb1a23 + d8c741a commit 7277439

6 files changed

Lines changed: 111 additions & 12 deletions

File tree

.ci/prepare_doc_bundle.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ python generate_reference.py sgr "$TARGET_DIR"/sgr
2626
echo "Generating configuration reference"
2727
python generate_reference.py config "$TARGET_DIR"/0100_config-flag-reference.mdx
2828

29-
echo "Building Asciinema casts"
30-
TARGET_DIR=$TARGET_DIR "$CI_DIR"/rebuild_asciicasts.sh
29+
# Temporarily disabled: these take way too much time and aren't used by the website.
30+
# echo "Building Asciinema casts"
31+
# TARGET_DIR=$TARGET_DIR "$CI_DIR"/rebuild_asciicasts.sh
3132

3233
echo "Archiving the bundle $OUTPUT.tar.gz"
3334
cd "$TARGET_DIR"/..

splitgraph/cloud/__init__.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@
3232
from splitgraph.cloud.models import (
3333
AddExternalCredentialRequest,
3434
AddExternalRepositoriesRequest,
35+
AddExternalRepositoriesResponse,
3536
AddExternalRepositoryRequest,
3637
ExportJobStatus,
3738
ExternalResponse,
3839
IngestionJobStatus,
40+
IntrospectionMode,
3941
ListExternalCredentialsResponse,
4042
MetadataResponse,
4143
Plugin,
@@ -74,6 +76,7 @@
7476
from splitgraph.config.management import patch_and_save_config
7577
from splitgraph.exceptions import (
7678
AuthAPIError,
79+
DataSourceError,
7780
GQLAPIError,
7881
GQLRepoDoesntExistError,
7982
GQLUnauthenticatedError,
@@ -530,15 +533,37 @@ def ensure_external_credential(
530533
raise JSONSchemaValidationError(message="[MASKED]")
531534
raise
532535

533-
def bulk_upsert_external(self, repositories: List[AddExternalRepositoryRequest]):
534-
request = AddExternalRepositoriesRequest(repositories=repositories)
535-
self._perform_request(
536+
def bulk_upsert_external(
537+
self,
538+
repositories: List[AddExternalRepositoryRequest],
539+
introspection_mode: IntrospectionMode = IntrospectionMode.EMPTY,
540+
raise_errors: bool = False,
541+
) -> None:
542+
request = AddExternalRepositoriesRequest(
543+
repositories=repositories, introspection_mode=introspection_mode
544+
)
545+
response = self._perform_request(
536546
"/bulk-add",
537547
self.access_token,
538548
request,
539549
endpoint=self.externals_endpoint,
540550
jsonschema_endpoint=True,
551+
response_class=AddExternalRepositoriesResponse,
541552
)
553+
assert response
554+
if response.errors:
555+
for repo_errors in response.errors:
556+
for error in repo_errors.errors:
557+
logging.warning(
558+
"Error adding table %s/%s/%s: %s (%s)",
559+
repo_errors.namespace,
560+
repo_errors.repository,
561+
error.table_name,
562+
error.error,
563+
error.error_text,
564+
)
565+
if raise_errors:
566+
raise DataSourceError("Error introspecting some tables!")
542567

543568

544569
def AuthAPIClient(*args, **kwargs):

splitgraph/cloud/models.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Definitions for responses from the cloud GQL/REST APIs
33
"""
4+
import enum
45
import logging
56
from datetime import datetime
67
from typing import Any, Dict, List, Optional
@@ -15,7 +16,7 @@
1516
Source,
1617
Table,
1718
)
18-
from splitgraph.core.types import Params, TableSchema
19+
from splitgraph.core.types import MountError, Params, TableSchema
1920

2021

2122
class Plugin(BaseModel):
@@ -30,6 +31,16 @@ class Plugin(BaseModel):
3031
supports_sync: bool
3132

3233

34+
class IntrospectionMode(str, enum.Enum):
35+
"""
36+
Which tables to (re)introspect when adding an external.
37+
"""
38+
39+
NONE = "none" # Don't reintrospect any tables
40+
EMPTY = "empty" # Introspect tables with an empty schema
41+
ALL = "all" # Reintrospect all tables
42+
43+
3344
# GQL response for the catalog metadata
3445

3546

@@ -289,3 +300,14 @@ def from_external(
289300

290301
class AddExternalRepositoriesRequest(BaseModel):
291302
repositories: List[AddExternalRepositoryRequest]
303+
introspection_mode: IntrospectionMode = IntrospectionMode.EMPTY
304+
305+
306+
class AddExternalRepositoriesResponse(BaseModel):
307+
class RepositoryMountError(BaseModel):
308+
namespace: str
309+
repository: str
310+
errors: List[MountError]
311+
312+
live_image_hashes: List[Optional[str]]
313+
errors: Optional[List[RepositoryMountError]] = None

splitgraph/commandline/cloud.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import click
1616
from click import wrap_text
1717

18-
from splitgraph.cloud.models import AddExternalRepositoryRequest
18+
from splitgraph.cloud.models import AddExternalRepositoryRequest, IntrospectionMode
1919
from splitgraph.cloud.project.models import Metadata, SplitgraphYAML
2020
from splitgraph.commandline.common import (
2121
ImageType,
@@ -613,9 +613,28 @@ def dump_c(remote, readme_dir, repositories_file, limit_repositories):
613613
is_flag=True,
614614
help="Only set up the metadata, not the external data source settings",
615615
)
616+
@click.option(
617+
"--introspection-mode",
618+
type=click.Choice(IntrospectionMode),
619+
default=IntrospectionMode.EMPTY,
620+
help="Whether to reintrospect tables. none: never reintrospect. all: reintrospect all tables. "
621+
"empty: only reintrospect tables with an empty schema.",
622+
)
623+
@click.option(
624+
"--ignore-introspection-errors",
625+
is_flag=True,
626+
help="If set, will ignore errors when introspecting tables.",
627+
)
616628
@click.argument("limit_repositories", type=str, nargs=-1)
617629
def load_c(
618-
remote, readme_dir, skip_external, initial_private, repositories_file, limit_repositories
630+
remote,
631+
readme_dir,
632+
skip_external,
633+
initial_private,
634+
repositories_file,
635+
limit_repositories,
636+
introspection_mode,
637+
ignore_introspection_errors,
619638
):
620639
"""
621640
Load a Splitgraph catalog from a YAML file.
@@ -666,7 +685,11 @@ def load_c(
666685
initial_private=initial_private,
667686
)
668687
external_repositories.append(external_repository)
669-
rest_client.bulk_upsert_external(repositories=external_repositories)
688+
rest_client.bulk_upsert_external(
689+
repositories=external_repositories,
690+
introspection_mode=introspection_mode,
691+
raise_errors=not ignore_introspection_errors,
692+
)
670693
logging.info(f"Uploaded images for {pluralise('repository', len(external_repositories))}")
671694

672695
logging.info("Updating metadata...")

test/splitgraph/commandline/http_fixtures.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,11 +399,12 @@ def add_external_credential(request, uri, response_headers):
399399
]
400400

401401

402-
def add_external_repo(initial_private=False):
402+
def add_external_repo(initial_private=False, error=False):
403403
def cb(request, uri, response_headers):
404404
data = json.loads(request.body)
405405

406406
assert data["repositories"] is not None
407+
assert data["introspection_mode"] == "empty"
407408
assert data["repositories"] == [
408409
{
409410
"credential_id": "98765432-aaaa-bbbb-a456-000000000000",
@@ -450,7 +451,26 @@ def cb(request, uri, response_headers):
450451
return [
451452
200,
452453
response_headers,
453-
json.dumps({"live_image_hashes": ["abcdef12" * 8, "ghijkl34" * 8, "mnoprs56" * 8]}),
454+
json.dumps(
455+
{
456+
"live_image_hashes": ["abcdef12" * 8, "ghijkl34" * 8, "mnoprs56" * 8],
457+
"errors": [
458+
{
459+
"namespace": "otheruser",
460+
"repository": "somerepo_2",
461+
"errors": [
462+
{
463+
"table_name": "table_1",
464+
"error": "SomeError",
465+
"error_text": "Something bad happened",
466+
}
467+
],
468+
}
469+
]
470+
if not error
471+
else [],
472+
}
473+
),
454474
]
455475

456476
return cb

test/splitgraph/commandline/test_cloud_metadata.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,9 @@ def test_commandline_dump(snapshot):
273273

274274

275275
@pytest.mark.parametrize("initial_private", [True, False])
276+
@pytest.mark.parametrize("errors", [True, False])
276277
@httpretty.activate(allow_net_connect=False)
277-
def test_commandline_load(initial_private):
278+
def test_commandline_load(initial_private, errors):
278279
runner = CliRunner()
279280

280281
httpretty.register_uri(
@@ -326,6 +327,7 @@ def get_remote_param(remote, param):
326327
os.path.join(RESOURCES, "splitgraph_yml", "readmes"),
327328
"-f",
328329
os.path.join(RESOURCES, "splitgraph_yml", "splitgraph.yml"),
330+
"--ignore-introspection-errors",
329331
],
330332
catch_exceptions=False,
331333
)
@@ -339,6 +341,12 @@ def get_remote_param(remote, param):
339341
reqs.pop() # discard duplicate request
340342
assert_repository_profiles(reqs.pop())
341343

344+
if errors:
345+
assert (
346+
"Error adding table otheruser/somerepo_2/table_1: "
347+
"SomeError (Something bad happened)" in result.output
348+
)
349+
342350

343351
def test_project_validate(snapshot):
344352
# Use the same file as the merging test

0 commit comments

Comments
 (0)