Skip to content

Commit f4750ae

Browse files
authored
fix: solr keywords field type change (#1153)
1 parent 79887a2 commit f4750ae

7 files changed

Lines changed: 61 additions & 26 deletions

File tree

components/renku_data_services/search/core.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@
4141
logger = logging.getLogger(__name__)
4242

4343

44-
async def update_solr(search_updates_repo: SearchUpdatesRepo, solr_client: SolrClient, batch_size: int) -> None:
44+
async def update_solr(
45+
search_updates_repo: SearchUpdatesRepo, solr_client: SolrClient, batch_size: int
46+
) -> list[Exception]:
4547
"""Selects entries from the search staging table and updates SOLR."""
4648
counter = 0
49+
output: list[Exception] = []
4750
while True:
4851
entries = await search_updates_repo.select_next(batch_size)
4952
if entries == []:
@@ -69,17 +72,22 @@ async def update_solr(search_updates_repo: SearchUpdatesRepo, solr_client: SolrC
6972
await solr_client.delete(DeleteDoc.solr_query())
7073
except Exception as de:
7174
logger.error("Error when removing soft-deleted documents", exc_info=de)
75+
output.append(de)
7276

7377
except Exception as e:
78+
output.append(e)
7479
logger.error(f"Error while updating solr with entities {ids}", exc_info=e)
7580
try:
7681
await search_updates_repo.mark_failed(ids)
7782
except Exception as e2:
83+
output.append(e2)
7884
logger.error("Error while setting search entities to failed", exc_info=e2)
7985

8086
if counter > 0:
8187
logger.info(f"Updated {counter} entries in SOLR")
8288

89+
return output
90+
8391

8492
async def _renku_query(
8593
authz_client: AuthzClient, ctx: Context, uq: SolrUserQuery, limit: int, offset: int

components/renku_data_services/search/reprovision.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from renku_data_services.project.models import Project
1818
from renku_data_services.search.db import SearchUpdatesRepo
1919
from renku_data_services.solr import entity_schema
20-
from renku_data_services.solr.solr_client import DefaultSolrClient, SolrClientConfig
20+
from renku_data_services.solr.solr_client import DefaultSolrAdminClient, DefaultSolrClient, SolrClientConfig
2121
from renku_data_services.solr.solr_migrate import SchemaMigrator
2222
from renku_data_services.users.db import UserRepo
2323
from renku_data_services.users.models import UserInfo
@@ -103,7 +103,21 @@ def log_counter(c: int) -> None:
103103
started = datetime.now()
104104
await self._search_updates_repo.clear_all()
105105
async with DefaultSolrClient(self._solr_config) as client:
106-
await client.delete("_type:*")
106+
res = await client.delete("_type:*")
107+
if res.status_code != 200:
108+
logger.error(
109+
f"Failed to delete all documents in solr during reprovisioning: {res.text}, "
110+
f"status_code: {res.status_code}",
111+
exc_info=False,
112+
)
113+
async with DefaultSolrAdminClient(self._solr_config) as admin_client:
114+
res = await admin_client.reload(None)
115+
if res.status_code != 200:
116+
logger.error(
117+
f"Failed to reload solr core during reprovisioning: {res.text}, "
118+
f"status code: {res.status_code}",
119+
exc_info=False,
120+
)
107121

108122
if migrate_solr_schema:
109123
await migrator.migrate(entity_schema.all_migrations)

components/renku_data_services/solr/entity_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ class FieldTypes:
126126
AddCommand(Field.of(Fields.description, FieldTypes.text)),
127127
AddCommand(Field.of(Fields.created_by, FieldTypes.id)),
128128
AddCommand(Field.of(Fields.creation_date, FieldTypes.date_time)),
129-
# text all
130129
AddCommand(FieldTypes.text_all),
130+
# text all
131131
AddCommand(Field.of(Fields.content_all, FieldTypes.text_all).make_multi_valued()),
132132
AddCommand(CopyFieldRule(source=Fields.name, dest=Fields.content_all)),
133133
AddCommand(CopyFieldRule(source=Fields.description, dest=Fields.content_all)),

components/renku_data_services/solr/solr_client.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -626,9 +626,6 @@ async def delete(self, query: str) -> Response:
626626
class DefaultSolrClient(SolrClient):
627627
"""Default implementation of the solr client."""
628628

629-
delegate: AsyncClient
630-
config: SolrClientConfig
631-
632629
def __init__(self, cfg: SolrClientConfig):
633630
self.config = cfg
634631
url_parsed = list(urlparse(cfg.base_url))
@@ -758,23 +755,22 @@ async def create(self, core_name: str | None) -> None:
758755
"""Create a core."""
759756
...
760757

758+
@abstractmethod
759+
async def reload(self, core_name: str | None) -> Response:
760+
"""Reload a core."""
761+
...
762+
761763

762764
class DefaultSolrAdminClient(SolrAdminClient):
763765
"""A client to the core admin api.
764766
765767
Url: https://solr.apache.org/guide/solr/latest/configuration-guide/coreadmin-api.html
766768
"""
767769

768-
delegate: AsyncClient
769-
config: SolrClientConfig
770-
771770
def __init__(self, cfg: SolrClientConfig):
772771
self.config = cfg
773-
url_parsed = list(urlparse(cfg.base_url))
774-
url_parsed[2] = urljoin(url_parsed[2], "/api/cores")
775-
burl = urlunparse(url_parsed)
776772
bauth = BasicAuth(username=cfg.user.username, password=cfg.user.password) if cfg.user is not None else None
777-
self.delegate = AsyncClient(auth=bauth, base_url=burl, timeout=cfg.timeout)
773+
self.delegate = AsyncClient(auth=bauth, base_url=self.config.base_url, timeout=cfg.timeout)
778774

779775
async def __aenter__(self) -> Self:
780776
await self.delegate.__aenter__()
@@ -788,7 +784,7 @@ async def __aexit__(
788784
async def core_status(self, core_name: str | None) -> dict[str, Any] | None:
789785
"""Return the status of the connected core or the one given by `core_name`."""
790786
core = core_name or self.config.core
791-
resp = await self.delegate.get(f"/{core}")
787+
resp = await self.delegate.get(f"/api/cores/{core}")
792788
if not resp.is_success:
793789
raise SolrClientStatusException(self.config, resp)
794790
else:
@@ -800,8 +796,16 @@ async def create(self, core_name: str | None) -> None:
800796
"""Create a core with the given `core_name` or the name provided in the config object."""
801797
core = core_name or self.config.core
802798
data = {"create": {"name": core, "configSet": self.config.configset}}
803-
resp = await self.delegate.post("", json=data)
799+
resp = await self.delegate.post("/api/cores", json=data)
804800
if not resp.is_success:
805801
raise SolrClientCreateCoreException(core, resp)
806802
else:
807803
return None
804+
805+
async def reload(self, core_name: str | None) -> Response:
806+
"""Reload a core with the given `core_name` or the name provided in the config object."""
807+
core = core_name or self.config.core
808+
return await self.delegate.post(
809+
f"/v2/cores/{core}/reload",
810+
headers={"Content-Type": "application/json"},
811+
)

components/renku_data_services/solr/solr_schema.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,12 @@ def to_dict(self) -> dict[str, Any]:
315315
@dataclass
316316
@final
317317
class ReplaceCommand(SchemaCommand):
318-
"""Replace a field, field type or dynamic field."""
318+
"""Replace a field, field type or dynamic field.
319+
320+
Use this with care if you are changing the field type.
321+
We have seen issues when we use this command to change the field type
322+
that do not occur if you call 'delete' and then 'add' rather than just 'replace'.
323+
"""
319324

320325
value: FieldType | Field | DynamicFieldRule
321326

test/bases/renku_data_services/data_api/conftest.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from renku_data_services.secrets_storage_api.app import register_all_handlers as register_secrets_handlers
3636
from renku_data_services.secrets_storage_api.dependencies import DependencyManager as SecretsDependencyManager
3737
from renku_data_services.solr import entity_schema
38-
from renku_data_services.solr.solr_client import DefaultSolrClient
38+
from renku_data_services.solr.solr_client import DefaultSolrAdminClient, DefaultSolrClient
3939
from renku_data_services.solr.solr_migrate import SchemaMigrator
4040
from renku_data_services.storage.rclone import RCloneValidator
4141
from renku_data_services.users.dummy_kc_api import DummyKeycloakAPI
@@ -295,21 +295,26 @@ async def search_reprovision(search_push_updates) -> SearchReprovisionCall:
295295
admin = InternalServiceAdmin(id=ServiceAdminId.search_reprovision)
296296

297297
async def search_reprovision_helper(
298-
app_manager_instance: DependencyManager, migrate_solr_schema: bool = True
298+
app_manager_instance: DependencyManager, migrate_solr_schema: bool = True, clear_index: bool = False
299299
) -> None:
300300
await app_manager_instance.search_reprovisioning.run_reprovision(admin, migrate_solr_schema)
301-
await search_push_updates(app_manager_instance, clear_index=False)
301+
await search_push_updates(app_manager_instance, clear_index=clear_index)
302302

303303
return search_reprovision_helper
304304

305305

306306
@pytest_asyncio.fixture
307307
async def search_push_updates():
308-
async def search_push_updates_helper(app_manager_instance: DependencyManager, clear_index: bool = True) -> None:
308+
async def search_push_updates_helper(app_manager_instance: DependencyManager, clear_index: bool = False) -> None:
309309
async with DefaultSolrClient(app_manager_instance.config.solr) as client:
310310
if clear_index:
311-
await client.delete("*:*")
312-
await search_core.update_solr(app_manager_instance.search_updates_repo, client, 10)
311+
res = await client.delete("_type:*")
312+
assert res.status_code == 200, res.text
313+
async with DefaultSolrAdminClient(app_manager_instance.config.solr) as admin_client:
314+
res = await admin_client.reload(None)
315+
assert res.status_code == 200, res.text
316+
responses = await search_core.update_solr(app_manager_instance.search_updates_repo, client, 10)
317+
assert len(responses) == 0, responses
313318

314319
return search_push_updates_helper
315320

test/bases/renku_data_services/data_api/test_search_migrations.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,7 @@ def _helper(latest_version: int | None = None) -> list[SchemaMigration]:
8383
("start_solr_version", "end_solr_version"),
8484
[
8585
(12, 13),
86-
# TODO: Fix this migration
87-
# (13, 14),
86+
(13, 14),
8887
],
8988
)
9089
@pytest.mark.xdist_group("search")
@@ -139,7 +138,7 @@ async def test_search_schema_upgrade(
139138
assert_search_result(result, [p1, p3])
140139
res = await solr_migrator.migrate(get_solr_schemas(end_solr_version))
141140
assert res.migrations_run == end_solr_version - start_solr_version, res
142-
await search_reprovision(app_manager, migrate_solr_schema=False)
141+
await search_reprovision(app_manager, migrate_solr_schema=True, clear_index=True)
143142
result = await search_query(sanic_client, "type:project", user=wout)
144143
assert_search_result(result, [p1, p3])
145144
result = await search_query(sanic_client, "keyword:test-keyword", user=wout)

0 commit comments

Comments
 (0)