Skip to content

Commit 3b74e10

Browse files
SOLR-17949: Fix Azure Blob path/listing bugs and wire tests to shared backup suites
Align the Azure Blob backup repository with S3/GCS conventions: - resolve()/getBlobPath() now mirror S3 exactly (fold URI host into the path), fixing exists()/getPathType() on virtual directories - listDir() strips trailing delimiters so it returns bare child names per the BackupRepository contract - delete() tolerates already-absent files (lenient, like local/S3) - commitBlockList(..., true) for overwrite semantics on retried uploads - handleBlobException() logs HTTP 404 at debug, other failures at error - fail fast with a clear message when the container name is missing - drop the unused CHUNK_SIZE constant and unused throws on sanitizedPath() - document the ~195 GiB single-file block-count ceiling Rewrite the tests to extend the shared abstract suites (AbstractBackupRepositoryTest, AbstractIncrementalBackupTest, AbstractInstallShardTest) against Azurite via a new AzuriteTestContainer helper. Recommend supplying secrets via sysprops/env vars in the reference guide. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 0ee9d20 commit 3b74e10

11 files changed

Lines changed: 413 additions & 891 deletions

File tree

solr/modules/azure-blob-repository/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ dependencies {
6565
testImplementation libs.commonsio.commonsio
6666

6767
testImplementation libs.azure.core.http.okhttp
68-
testImplementation libs.squareup.okhttp3.okhttp
68+
testImplementation libs.squareup.okhttp3.okhttp.jvm
6969

7070
// Testcontainers for Azurite integration testing
7171
testImplementation libs.testcontainers

solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ public class AzureBlobBackupRepository extends AbstractBackupRepository {
5353
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
5454

5555
static final String BLOB_SCHEME = "blob";
56-
private static final int CHUNK_SIZE = 16 * 1024 * 1024;
5756
private static final int COPY_BUFFER_SIZE = 8192;
5857

5958
private AzureBlobStorageClient client;
@@ -121,6 +120,7 @@ public URI resolve(URI baseUri, String... pathComponents) {
121120
throw new IllegalArgumentException("URI must begin with 'blob:' scheme");
122121
}
123122

123+
// If paths contain unnecessary '/' separators, they'll be removed by URI.normalize()
124124
String path = baseUri + "/" + String.join("/", pathComponents);
125125
return URI.create(path).normalize();
126126
}
@@ -170,30 +170,26 @@ public void delete(URI path, Collection<String> files) throws IOException {
170170
Objects.requireNonNull(path, "cannot delete with a null URI");
171171
Objects.requireNonNull(files, "cannot delete with a null files collection");
172172

173-
String basePath = getBlobPath(path);
174-
175-
if (!client.isDirectory(basePath)) {
176-
int lastSlash = basePath.lastIndexOf('/');
177-
basePath = lastSlash >= 0 ? basePath.substring(0, lastSlash) : "";
178-
}
179-
180-
final String prefix;
181-
if (basePath.isEmpty() || basePath.endsWith("/")) {
182-
prefix = basePath;
183-
} else {
184-
prefix = basePath + "/";
185-
}
186-
187173
Set<String> fullPaths =
188174
files.stream()
189-
.map(file -> prefix + file.replaceFirst("^/+", ""))
175+
.map(file -> resolve(path, file))
176+
.map(this::getBlobPath)
190177
.collect(Collectors.toSet());
191178

192179
if (log.isDebugEnabled()) {
193180
log.debug("Delete files '{}'", fullPaths);
194181
}
195182

196-
client.delete(fullPaths);
183+
try {
184+
client.delete(fullPaths);
185+
} catch (AzureBlobNotFoundException e) {
186+
// Deleting files that are already absent is a no-op at the repository level, matching the
187+
// lenient behavior of the local-filesystem and S3 repositories. Any present files in the
188+
// batch were still removed before this was thrown.
189+
if (log.isDebugEnabled()) {
190+
log.debug("Some files requested for deletion were already absent", e);
191+
}
192+
}
197193
}
198194

199195
@Override
@@ -352,7 +348,7 @@ public void copyIndexFileTo(
352348

353349
try (InputStream inputStream = client.pullStream(blobPath);
354350
IndexOutput indexOutput = dest.createOutput(destFileName, IOContext.DEFAULT)) {
355-
byte[] buffer = new byte[CHUNK_SIZE];
351+
byte[] buffer = new byte[COPY_BUFFER_SIZE];
356352
int len;
357353
while ((len = inputStream.read(buffer)) != -1) {
358354
indexOutput.writeBytes(buffer, 0, len);
@@ -377,7 +373,10 @@ private String getBlobPath(URI uri) {
377373
if (!BLOB_SCHEME.equalsIgnoreCase(uri.getScheme())) {
378374
throw new IllegalArgumentException("URI must begin with 'blob:' scheme");
379375
}
380-
return uri.getPath();
376+
// Depending on the scheme, the first path element may be parsed as the URI host (e.g.
377+
// "blob://dir/file" -> host="dir"). Fold it back into the path, mirroring S3BackupRepository.
378+
String host = uri.getHost();
379+
return host == null ? uri.getPath() : host + uri.getPath();
381380
}
382381

383382
private void writeFooter(long checksum, OutputStream outputStream) throws IOException {

solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import org.apache.solr.common.util.EnvUtils;
2020
import org.apache.solr.common.util.NamedList;
21+
import org.apache.solr.common.util.StrUtils;
2122

2223
public class AzureBlobBackupRepositoryConfig {
2324

@@ -54,6 +55,12 @@ public AzureBlobBackupRepositoryConfig(NamedList<?> config) {
5455
}
5556

5657
public AzureBlobStorageClient buildClient() {
58+
if (StrUtils.isNullOrEmpty(containerName)) {
59+
throw new IllegalArgumentException(
60+
"Missing required configuration '"
61+
+ CONTAINER_NAME
62+
+ "' for the Azure Blob backup repository");
63+
}
5764
return new AzureBlobStorageClient(
5865
containerName,
5966
connectionString,

solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@
3838
*/
3939
class AzureBlobOutputStream extends OutputStream {
4040
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
41-
42-
static final int BLOCK_SIZE = 4 * 1024 * 1024;
41+
private static final int BLOCK_SIZE = 4 * 1024 * 1024;
4342

4443
private final BlobClient blobClient;
4544
private final String blobPath;
@@ -220,7 +219,7 @@ void complete() throws IOException {
220219

221220
try {
222221
BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient();
223-
blockBlobClient.commitBlockList(blockIds);
222+
blockBlobClient.commitBlockList(blockIds, true);
224223
} catch (BlobStorageException e) {
225224
throw new IOException(
226225
"Failed to commit block list", AzureBlobStorageClient.handleBlobException(e));

solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ String[] listDir(String path) throws AzureBlobException {
222222
int slashIndex = s.indexOf(BLOB_FILE_PATH_DELIMITER);
223223
return slashIndex == -1 || slashIndex == s.length() - 1;
224224
})
225+
.map(s -> s.endsWith(BLOB_FILE_PATH_DELIMITER) ? s.substring(0, s.length() - 1) : s)
226+
.distinct()
225227
.toArray(String[]::new);
226228
} catch (BlobStorageException e) {
227229
throw handleBlobException(e);
@@ -579,7 +581,7 @@ private String getParentDirectory(String path) {
579581
: "";
580582
}
581583

582-
String sanitizedPath(String path) throws AzureBlobException {
584+
String sanitizedPath(String path) {
583585
String sanitizedPath = path.trim();
584586
while (sanitizedPath.startsWith(BLOB_FILE_PATH_DELIMITER)) {
585587
sanitizedPath = sanitizedPath.substring(1).trim();
@@ -621,12 +623,14 @@ static AzureBlobException handleBlobException(BlobStorageException e) {
621623
e.getErrorCode(),
622624
e.getMessage());
623625

624-
log.error(errMessage);
625-
626626
if (e.getStatusCode() == HTTP_NOT_FOUND) {
627+
if (log.isDebugEnabled()) {
628+
log.debug(errMessage);
629+
}
627630
return new AzureBlobNotFoundException(errMessage, e);
628-
} else {
629-
return new AzureBlobException(errMessage, e);
630631
}
632+
633+
log.error(errMessage);
634+
return new AzureBlobException(errMessage, e);
631635
}
632636
}

0 commit comments

Comments
 (0)