Skip to content

Commit 27eacbb

Browse files
committed
HADOOP-19801. Scope optimized non-empty directory deletes to the delete request.
1 parent caa518e commit 27eacbb

10 files changed

Lines changed: 163 additions & 33 deletions

File tree

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,20 @@ private Constants() {
17801780
public static final String DELETE_NON_EMPTY_DIRECTORY_ENABLED =
17811781
"fs.s3a.delete.non-empty-directory.enabled";
17821782

1783+
/**
1784+
* Custom delete header required by some S3-compatible stores to delete a
1785+
* non-empty directory key (prefix) in one request.
1786+
* Value: {@value}.
1787+
*/
1788+
public static final String DELETE_NON_EMPTY_DIRECTORY_HEADER =
1789+
"x-amz-delete-contents";
1790+
1791+
/**
1792+
* Value sent with {@link #DELETE_NON_EMPTY_DIRECTORY_HEADER}.
1793+
* Value: {@value}.
1794+
*/
1795+
public static final String DELETE_NON_EMPTY_DIRECTORY_HEADER_VALUE = "true";
1796+
17831797
/**
17841798
* Default value of {@link #DELETE_NON_EMPTY_DIRECTORY_ENABLED}: {@value}.
17851799
*/

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2623,9 +2623,20 @@ public void deleteObjectAtPath(final Path path,
26232623
final String key,
26242624
final boolean isFile)
26252625
throws IOException {
2626+
deleteObjectAtPath(path, key, isFile, false);
2627+
}
2628+
2629+
@Override
2630+
@Retries.RetryTranslated
2631+
public void deleteObjectAtPath(final Path path,
2632+
final String key,
2633+
final boolean isFile,
2634+
final boolean deleteContents)
2635+
throws IOException {
26262636
auditSpan.activate();
26272637
once("delete", path.toString(), () ->
2628-
S3AFileSystem.this.deleteObjectAtPath(path, key, isFile));
2638+
S3AFileSystem.this.deleteObjectAtPath(
2639+
path, key, isFile, deleteContents));
26292640
}
26302641

26312642
@Override
@@ -3227,12 +3238,24 @@ void deleteObjectAtPath(Path f,
32273238
String key,
32283239
boolean isFile)
32293240
throws SdkException, IOException {
3241+
deleteObjectAtPath(f, key, isFile, false);
3242+
}
3243+
3244+
@Retries.RetryRaw
3245+
void deleteObjectAtPath(Path f,
3246+
String key,
3247+
boolean isFile,
3248+
boolean deleteContents)
3249+
throws SdkException, IOException {
32303250
if (isFile) {
32313251
instrumentation.fileDeleted(1);
32323252
} else {
32333253
instrumentation.directoryDeleted();
32343254
}
3235-
deleteObject(key);
3255+
incrementWriteOperations();
3256+
getStore().deleteObject(getRequestFactory()
3257+
.newDeleteObjectRequestBuilder(key, deleteContents)
3258+
.build());
32363259
}
32373260

32383261
/**

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,20 @@ ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key,
245245
* @param key object to delete
246246
* @return the request builder.
247247
*/
248-
DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key);
248+
default DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key) {
249+
return newDeleteObjectRequestBuilder(key, false);
250+
}
251+
252+
/**
253+
* Create a request builder to delete a single object.
254+
* @param key object to delete
255+
* @param deleteContents should the request include the custom non-empty
256+
* directory delete header?
257+
* @return the request builder.
258+
*/
259+
DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(
260+
String key,
261+
boolean deleteContents);
249262

250263
/**
251264
* Create a request builder to delete objects in bulk.

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,18 +252,19 @@ public Boolean execute() throws IOException {
252252
}
253253
if (status.isEmptyDirectory() == Tristate.TRUE) {
254254
LOG.debug("deleting empty directory {}", path);
255-
deleteObjectAtPath(path, key, false);
255+
deleteObjectAtPath(path, key, false, false);
256256
} else if (deleteNonEmptyDirectoryEnabled) {
257-
LOG.debug("deleting non-empty directory {} with single request (endpoint supports it)", path);
258-
deleteObjectAtPath(path, key, false);
257+
LOG.debug("deleting non-empty directory {} with single request "
258+
+ "(endpoint supports it)", path);
259+
deleteObjectAtPath(path, key, false, true);
259260
} else {
260261
deleteDirectoryTree(path, key);
261262
}
262263

263264
} else {
264265
// simple file.
265266
LOG.debug("deleting simple file {}", path);
266-
deleteObjectAtPath(path, key, true);
267+
deleteObjectAtPath(path, key, true, false);
267268
}
268269
LOG.debug("Deleted {} objects", filesDeleted);
269270
return true;
@@ -363,7 +364,8 @@ private void queueForDeletion(
363364
*/
364365
private void queueForDeletion(final String key,
365366
boolean isDirMarker) throws IOException {
366-
LOG.debug("Adding object to delete: \"{}\"", key);
367+
LOG.debug("Adding {} to delete: \"{}\"",
368+
isDirMarker ? "dir marker" : "file", key);
367369
keys.add(new DeleteEntry(key, isDirMarker));
368370
if (keys.size() == pageSize) {
369371
submitNextBatch();
@@ -408,11 +410,12 @@ private void resetDeleteList() {
408410
private void deleteObjectAtPath(
409411
final Path path,
410412
final String key,
411-
final boolean isFile)
413+
final boolean isFile,
414+
final boolean deleteContents)
412415
throws IOException {
413416
LOG.debug("delete: {} {}", (isFile ? "file" : "dir marker"), key);
414417
filesDeleted++;
415-
callbacks.deleteObjectAtPath(path, key, isFile);
418+
callbacks.deleteObjectAtPath(path, key, isFile, deleteContents);
416419
}
417420

418421
/**

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ void deleteObjectAtPath(Path path,
9898
boolean isFile)
9999
throws IOException;
100100

101+
/**
102+
* Delete an object, optionally enabling the custom non-empty directory
103+
* delete header on stores which support it.
104+
* @param path path to delete
105+
* @param key key of entry
106+
* @param isFile is the path a file (used for instrumentation only)
107+
* @param deleteContents should the request include the custom delete header?
108+
* @throws IOException from invoker signature only -should not be raised.
109+
*/
110+
@Retries.RetryTranslated
111+
default void deleteObjectAtPath(Path path,
112+
String key,
113+
boolean isFile,
114+
boolean deleteContents)
115+
throws IOException {
116+
deleteObjectAtPath(path, key, isFile);
117+
}
118+
101119
/**
102120
* Recursive list of files and directory markers.
103121
*

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
import static org.apache.commons.lang3.StringUtils.isEmpty;
6666
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
6767
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_PART_UPLOAD_TIMEOUT;
68+
import static org.apache.hadoop.fs.s3a.Constants.DELETE_NON_EMPTY_DIRECTORY_HEADER;
69+
import static org.apache.hadoop.fs.s3a.Constants.DELETE_NON_EMPTY_DIRECTORY_HEADER_VALUE;
6870
import static org.apache.hadoop.fs.s3a.Constants.IF_NONE_MATCH_STAR;
6971
import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.SSE_C;
7072
import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.UNKNOWN_ALGORITHM;
@@ -724,7 +726,22 @@ public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(
724726

725727
@Override
726728
public DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key) {
727-
return prepareRequest(DeleteObjectRequest.builder().bucket(bucket).key(key));
729+
return newDeleteObjectRequestBuilder(key, false);
730+
}
731+
732+
@Override
733+
public DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(
734+
final String key,
735+
final boolean deleteContents) {
736+
DeleteObjectRequest.Builder builder =
737+
prepareRequest(DeleteObjectRequest.builder().bucket(bucket).key(key));
738+
if (deleteContents) {
739+
builder.overrideConfiguration(
740+
override -> override.putHeader(
741+
DELETE_NON_EMPTY_DIRECTORY_HEADER,
742+
DELETE_NON_EMPTY_DIRECTORY_HEADER_VALUE));
743+
}
744+
return builder;
728745
}
729746

730747
@Override

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,10 @@ Here are some the S3A properties for use in production.
578578
<value>false</value>
579579
<description>When true, recursive deletion of a non-empty directory uses a single delete
580580
request for the directory key (prefix) instead of listing and deleting contained
581-
objects first. Only enable this for S3-compatible endpoints that support
582-
deleting non-empty directories (path prefix) in one request (e.g. VAST).
581+
objects first. S3A sends the request-scoped header
582+
<code>x-amz-delete-contents: true</code> only on that delete call. Only enable
583+
this for S3-compatible endpoints that support deleting non-empty directories
584+
(path prefix) in one request (e.g. VAST).
583585
</description>
584586
</property>
585587

@@ -1732,6 +1734,8 @@ to the number of files (objects) in the directory.
17321734
When `fs.s3a.delete.non-empty-directory.enabled=true`, only one delete request is sent for
17331735
the directory (prefix). The S3 endpoint has to support this feature. Depending on the
17341736
S3 endpoint implementation of this feature, deletes might be synchronous or asynchronous.
1737+
S3A sends the custom header `x-amz-delete-contents: true` only on that delete request,
1738+
so there is no need to configure it globally with `fs.s3a.client.s3.custom.headers`.
17351739

17361740
The [VAST S3 endpoint](https://kb.vastdata.com/documentation/docs/using-trash-folder-for-s3-objects-6)
17371741
supports such deletes.

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADeleteNonEmptyDirectoryCapability.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,13 @@
2020

2121
import org.apache.hadoop.conf.Configuration;
2222
import org.apache.hadoop.fs.Path;
23-
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
24-
import org.apache.hadoop.fs.contract.AbstractFSContract;
25-
import org.apache.hadoop.fs.contract.s3a.S3AContract;
2623
import org.apache.hadoop.test.tags.IntegrationTest;
2724

2825
import org.assertj.core.api.Assertions;
2926
import org.junit.jupiter.api.Test;
3027

3128
import static org.apache.hadoop.fs.s3a.Constants.DELETE_NON_EMPTY_DIRECTORY_ENABLED;
29+
import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
3230
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
3331
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
3432

@@ -38,22 +36,19 @@
3836
*/
3937
@IntegrationTest
4038
public class ITestS3ADeleteNonEmptyDirectoryCapability extends
41-
AbstractContractDeleteTest {
42-
43-
@Override
44-
protected AbstractFSContract createContract(Configuration conf) {
45-
return new S3AContract(conf);
46-
}
39+
AbstractS3ATestBase {
4740

4841
/**
4942
* Test that when the option is disabled (default), the capability is false.
5043
*/
5144
@Test
5245
public void testCapabilityDisabledByDefault() throws Throwable {
53-
Assertions.assertThat(getFileSystem().hasPathCapability(new Path("/"),
54-
DELETE_NON_EMPTY_DIRECTORY_ENABLED))
55-
.describedAs("path capability when option not set")
56-
.isFalse();
46+
try (S3AFileSystem fs = createCapabilityTestFileSystem(false)) {
47+
Assertions.assertThat(fs.hasPathCapability(new Path("/"),
48+
DELETE_NON_EMPTY_DIRECTORY_ENABLED))
49+
.describedAs("path capability when option not set")
50+
.isFalse();
51+
}
5752
}
5853

5954
/**
@@ -62,16 +57,23 @@ public void testCapabilityDisabledByDefault() throws Throwable {
6257
*/
6358
@Test
6459
public void testCapabilityWhenEnabled() throws Throwable {
65-
Configuration conf = new Configuration(getFileSystem().getConf());
66-
removeBaseAndBucketOverrides(getTestBucketName(conf), conf,
67-
DELETE_NON_EMPTY_DIRECTORY_ENABLED);
68-
conf.setBoolean(DELETE_NON_EMPTY_DIRECTORY_ENABLED, true);
69-
try (S3AFileSystem fs = new S3AFileSystem()) {
70-
fs.initialize(getFileSystem().getUri(), conf);
60+
try (S3AFileSystem fs = createCapabilityTestFileSystem(true)) {
7161
Assertions.assertThat(fs.hasPathCapability(new Path("/"),
7262
DELETE_NON_EMPTY_DIRECTORY_ENABLED))
7363
.describedAs("path capability when option enabled")
7464
.isTrue();
7565
}
7666
}
67+
68+
private S3AFileSystem createCapabilityTestFileSystem(
69+
final boolean enabled) throws Exception {
70+
Configuration conf = new Configuration(getFileSystem().getConf());
71+
removeBaseAndBucketOverrides(getTestBucketName(conf), conf,
72+
DELETE_NON_EMPTY_DIRECTORY_ENABLED);
73+
disableFilesystemCaching(conf);
74+
conf.setBoolean(DELETE_NON_EMPTY_DIRECTORY_ENABLED, enabled);
75+
S3AFileSystem fs = new S3AFileSystem();
76+
fs.initialize(getFileSystem().getUri(), conf);
77+
return fs;
78+
}
7779
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,19 @@ void deleteObjectAtPath(Path f,
358358
String key,
359359
boolean isFile)
360360
throws SdkException, IOException {
361+
deleteObjectAtPath(f, key, isFile, false);
362+
}
363+
364+
@Override
365+
void deleteObjectAtPath(Path f,
366+
String key,
367+
boolean isFile,
368+
boolean deleteContents)
369+
throws SdkException, IOException {
361370
mock.getS3AInternals()
362371
.getAmazonS3Client("test")
363372
.deleteObject(getRequestFactory()
364-
.newDeleteObjectRequestBuilder(key)
373+
.newDeleteObjectRequestBuilder(key, deleteContents)
365374
.build());
366375
}
367376

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
3838
import software.amazon.awssdk.services.s3.model.CopyObjectRequest;
3939
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
40+
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest;
4041
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
4142
import org.assertj.core.api.Assertions;
4243
import org.slf4j.Logger;
@@ -56,6 +57,8 @@
5657
import org.apache.hadoop.test.AbstractHadoopTestBase;
5758

5859
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_PART_UPLOAD_TIMEOUT;
60+
import static org.apache.hadoop.fs.s3a.Constants.DELETE_NON_EMPTY_DIRECTORY_HEADER;
61+
import static org.apache.hadoop.fs.s3a.Constants.DELETE_NON_EMPTY_DIRECTORY_HEADER_VALUE;
5962
import static org.apache.hadoop.fs.s3a.impl.PutObjectOptions.defaultOptions;
6063
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
6164
import static org.assertj.core.api.Assertions.assertThat;
@@ -147,6 +150,30 @@ public void testRequestFactoryWithProcessor() throws Throwable {
147150
.isEqualTo(requestsAnalyzed);
148151
}
149152

153+
@Test
154+
public void testDeleteObjectRequestWithDeleteContentsHeader()
155+
throws Throwable {
156+
RequestFactory factory = RequestFactoryImpl.builder()
157+
.withBucket("bucket")
158+
.build();
159+
160+
DeleteObjectRequest request =
161+
factory.newDeleteObjectRequestBuilder("path", true).build();
162+
Assertions.assertThat(request.overrideConfiguration())
163+
.describedAs("override config for delete request")
164+
.isPresent();
165+
Assertions.assertThat(request.overrideConfiguration().get().headers())
166+
.containsEntry(
167+
DELETE_NON_EMPTY_DIRECTORY_HEADER,
168+
Collections.singletonList(DELETE_NON_EMPTY_DIRECTORY_HEADER_VALUE));
169+
170+
DeleteObjectRequest defaultRequest =
171+
factory.newDeleteObjectRequestBuilder("path").build();
172+
Assertions.assertThat(defaultRequest.overrideConfiguration())
173+
.describedAs("override config for default delete request")
174+
.isEmpty();
175+
}
176+
150177
private final class CountRequests
151178
implements RequestFactoryImpl.PrepareRequest {
152179

0 commit comments

Comments
 (0)