Skip to content

Commit 86ab72e

Browse files
authored
Harden parseErrorCodeFromS3 to never throw on non-XML responses (apple#12986) (apple#12996)
Return "" instead of throwing on parse failures since many HTTP error responses (502/503 HTML pages, empty bodies) are not XML. Remove unused backup_parse_s3_response_failure error code. Fix const-ref parameter and add unit tests. (cherry picked from commit 7388f54)
1 parent 543d645 commit 86ab72e

2 files changed

Lines changed: 78 additions & 14 deletions

File tree

fdbclient/S3BlobStore.actor.cpp

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -899,32 +899,34 @@ std::string awsCanonicalURI(const std::string& resource, std::vector<std::string
899899
}
900900

901901
// ref: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
902-
std::string parseErrorCodeFromS3(std::string xmlResponse) {
903-
// Copy XML string to a modifiable buffer
902+
// Returns the S3 error code string from an XML error response, or "" if the response
903+
// cannot be parsed. This is best-effort for logging/diagnostics only — many HTTP error
904+
// responses (502/503 from load balancers, empty bodies, HTML responses) are not XML.
905+
std::string parseErrorCodeFromS3(const std::string& response) {
906+
if (response.empty()) {
907+
return "";
908+
}
904909
try {
905-
std::vector<char> xmlBuffer(xmlResponse.begin(), xmlResponse.end());
906-
xmlBuffer.push_back('\0'); // Ensure null-terminated string
907-
// Parse the XML
910+
std::vector<char> xmlBuffer(response.begin(), response.end());
911+
xmlBuffer.push_back('\0');
908912
xml_document<> doc;
909913
doc.parse<0>(&xmlBuffer[0]);
910-
// Find the root node
911914
xml_node<>* root = doc.first_node("Error");
912915
if (!root) {
913-
TraceEvent(SevWarn, "ParseS3XMLResponseNoError").detail("Response", xmlResponse).log();
914916
return "";
915917
}
916-
// Find the <Code> node
917918
xml_node<>* codeNode = root->first_node("Code");
918919
if (!codeNode) {
919-
TraceEvent(SevWarn, "ParseS3XMLResponseNoErrorCode").detail("Response", xmlResponse).log();
920920
return "";
921921
}
922922
return std::string(codeNode->value());
923-
} catch (Error e) {
924-
TraceEvent("BackupParseS3ErrorCodeFailure").errorUnsuppressed(e);
925-
throw backup_parse_s3_response_failure();
926923
} catch (...) {
927-
throw backup_parse_s3_response_failure();
924+
// Parse failures are expected for non-XML responses (HTML error pages, empty bodies, etc.)
925+
TraceEvent("ParseS3ErrorCodeNonXML")
926+
.suppressFor(60)
927+
.detail("ResponseSize", response.size())
928+
.detail("ResponseHead", response.substr(0, 200));
929+
return "";
928930
}
929931
}
930932

@@ -2420,3 +2422,66 @@ TEST_CASE("/backup/s3/guess_region") {
24202422
}
24212423
return Void();
24222424
}
2425+
2426+
TEST_CASE("/backup/s3/parseErrorCodeFromS3") {
2427+
// Valid S3 error XML — should extract the error code
2428+
{
2429+
std::string xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
2430+
"<Error><Code>AccessDenied</Code>"
2431+
"<Message>Access Denied</Message></Error>";
2432+
ASSERT(parseErrorCodeFromS3(xml) == "AccessDenied");
2433+
}
2434+
2435+
// InvalidToken error
2436+
{
2437+
std::string xml = "<Error><Code>InvalidToken</Code>"
2438+
"<Message>The provided token is malformed or otherwise invalid.</Message></Error>";
2439+
ASSERT(parseErrorCodeFromS3(xml) == "InvalidToken");
2440+
}
2441+
2442+
// ExpiredToken error
2443+
{
2444+
std::string xml = "<Error><Code>ExpiredToken</Code><Message>Token expired</Message></Error>";
2445+
ASSERT(parseErrorCodeFromS3(xml) == "ExpiredToken");
2446+
}
2447+
2448+
// Missing <Code> node — should return ""
2449+
{
2450+
std::string xml = "<Error><Message>Something went wrong</Message></Error>";
2451+
ASSERT(parseErrorCodeFromS3(xml) == "");
2452+
}
2453+
2454+
// No <Error> root — should return ""
2455+
{
2456+
std::string xml = "<ListBucketResult><Name>bucket</Name></ListBucketResult>";
2457+
ASSERT(parseErrorCodeFromS3(xml) == "");
2458+
}
2459+
2460+
// Empty response — should return ""
2461+
{
2462+
ASSERT(parseErrorCodeFromS3("") == "");
2463+
}
2464+
2465+
// HTML error page from load balancer (502/503) — should return "", not throw
2466+
{
2467+
std::string html = "<html><body><h1>502 Bad Gateway</h1></body></html>";
2468+
ASSERT(parseErrorCodeFromS3(html) == "");
2469+
}
2470+
2471+
// Completely invalid / garbage — should return "", not throw
2472+
{
2473+
ASSERT(parseErrorCodeFromS3("not xml at all {{{") == "");
2474+
}
2475+
2476+
// Partial XML — should return "", not throw
2477+
{
2478+
ASSERT(parseErrorCodeFromS3("<Error><Code>Incomple") == "");
2479+
}
2480+
2481+
// Plain text error — should return "", not throw
2482+
{
2483+
ASSERT(parseErrorCodeFromS3("Internal Server Error") == "");
2484+
}
2485+
2486+
return Void();
2487+
}

flow/include/flow/error_definitions.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ ERROR( backup_does_not_exist, 2319, "Backup does not exist")
352352
ERROR( backup_not_filterable_with_key_ranges, 2320, "Backup before 6.3 cannot be filtered with key ranges")
353353
ERROR( backup_not_overlapped_with_keys_filter, 2321, "Backup key ranges doesn't overlap with key ranges filter")
354354
ERROR( bucket_not_in_url, 2322, "bucket is not in the URL for backup" )
355-
ERROR( backup_parse_s3_response_failure, 2323, "cannot parse s3 response properly" )
356355
ERROR( restore_invalid_version, 2361, "Invalid restore version")
357356
ERROR( restore_corrupted_data, 2362, "Corrupted backup data")
358357
ERROR( restore_missing_data, 2363, "Missing backup data")

0 commit comments

Comments
 (0)