@@ -899,32 +899,34 @@ std::string awsCanonicalURI(const std::string& resource, std::vector<std::string
899899}
900900
901901// ref: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
902- std::string parseErrorCodeFromS3 (std::string xmlResponse) {
903- // Copy XML string to a modifiable buffer
902+ // Returns the S3 error code string from an XML error response, or "" if the response
903+ // cannot be parsed. This is best-effort for logging/diagnostics only — many HTTP error
904+ // responses (502/503 from load balancers, empty bodies, HTML responses) are not XML.
905+ std::string parseErrorCodeFromS3 (const std::string& response) {
906+ if (response.empty ()) {
907+ return " " ;
908+ }
904909 try {
905- std::vector<char > xmlBuffer (xmlResponse.begin (), xmlResponse.end ());
906- xmlBuffer.push_back (' \0 ' ); // Ensure null-terminated string
907- // Parse the XML
910+ std::vector<char > xmlBuffer (response.begin (), response.end ());
911+ xmlBuffer.push_back (' \0 ' );
908912 xml_document<> doc;
909913 doc.parse <0 >(&xmlBuffer[0 ]);
910- // Find the root node
911914 xml_node<>* root = doc.first_node (" Error" );
912915 if (!root) {
913- TraceEvent (SevWarn, " ParseS3XMLResponseNoError" ).detail (" Response" , xmlResponse).log ();
914916 return " " ;
915917 }
916- // Find the <Code> node
917918 xml_node<>* codeNode = root->first_node (" Code" );
918919 if (!codeNode) {
919- TraceEvent (SevWarn, " ParseS3XMLResponseNoErrorCode" ).detail (" Response" , xmlResponse).log ();
920920 return " " ;
921921 }
922922 return std::string (codeNode->value ());
923- } catch (Error e) {
924- TraceEvent (" BackupParseS3ErrorCodeFailure" ).errorUnsuppressed (e);
925- throw backup_parse_s3_response_failure ();
926923 } catch (...) {
927- throw backup_parse_s3_response_failure ();
924+ // Parse failures are expected for non-XML responses (HTML error pages, empty bodies, etc.)
925+ TraceEvent (" ParseS3ErrorCodeNonXML" )
926+ .suppressFor (60 )
927+ .detail (" ResponseSize" , response.size ())
928+ .detail (" ResponseHead" , response.substr (0 , 200 ));
929+ return " " ;
928930 }
929931}
930932
@@ -2420,3 +2422,66 @@ TEST_CASE("/backup/s3/guess_region") {
24202422 }
24212423 return Void ();
24222424}
2425+
2426+ TEST_CASE (" /backup/s3/parseErrorCodeFromS3" ) {
2427+ // Valid S3 error XML — should extract the error code
2428+ {
2429+ std::string xml = " <?xml version=\" 1.0\" encoding=\" UTF-8\" ?>"
2430+ " <Error><Code>AccessDenied</Code>"
2431+ " <Message>Access Denied</Message></Error>" ;
2432+ ASSERT (parseErrorCodeFromS3 (xml) == " AccessDenied" );
2433+ }
2434+
2435+ // InvalidToken error
2436+ {
2437+ std::string xml = " <Error><Code>InvalidToken</Code>"
2438+ " <Message>The provided token is malformed or otherwise invalid.</Message></Error>" ;
2439+ ASSERT (parseErrorCodeFromS3 (xml) == " InvalidToken" );
2440+ }
2441+
2442+ // ExpiredToken error
2443+ {
2444+ std::string xml = " <Error><Code>ExpiredToken</Code><Message>Token expired</Message></Error>" ;
2445+ ASSERT (parseErrorCodeFromS3 (xml) == " ExpiredToken" );
2446+ }
2447+
2448+ // Missing <Code> node — should return ""
2449+ {
2450+ std::string xml = " <Error><Message>Something went wrong</Message></Error>" ;
2451+ ASSERT (parseErrorCodeFromS3 (xml) == " " );
2452+ }
2453+
2454+ // No <Error> root — should return ""
2455+ {
2456+ std::string xml = " <ListBucketResult><Name>bucket</Name></ListBucketResult>" ;
2457+ ASSERT (parseErrorCodeFromS3 (xml) == " " );
2458+ }
2459+
2460+ // Empty response — should return ""
2461+ {
2462+ ASSERT (parseErrorCodeFromS3 (" " ) == " " );
2463+ }
2464+
2465+ // HTML error page from load balancer (502/503) — should return "", not throw
2466+ {
2467+ std::string html = " <html><body><h1>502 Bad Gateway</h1></body></html>" ;
2468+ ASSERT (parseErrorCodeFromS3 (html) == " " );
2469+ }
2470+
2471+ // Completely invalid / garbage — should return "", not throw
2472+ {
2473+ ASSERT (parseErrorCodeFromS3 (" not xml at all {{{" ) == " " );
2474+ }
2475+
2476+ // Partial XML — should return "", not throw
2477+ {
2478+ ASSERT (parseErrorCodeFromS3 (" <Error><Code>Incomple" ) == " " );
2479+ }
2480+
2481+ // Plain text error — should return "", not throw
2482+ {
2483+ ASSERT (parseErrorCodeFromS3 (" Internal Server Error" ) == " " );
2484+ }
2485+
2486+ return Void ();
2487+ }
0 commit comments