Skip to content

Commit d422ef0

Browse files
Add new ErrHTMLContent value to represent errors due to HTML parsing. (#1252)
## What changes are proposed in this pull request? This PR adds a new error value `ErrHTMLContent` to differentiate parsing errors caused by HTML content from other parsing errors — which are likely to be due actual implementation issues within the SDK. The new error value can be used by higher-level tools (e.g. the Databricks CLI) to provide actionable feedback to users. ## How is this tested? Unit and integration tests.
1 parent 31c554d commit d422ef0

3 files changed

Lines changed: 75 additions & 82 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### New Features and Improvements
66

7+
* Add new error value `ErrHTMLContent` to differentiate parsing errors caused by
8+
HTML content from other parsing errors.
79
* Return more detailed error messages when OAuth endpoints cannot be resolved.
810
* Use a free port in `u2m` authentication flows rather than 8020.
911

client/client_test.go

Lines changed: 41 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@ import (
77
"io"
88
"net/http"
99
"net/url"
10-
"runtime"
1110
"strings"
1211
"testing"
1312

1413
"github.com/databricks/databricks-sdk-go/apierr"
1514
"github.com/databricks/databricks-sdk-go/config"
15+
"github.com/databricks/databricks-sdk-go/httpclient"
1616
"github.com/databricks/databricks-sdk-go/internal/env"
1717
"github.com/databricks/databricks-sdk-go/useragent"
18-
"github.com/databricks/databricks-sdk-go/version"
1918
"github.com/stretchr/testify/assert"
2019
"github.com/stretchr/testify/require"
2120
)
@@ -353,67 +352,61 @@ func TestDoRemovesDoubleSlashesFromFilesAPI(t *testing.T) {
353352
}
354353

355354
func TestNonJSONResponseIncludedInError(t *testing.T) {
356-
cicdHeader := ""
357-
if useragent.CiCdProvider() != "" {
358-
cicdHeader = fmt.Sprintf(" cicd/%s", useragent.CiCdProvider())
359-
}
360-
goVersion := strings.TrimPrefix(runtime.Version(), "go")
361355
type testCase struct {
362356
statusCode int
363357
status string
364358
errorMessage string
365359
}
366360
cases := []testCase{
367361
{
368-
statusCode: 400,
369-
status: "Bad Request",
370-
errorMessage: `failed to unmarshal response body: invalid character '<' looking for beginning of value. This is likely a bug in the Databricks SDK for Go or the underlying REST API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:
371-
` + "```" + `
372-
GET /a
373-
> * Host:
374-
> * Accept: application/json
375-
> * Authorization: REDACTED
376-
> * User-Agent: unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + `
377-
< HTTP/2.0 Bad Request
378-
< * Content-Type: text/html
379-
< <html><body>hello</body></html>
380-
` + "```",
362+
statusCode: 400,
363+
status: "Bad Request",
364+
errorMessage: httpclient.ErrHTMLContent.Error(),
381365
},
382366
{
383-
statusCode: 500,
384-
status: "Internal Server Error",
385-
errorMessage: `failed to unmarshal response body: invalid character '<' looking for beginning of value. This is likely a bug in the Databricks SDK for Go or the underlying REST API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:
386-
` + "```" + `
387-
GET /a
388-
> * Host:
389-
> * Accept: application/json
390-
> * Authorization: REDACTED
391-
> * User-Agent: unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + `
392-
< HTTP/2.0 Internal Server Error
393-
< * Content-Type: text/html
394-
< <html><body>hello</body></html>
395-
` + "```",
367+
statusCode: 500,
368+
status: "Internal Server Error",
369+
errorMessage: httpclient.ErrHTMLContent.Error(),
396370
},
397371
{
398-
statusCode: 200,
399-
status: "OK",
400-
errorMessage: `failed to unmarshal response body: invalid character '<' looking for beginning of value. This is likely a bug in the Databricks SDK for Go or the underlying REST API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:
401-
` + "```" + `
402-
GET /a
403-
> * Host:
404-
> * Accept: application/json
405-
> * Authorization: REDACTED
406-
> * User-Agent: unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + `
407-
< HTTP/2.0 OK
408-
< * Content-Type: text/html
409-
< <html><body>hello</body></html>
410-
` + "```",
372+
statusCode: 200,
373+
status: "OK",
374+
errorMessage: httpclient.ErrHTMLContent.Error(),
411375
},
412376
}
377+
413378
for _, tc := range cases {
414-
tc := tc
415379
t.Run(fmt.Sprintf("%d %s", tc.statusCode, tc.status), func(t *testing.T) {
416-
testNonJSONResponseIncludedInError(t, tc.statusCode, tc.status, tc.errorMessage)
380+
c, err := New(&config.Config{
381+
Host: "some",
382+
Token: "token",
383+
ConfigFile: "/dev/null",
384+
HTTPTransport: hc(func(r *http.Request) (*http.Response, error) {
385+
r.Header.Del("traceparent") // clear nondeterministic traceparent header
386+
return &http.Response{
387+
Proto: "HTTP/2.0",
388+
Status: tc.status,
389+
Body: io.NopCloser(strings.NewReader(`<html><body>hello</body></html>`)),
390+
Request: r,
391+
Header: http.Header{
392+
"Content-Type": []string{"text/html"},
393+
},
394+
}, nil
395+
}),
396+
})
397+
if err != nil {
398+
t.Fatalf("unexpected error: %v", err)
399+
}
400+
401+
var m map[string]string
402+
gotErr := c.Do(context.Background(), "GET", "/a", nil, nil, nil, &m)
403+
404+
if gotErr == nil {
405+
t.Fatalf("expected error, got nil")
406+
}
407+
if !strings.Contains(gotErr.Error(), tc.errorMessage) {
408+
t.Fatalf("expected error to contain %q, got %q", tc.errorMessage, gotErr.Error())
409+
}
417410
})
418411
}
419412
}
@@ -534,39 +527,6 @@ func TestUserAgentForCiCd(t *testing.T) {
534527

535528
}
536529

537-
func testNonJSONResponseIncludedInError(t *testing.T, statusCode int, status, errorMessage string) {
538-
c, err := New(&config.Config{
539-
Host: "some",
540-
Token: "token",
541-
ConfigFile: "/dev/null",
542-
HTTPTransport: hc(func(r *http.Request) (*http.Response, error) {
543-
// Clear traceparent header which is nondeterministic.
544-
r.Header.Del("traceparent")
545-
return &http.Response{
546-
Proto: "HTTP/2.0",
547-
Status: status,
548-
Body: io.NopCloser(strings.NewReader(`<html><body>hello</body></html>`)),
549-
Request: r,
550-
Header: http.Header{
551-
"Content-Type": []string{"text/html"},
552-
},
553-
}, nil
554-
}),
555-
})
556-
require.NoError(t, err)
557-
var m map[string]string
558-
err = c.Do(
559-
context.Background(),
560-
"GET",
561-
"/a",
562-
nil,
563-
nil,
564-
nil,
565-
&m,
566-
)
567-
require.EqualError(t, err, errorMessage)
568-
}
569-
570530
func TestRetryOn503(t *testing.T) {
571531
var requested bool
572532
c, err := New(&config.Config{

httpclient/response.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"encoding/json"
7+
"errors"
78
"fmt"
89
"io"
910
"net/http"
@@ -16,6 +17,29 @@ import (
1617
"github.com/databricks/databricks-sdk-go/logger/httplog"
1718
)
1819

20+
// ErrHTMLContent is returned when the response body is HTML instead of JSON.
21+
//
22+
// This almost always indicates an issue with your authentication configuration.
23+
// If you encounter this error, please verify the following:
24+
//
25+
// - Databricks Host: Ensure your host is set correctly.
26+
// - Permissions: Confirm that the authentication method has the required
27+
// permissions for the API operation you are trying to perform.
28+
// - Network/Proxy: If you are behind a corporate firewall, ensure it is not
29+
// blocking or redirecting API traffic.
30+
//
31+
// A common cause of this error is Private Link redirecting the SDK to a login
32+
// page, which the SDK cannot process. This usually happens when trying to
33+
// access a Private Link-enabled workspace configured with no public internet
34+
// access from a different network than the VPC endpoint belongs to.
35+
//
36+
// For more details, please refer to the [Unified Auth] documentation and
37+
// [Private Link Authentication Troubleshooting].
38+
//
39+
// [Unified Auth]: https://docs.databricks.com/aws/en/dev-tools/auth/unified-auth
40+
// [Private Link Authentication Troubleshooting]: https://learn.microsoft.com/en-us/azure/databricks/security/network/classic/private-link-standard#authentication-troubleshooting
41+
var ErrHTMLContent = errors.New("received HTML response instead of JSON")
42+
1943
func WithResponseHeader(key string, value *string) DoOption {
2044
return DoOption{
2145
out: func(body *common.ResponseWrapper) error {
@@ -91,14 +115,21 @@ func WithResponseUnmarshal(response any) DoOption {
91115
*bs = bodyBytes
92116
return nil
93117
}
94-
if err = json.Unmarshal(bodyBytes, &response); err != nil {
118+
if err := json.Unmarshal(bodyBytes, &response); err != nil {
119+
if _, ok := err.(*json.SyntaxError); ok && isHTMLContent(bodyBytes) {
120+
return ErrHTMLContent
121+
}
95122
return fmt.Errorf("failed to unmarshal response body: %w. %s", err, makeUnexpectedResponse(body.Response, body.RequestBody.DebugBytes, bodyBytes))
96123
}
97124
return nil
98125
},
99126
}
100127
}
101128

129+
func isHTMLContent(bodyBytes []byte) bool {
130+
return strings.HasPrefix(string(bodyBytes), "<")
131+
}
132+
102133
func findContentsField(response any) (*reflect.Value, bool) {
103134
value := reflect.ValueOf(response)
104135
value = reflect.Indirect(value)

0 commit comments

Comments
 (0)