Skip to content

Commit 3db56b7

Browse files
authored
Sanitize links in citation (#5564)
* Allow Markdown in citation * Expect console.warn * Add sanitize link * Sanitize links in claim interpreter * Add forbid-elements * Fix null * Allow cite: URL * Fix inline citation * Add entry * Add removed * Update doc on source of truth * Update entry * Add schema * Add comment * Use regular function * Allow empty string
1 parent 88e4f00 commit 3db56b7

File tree

15 files changed

+466
-100
lines changed

15 files changed

+466
-100
lines changed

.eslintrc.react.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ extends:
77
- plugin:react/recommended
88
- plugin:react-hooks/recommended
99

10+
overrides:
11+
- files:
12+
- '__tests__/**/*.js'
13+
- '**/*.spec.jsx'
14+
- '**/*.spec.tsx'
15+
- '**/*.test.jsx'
16+
- '**/*.test.tsx'
17+
18+
env:
19+
jest: true
20+
21+
rules:
22+
'react/forbid-elements': off
23+
1024
parserOptions:
1125
ecmaFeatures:
1226
jsx: true
@@ -29,6 +43,10 @@ rules:
2943
- error
3044
- forbid:
3145
- id
46+
react/forbid-elements:
47+
- error
48+
- forbid:
49+
- a
3250
react/jsx-boolean-value:
3351
- error
3452
- always

.eslintrc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ overrides:
9494
rules:
9595
'@typescript-eslint/no-require-imports': off
9696
no-magic-numbers: off
97+
'react/forbid-elements': off
9798

9899
rules:
99100
# Only list rules that are not in *:recommended set

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
4242
- `styleOptions.hideUploadButton` is being deprecated in favor of `styleOptions.disableFileUpload`. The option will be removed on or after 2027-07-14
4343
- `botframework-directlinespeech-sdk` no longer ponyfill `AbortController`, it is supported by modern browsers, in PR [#5530](https://github.com/microsoft/BotFramework-WebChat/pull/5530)
4444
- `activityMiddleware` is being deprecated in favor of [`polymiddleware`](./docs/MIDDLEWARE.md). It will be removed on or after 2027-08-16, related to PR [#5515](https://github.com/microsoft/BotFramework-WebChat/pull/5515)
45+
- Root-level (unconnected) `Claim` entity is being deprecated, in PR [#5564](https://github.com/microsoft/BotFramework-WebChat/pull/5564), by [@compulim](https://github.com/compulim). It will be removed on or after 2027-08-29
46+
- Use `entities[@id=""][@type="Message"].citation[@type="Claim"]` instead
4547

4648
### Added
4749

@@ -116,6 +118,7 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
116118
- `@msinternal/botframework-webchat-api-middleware` for middleware branch of API package
117119
- `@msinternal/botframework-webchat-debug-theme` package for enabling debugging scenarios
118120
- `@msinternal/botframework-webchat-react-hooks` for helpers for React hooks
121+
- Added link sanitization and ESLint rules, in PR [#5564](https://github.com/microsoft/BotFramework-WebChat/pull/5564), by [@compulim](https://github.com/compulim)
119122

120123
### Changed
121124

@@ -291,6 +294,8 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
291294
- `useSuggestedActions()` hook is being deprecated in favor of the `useSuggestedActionsHooks().useSuggestedActions()` hook, in PR [#5489](https://github.com/microsoft/BotFramework-WebChat/pull/5489), by [@compulim](https://github.com/compulim)
292295
- Fixed core internal import in legacy CommonJS environments, in [5509](https://github.com/microsoft/BotFramework-WebChat/pull/5509), by [@OEvgeny](https://github.com/OEvgeny)
293296
- `activityMiddleware` is being deprecated in favor of [`polymiddleware`](./docs/MIDDLEWARE.md). It will be removed on or after 2027-08-16, related to PR [#5515](https://github.com/microsoft/BotFramework-WebChat/pull/5515)
297+
- Root-level (unconnected) `Claim` entity is being deprecated, in PR [#5564](https://github.com/microsoft/BotFramework-WebChat/pull/5564), by [@compulim](https://github.com/compulim). It will be removed on or after 2027-08-29
298+
- Use `entities[@id=""][@type="Message"].citation[@type="Claim"]` instead
294299

295300
### Samples
296301

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<!doctype html>
2+
<html lang="en-US">
3+
<head>
4+
<link href="/assets/index.css" rel="stylesheet" type="text/css" />
5+
<script crossorigin="anonymous" src="/test-harness.js"></script>
6+
<script crossorigin="anonymous" src="/test-page-object.js"></script>
7+
<script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
8+
</head>
9+
<body>
10+
<main id="webchat"></main>
11+
<script>
12+
run(async function () {
13+
const { directLine, store } = testHelpers.createDirectLineEmulator();
14+
15+
WebChat.renderWebChat(
16+
{
17+
directLine,
18+
store
19+
},
20+
document.getElementById('webchat')
21+
);
22+
23+
await pageConditions.uiConnected();
24+
25+
await directLine.emulateIncomingActivity({
26+
entities: [
27+
{
28+
'@context': 'https://schema.org',
29+
'@id': '',
30+
'@type': 'Message',
31+
type: 'https://schema.org/Message',
32+
citation: [
33+
{
34+
'@id': ':_doesnt-care-1',
35+
'@type': 'Claim',
36+
appearance: {
37+
'@type': 'DigitalDocument',
38+
encodingFormat: 'application/octet-stream',
39+
url: 'https://aka.ms/claim'
40+
},
41+
claimInterpreter: {
42+
'@type': 'Project',
43+
slogan: 'Surfaced with Azure OpenAI',
44+
url: 'javascript:alert(1)'
45+
},
46+
position: '1'
47+
}
48+
]
49+
}
50+
],
51+
text: `Fugiat excepteur anim irure consectetur ex nisi eu deserunt officia tempor eu et excepteur.[1]
52+
53+
[1]: https://aka.ms/claim
54+
`,
55+
type: 'message'
56+
});
57+
58+
await host.snapshot('local');
59+
60+
const markdownElement = pageElements.activities()[0].querySelector('.webchat__text-content__markdown');
61+
const markdownLinks = markdownElement.querySelectorAll('a');
62+
63+
// The javascript: shouldn't be a link.
64+
expect(markdownLinks).toHaveLength(1);
65+
66+
expect(markdownLinks[0].getAttribute('href')).toBe('https://aka.ms/claim');
67+
68+
const claimInterpreterElement = pageElements.activities()[0].querySelector('.webchat__activity-status__originator');
69+
70+
expect(claimInterpreterElement).toHaveProperty('tagName', 'SPAN');
71+
expect(claimInterpreterElement).toHaveProperty('textContent', 'Surfaced with Azure OpenAI');
72+
});
73+
</script>
74+
</body>
75+
</html>
21.1 KB
Loading
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
<!doctype html>
2+
<html lang="en-US">
3+
<head>
4+
<link href="/assets/index.css" rel="stylesheet" type="text/css" />
5+
<script type="importmap">
6+
{
7+
"imports": {
8+
"jest-mock": "https://esm.sh/jest-mock",
9+
"react": "https://esm.sh/react@18",
10+
"react-dom": "https://esm.sh/react-dom@18",
11+
"react-dom/": "https://esm.sh/react-dom@18/"
12+
}
13+
}
14+
</script>
15+
<script type="module">
16+
import React from 'react';
17+
window.React = React;
18+
</script>
19+
<script crossorigin="anonymous" src="/test-harness.js"></script>
20+
<script crossorigin="anonymous" src="/test-page-object.js"></script>
21+
<script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
22+
</head>
23+
<body>
24+
<main id="webchat"></main>
25+
<script type="module">
26+
import { fn, spyOn } from 'jest-mock';
27+
28+
run(async function () {
29+
const { directLine, store } = testHelpers.createDirectLineEmulator();
30+
31+
WebChat.renderWebChat(
32+
{
33+
directLine,
34+
store
35+
},
36+
document.getElementById('webchat')
37+
);
38+
39+
await pageConditions.uiConnected();
40+
41+
const consoleWarn = fn(console.log.bind(console));
42+
43+
spyOn(console, 'warn').mockImplementationOnce(consoleWarn);
44+
45+
await directLine.emulateIncomingActivity({
46+
entities: [
47+
{
48+
'@context': 'https://schema.org',
49+
'@id': '',
50+
'@type': 'Message',
51+
type: 'https://schema.org/Message',
52+
citation: [
53+
{
54+
'@id': ':_doesnt-care-1',
55+
'@type': 'Claim',
56+
appearance: {
57+
'@type': 'DigitalDocument',
58+
encodingFormat: 'application/octet-stream',
59+
url: 'https://aka.ms/bad-link'
60+
},
61+
position: '1'
62+
}
63+
]
64+
}
65+
],
66+
text: `Ea officia[1] elit laboris[2] reprehenderit laborum elit ipsum qui eiusmod.
67+
68+
[1]: https://aka.ms/correct-link
69+
[2]: javascript:alert(1)
70+
`,
71+
type: 'message'
72+
});
73+
74+
expect(consoleWarn).toHaveBeenCalledTimes(1);
75+
expect(consoleWarn.mock.calls[0][0]).toBe(
76+
'botframework-webchat: When "Message.citation[].url" is set in entities, it must match its corresponding URL in Markdown link reference definition'
77+
);
78+
79+
await host.snapshot('local');
80+
81+
const markdownElement = pageElements.activities()[0].querySelector('.webchat__text-content__markdown');
82+
const markdownClickableLinks = markdownElement.querySelectorAll('a[href]');
83+
84+
// The javascript: shouldn't be a link.
85+
expect(markdownClickableLinks).toHaveLength(1);
86+
87+
expect(markdownClickableLinks[0].getAttribute('href')).toBe('https://aka.ms/correct-link');
88+
89+
const linkDefinitionItems = pageElements.linkDefinitions()[0].querySelectorAll('[role="listitem"] > *');
90+
91+
// THe javascript: link is gone in Markdown, should be ignored in citation as well.
92+
expect(linkDefinitionItems).toHaveLength(1);
93+
94+
expect(linkDefinitionItems[0].getAttribute('href')).toBe('https://aka.ms/correct-link');
95+
});
96+
</script>
97+
</body>
98+
</html>
16.5 KB
Loading

docs/CITATION.md

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ The activity graph should have a [Message thing](#message-thing).
3030

3131
A [Message thing](https://schema.org/Message) represent the message activity itself and act as the root of the [activity graph](#activity-graph). It must have the following fields:
3232

33-
- `@context` of `"https://schema.org"`
34-
- `@id` of `""` (an empty string means self in JSON-LD fashion)
35-
- `@type` of `"Message"`
36-
- `type` of `"https://schema.org/Message"`
33+
- `@context` of `"https://schema.org"`
34+
- `@id` of `""` (an empty string means self in JSON-LD fashion)
35+
- `@type` of `"Message"`
36+
- `type` of `"https://schema.org/Message"`
3737

3838
### Non-URL citation
3939

@@ -53,7 +53,7 @@ Bot developers should implement citations as outlined in this section to ensure
5353

5454
Notes:
5555

56-
- The third citation is a non-URL citation, its link `cite:1` is currently ignored
56+
- The third citation is a non-URL citation, its link `cite:1` is ignored and treated as an opaque string
5757

5858
```
5959
Sure, you should override the default proxy settings[1]​[2], when your proxy server requires authentication[3].
@@ -73,8 +73,10 @@ Sure, you should override the default proxy settings[1]​[2], when your proxy s
7373

7474
Please refer to the graph for details of each fields. Notably:
7575

76-
- Only compact from is supported (i.e. nested objects), other forms and object references are not supported unless stated otherwise
77-
- Subclasses are not supported. If the object is expected to be `Message`, it must not be `EmailMessage` (subclass)
76+
- Only compact from is supported (i.e. nested objects), other forms and object references are not supported unless stated otherwise
77+
- Subclasses are not supported. If the object is expected to be `Message`, it must not be `EmailMessage` (subclass)
78+
79+
> Notes: In some older versions of Web Chat, we were using root-level and unconnected `Claim` thing. This is strictly used internally and its usage is being deprecated.
7880
7981
#### Sample payload
8082

@@ -193,7 +195,11 @@ We use `position` instead of `@id` to match the link definition in Markdown to t
193195

194196
### Source of truths
195197

196-
If there are deviations of information in Markdown and Message thing, the Message thing should take precedence over the Markdown, given the receiver understood the Message thing.
198+
> This is updated in PR [5564](https://github.com/microsoft/BotFramework-WebChat/pull/5564) in 2025-08-29.
199+
200+
~If there are deviations of information in Markdown and Message thing, the Message thing should take precedence over the Markdown, given the receiver understood the Message thing.~
201+
202+
If there are deviations of information in Markdown and Message thing, the Markdown should take precedence over the Message thing. This is to support plain text channels (text/SMS) as they do not have capacity to display content from the Message thing.
197203

198204
### `usageInfo` on the `Message` thing should be a blank node
199205

@@ -203,4 +209,4 @@ In JSON-LD, blank node means a node that does not have any contents but `@id` an
203209

204210
## Further reading
205211

206-
- [Microsoft Teams: Bot messages with AI-generated content](https://learn.microsoft.com/en-us/microsoftteams/platform/bots/how-to/bot-messages-ai-generated-content?tabs=after%2Cbotmessage#citations)
212+
- [Microsoft Teams: Bot messages with AI-generated content](https://learn.microsoft.com/en-us/microsoftteams/platform/bots/how-to/bot-messages-ai-generated-content?tabs=after%2Cbotmessage#citations)
Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,48 @@
1+
import { validateProps } from '@msinternal/botframework-webchat-react-valibot';
12
import { type OrgSchemaProject } from 'botframework-webchat-core';
23
import React, { memo } from 'react';
4+
import { custom, object, optional, pipe, readonly, safeParse, string, type InferInput } from 'valibot';
35

4-
type Props = Readonly<{ project: OrgSchemaProject }>;
6+
import useSanitizeHrefCallback from '../../hooks/internal/useSanitizeHrefCallback';
57

6-
const Originator = memo(({ project }: Props) => {
7-
const { name, slogan, url } = project;
8+
const originatorPropsSchema = pipe(
9+
object({
10+
// TODO: [P1] We should build this schema into `OrgSchemaProject` instead, or build a Schema.org query library.
11+
project: custom<OrgSchemaProject>(
12+
value =>
13+
safeParse(
14+
object({
15+
name: optional(string()),
16+
slogan: optional(string()),
17+
url: optional(string())
18+
}),
19+
value
20+
).success
21+
)
22+
}),
23+
readonly()
24+
);
825

26+
type OriginatorProps = InferInput<typeof originatorPropsSchema>;
27+
28+
// Regular function is better for React function component.
29+
// eslint-disable-next-line prefer-arrow-callback
30+
const Originator = memo(function Originator(props: OriginatorProps) {
31+
const {
32+
project: { name, slogan, url }
33+
} = validateProps(originatorPropsSchema, props);
34+
35+
const sanitizeHref = useSanitizeHrefCallback();
36+
37+
const { sanitizedHref } = sanitizeHref(url);
938
const text = slogan || name;
1039

11-
return url ? (
40+
return sanitizedHref ? (
41+
// Link is sanitized.
42+
// eslint-disable-next-line react/forbid-elements
1243
<a
1344
className="webchat__activity-status__originator webchat__activity-status__originator--has-link"
14-
href={url}
45+
href={sanitizedHref}
1546
rel="noopener noreferrer"
1647
target="_blank"
1748
>
@@ -22,6 +53,5 @@ const Originator = memo(({ project }: Props) => {
2253
);
2354
});
2455

25-
Originator.displayName = 'Originator';
26-
2756
export default Originator;
57+
export { originatorPropsSchema, type OriginatorProps };

packages/component/src/Attachment/FileContent.tsx

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const ROOT_STYLE = {
2525
}
2626
};
2727

28+
// TODO: Consider using `useSanitizeHrefCallback`, which underlying use `sanitize-html` or whatever in HTML content transformer.
2829
const ALLOWED_PROTOCOLS = ['blob:', 'data:', 'http:', 'https:'];
2930

3031
function isAllowedProtocol(url) {
@@ -96,10 +97,10 @@ function FileContent(props: FileContentProps) {
9697

9798
const localizedSize = typeof size === 'number' && localizeBytes(size);
9899

99-
const allowedHref = href && isAllowedProtocol(href) ? href : undefined;
100+
const sanitizedHref = href && isAllowedProtocol(href) ? href : undefined;
100101

101102
const alt = localize(
102-
allowedHref
103+
sanitizedHref
103104
? localizedSize
104105
? 'FILE_CONTENT_DOWNLOADABLE_WITH_SIZE_ALT'
105106
: 'FILE_CONTENT_DOWNLOADABLE_ALT'
@@ -112,12 +113,14 @@ function FileContent(props: FileContentProps) {
112113

113114
return (
114115
<div className={classNames('webchat__fileContent', rootClassName, fileContentStyleSet + '', className)}>
115-
{allowedHref ? (
116+
{sanitizedHref ? (
117+
// URL is sanitized.
118+
// eslint-disable-next-line react/forbid-elements
116119
<a
117120
aria-label={alt}
118121
className="webchat__fileContent__buttonLink"
119122
download={fileName}
120-
href={allowedHref}
123+
href={sanitizedHref}
121124
rel="noopener noreferrer"
122125
target="_blank"
123126
>

0 commit comments

Comments
 (0)