Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/barge.in.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down
59 changes: 51 additions & 8 deletions __tests__/html2/speechToSpeech/basic.sendbox.with.mic.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
</head>
<body>
<main id="webchat"></main>
<script type="module">
import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';

setupMockMediaDevices();
setupMockAudioPlayback();
</script>
<script type="text/babel">
run(async function () {
const {
Expand All @@ -23,8 +30,9 @@
// GIVEN: Web Chat with Fluent Theme and microphone button enabled
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand All @@ -50,17 +58,52 @@
const keypadButton = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadToolbarButton}"]`);
expect(keypadButton).toBeTruthy();

// THEN: Text counter should NOT be present
const textCounter = document.querySelector('.sendbox__text-counter');
expect(textCounter).toBeFalsy();

// THEN: Send button should NOT be present
// THEN: Multi-modal design: send button coexists with mic. While idle it is enabled
// so the user can also send text without leaving voice mode.
const sendButton = document.querySelector(`[data-testid="${testIds.sendBoxSendButton}"]`);
expect(sendButton).toBeFalsy();
const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
const isSendDisabled = () => sendButton.getAttribute('aria-disabled') === 'true';
expect(sendButton).toBeTruthy();
expect(isSendDisabled()).toBe(false);
expect(textArea.hasAttribute('readonly')).toBe(false);

// THEN: Should show sendbox with microphone and keypad buttons
// THEN: Should show sendbox with microphone, keypad and send buttons
await host.snapshot('local');

// WHEN: User starts recording
await host.click(micButton);

// First wait for the voice toggle to actually flip on so we know recording started.
await pageConditions.became(
'Recording started',
() => micButton.getAttribute('aria-label')?.includes('Microphone on'),
2000
);

// THEN: Send button is disabled and text input becomes read-only — voice and text
// are mutually exclusive while the mic is open.
await pageConditions.became(
'Send button disabled while recording',
() => isSendDisabled() && textArea.hasAttribute('readonly'),
2000
);

// WHEN: User stops recording
await host.click(micButton);

await pageConditions.became(
'Recording stopped',
() => micButton.getAttribute('aria-label')?.includes('Microphone off'),
2000
);

// THEN: Send button and text input are re-enabled — back to free text entry.
await pageConditions.became(
'Send button re-enabled after stopping recording',
() => !isSendDisabled() && !textArea.hasAttribute('readonly'),
2000
);

// WHEN: Voice configuration is removed from directLine
directLine.setCapability('getVoiceConfiguration', undefined);

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 6 additions & 1 deletion __tests__/html2/speechToSpeech/csp.recording.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,20 @@
// GIVEN: Web Chat with Speech-to-Speech enabled and CSP headers
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
<ReactWebChat
directLine={directLine}
store={store}
nonce="WEB_CHAT_NONCE"
styleOptions={{
// TODO: Use blob url instead of raw data URI and remove this workaround
voiceProcessingSound: false
}}
/>
</FluentThemeProvider>,
document.getElementById('webchat')
Expand Down
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/dtmf.input.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Intercept postActivity to capture outgoing DTMF events
const capturedDtmfEvents = [];
Expand Down
Binary file modified __tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified __tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 10 additions & 7 deletions __tests__/html2/speechToSpeech/happy.path.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
// GIVEN: Web Chat with Speech-to-Speech enabled
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Server announces audio modality (mic shows up) and the consumer opted into the
// multi-modal experience: outgoing activities go over WebSocket without echo back.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down Expand Up @@ -137,20 +139,21 @@
expect(activities[0]).toHaveProperty('textContent', 'What is the weather today?');
expect(activities[1]).toHaveProperty('textContent', 'The weather today is sunny with a high of 75 degrees.');

// THEN: Verify activity status for voice transcripts
// THEN: Verify activity status for voice transcripts.
// New design: every transcript renders just `Just now | <icon>` — no role label,
// bot uses the audio-playing icon, user uses the microphone icon.
const activityStatuses = pageElements.activityStatuses();
expect(activityStatuses.length).toBe(2);

// THEN: User transcript should have timestamp but NO "Agent" label
const userActivityStatus = activityStatuses[0];
expect(userActivityStatus.innerText).not.toContain('Agent');
expect(userActivityStatus.innerText).toContain('Just now');
expect(userActivityStatus.innerText).toContain('|');
expect(userActivityStatus.querySelector('[class*="icon--microphone"]')).toBeTruthy();

// THEN: Bot transcript should have "Agent" label AND timestamp
const botActivityStatus = activityStatuses[1];
expect(botActivityStatus.innerText).toContain('Agent');
expect(botActivityStatus.innerText).toContain('|');
expect(botActivityStatus.innerText).toContain('Just now');
expect(botActivityStatus.innerText).toContain('|');
expect(botActivityStatus.querySelector('[class*="icon--audio-playing"]')).toBeTruthy();

// WHEN: User stops recording by clicking microphone button again
await host.click(micButton);
Expand Down
Binary file modified __tests__/html2/speechToSpeech/happy.path.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified __tests__/html2/speechToSpeech/happy.path.html.snap-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
197 changes: 197 additions & 0 deletions __tests__/html2/speechToSpeech/multimodal.text.with.voice.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<!doctype html>
<html lang="en-US">
<head>
<link href="/assets/index.css" rel="stylesheet" type="text/css" />
<script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
<script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
<script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
<script crossorigin="anonymous" src="/test-harness.js"></script>
<script crossorigin="anonymous" src="/test-page-object.js"></script>
<script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
<script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
</head>
<body>
<main id="webchat"></main>
<!--
Test: Multi-modal experience — text and voice coexist in the same send box.

Verifies the realistic interleaving:
1. Server announces audio capability + consumer opts into voice mode (`enableVoiceMode`).
2. Text turn: user types → bot replies as text. Both ride the WebSocket fire-and-forget,
saga renders user message optimistically, bot text arrives as a normal incoming activity.
3. Voice turn: user clicks mic → user speaks → bot replies via media.end transcript.
While recording, the text input is read-only and the send button is disabled.
4. Mic toggled off → text turn again (user types → bot replies as text).
5. Snapshot captures the full mixed transcript.
-->
<script type="module">
import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';

setupMockMediaDevices();
setupMockAudioPlayback();
</script>
<script type="text/babel">
run(async function () {
const {
React,
ReactDOM: { render },
WebChat: { FluentThemeProvider, ReactWebChat, testIds }
} = window;

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Mirror real DirectLine when `enableVoiceMode` is true: server announces audio,
// and outgoing traffic flows over the WebSocket without echo back.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Capture outgoing activities to assert WebSocket-style fire-and-forget delivery.
const outgoingActivities = [];
const originalPostActivity = directLine.postActivity.bind(directLine);
directLine.postActivity = activity => {
outgoingActivities.push(activity);
return originalPostActivity(activity);
};

render(
<FluentThemeProvider variant="fluent">
<ReactWebChat directLine={directLine} store={store} />
</FluentThemeProvider>,
document.getElementById('webchat')
);

await pageConditions.uiConnected();

const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
const sendButton = document.querySelector(`[data-testid="${testIds.sendBoxSendButton}"]`);
const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
const isSendDisabled = () => sendButton.getAttribute('aria-disabled') === 'true';

// GIVEN: Multi-modal idle — mic, send button and a writable text box all coexist.
expect(micButton).toBeTruthy();
expect(sendButton).toBeTruthy();
expect(isSendDisabled()).toBe(false);
expect(textArea.hasAttribute('readonly')).toBe(false);

// ===== TURN 1: Text in → Text out =====
await pageObjects.sendMessageViaSendBox('What is the weather today?', { waitForSend: false });

await pageConditions.became(
'Outgoing text activity captured',
() => outgoingActivities.some(a => a.type === 'message' && a.text === 'What is the weather today?'),
1000
);

await pageConditions.numActivitiesShown(1);

await directLine.emulateIncomingActivity('The weather today is sunny with a high of 75 degrees.');

await pageConditions.numActivitiesShown(2);

// ===== TURN 2: Voice in → Voice out =====
await host.click(micButton);

await pageConditions.became(
'Recording started',
() => micButton.getAttribute('aria-label')?.includes('Microphone on'),
2000
);

// While recording, text path is locked down.
await pageConditions.became(
'Recording active disables text path',
() => isSendDisabled() && textArea.hasAttribute('readonly'),
2000
);

// User speech is identified, processed, then transcript arrives.
await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'request.update',
from: { role: 'bot' },
value: { state: 'detected', message: 'Your request is identified' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'request.update',
from: { role: 'bot' },
value: { state: 'processing', message: 'Your request is being processed' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.end',
value: { transcription: 'Will it rain tomorrow?', origin: 'user' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
});

await pageConditions.numActivitiesShown(3);

// Bot replies as voice (audio chunk + transcript).
await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.chunk',
from: { role: 'bot' },
value: { content: 'AAAAAA==', contentType: 'audio/webm' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.end',
from: { role: 'bot' },
value: { transcription: 'No rain expected tomorrow.', origin: 'agent' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
});

await pageConditions.numActivitiesShown(4);

// Toggle mic off — back to idle text mode.
await host.click(micButton);

await pageConditions.became(
'Recording stopped',
() => micButton.getAttribute('aria-label')?.includes('Microphone off'),
2000
);

await pageConditions.became(
'Idle re-enables text path',
() => !isSendDisabled() && !textArea.hasAttribute('readonly'),
2000
);

// ===== TURN 3: Text in → Text out =====
await pageObjects.sendMessageViaSendBox('Thanks!', { waitForSend: false });

await pageConditions.became(
'Second outgoing text captured',
() => outgoingActivities.some(a => a.type === 'message' && a.text === 'Thanks!'),
1000
);

await pageConditions.numActivitiesShown(5);

await directLine.emulateIncomingActivity("You're welcome!");

await pageConditions.numActivitiesShown(6);

// ===== Verify final transcript order =====
const activities = pageElements.activityContents();
expect(activities[0]).toHaveProperty('textContent', 'What is the weather today?');
expect(activities[1]).toHaveProperty('textContent', 'The weather today is sunny with a high of 75 degrees.');
expect(activities[2]).toHaveProperty('textContent', 'Will it rain tomorrow?');
expect(activities[3]).toHaveProperty('textContent', 'No rain expected tomorrow.');
expect(activities[4]).toHaveProperty('textContent', 'Thanks!');
expect(activities[5]).toHaveProperty('textContent', "You're welcome!");

await pageConditions.scrollToBottomCompleted();
await host.snapshot('local');
});
</script>
</body>
</html>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/multiple.turns.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down
Binary file modified __tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions __tests__/html2/speechToSpeech/mute.unmute.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
// Setup Web Chat with Speech-to-Speech
const { directLine, store } = testHelpers.createDirectLineEmulator();
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Track voiceState and microphoneMuted changes
store.subscribe(() => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Intercept postActivity to capture outgoing voice chunks
const capturedChunks = [];
Expand Down
Loading
Loading