Skip to content

Commit 6c131e8

Browse files
authored
fix(azure): correct volume attribute format and eliminate spurious xmlns/version warnings (#42) (#43)
Drop the % suffix from all three prosody volume emission sites so Azure receives the correct absolute format (volume="75") rather than a relative change (volume="75%"). Also reorder prepareSSML so validation runs after processing, removing two spurious warnings that fired on every plain-text call.
1 parent 4e7e04e commit 6c131e8

6 files changed

Lines changed: 100 additions & 15 deletions

File tree

__tests__/azure-ssml.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/**
2+
* Tests for Azure SSML generation correctness (issue #42)
3+
*/
4+
5+
import * as SSMLUtils from "../src/core/ssml-utils";
6+
7+
// Minimal stub so we can import AzureTTSClient without real credentials
8+
jest.mock("../src/core/abstract-tts", () => {
9+
return {
10+
AbstractTTSClient: class {
11+
voiceId = "en-US-AriaNeural";
12+
lang = "en-US";
13+
properties: Record<string, unknown> = { rate: "medium", pitch: "medium", volume: 100 };
14+
timings: unknown[] = [];
15+
on() {}
16+
emit() {}
17+
},
18+
};
19+
});
20+
21+
// We test the SSML utilities directly — no network calls needed.
22+
23+
describe("createProsodyTag — volume format", () => {
24+
it("emits an absolute volume value without a % suffix", () => {
25+
const result = SSMLUtils.createProsodyTag("hello", { volume: 75 });
26+
// volume="75" is the absolute format (0-100 scale).
27+
// volume="75%" would be a relative +75% change — wrong.
28+
expect(result).toContain('volume="75"');
29+
expect(result).not.toContain('volume="75%"');
30+
});
31+
32+
it("emits volume=100 without % when at full volume", () => {
33+
const result = SSMLUtils.createProsodyTag("hello", { volume: 100 });
34+
expect(result).toContain('volume="100"');
35+
expect(result).not.toContain('volume="100%"');
36+
});
37+
38+
it("emits volume=0 without % when muted", () => {
39+
const result = SSMLUtils.createProsodyTag("hello", { volume: 0 });
40+
expect(result).toContain('volume="0"');
41+
expect(result).not.toContain('volume="0%"');
42+
});
43+
});
44+
45+
describe("Azure prepareSSML — no spurious xmlns/version warnings", () => {
46+
let warnSpy: jest.SpyInstance;
47+
48+
beforeEach(() => {
49+
warnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});
50+
});
51+
52+
afterEach(() => {
53+
warnSpy.mockRestore();
54+
});
55+
56+
it("does not warn about missing xmlns or version when synthesising plain text", async () => {
57+
// Import lazily so mock is in place
58+
const { AzureTTSClient } = await import("../src/engines/azure");
59+
const client = new AzureTTSClient({ subscriptionKey: "key", region: "eastus" });
60+
61+
// Access the private method via type cast
62+
const ssml = await (client as any).prepareSSML("Hello world");
63+
64+
const xmnsWarning = warnSpy.mock.calls.some((args) =>
65+
args.some(
66+
(a: unknown) =>
67+
typeof a === "string" && a.includes("xmlns") ||
68+
(Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("xmlns")))
69+
)
70+
);
71+
const versionWarning = warnSpy.mock.calls.some((args) =>
72+
args.some(
73+
(a: unknown) =>
74+
typeof a === "string" && a.includes("version") ||
75+
(Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("version")))
76+
)
77+
);
78+
79+
expect(xmnsWarning).toBe(false);
80+
expect(versionWarning).toBe(false);
81+
82+
// Sanity: output should actually have the attributes
83+
expect(ssml).toContain('xmlns="http://www.w3.org/2001/10/synthesis"');
84+
expect(ssml).toContain('version="1.0"');
85+
});
86+
});

src/__tests__/azure-mstts-namespace.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,15 @@ describe("Azure MSTTS Namespace Handling", () => {
175175

176176
it("should normalise 0-1 volume fraction to 0-100 percentage", async () => {
177177
// Regression test for: https://github.com/willwade/js-tts-wrapper/issues/40
178-
// Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80%", not "0.8%".
178+
// Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80" (absolute, 0-100 scale), not "0.8" or "80%".
179179
const plainSSML = `<speak>Hello world</speak>`;
180180
const options = { volume: 0.8 };
181181

182182
const result = (client as any).ensureAzureSSMLStructure(plainSSML, "en-US-JennyNeural", options);
183183

184-
expect(result).toContain('volume="80%"');
185-
expect(result).not.toContain('volume="0.8%"');
184+
expect(result).toContain('volume="80"');
185+
expect(result).not.toContain('volume="0.8"');
186+
expect(result).not.toContain('volume="80%"');
186187
});
187188
});
188189
});

src/__tests__/ssml-utils.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ describe("SSMLUtils", () => {
5353

5454
it("should create prosody tag with volume", () => {
5555
const result = SSMLUtils.createProsodyTag("Hello", { volume: 80 });
56-
expect(result).toBe('<prosody volume="80%">Hello</prosody>');
56+
expect(result).toBe('<prosody volume="80">Hello</prosody>');
5757
});
5858

5959
it("should create prosody tag with multiple attributes", () => {
@@ -64,7 +64,7 @@ describe("SSMLUtils", () => {
6464
});
6565
expect(result).toContain('rate="slow"');
6666
expect(result).toContain('pitch="high"');
67-
expect(result).toContain('volume="80%"');
67+
expect(result).toContain('volume="80"');
6868
});
6969

7070
it("should return original text if no options provided", () => {

src/core/abstract-tts.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -993,7 +993,7 @@ export abstract class AbstractTTSClient {
993993
}
994994

995995
if (this.properties.volume) {
996-
attrs.push(`volume="${this.properties.volume}%"`);
996+
attrs.push(`volume="${this.properties.volume}"`);
997997
}
998998

999999
if (attrs.length === 0) {

src/core/ssml-utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export function createProsodyTag(text: string, options?: SpeakOptions): string {
6868

6969
if (options.rate) attrs.push(`rate="${options.rate}"`);
7070
if (options.pitch) attrs.push(`pitch="${options.pitch}"`);
71-
if (options.volume !== undefined) attrs.push(`volume="${options.volume}%"`);
71+
if (options.volume !== undefined) attrs.push(`volume="${options.volume}"`);
7272

7373
if (attrs.length === 0) return text;
7474

src/engines/azure.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,11 @@ export class AzureTTSClient extends AbstractTTSClient {
556556
// Use voice from options or the default voice
557557
const voiceId = options?.voice || this.voiceId;
558558

559-
// Validate and process SSML for Azure compatibility
559+
// Process and structure SSML first so required attributes are present before validation
560+
ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined);
561+
ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options);
562+
563+
// Validate after processing so warnings reflect what Azure actually receives
560564
const validation = SSMLUtils.validateSSMLForEngine(ssml, "azure", voiceId || undefined);
561565
if (validation.warnings.length > 0) {
562566
console.warn("Azure SSML warnings:", validation.warnings);
@@ -566,12 +570,6 @@ export class AzureTTSClient extends AbstractTTSClient {
566570
throw new Error(`Invalid SSML for Azure: ${validation.errors.join(", ")}`);
567571
}
568572

569-
// Process SSML for Azure compatibility
570-
ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined);
571-
572-
// Ensure proper SSML structure for Azure
573-
ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options);
574-
575573
return ssml;
576574
}
577575

@@ -648,7 +646,7 @@ export class AzureTTSClient extends AbstractTTSClient {
648646
const attrs: string[] = [];
649647
if (rate && rate !== DEFAULT_RATE) attrs.push(`rate="${rate}"`);
650648
if (pitch && pitch !== DEFAULT_PITCH) attrs.push(`pitch="${pitch}"`);
651-
if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}%"`);
649+
if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}"`);
652650

653651
// <prosody> must be nested inside <voice>, not as a direct child of <speak>.
654652
// Azure rejects: Node [speak] should not contain node [prosody] with type [Others].

0 commit comments

Comments
 (0)