diff --git a/lib/extension/otaUpdate.ts b/lib/extension/otaUpdate.ts index 9c8ed542e6..464006898c 100644 --- a/lib/extension/otaUpdate.ts +++ b/lib/extension/otaUpdate.ts @@ -55,6 +55,31 @@ export default class OTAUpdate extends Extension { ); #inProgress = new Set(); #lastChecked = new Map(); + /** + * Cache of recent queryNextImageRequest payloads for imageTypes with available updates. + * Key: composite `${ieeeAddr}_${imageType}`. Used in the manual update flow to retry + * with a different imageType when the first imageNotify response has no available update + * (e.g. multi-MCU devices where a faster MCU responds first but is already up-to-date). + */ + #pendingAvailableRequests = new Map< + string, + { + payload: Zcl.ClustersTypes.TClusterCommandPayload<"genOta", "queryNextImageRequest">; + endpoint: zh.Endpoint; + ts: number; + } + >(); + + /** + * Returns true if any OTA operation (auto or manual) is in progress for the given device. + * + * Auto-update keys are composite `${ieeeAddr}_${imageType}` (inserted in onZigbeeEvent). + * Manual-update keys are bare `ieeeAddr` (inserted in onMQTTMessage). + * Both coexist in the same Set -- this method is the single authority for "is anything running?" + */ + #hasInProgress(ieeeAddr: string): boolean { + return this.#inProgress.has(ieeeAddr) || [...this.#inProgress].some((k) => k.startsWith(`${ieeeAddr}_`)); + } // biome-ignore lint/suspicious/useAwait: API override async start(): Promise { @@ -78,6 +103,7 @@ export default class OTAUpdate extends Extension { clearState(): void { this.#inProgress.clear(); this.#lastChecked.clear(); + this.#pendingAvailableRequests.clear(); } #removeProgressAndRemainingFromState(device: Device): void { @@ -90,7 +116,16 @@ export default class OTAUpdate extends Extension { } @bind private async onZigbeeEvent(data: eventdata.DeviceMessage): Promise { - if (data.type !== "commandQueryNextImageRequest" || !data.device.definition || this.#inProgress.has(data.device.ieeeAddr)) { + if (data.type !== "commandQueryNextImageRequest" || !data.device.definition) { + return; + } + + const requestData = data.data as Zcl.ClustersTypes.TClusterCommandPayload<"genOta", "queryNextImageRequest">; + const imageType = requestData.imageType; + const inProgressKey = `${data.device.ieeeAddr}_${imageType}`; + + // Block if this exact imageType is already running, OR if a manual MQTT update is running for this device. + if (this.#inProgress.has(inProgressKey) || this.#inProgress.has(data.device.ieeeAddr)) { return; } @@ -105,7 +140,7 @@ export default class OTAUpdate extends Extension { if (data.device.zh.scheduledOta) { // allow custom source to override check for definition `ota` if (data.device.zh.scheduledOta?.url !== undefined || data.device.definition.ota) { - this.#inProgress.add(data.device.ieeeAddr); + this.#inProgress.add(inProgressKey); logger.info(`Updating '${data.device.name}' to latest firmware`); @@ -135,7 +170,7 @@ export default class OTAUpdate extends Extension { await this.publishEntityState(data.device, this.#getEntityPublishPayload(data.device, "scheduled")); } - this.#inProgress.delete(data.device.ieeeAddr); + this.#inProgress.delete(inProgressKey); return; // we're done } @@ -147,15 +182,15 @@ export default class OTAUpdate extends Extension { // with only 10 - 60 seconds inbetween. It doesn't make sense to check for a new update // each time, so this interval can be set by the user. The default is 1,440 minutes (one day). const updateCheckInterval = settings.get().ota.update_check_interval * 1000 * 60; - const deviceLastChecked = this.#lastChecked.get(data.device.ieeeAddr); + const deviceLastChecked = this.#lastChecked.get(inProgressKey); const check = deviceLastChecked !== undefined ? Date.now() - deviceLastChecked > updateCheckInterval : true; if (!check) { return; } - this.#inProgress.add(data.device.ieeeAddr); - this.#lastChecked.set(data.device.ieeeAddr, Date.now()); + this.#inProgress.add(inProgressKey); + this.#lastChecked.set(inProgressKey, Date.now()); let availableResult: OtaUpdateAvailableResult | undefined; try { @@ -174,6 +209,13 @@ export default class OTAUpdate extends Extension { if (availableResult?.available) { logger.info(`OTA update available for '${data.device.name}'`); + // Cache this request so the manual update flow can retry for this imageType, + // avoiding the imageNotify race condition on multi-MCU devices. + this.#pendingAvailableRequests.set(inProgressKey, { + payload: data.data as Zcl.ClustersTypes.TClusterCommandPayload<"genOta", "queryNextImageRequest">, + endpoint: data.endpoint, + ts: Date.now(), + }); } } } @@ -187,7 +229,7 @@ export default class OTAUpdate extends Extension { data.meta.zclTransactionSequenceNumber, ); logger.debug(`Responded to OTA request of '${data.device.name}' with 'NO_IMAGE_AVAILABLE'`); - this.#inProgress.delete(data.device.ieeeAddr); + this.#inProgress.delete(inProgressKey); } async #readSoftwareBuildIDAndDateCode( @@ -267,7 +309,7 @@ export default class OTAUpdate extends Extension { if (!(device instanceof Device)) { error = `Device '${ID}' does not exist`; - } else if (this.#inProgress.has(device.ieeeAddr)) { + } else if (this.#hasInProgress(device.ieeeAddr)) { // also guards against scheduling while check/update op in progress that could result in undesired OTA state error = `OTA update or check for update already in progress for '${device.name}'`; } else { @@ -369,7 +411,22 @@ export default class OTAUpdate extends Extension { try { const firmwareFrom = await this.#readSoftwareBuildIDAndDateCode(device, "immediate"); - const [fromVersion, toVersion] = await this.#update(source, device, undefined, undefined, dataSettings); + let [fromVersion, toVersion] = await this.#update(source, device, undefined, undefined, dataSettings); + + if (toVersion === undefined) { + // For multi-MCU devices, imageNotify may be answered by an already-up-to-date MCU first. + // If another imageType has a recent pending available update, retry with a fresh imageNotify. + // By now the first MCU is quiet (received NO_IMAGE_AVAILABLE), so the second MCU answers. + const now = Date.now(); + const pendingEntry = [...this.#pendingAvailableRequests.entries()].find( + ([k, v]) => k.startsWith(`${device.ieeeAddr}_`) && now - v.ts < 300_000, + ); + + if (pendingEntry) { + this.#pendingAvailableRequests.delete(pendingEntry[0]); + [fromVersion, toVersion] = await this.#update(source, device, undefined, undefined, dataSettings); + } + } if (toVersion === undefined) { error = `Update of '${device.name}' failed (No image currently available)`; @@ -513,10 +570,25 @@ export default class OTAUpdate extends Extension { device.zh.save(); } + // After a successful update, reset lastChecked for all imageTypes of this device. + // This ensures sibling MCUs (same device, different imageType) are rechecked on their + // next queryNextImageRequest instead of being silently ignored for up to 24 hours. + for (const key of [...this.#lastChecked.keys()]) { + if (key.startsWith(`${device.ieeeAddr}_`)) { + this.#lastChecked.delete(key); + } + } + setTimeout(() => { - device.reInterview(this.eventBus).catch((error) => { - logger.error(`${error.message}. Re-try manually after some time.`); - }); + // Only re-interview when no other OTA update is still running for this device. + // Prevents interrupting a second imageType update that started right after this one. + // Note: if two updates finish within 5s of each other, both timers may fire -- + // this results in two reInterview calls in quick succession, which is acceptable. + if (!this.#hasInProgress(device.ieeeAddr)) { + device.reInterview(this.eventBus).catch((error) => { + logger.error(`${error.message}. Re-try manually after some time.`); + }); + } }, 5000); return [from.fileVersion, to.fileVersion]; diff --git a/test/extensions/otaUpdate.test.ts b/test/extensions/otaUpdate.test.ts index f5e3903a43..1662cd553e 100644 --- a/test/extensions/otaUpdate.test.ts +++ b/test/extensions/otaUpdate.test.ts @@ -1349,4 +1349,251 @@ describe("Extension: OTAUpdate", () => { {}, ); }); + + it("allows independent auto-check for different imageType while another imageType check is already in progress", async () => { + // Auto-check path: checkOta for imageType A hangs while imageType B proceeds independently. + // Reset device-level flag that a previous test may have set and that afterEach does not clear. + settings.set(["devices", devices.bulb.ieeeAddr, "disable_automatic_update_check"], false); + const dataA = {imageType: 100, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + const dataB = {imageType: 200, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + + let resolveCheckA!: (val: ReturnType extends Promise ? T : never) => void; + + // A's checkOta hangs + devices.bulb.checkOta.mockImplementationOnce( + () => + new Promise((resolve) => { + resolveCheckA = resolve as typeof resolveCheckA; + }), + ); + // B's checkOta resolves normally + devices.bulb.checkOta.mockResolvedValueOnce({ + available: true, + current: {...DEFAULT_CURRENT, ...dataB}, + availableMeta: {...DEFAULT_AVAILABLE_META, ...dataB, fileVersion: 2}, + }); + + const payloadA = { + data: dataA, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 1}, + }; + const payloadB = { + data: dataB, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 2}, + }; + + // Start check for imageType A (hangs in checkOta, #inProgress has key_A) + const checkAPromise = mockZHEvents.message(payloadA); + await flushPromises(); + + // Send payloadA again — same imageType must be blocked by #inProgress + await mockZHEvents.message(payloadA); + await flushPromises(); + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(1); // still 1 — duplicate blocked + + // While A is in progress, trigger imageType B — should NOT be blocked + await mockZHEvents.message(payloadB); + await flushPromises(); + + // Both A and B should have had checkOta called (2 total, one per imageType) + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(2); + expect(devices.bulb.checkOta).toHaveBeenCalledWith({downgrade: false}, dataA, {}, devices.bulb.endpoints[0]); + expect(devices.bulb.checkOta).toHaveBeenCalledWith({downgrade: false}, dataB, {}, devices.bulb.endpoints[0]); + + // Resolve A and clean up + resolveCheckA({available: false, current: {...DEFAULT_CURRENT, ...dataA}, availableMeta: {...DEFAULT_AVAILABLE_META, ...dataA}}); + await checkAPromise; + await flushPromises(); + }); + + it("uses independent lastChecked interval per imageType", async () => { + // imageType A was recently checked -> throttled on second request + // imageType B never checked -> proceeds immediately + // Reset device-level flag that a previous test may have set and that afterEach does not clear. + settings.set(["devices", devices.bulb.ieeeAddr, "disable_automatic_update_check"], false); + const dataA = {imageType: 100, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + const dataB = {imageType: 200, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + + devices.bulb.checkOta + .mockResolvedValueOnce({ + available: false, + current: {...DEFAULT_CURRENT, ...dataA}, + availableMeta: {...DEFAULT_AVAILABLE_META, ...dataA}, + }) + .mockResolvedValueOnce({ + available: true, + current: {...DEFAULT_CURRENT, ...dataB}, + availableMeta: {...DEFAULT_AVAILABLE_META, ...dataB, fileVersion: 2}, + }); + + const payloadA = { + data: dataA, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 1}, + }; + const payloadB = { + data: dataB, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 2}, + }; + + // First check for imageType A -> sets lastChecked[key_A] + await mockZHEvents.message(payloadA); + await flushPromises(); + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(1); + + // Second request for imageType A immediately -> should be throttled (within interval) + await mockZHEvents.message(payloadA); + await flushPromises(); + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(1); // still 1 -- throttled + + // First request for imageType B -> key_B not in lastChecked -> should proceed + await mockZHEvents.message(payloadB); + await flushPromises(); + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(2); // now 2 -- B went through + expect(devices.bulb.checkOta).toHaveBeenLastCalledWith({downgrade: false}, dataB, {}, devices.bulb.endpoints[0]); + }); + + it("retries MQTT update from pending request cache when first attempt returns no image", async () => { + // Covers Fix 2: the pending-available-requests retry path. + // Also covers Fix 3: #lastChecked entries are cleared after a successful update. + settings.set(["devices", devices.bulb.ieeeAddr, "disable_automatic_update_check"], false); + + const data = {imageType: 100, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + + // Step 1 — auto-check finds an available update → populates #pendingAvailableRequests and #lastChecked + devices.bulb.checkOta.mockResolvedValueOnce({ + available: true, + current: {...DEFAULT_CURRENT, ...data}, + availableMeta: {...DEFAULT_AVAILABLE_META, ...data, fileVersion: 2}, + }); + + await mockZHEvents.message({ + data, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 1}, + }); + await flushPromises(); + expect(devices.bulb.checkOta).toHaveBeenCalledTimes(1); + + // Step 2 — MQTT update: first call returns no image, pending cache triggers a retry that succeeds. + // Also exercises the #lastChecked reset loop (Fix 3) because #lastChecked has an entry from Step 1. + devices.bulb.endpoints[0].read.mockImplementation(() => ({swBuildId: "1", dateCode: "20240101"})); + devices.bulb.updateOta + .mockResolvedValueOnce([{...DEFAULT_CURRENT, ...data}, undefined]) // first: no image + .mockResolvedValueOnce([ + {...DEFAULT_CURRENT, ...data, fileVersion: 1}, + {...DEFAULT_CURRENT, ...data, fileVersion: 2}, + ]); // retry: success + + mockMQTTEvents.message("zigbee2mqtt/bridge/request/device/ota_update/update", stringify({id: "bulb"})); + await flushPromises(); + await vi.advanceTimersByTimeAsync(6000); + + expect(devices.bulb.updateOta).toHaveBeenCalledTimes(2); + expect(mockMQTTPublishAsync).toHaveBeenCalledWith( + "zigbee2mqtt/bridge/response/device/ota_update/update", + expect.stringContaining('"status":"ok"'), + {}, + ); + }); + + it("defers re-interview until no other imageType update is in progress", async () => { + // Two concurrent scheduled-OTA updates (A and B). Re-interview must fire only + // after the LAST update completes, not after each one. + const dataA = {imageType: 100, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + const dataB = {imageType: 200, manufacturerCode: 0x128b, fileVersion: 1, fieldControl: 0}; + + let resolveUpdateA!: () => void; + let resolveUpdateB!: () => void; + + // Set scheduledOta directly so device takes the scheduled-OTA path in onZigbeeEvent + devices.bulb.scheduledOta = {url: undefined}; + + devices.bulb.updateOta + .mockImplementationOnce( + (_source: unknown, _req: unknown, _tsn: unknown, _extra: unknown, _onProg: unknown) => + new Promise<[typeof DEFAULT_CURRENT, typeof DEFAULT_CURRENT]>((resolve) => { + resolveUpdateA = () => + resolve([ + {...DEFAULT_CURRENT, ...dataA}, + {...DEFAULT_CURRENT, ...dataA, fileVersion: 2}, + ]); + }), + ) + .mockImplementationOnce( + (_source: unknown, _req: unknown, _tsn: unknown, _extra: unknown, _onProg: unknown) => + new Promise<[typeof DEFAULT_CURRENT, typeof DEFAULT_CURRENT]>((resolve) => { + resolveUpdateB = () => + resolve([ + {...DEFAULT_CURRENT, ...dataB}, + {...DEFAULT_CURRENT, ...dataB, fileVersion: 2}, + ]); + }), + ); + + const payloadA = { + data: dataA, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 1}, + }; + const payloadB = { + data: dataB, + cluster: "genOta", + device: devices.bulb, + endpoint: devices.bulb.getEndpoint(1)!, + type: "commandQueryNextImageRequest", + linkquality: 10, + meta: {zclTransactionSequenceNumber: 2}, + }; + + // Start both updates (each hangs in updateOta) + const updateAPromise = mockZHEvents.message(payloadA); + await flushPromises(); + const updateBPromise = mockZHEvents.message(payloadB); + await flushPromises(); + + // Finish A -- re-interview must NOT fire (B still in #inProgress) + resolveUpdateA(); + await updateAPromise; + await flushPromises(); + await vi.advanceTimersByTimeAsync(6000); // past the 5s setTimeout + expect(devices.bulb.interview).not.toHaveBeenCalled(); + + // Finish B -- re-interview MUST fire now (nothing else in #inProgress) + resolveUpdateB(); + await updateBPromise; + await flushPromises(); + await vi.advanceTimersByTimeAsync(6000); + expect(devices.bulb.interview).toHaveBeenCalledTimes(1); + + // Clean up + devices.bulb.scheduledOta = undefined; + }); });