Skip to content

Commit ebd7229

Browse files
committed
feat: add autodownload feature
1 parent 9a72c36 commit ebd7229

7 files changed

Lines changed: 84 additions & 38 deletions

File tree

README.md

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,24 @@
1-
21
# nodejs-whisper
32

43
Node.js bindings for OpenAI's Whisper model.
54

6-
7-
85
[![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](https://choosealicense.com/licenses/mit/)
96

10-
11-
127
## Features
13-
14-
- Automatically convert the audio to WAV format with a 16000 Hz frequency to support the whisper model.
15-
- Output transcripts to (.txt .srt .vtt)
16-
- Optimized for CPU (Including Apple Silicon ARM)
17-
- Timestamp precision to single word
18-
- Split on word rather than on token (Optional)
19-
- Translate from source language to english (Optional)
20-
- Convert audio formet to wav to support whisper model
218

9+
- Automatically convert the audio to WAV format with a 16000 Hz frequency to support the whisper model.
10+
- Output transcripts to (.txt .srt .vtt)
11+
- Optimized for CPU (Including Apple Silicon ARM)
12+
- Timestamp precision to single word
13+
- Split on word rather than on token (Optional)
14+
- Translate from source language to english (Optional)
15+
- Convert audio formet to wav to support whisper model
2216

2317
## Installation
2418

2519
1. Install ffmpeg and make tools
2620

27-
```bash
21+
```bash
2822
sudo apt update
2923
sudo apt install ffmpeg build-essential
3024
```
@@ -34,38 +28,39 @@ sudo apt install ffmpeg build-essential
3428
```bash
3529
npm i nodejs-whisper
3630
```
31+
3732
2. Download whisper model
33+
3834
```bash
3935
npx nodejs-whisper download
4036
```
4137

42-
- NOTE: user may need to install make tool
43-
38+
- NOTE: user may need to install make tool
39+
4440
## Usage/Examples
4541

4642
```javascript
4743
import path from 'path'
48-
import { nodewhisper } from "nodejs-whisper"
44+
import { nodewhisper } from 'nodejs-whisper'
4945

5046
// Need to provide exact path to your audio file.
5147
const filePath = path.resolve(__dirname, 'YourAudioFileName')
5248

5349
await nodewhisper(filePath, {
5450
modelName: 'base.en', //Downloaded models name
51+
autoDownloadModelName: 'base.en', // (optional) autodownload a model if model is not present
5552
whisperOptions: {
5653
outputInText: false, // get output result in txt file
5754
outputInVtt: false, // get output result in vtt file
5855
outputInSrt: true, // get output result in srt file
5956
outputInCsv: false, // get output result in csv file
60-
translateToEnglish: false,//translate from source language to english
57+
translateToEnglish: false, //translate from source language to english
6158
wordTimestamps: false, // Word-level timestamps
6259
timestamps_length: 20, // amount of dialogue per timestamp pair
6360
splitOnWord: true, //split on word rather than on token
6461
},
6562
})
6663

67-
68-
6964
// Model list
7065
const MODELS_LIST = [
7166
'tiny',
@@ -81,15 +76,12 @@ const MODELS_LIST = [
8176
]
8277
```
8378

84-
85-
86-
87-
8879
## Types
8980

9081
```
9182
interface IOptions {
9283
modelName: string
84+
autoDownloadModelName?: string
9385
whisperOptions?: WhisperOptions
9486
}
9587
@@ -106,8 +98,6 @@ const MODELS_LIST = [
10698
10799
```
108100

109-
110-
111101
## Run Locally
112102

113103
Clone the project
@@ -140,17 +130,14 @@ Build Project
140130
npm run build
141131
```
142132

143-
144133
## Made with
145134

146-
- [Whisper OpenAI (using C++ port by: ggerganov)](https://github.com/ggerganov/whisper.cpp)
135+
- [Whisper OpenAI (using C++ port by: ggerganov)](https://github.com/ggerganov/whisper.cpp)
147136

148137
## Feedback
149138

150139
If you have any feedback, please reach out to us at chetanbaliyan10@gmail.com
151140

152-
153141
## Authors
154142

155-
- [@chetanXpro](https://www.github.com/chetanXpro)
156-
143+
- [@chetanXpro](https://www.github.com/chetanXpro)

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "nodejs-whisper",
3-
"version": "0.0.8",
3+
"version": "0.1.3",
44
"description": "Node bindings for OpenAI's Whisper. Optimized for CPU.",
55
"main": "dist/index.js",
66
"types": "dist/index.d.ts",

src/autoDownloadModel.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import path from 'path'
2+
import shell from 'shelljs'
3+
4+
import { MODELS_LIST, MODELS } from './constants'
5+
import fs from 'fs'
6+
7+
export default async function autoDownloadModel(autoDownloadModelName?: string) {
8+
try {
9+
if (autoDownloadModelName) {
10+
if (!MODELS_LIST.includes(autoDownloadModelName))
11+
throw new Error('[Nodejs-whisper] Error: Provide valid model name')
12+
13+
shell.cd(path.join(__dirname, '..', './cpp/whisper.cpp/models'))
14+
15+
let anyModelExist = []
16+
17+
MODELS.forEach(model => {
18+
if (fs.existsSync(path.join(__dirname, '..', `./cpp/whisper.cpp/models/${model}`))) {
19+
anyModelExist.push(model)
20+
// console.log('anyModelExist found', model)
21+
}
22+
})
23+
24+
return new Promise((resolve, reject) => {
25+
if (anyModelExist.length > 0) {
26+
resolve('Models already exist. Skipping download.')
27+
28+
// console.log('Models already exist. Skipping download.')
29+
} else {
30+
console.log(`[Nodejs-whisper] Autodownload Model: ${autoDownloadModelName}\n`)
31+
32+
let scriptPath = './download-ggml-model.sh'
33+
34+
if (process.platform === 'win32') scriptPath = 'download-ggml-model.cmd'
35+
36+
shell.chmod('+x', scriptPath)
37+
shell.exec(`${scriptPath} ${autoDownloadModelName}`)
38+
39+
console.log('[Nodejs-whisper] Attempting to compile model...\n')
40+
41+
shell.cd('../')
42+
43+
shell.exec('make')
44+
45+
resolve('Model Downloaded Successfully')
46+
}
47+
})
48+
}
49+
} catch (error) {
50+
console.log('[Nodejs-whisper] Error Caught in downloadModel\n')
51+
console.log(error)
52+
return error
53+
}
54+
}

src/downloadModel.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,9 @@ export default async function downloadModel() {
7474
const modelName = await askForModel()
7575

7676
let scriptPath = './download-ggml-model.sh'
77-
// windows .cmd version
77+
7878
if (process.platform === 'win32') scriptPath = 'download-ggml-model.cmd'
7979

80-
// todo: check if windows or unix to run bat command or .sh command
8180
shell.chmod('+x', scriptPath)
8281
shell.exec(`${scriptPath} ${modelName}`)
8382

src/index.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
11
import { WhisperOptions } from './types'
22
import { executeCppCommand } from './whisper'
3-
import downloadModel from './downloadModel'
3+
// import downloadModel from './downloadModel'
44

55
import { constructCommand } from './WhisperHelper'
66
import { checkIfFileExists, convertToWavType } from './utils'
77

8+
import autoDownloadModel from './autoDownloadModel'
9+
810
export interface IOptions {
911
modelName: string
12+
autoDownloadModelName?: string
1013
whisperOptions?: WhisperOptions
1114
}
1215

1316
export async function nodewhisper(filePath: string, options: IOptions) {
17+
if (options.autoDownloadModelName) {
18+
await autoDownloadModel(options.autoDownloadModelName)
19+
}
20+
1421
checkIfFileExists(filePath)
1522
console.log(`[Nodejs-whisper] Transcribing file: ${filePath}\n`)
16-
await downloadModel()
23+
// await downloadModel()
1724

1825
const outputFilePath = await convertToWavType(filePath)
1926

src/utils.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ export const convertToWavType = async (inputFilePath: string) => {
2020
)
2121

2222
if (fileExtension !== 'wav') {
23-
console.warn('[Nodejs-whisper] Warning: Unsupported audio format.\n')
2423
console.log('[Nodejs-whisper] Converting audio to wav File Type...\n')
2524
const command = `ffmpeg -nostats -loglevel 0 -i ${inputFilePath} -ar 16000 -ac 1 -c:a pcm_s16le ${outputFilePath}.wav`
2625

src/whisper.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export async function whisperShell(command: string, options: IShellOptions = def
2121
// docs: https://github.com/shelljs/shelljs#execcommand--options--callback
2222
shell.exec(command, options, (code: number, stdout: string, stderr: string) => {
2323
if (code === 0) {
24-
console.log('[Nodejs-whisper] Done!')
24+
console.log('[Nodejs-whisper] Transcribing Done!')
2525

2626
resolve(stdout)
2727
} else {

0 commit comments

Comments
 (0)