-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1900 from dsteinman/nodejs-example
add simple nodejs example
- Loading branch information
Showing
4 changed files
with
149 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# NodeJS voice recognition example using Mozilla DeepSpeech | ||
|
||
Download the pre-trained model (1.8GB): | ||
|
||
``` | ||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz | ||
tar xvfz deepspeech-0.4.1-models.tar.gz | ||
``` | ||
|
||
Edit references to models path if necessary: | ||
|
||
``` | ||
let modelPath = './models/output_graph.pbmm'; | ||
let alphabetPath = './models/alphabet.txt'; | ||
let lmPath = './models/lm.binary'; | ||
let triePath = './models/trie'; | ||
``` | ||
|
||
Install Sox (for .wav file loading): | ||
|
||
``` | ||
brew install sox | ||
``` | ||
|
||
Download test audio files: | ||
|
||
``` | ||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz | ||
tar xfvz audio-0.4.1.tar.gz | ||
``` | ||
|
||
Install NPM dependencies: | ||
|
||
``` | ||
npm install | ||
``` | ||
|
||
Run: | ||
|
||
``` | ||
node index.js | ||
``` | ||
|
||
Result should be something like: | ||
|
||
``` | ||
audio length 1.975 | ||
result: experience proves this | ||
``` | ||
|
||
Try other wav files with an argument: | ||
|
||
``` | ||
node index.js audio/2830-3980-0043.wav | ||
node index.js audio/8455-210777-0068.wav | ||
node index.js audio/4507-16021-0012.wav | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
const DeepSpeech = require('deepspeech'); | ||
const Fs = require('fs'); | ||
const Sox = require('sox-stream'); | ||
const MemoryStream = require('memory-stream'); | ||
const Duplex = require('stream').Duplex; | ||
const Wav = require('node-wav'); | ||
|
||
const BEAM_WIDTH = 1024; | ||
const N_FEATURES = 26; | ||
const N_CONTEXT = 9; | ||
let modelPath = './models/output_graph.pbmm'; | ||
let alphabetPath = './models/alphabet.txt'; | ||
|
||
let model = new DeepSpeech.Model(modelPath, N_FEATURES, N_CONTEXT, alphabetPath, BEAM_WIDTH); | ||
|
||
const LM_ALPHA = 0.75; | ||
const LM_BETA = 1.85; | ||
let lmPath = './models/lm.binary'; | ||
let triePath = './models/trie'; | ||
|
||
model.enableDecoderWithLM(alphabetPath, lmPath, triePath, LM_ALPHA, LM_BETA); | ||
|
||
let audioFile = process.argv[2] || './audio/2830-3980-0043.wav'; | ||
|
||
if (!Fs.existsSync(audioFile)) { | ||
console.log('file missing:', audioFile); | ||
process.exit(); | ||
} | ||
|
||
const buffer = Fs.readFileSync(audioFile); | ||
const result = Wav.decode(buffer); | ||
|
||
if (result.sampleRate < 16000) { | ||
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.'); | ||
} | ||
|
||
function bufferToStream(buffer) { | ||
let stream = new Duplex(); | ||
stream.push(buffer); | ||
stream.push(null); | ||
return stream; | ||
} | ||
|
||
let audioStream = new MemoryStream(); | ||
bufferToStream(buffer). | ||
pipe(Sox({ | ||
global: { | ||
'no-dither': true, | ||
}, | ||
output: { | ||
bits: 16, | ||
rate: 16000, | ||
channels: 1, | ||
encoding: 'signed-integer', | ||
endian: 'little', | ||
compression: 0.0, | ||
type: 'raw' | ||
} | ||
})). | ||
pipe(audioStream); | ||
|
||
audioStream.on('finish', () => { | ||
|
||
let audioBuffer = audioStream.toBuffer(); | ||
|
||
const audioLength = (audioBuffer.length / 2) * ( 1 / 16000); | ||
console.log('audio length', audioLength); | ||
|
||
let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000); | ||
|
||
console.log('result:', result); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"name": "deepspeech-nodejs_wav", | ||
"version": "1.0.0", | ||
"description": "Simple audio processing", | ||
"main": "index.js", | ||
"scripts": { | ||
"start": "node ./index.js" | ||
}, | ||
"dependencies": { | ||
"argparse": "^1.0.10", | ||
"deepspeech": "^0.4.1", | ||
"node-wav": "0.0.2", | ||
"sox-stream": "^2.0.3", | ||
"util": "^0.11.1" | ||
}, | ||
"license": "Public domain" | ||
} |