Merge pull request #1900 from dsteinman/nodejs-example

add simple nodejs example
mozilla · Mar 4, 2019 · dca8c40 · dca8c40
2 parents 604c015 + 7c3d56b
commit dca8c40
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -196,7 +196,7 @@ Alternatively, if you're using Linux and have a supported NVIDIA GPU (See the re
 npm install deepspeech-gpu
 ```
 
-See [client.js](native_client/javascript/client.js) for an example of how to use the bindings.
+See [client.js](native_client/javascript/client.js) for an example of how to use the bindings.  Or download the [wav example](examples/nodejs_wav).
 
 Please ensure you have the required [CUDA dependency](#cuda-dependency).
 

diff --git a/examples/nodejs_wav/Readme.md b/examples/nodejs_wav/Readme.md
@@ -0,0 +1,59 @@
+# NodeJS voice recognition example using Mozilla DeepSpeech
+
+Download the pre-trained model (1.8GB):
+
+```
+wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz
+tar xvfz deepspeech-0.4.1-models.tar.gz
+```
+
+Edit references to models path if necessary:
+
+```
+let modelPath = './models/output_graph.pbmm';
+let alphabetPath = './models/alphabet.txt';
+let lmPath = './models/lm.binary';
+let triePath = './models/trie';
+```
+
+Install Sox (for .wav file loading):
+
+```
+brew install sox
+```
+
+Download test audio files:
+
+```
+wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz
+tar xfvz audio-0.4.1.tar.gz
+```
+
+Install NPM dependencies:
+
+```
+npm install
+```
+
+Run:
+
+```
+node index.js
+```
+
+Result should be something like:
+
+```
+audio length 1.975
+result: experience proves this
+
+```
+
+Try other wav files with an argument:
+
+```
+node index.js audio/2830-3980-0043.wav
+node index.js audio/8455-210777-0068.wav
+node index.js audio/4507-16021-0012.wav
+```
+
diff --git a/examples/nodejs_wav/index.js b/examples/nodejs_wav/index.js
@@ -0,0 +1,72 @@
+const DeepSpeech = require('deepspeech');
+const Fs = require('fs');
+const Sox = require('sox-stream');
+const MemoryStream = require('memory-stream');
+const Duplex = require('stream').Duplex;
+const Wav = require('node-wav');
+
+const BEAM_WIDTH = 1024;
+const N_FEATURES = 26;
+const N_CONTEXT = 9;
+let modelPath = './models/output_graph.pbmm';
+let alphabetPath = './models/alphabet.txt';
+
+let model = new DeepSpeech.Model(modelPath, N_FEATURES, N_CONTEXT, alphabetPath, BEAM_WIDTH);
+
+const LM_ALPHA = 0.75;
+const LM_BETA = 1.85;
+let lmPath = './models/lm.binary';
+let triePath = './models/trie';
+
+model.enableDecoderWithLM(alphabetPath, lmPath, triePath, LM_ALPHA, LM_BETA);
+
+let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
+
+if (!Fs.existsSync(audioFile)) {
+	console.log('file missing:', audioFile);
+	process.exit();
+}
+
+const buffer = Fs.readFileSync(audioFile);
+const result = Wav.decode(buffer);
+
+if (result.sampleRate < 16000) {
+	console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
+}
+
+function bufferToStream(buffer) {
+	let stream = new Duplex();
+	stream.push(buffer);
+	stream.push(null);
+	return stream;
+}
+
+let audioStream = new MemoryStream();
+bufferToStream(buffer).
+pipe(Sox({
+	global: {
+		'no-dither': true,
+	},
+	output: {
+		bits: 16,
+		rate: 16000,
+		channels: 1,
+		encoding: 'signed-integer',
+		endian: 'little',
+		compression: 0.0,
+		type: 'raw'
+	}
+})).
+pipe(audioStream);
+
+audioStream.on('finish', () => {
+
+	let audioBuffer = audioStream.toBuffer();
+
+	const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
+	console.log('audio length', audioLength);
+
+	let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
+
+	console.log('result:', result);
+});
diff --git a/examples/nodejs_wav/package.json b/examples/nodejs_wav/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "deepspeech-nodejs_wav",
+  "version": "1.0.0",
+  "description": "Simple audio processing",
+  "main": "index.js",
+  "scripts": {
+    "start": "node ./index.js"
+  },
+  "dependencies": {
+    "argparse": "^1.0.10",
+    "deepspeech": "^0.4.1",
+    "node-wav": "0.0.2",
+    "sox-stream": "^2.0.3",
+    "util": "^0.11.1"
+  },
+  "license": "Public domain"
+}