watson-developer-cloud
diff --git a/‎dist/watson-speech.js
Lines changed: 1999 additions & 683 deletions b/‎dist/watson-speech.js
Lines changed: 1999 additions & 683 deletions
diff --git a/‎examples/static/index.html
Lines changed: 21 additions & 19 deletions b/‎examples/static/index.html
Lines changed: 21 additions & 19 deletions
diff --git a/‎examples/static/multi-speaker-file-console.html renamed to ‎examples/static/speaker-labels-file-console.html
Lines changed: 2 additions & 5 deletions b/‎examples/static/multi-speaker-file-console.html renamed to ‎examples/static/speaker-labels-file-console.html
Lines changed: 2 additions & 5 deletions
diff --git a/‎examples/static/speaker-stream-file-console.html
Lines changed: 77 additions & 0 deletions b/‎examples/static/speaker-stream-file-console.html
Lines changed: 77 additions & 0 deletions
diff --git a/‎examples/static/speaker-stream-file-html.html
Lines changed: 85 additions & 0 deletions b/‎examples/static/speaker-stream-file-html.html
Lines changed: 85 additions & 0 deletions
diff --git a/‎speech-to-text/index.js
Lines changed: 5 additions & 0 deletions b/‎speech-to-text/index.js
Lines changed: 5 additions & 0 deletions
diff --git a/‎speech-to-text/recognize-file.js
Lines changed: 49 additions & 14 deletions b/‎speech-to-text/recognize-file.js
Lines changed: 49 additions & 14 deletions
@@ -1,34 +1,36 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <meta charset="UTF-8">
-    <title>IBM Watson Speech JavaScript SDK Example</title>
+  <meta charset="UTF-8">
+  <title>IBM Watson Speech JavaScript SDK Example</title>
 </head>
 <body>
 <h1>IBM Watson Speech JavaScript SDK Examples</h1>
 <h2>Speech to Text</h2>
 <ul>
-    <li><a href="microphone-streaming.html">Transcribe from Microphone, Streaming</a></li>
-    <li><a href="microphone-streaming-auto-stop.html">Transcribe from Microphone, Streaming, automatically stop at first pause</a></li>
-    <li><a href="microphone-alternatives.html">Transcribe from Microphone, with Alternatives</a></li>
-    <li><a href="microphone-word-confidence.html">Transcribe from Microphone, with Word Confidence</a></li>
-    <li><a href="microphone-streaming-text-to-console.html">Transcribe from Microphone, send text to console</a></li>
-    <li><a href="microphone-streaming-object-to-console.html">Transcribe from Microphone, send JSON to console (includes text and metadata; v0.22+ format)</a></li>
-    <li><a href="microphone-streaming-object-extracted-to-console.html">Transcribe from Microphone, send JSON to console with results extracted (pre-v0.22 format)</a></li>
-    <li><a href="microphone-streaming-model.html">Transcribe from Microphone, Streaming with chosen model</a></li>
-    <li><a href="file-streaming.html">Transcribe from file, Streaming</a></li>
-    <li><a href="multi-speaker-file-console.html">Transcribe from file, multiple speakers</a></li>
-    <li><a href="file-realtime-vs-no-realtime.html">Transcribe from file, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></a></li>
-    <li><a href="file-promise.html">Transcribe from file, Promise</a></li>
-    <li><a href="file-ajax.html">Transcribe from file loaded over AJAX</a></li>
-    <li><a href="browserify.html">Example bundled with browserify</a> <b>(Node.js server only)</b></li>
-    <li><strike><a href="audio-video-deprecated/">Deprecated: Transcribe from HTML5 &lt;audio&gt; or &lt;video&gt; element</a></strike>  <b>(Node.js server only)</b></li>
+  <li><a href="microphone-streaming.html">Transcribe from Microphone, Streaming</a></li>
+  <li><a href="microphone-streaming-auto-stop.html">Transcribe from Microphone, Streaming, automatically stop at first pause</a></li>
+  <li><a href="microphone-alternatives.html">Transcribe from Microphone, with Alternatives</a></li>
+  <li><a href="microphone-word-confidence.html">Transcribe from Microphone, with Word Confidence</a></li>
+  <li><a href="microphone-streaming-text-to-console.html">Transcribe from Microphone, send text to console</a></li>
+  <li><a href="microphone-streaming-object-to-console.html">Transcribe from Microphone, send JSON to console (includes text and metadata; v0.22+ format)</a></li>
+  <li><a href="microphone-streaming-object-extracted-to-console.html">Transcribe from Microphone, send JSON to console with results extracted (pre-v0.22 format)</a></li>
+  <li><a href="microphone-streaming-model.html">Transcribe from Microphone, Streaming with chosen model</a></li>
+  <li><a href="file-streaming.html">Transcribe from file, Streaming</a></li>
+  <li><a href="speaker-labels-file-console.html">Transcribe from file with <code>{speaker_labels: true}</code>, output to console</a></li>
+  <li><a href="speaker-stream-file-console.html">Transcribe from file with <code>{resultsBySpeaker: true}</code>, output to console</a></li>
+  <li><a href="speaker-stream-file-html.html">Transcribe from file with <code>{resultsBySpeaker: true}</code>, output HTML</a></li>
+  <li><a href="file-realtime-vs-no-realtime.html">Transcribe from file, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></a></li>
+  <li><a href="file-promise.html">Transcribe from file, Promise</a></li>
+  <li><a href="file-ajax.html">Transcribe from file loaded over AJAX</a></li>
+  <li><a href="browserify.html">Example bundled with browserify</a> <b>(Node.js server only)</b></li>
+  <li><strike><a href="audio-video-deprecated/">Deprecated: Transcribe from HTML5 &lt;audio&gt; or &lt;video&gt; element</a></strike>  <b>(Node.js server only)</b></li>
 </ul>
 
 <h2>Text to Speech</h2>
 <ul>
-    <li><a href="text-to-speech.html">Synthesize text</a></li>
-    <li><a href="text-to-speech-custom-voice.html">Synthesize text w/ custom voice</a></li>
+  <li><a href="text-to-speech.html">Synthesize text</a></li>
+  <li><a href="text-to-speech-custom-voice.html">Synthesize text w/ custom voice</a></li>
 </ul>
 </body>
 </html>
@@ -7,7 +7,7 @@
 <body>
 
 <section>
-  <h2>Transcribe from Microphone</h2>
+  <h2>Transcribe from file with <code>{speaker_labels: true}</code>, output to console</h2>
   <button id="button">Transcribe File</button>
   <button id="stop">Stop</button>
 
@@ -47,15 +47,12 @@ <h2>Code for this demo:</h2>
         model: 'en-US_NarrowbandModel',
         objectMode: true, // send objects instead of text
         realtime: true, // don't slow down the results if transcription occurs faster than playback
-        format: false,
+        format: false, // enable resultsBySpeaker when formatting for multiple speakers
         play: true
       });
 
-      window.allResults = [];
-
       stream.on('data', function(data) {
         console.log(data);
-        allResults.push(data);
       });
 
       stream.on('error', function(err) {
 
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>Watson Speech to Text client example</title>
+</head>
+<body>
+
+<section>
+  <h2>Transcribe from file with <code>{resultsBySpeaker: true}</code>, output to console</h2>
+  <button id="button">Transcribe File</button>
+  <button id="stop">Stop</button>
+
+  <h2>Output:</h2>
+  <div id="output">Open your browser's console to view the output. Note: it will take some time before results begin to appear.</div>
+</section>
+
+<script src="watson-speech.js"></script>
+<!-- window.fetch pollyfill for IE/Edge & Older Chrome/FireFox -->
+<script src="bower_components/fetch/fetch.js"></script>
+
+<h2>Code for this demo:</h2>
+
+<pre><code><script style="display: block;">
+
+  // preloading the data for a smoother experience
+  var preloadTokenAndAudio = Promise.all([
+    fetch('/api/speech-to-text/token').then(function(response) {
+      return response.text();
+    }),
+    fetch('/en-us-multi-speaker-narrowband.wav').then(function(response) {
+      return response.blob();
+    })
+  ]);
+
+  document.querySelector('#button').onclick = function () {
+    preloadTokenAndAudio.then(function (values) {
+      var token = values[0];
+      var file = values[1];
+
+      var stream = WatsonSpeech.SpeechToText.recognizeFile({
+        token: token,
+        data: file,
+        // only certain models support speaker labels currently,
+        // see http://www.ibm.com/watson/developercloud/doc/speech-to-text/output.shtml#speaker_labels
+        model: 'en-US_NarrowbandModel',
+        resultsBySpeaker: true, // pipes results through a SpeakerStream, and also enables speaker_labels and objectMode
+        realtime: false, // don't slow down the results if transcription occurs faster than playback
+        play: true
+      });
+
+      stream.on('data', function(data) {
+        // SpeakerStream's data events are different in that most include multiple result objects, and currently, they
+        // are all interim until the last data event.
+
+        // The result objects look similar to those returned by the RecognizeStream, except that they each have a
+        // `speaker` key with a numeric value. Additionally, extra features, such as alternatives and word alternatives
+        // will be lost in the SpeakerStream results.
+
+        console.log(data);
+      });
+
+      stream.on('error', function(err) {
+        console.log(err);
+      });
+
+      document.querySelector('#stop').onclick = stream.stop.bind(stream);
+
+    }).catch(function(error) {
+      console.log(error);
+    });
+  };
+
+</script></code></pre>
+
+</body>
+</html>
@@ -0,0 +1,85 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>Watson Speech to Text client example</title>
+</head>
+<body>
+
+<section>
+  <h2>Transcribe from file with <code>{resultsBySpeaker: true}</code>, output HTML</h2>
+  <button id="button">Transcribe File</button>
+  <button id="stop">Stop</button>
+
+  <h2>Output:</h2>
+  <div id="output"></div>
+</section>
+
+<script src="watson-speech.js"></script>
+<!-- window.fetch pollyfill for IE/Edge & Older Chrome/FireFox -->
+<script src="bower_components/fetch/fetch.js"></script>
+
+<h2>Code for this demo:</h2>
+
+<pre><code><script style="display: block;">
+
+  // preloading the data for a smoother experience
+  var preloadTokenAndAudio = Promise.all([
+    fetch('/api/speech-to-text/token').then(function(response) {
+      return response.text();
+    }),
+    fetch('/en-us-multi-speaker-narrowband.wav').then(function(response) {
+      return response.blob();
+    })
+  ]);
+
+  document.querySelector('#button').onclick = function () {
+    preloadTokenAndAudio.then(function (values) {
+      var token = values[0];
+      var file = values[1];
+
+      document.querySelector('#output').innerHTML = 'Processing. Note: it will take some time for the first results to appear.';
+
+      var stream = WatsonSpeech.SpeechToText.recognizeFile({
+        token: token,
+        data: file,
+        speaker_labels: true,
+        // only certain models support speaker labels currently,
+        // see http://www.ibm.com/watson/developercloud/doc/speech-to-text/output.shtml#speaker_labels
+        model: 'en-US_NarrowbandModel',
+        resultsBySpeaker: true, // pipes results through a SpeakerStream, and also enables speaker_labels and objectMode
+        play: true
+      });
+
+      stream.on('data', function(data) {
+        // With resultsBySpeaker, the data events are different in that most include multiple result objects, and
+        // currently, they are all interim until the last data event.
+
+        // The result objects look similar to normal ones, except that they each have a `speaker` key with a numeric
+        // value. Additionally, extra features, such as alternatives and word alternatives will be lost.
+
+        var lines = data.results.map(function(result) {
+            return '<div class="line speaker-' + result.speaker + '">' +
+              '<b class="speaker-label">Speaker ' + result.speaker + ':</b> ' +
+              result.alternatives[0].transcript +
+            '</div>';
+        });
+
+        document.querySelector('#output').innerHTML = lines.join('\n');
+      });
+
+      stream.on('error', function(err) {
+        console.log(err);
+      });
+
+      document.querySelector('#stop').onclick = stream.stop.bind(stream);
+
+    }).catch(function(error) {
+      console.log(error);
+    });
+  };
+
+</script></code></pre>
+
+</body>
+</html>
@@ -62,6 +62,11 @@ module.exports = {
    */
   ResultStream: require('./result-stream'),
 
+  /**
+   * @see SpeakerStream
+   */
+  SpeakerStream: require('./speaker-stream'),
+
   /**
    * @see WritableElementStream
    */
 
@@ -23,6 +23,7 @@ var TimingStream = require('./timing-stream.js');
 var assign = require('object.assign/polyfill')();
 var WritableElementStream = require('./writable-element-stream');
 var ResultStream = require('./result-stream');
+var SpeakerStream = require('./speaker-stream');
 
 /**
  * @module watson-speech/speech-to-text/recognize-file
@@ -37,11 +38,12 @@ var ResultStream = require('./result-stream');
  * @param {Blob|File} options.data - the raw audio data as a Blob or File instance
  * @param {Boolean} [options.play=false] - If a file is set, play it locally as it's being uploaded
  * @param {Boolena} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting. Also controls smart_formatting option unless explicitly set.
- * @param {Boolena} [options.realtime=options.play] - pipe the text through a {TimingStream} which slows the output down to real-time to match the audio playback.
+ * @param {Boolena} [options.realtime=options.play] - pipe the text through a {TimingStream} which slows the output down to real-time to match the audio playback. Not currently compatible with resultsBySpeaker option.
  * @param {String|DOMElement} [options.outputElement] pipe the text to a WriteableElementStream targeting the specified element. Also defaults objectMode to true to enable interim results.
- * @param {Boolean} [options.extractResults=false] pipe results through a ResultExtractor stream to simplify the objects. (Default behavior before v0.22) Requires objectMode.
+ * @param {Boolean} [options.extractResults=false] pipe results through a ResultExtractor stream to simplify the objects. (Default behavior before v0.22) Automatically enables objectMode.
+ * @param {Boolean} [options.resultsBySpeaker=false] pipe results through a SpeakerStream. Causes each data event to include multiple results, each with a speaker field. Automatically enables objectMode and speaker_labels. Automatically disables the realtime option due to incompatibilities. Adds some delay to processing.
  *
- * @returns {RecognizeStream|FormatStream|TimingStream}
+ * @returns {RecognizeStream|SpeakerStream|FormatStream|ResultStream|TimingStream}
  */
 module.exports = function recognizeFile(options) { // eslint-disable-line complexity
   if (!options || !options.token) {
@@ -53,8 +55,14 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
     options.objectMode = true;
   }
   // the ResultExtractor only works in objectMode
-  if (options.extractResults && options.objectMode !== false) {
+  if (options.extractResults) {
+    options.objectMode = true;
+  }
+  // SpeakerStream requires objectMode and speaker_labels
+  if (options.resultsBySpeaker) {
     options.objectMode = true;
+    options.speaker_labels = true;
+    options.realtime = false;
   }
 
   // default format to true (capitals and periods)
@@ -78,19 +86,34 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
   delete rsOpts.objectMode;
 
 
+
+  var stream = new BlobStream(options.data);
   var recognizeStream = new RecognizeStream(rsOpts);
-  var stream = new BlobStream(options.data).pipe(recognizeStream);
+  var streams = [stream, recognizeStream]; // collect all of the streams so that we can bundle up errors and send them to the last one
+  stream = stream.pipe(recognizeStream);
 
-  if (options.format) {
-    stream = stream.pipe(new FormatStream(options));
-  }
+  // note: the TimingStream cannot currently handle results as regrouped by the SpeakerStream
+  // so it must come first
+  var timingStream;
   if (realtime) {
-    stream = stream.pipe(new TimingStream(options));
+    timingStream = new TimingStream(options);
+    stream = stream.pipe(timingStream);
+    streams.push(stream);
     stream.on('stop', recognizeStream.stop.bind(recognizeStream));
   } else {
     stream.stop = recognizeStream.stop.bind(recognizeStream);
   }
 
+  if (options.resultsBySpeaker) {
+    stream = stream.pipe(new SpeakerStream(options));
+    streams.push(stream);
+  }
+
+  if (options.format) {
+    stream = stream.pipe(new FormatStream(options));
+    streams.push(stream);
+  }
+
   if (options.play) {
     FilePlayer.playFile(options.data).then(function(player) {
       recognizeStream.on('stop', player.stop.bind(player));
@@ -101,18 +124,30 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
   }
 
   if (options.outputElement) {
-    stream.pipe(new WritableElementStream(options));
+    // we don't want to return the WES, just send data to it
+    streams.push(stream.pipe(new WritableElementStream(options)));
   }
 
   if (options.extractResults) {
-    var stop = stream.stop.bind(stream);
+    var stop = stream.stop ? stream.stop.bind(stream) : recognizeStream.stop.bind(recognizeStream);
     stream = stream.pipe(new ResultStream());
     stream.stop = stop;
+    streams.push(stream);
   }
 
-  // Capture error from original RecognizeStream
-  if (stream !== recognizeStream) {
-    recognizeStream.on('error', stream.emit.bind(stream, 'error'));
+  // Capture errors from any stream except the last one and emit them on the last one
+  streams.forEach(function(prevStream) {
+    if (prevStream !== stream) {
+      prevStream.on('error', stream.emit.bind(stream, 'error'));
+    }
+  });
+
+  if (!stream.stop) {
+    if (timingStream) {
+      stream.stop = timingStream.stop.bind(timingStream);
+    } else {
+      stream.stop = recognizeStream.stop.bind(recognizeStream);
+    }
   }
 
   return stream;