+ /**
+ * Manager handling the transcription of Speech into Text.
+ *
+ * @author Sotiri Bakagiannis and Alain Pitiot
+ * @version 2021.x
+ * @copyright (c) 2021 Open Science Tools Ltd. (https://opensciencetools.org)
+ * @license Distributed under the terms of the MIT License
+ */
+
+import {Clock} from "../util/Clock";
+import {PsychObject} from "../util/PsychObject";
+import {PsychoJS} from "../core/PsychoJS";
+
+
+/**
+ * Transcript returned by the transcriber
+ *
+ * @name module:sound.Transcript
+ * @class
+ */
+export class Transcript
+{
+ constructor(transcriber, text = '', confidence = 0.0)
+ {
+ // recognised text:
+ this.text = text;
+
+ // confidence in the recognition:
+ this.confidence = confidence;
+
+ // time the speech started, relative to the Transcriber clock:
+ this.speechStart = transcriber._speechStart;
+
+ // time the speech ended, relative to the Transcriber clock:
+ this.speechEnd = transcriber._speechEnd;
+
+ // time a recognition result was produced, relative to the Transcriber clock:
+ this.time = transcriber._recognitionTime;
+ }
+}
+
+
+/**
+ * <p>This manager handles the transcription of speech into text.</p>
+ *
+ * @name module:sound.Transcriber
+ * @class
+ * @param {Object} options
+ * @param {module:core.PsychoJS} options.psychoJS - the PsychoJS instance
+ * @param {String} options.name - the name used when logging messages
+ * @param {number} [options.bufferSize= 10000] - the maximum size of the circular transcript buffer
+ * @param {String[]} [options.continuous= true] - whether or not to continuously recognise
+ * @param {String[]} [options.lang= 'en-US'] - the spoken language
+ * @param {String[]} [options.interimResults= false] - whether or not to make interim results available
+ * @param {String[]} [options.maxAlternatives= 1] - the maximum number of recognition alternatives
+ * @param {String[]} [options.tokens= [] ] - the tokens to be recognised. This is experimental technology, not available in all browser.
+ * @param {Clock} [options.clock= undefined] - an optional clock
+ * @param {boolean} [options.autoLog= false] - whether or not to log
+ *
+ * @todo deal with alternatives, interim results, and recognition errors
+ */
+export class Transcriber extends PsychObject
+{
+
+ constructor({psychoJS, name, bufferSize, continuous, lang, interimResults, maxAlternatives, tokens, clock, autoLog} = {})
+ {
+ super(psychoJS);
+
+ this._addAttribute('name', name, 'transcriber');
+ this._addAttribute('bufferSize', bufferSize, 10000);
+ this._addAttribute('continuous', continuous, true, this._onChange);
+ this._addAttribute('lang', lang, 'en-US', this._onChange);
+ this._addAttribute('interimResults', interimResults, false, this._onChange);
+ this._addAttribute('maxAlternatives', maxAlternatives, 1, this._onChange);
+ this._addAttribute('tokens', tokens, [], this._onChange);
+ this._addAttribute('clock', clock, new Clock());
+ this._addAttribute('autoLog', false, autoLog);
+ this._addAttribute('status', PsychoJS.Status.NOT_STARTED);
+
+ // prepare the transcription:
+ this._prepareTranscription();
+
+ if (this._autoLog)
+ {
+ this._psychoJS.experimentLogger.exp(`Created ${this.name} = ${this.toString()}`);
+ }
+ }
+
+
+ /**
+ * Start the transcription.
+ *
+ * @name module:sound.Transcriber#start
+ * @function
+ * @public
+ * @return {Promise} promise fulfilled when the transcription actually started
+ */
+ start()
+ {
+ if (this._status !== PsychoJS.Status.STARTED)
+ {
+ this._psychoJS.logger.debug('request to start speech to text transcription');
+
+ try
+ {
+ if (!this._recognition)
+ {
+ throw 'the speech recognition has not been initialised yet, possibly because the participant has not given the authorisation to record audio';
+ }
+
+ this._recognition.start();
+
+ // return a promise, which will be satisfied when the transcription actually starts,
+ // which is also when the reset of the clock and the change of status takes place
+ const self = this;
+ return new Promise((resolve, reject) =>
+ {
+ self._startCallback = resolve;
+ self._errorCallback = reject;
+ });
+ }
+ catch (error)
+ {
+ // TODO Strangely, start sometimes fails with the message that the recognition has already started. It is most probably a bug in the implementation of the Web Speech API. We need to catch this particular error and no throw on this occasion
+
+ this._psychoJS.logger.error('unable to start the speech to text transcription: ' + JSON.stringify(error));
+ this._status = PsychoJS.Status.ERROR;
+
+ throw {
+ origin: 'Transcriber.start',
+ context: 'when starting the speech to text transcription with transcriber: ' + this._name,
+ error
+ };
+ }
+
+ }
+
+ }
+
+
+ /**
+ * Stop the transcription.
+ *
+ * @name module:sound.Transcriber#stop
+ * @function
+ * @public
+ * @return {Promise} promise fulfilled when the speech recognition actually stopped
+ */
+ stop()
+ {
+ if (this._status === PsychoJS.Status.STARTED)
+ {
+ this._psychoJS.logger.debug('request to stop speech to text transcription');
+
+ this._recognition.stop();
+
+ // return a promise, which will be satisfied when the recognition actually stops:
+ const self = this;
+ return new Promise((resolve, reject) =>
+ {
+ self._stopCallback = resolve;
+ self._errorCallback = reject;
+ });
+ }
+ }
+
+
+ /**
+ * Get the list of transcripts still in the buffer, i.e. those that have not been
+ * previously cleared by calls to getTranscripts with clear = true.
+ *
+ * @name module:sound.Transcriber#getTranscripts
+ * @function
+ * @public
+ * @param {Object} options
+ * @param {string[]} [options.transcriptList= []]] - the list of transcripts texts to consider. If transcriptList is empty, we consider all transcripts.
+ * @param {boolean} [options.clear= false] - whether or not to keep in the buffer the transcripts for a subsequent call to getTranscripts. If a keyList has been given and clear = true, we only remove from the buffer those keys in keyList
+ * @return {Transcript[]} the list of transcripts still in the buffer
+ */
+ getTranscripts({
+ transcriptList = [],
+ clear = true
+ } = {})
+ {
+ // if nothing in the buffer, return immediately:
+ if (this._bufferLength === 0)
+ {
+ return [];
+ }
+
+
+ // iterate over the buffer, from start to end, and discard the null transcripts (i.e. those
+ // previously cleared):
+ const filteredTranscripts = [];
+ const bufferWrap = (this._bufferLength === this._bufferSize);
+ let i = bufferWrap ? this._bufferIndex : -1;
+ do
+ {
+ i = (i + 1) % this._bufferSize;
+
+ const transcript = this._circularBuffer[i];
+ if (transcript)
+ {
+ // if the transcriptList is empty of the transcript text is in the transcriptList:
+ if (transcriptList.length === 0 || transcriptList.includes(transcript.text))
+ {
+ filteredTranscripts.push(transcript);
+
+ if (clear)
+ {
+ this._circularBuffer[i] = null;
+ }
+ }
+ }
+ } while (i !== this._bufferIndex);
+
+ return filteredTranscripts;
+ }
+
+
+ /**
+ * Clear all transcripts and resets the circular buffers.
+ *
+ * @name module:sound.Transcriber#clearTranscripts
+ * @function
+ */
+ clearTranscripts()
+ {
+ // circular buffer of transcripts:
+ this._circularBuffer = new Array(this._bufferSize);
+ this._bufferLength = 0;
+ this._bufferIndex = -1;
+ }
+
+
+ /**
+ * Callback for changes to the recognition settings.
+ *
+ * <p>Changes to the recognition settings require the recognition to stop and be re-started.</p>
+ *
+ * @name module:sound.Transcriber#_onChange
+ * @function
+ * @protected
+ */
+ _onChange()
+ {
+ if (this._status === PsychoJS.Status.STARTED)
+ {
+ this.stop();
+ }
+
+ this._prepareTranscription();
+
+ this.start();
+ }
+
+
+ /**
+ * Prepare the transcription.
+ *
+ * @name module:sound.Transcriber#_prepareTranscription
+ * @function
+ * @protected
+ */
+ _prepareTranscription()
+ {
+ // setup the circular buffer of transcripts:
+ this.clearTranscripts();
+
+
+ // recognition settings:
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+ this._recognition = new SpeechRecognition();
+ this._recognition.continuous = this._continuous;
+ this._recognition.lang = this._lang;
+ this._recognition.interimResults = this._interimResults;
+ this._recognition.maxAlternatives = this._maxAlternatives;
+
+ // grammar list with tokens added:
+ if (Array.isArray(this._tokens) && this._tokens.length > 0)
+ {
+ const SpeechGrammarList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
+
+ // note: we accepts JSGF encoded strings, and relative weight indicator between 0.0 and 1.0
+ // ref: https://www.w3.org/TR/jsgf/
+ const name = 'NULL';
+ const grammar = `#JSGF V1.0; grammar ${name}; public <${name}> = ${this._tokens.join('|')};`
+ const grammarList = new SpeechGrammarList();
+ grammarList.addFromString(grammar, 1);
+ this._recognition.grammars = grammarList;
+ }
+
+
+ // setup the callbacks:
+ const self = this;
+
+ // called when the start of a speech is detected:
+ this._recognition.onspeechstart = (e) =>
+ {
+ this._currentSpeechStart = this._clock.getTime();
+ self._psychoJS.logger.debug('speech started');
+ }
+
+ // called when the end of a speech is detected:
+ this._recognition.onspeechend = () =>
+ {
+ this._currentSpeechEnd = this._clock.getTime();
+ // this._recognition.stop();
+ self._psychoJS.logger.debug('speech ended');
+ }
+
+ // called when the recognition actually started:
+ this._recognition.onstart = () =>
+ {
+ this._clock.reset();
+ this._status = PsychoJS.Status.STARTED;
+ self._psychoJS.logger.debug('speech recognition started');
+
+ // resolve the Transcriber.start promise, if need be:
+ if (self._startCallback())
+ {
+ self._startCallback({
+ time: self._psychoJS.monotonicClock.getTime()
+ });
+ }
+ }
+
+ // called whenever stop() or abort() are called:
+ this._recognition.onend = () =>
+ {
+ this._status = PsychoJS.Status.STOPPED;
+ self._psychoJS.logger.debug('speech recognition ended');
+
+ // resolve the Transcriber.stop promise, if need be:
+ if (self._stopCallback)
+ {
+ self._stopCallback({
+ time: self._psychoJS.monotonicClock.getTime()
+ });
+ }
+ }
+
+ // called whenever a new result is available:
+ this._recognition.onresult = (event) =>
+ {
+ this._recognitionTime = this._clock.getTime();
+
+ // do not process the results if the Recogniser is not STARTED:
+ if (self._status !== PsychoJS.Status.STARTED)
+ {
+ return;
+ }
+
+ // in continuous recognition mode, we need to get the result at resultIndex,
+ // otherwise we pick the first result
+ const resultIndex = (self._continuous) ? event.resultIndex : 0;
+
+ // TODO at the moment we consider only the first alternative:
+ const alternativeIndex = 0;
+
+ const results = event.results;
+ const text = results[resultIndex][alternativeIndex].transcript;
+ const confidence = results[resultIndex][alternativeIndex].confidence;
+
+ // create a new transcript:
+ const transcript = new Transcript(self, text, confidence);
+
+ // insert it in the circular transcript buffer:
+ self._bufferIndex = (self._bufferIndex + 1) % self._bufferSize;
+ self._bufferLength = Math.min(self._bufferLength + 1, self._bufferSize);
+ self._circularBuffer[self._bufferIndex] = transcript;
+
+ self._psychoJS.logger.debug('speech recognition transcript: ', JSON.stringify(transcript));
+ }
+
+ // called upon recognition errors:
+ this._recognition.onerror = (event) =>
+ {
+ // lack of speech is not an error:
+ if (event.error === 'no-speech')
+ {
+ return;
+ }
+
+ self._psychoJS.logger.error('speech recognition error: ', JSON.stringify(event));
+ self._status = PsychoJS.Status.ERROR;
+ }
+
+ }
+
+}
+
+
+
+
+