psychojs/docs/sound_AudioClip.js.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <title>JSDoc: Source: sound/AudioClip.js</title>

    <script src="scripts/prettify/prettify.js"> </script>
    <script src="scripts/prettify/lang-css.js"> </script>
    <!--[if lt IE 9]>
      <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->
    <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css">
    <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css">
</head>

<body>

<div id="main">

    <h1 class="page-title">Source: sound/AudioClip.js</h1>


    <section>
        <article>
            <pre class="prettyprint source linenums"><code>/**
 * AudioClip encapsulates an audio recording.
 *
 * @author Alain Pitiot and Sotiri Bakagiannis
 * @version 2021.2.0
 * @copyright (c) 2021 Open Science Tools Ltd. (https://opensciencetools.org)
 * @license Distributed under the terms of the MIT License
 */

import { PsychoJS } from "../core/PsychoJS.js";
import { ExperimentHandler } from "../data/ExperimentHandler.js";
import { PsychObject } from "../util/PsychObject.js";
import * as util from "../util/Util.js";

/**
 * &lt;p>AudioClip encapsulates an audio recording.&lt;/p>
 *
 * @name module:sound.AudioClip
 * @class
 * @param {Object} options
 * @param {module:core.PsychoJS} options.psychoJS - the PsychoJS instance
 * @param {String} [options.name= 'audioclip'] - the name used when logging messages
 * @param {string} options.format the format for the audio file
 * @param {number} options.sampleRateHz - the sampling rate
 * @param {Blob} options.data - the audio data, in the given format, at the given sampling rate
 * @param {boolean} [options.autoLog= false] - whether or not to log
 */
export class AudioClip extends PsychObject
{
	constructor({ psychoJS, name, sampleRateHz, format, data, autoLog } = {})
	{
		super(psychoJS);

		this._addAttribute("name", name, "audioclip");
		this._addAttribute("format", format);
		this._addAttribute("sampleRateHz", sampleRateHz);
		this._addAttribute("data", data);
		this._addAttribute("autoLog", false, autoLog);
		this._addAttribute("status", AudioClip.Status.CREATED);

		// add a volume attribute, for playback:
		this._addAttribute("volume", 1.0);

		if (this._autoLog)
		{
			this._psychoJS.experimentLogger.exp(`Created ${this.name} = ${this.toString()}`);
		}

		// decode the blob into an audio buffer:
		this._decodeAudio();
	}

	/**
	 * Set the volume of the playback.
	 *
	 * @name module:sound.AudioClip#setVolume
	 * @function
	 * @public
	 * @param {number} volume - the volume of the playback (must be between 0.0 and 1.0)
	 */
	setVolume(volume)
	{
		this._volume = volume;
	}

	/**
	 * Start playing the audio clip.
	 *
	 * @name module:sound.AudioClip#startPlayback
	 * @function
	 * @public
	 */
	async startPlayback()
	{
		this._psychoJS.logger.debug("request to play the audio clip");

		// wait for the decoding to complete:
		await this._decodeAudio();

		// note: we need to prepare the audio graph anew each time since, for instance, an
		// AudioBufferSourceNode can only be played once
		// ref: https://developer.mozilla.org/en-US/docs/Web/API/AudioBufferSourceNode

		// create a source node from the in-memory audio data in _audioBuffer:
		this._source = this._audioContext.createBufferSource();
		this._source.buffer = this._audioBuffer;

		// create a gain node, so we can control the volume:
		this._gainNode = this._audioContext.createGain();

		// connect the nodes:
		this._source.connect(this._gainNode);
		this._gainNode.connect(this._audioContext.destination);

		// set the volume:
		this._gainNode.gain.value = this._volume;

		// start the playback:
		this._source.start();
	}

	/**
	 * Stop playing the audio clip.
	 *
	 * @name module:sound.AudioClip#startPlayback
	 * @function
	 * @public
	 * @param {number} [fadeDuration = 17] - how long the fading out should last, in ms
	 */
	async stopPlayback(fadeDuration = 17)
	{
		// TODO deal with fade duration

		// stop the playback:
		this._source.stop();
	}

	/**
	 * Get the duration of the audio clip, in seconds.
	 *
	 * @name module:sound.AudioClip#getDuration
	 * @function
	 * @public
	 * @returns {Promise&lt;number>} the duration of the audio clip
	 */
	async getDuration()
	{
		// wait for the decoding to complete:
		await this._decodeAudio();

		return this._audioBuffer.duration;
	}

	/**
	 * Upload the audio clip to the pavlovia server.
	 *
	 * @name module:sound.AudioClip#upload
	 * @function
	 * @public
	 */
	upload()
	{
		this._psychoJS.logger.debug("request to upload the audio clip to pavlovia.org");

		// add a format-dependent audio extension to the name:
		const filename = this._name + util.extensionFromMimeType(this._format);

		// if the audio recording cannot be uploaded, e.g. the experiment is running locally, or
		// if it is piloting mode, then we offer the audio clip as a file for download:
		if (
			this._psychoJS.getEnvironment() !== ExperimentHandler.Environment.SERVER
			|| this._psychoJS.config.experiment.status !== "RUNNING"
			|| this._psychoJS._serverMsg.has("__pilotToken")
		)
		{
			return this.download(filename);
		}

		// upload the data:
		return this._psychoJS.serverManager.uploadAudioVideo({
			mediaBlob: this._data,
			tag: filename
		});
	}

	/**
	 * Offer the audio clip to the participant as a sound file to download.
	 *
	 * @name module:sound.AudioClip#download
	 * @function
	 * @public
	 */
	download(filename = "audio.webm")
	{
		const anchor = document.createElement("a");
		anchor.href = window.URL.createObjectURL(this._data);
		anchor.download = filename;
		document.body.appendChild(anchor);
		anchor.click();
		document.body.removeChild(anchor);
	}

	/**
	 * Transcribe the audio clip.
	 *
	 * @param {Object} options
	 * @param {Symbol} options.engine - the speech-to-text engine
	 * @param {String} options.languageCode - the BCP-47 language code for the recognition,
	 * 	e.g. 'en-GB'
	 * @return {Promise} a promise resolving to the transcript and associated
	 * 	transcription confidence
	 */
	async transcribe({ engine, languageCode } = {})
	{
		const response = {
			origin: "AudioClip.transcribe",
			context: `when transcribing audio clip: ${this._name}`,
		};

		this._psychoJS.logger.debug(response);

		// get the secret key from the experiment configuration:
		const fullEngineName = `sound.AudioClip.Engine.${Symbol.keyFor(engine)}`;
		let transcriptionKey;
		for (const key of this._psychoJS.config.experiment.keys)
		{
			if (key.name === fullEngineName)
			{
				transcriptionKey = key.value;
			}
		}
		if (typeof transcriptionKey === "undefined")
		{
			throw {
				...response,
				error: `missing key for engine: ${fullEngineName}`,
			};
		}

		// wait for the decoding to complete:
		await this._decodeAudio();

		// dispatch on engine:
		if (engine === AudioClip.Engine.GOOGLE)
		{
			return this._GoogleTranscribe(transcriptionKey, languageCode);
		}
		else
		{
			throw {
				...response,
				error: `unsupported speech-to-text engine: ${engine}`,
			};
		}
	}

	/**
	 * Transcribe the audio clip using the Google Cloud Speech-To-Text Engine.
	 *
	 * ref: https://cloud.google.com/speech-to-text/docs/reference/rest/v1/speech/recognize
	 *
	 * @param {String} transcriptionKey - the secret key to the Google service
	 * @param {String} languageCode - the BCP-47 language code for the recognition, e.g. 'en-GB'
	 * @return {Promise} a promise resolving to the transcript and associated
	 * 	transcription confidence
	 */
	_GoogleTranscribe(transcriptionKey, languageCode)
	{
		return new Promise(async (resolve, reject) =>
		{
			// convert the Float32 PCM audio data to UInt16:
			const buffer = new ArrayBuffer(this._audioData.length * 2);
			const uint16View = new Uint16Array(buffer);
			for (let t = 0; t &lt; this._audioData.length; ++t)
			{
				uint16View[t] = (this._audioData[t] &lt; 0)
					? this._audioData[t] * 0x8000
					: this._audioData[t] * 0x7FFF;
			}

			// encode it to base64:
			const base64Data = this._base64ArrayBuffer(new Uint8Array(buffer));

			// query the Google speech-to-text service:
			const body = {
				config: {
					encoding: "LINEAR16",
					sampleRateHertz: this._sampleRateHz,
					languageCode,
				},
				audio: {
					content: base64Data,
				},
			};

			const url = `https://speech.googleapis.com/v1/speech:recognize?key=${transcriptionKey}`;

			const response = await fetch(url, {
				method: "POST",
				headers: {
					"Content-Type": "application/json",
				},
				body: JSON.stringify(body),
			});

			// convert the response to json:
			const decodedResponse = await response.json();
			this._psychoJS.logger.debug("speech.googleapis.com response:", JSON.stringify(decodedResponse));

			// TODO deal with more than one results and/or alternatives
			if (("results" in decodedResponse) &amp;&amp; (decodedResponse.results.length > 0))
			{
				resolve(decodedResponse.results[0].alternatives[0]);
			}
			else
			{
				// no transcription available:
				resolve({
					transcript: "",
					confidence: -1,
				});
			}
		});
	}

	/**
	 * Decode the formatted audio data (e.g. webm) into a 32bit float PCM audio buffer.
	 *
	 */
	_decodeAudio()
	{
		this._psychoJS.logger.debug("request to decode the data of the audio clip");

		// if the audio clip is ready, the PCM audio data is available in _audioData, a Float32Array:
		if (this._status === AudioClip.Status.READY)
		{
			return;
		}

		// if we are already decoding, wait until the process completed:
		if (this._status === AudioClip.Status.DECODING)
		{
			const self = this;
			return new Promise(function(resolve, reject)
			{
				self._decodingCallbacks.push(resolve);

				// self._errorCallback = reject; // TODO
			}.bind(this));
		}

		// otherwise, start decoding the input formatted audio data:
		this._status = AudioClip.Status.DECODING;
		this._audioData = null;
		this._source = null;
		this._gainNode = null;
		this._decodingCallbacks = [];

		this._audioContext = new (window.AudioContext || window.webkitAudioContext)({
			sampleRate: this._sampleRateHz,
		});

		const reader = new window.FileReader();
		reader.onloadend = async () =>
		{
			try
			{
				// decode the ArrayBuffer containing the formatted audio data (e.g. webm)
				// into an audio buffer:
				this._audioBuffer = await this._audioContext.decodeAudioData(reader.result);

				// get the Float32Array containing the PCM data:
				this._audioData = this._audioBuffer.getChannelData(0);

				// we are now ready to translate and play:
				this._status = AudioClip.Status.READY;

				// resolve all the promises waiting for the decoding to complete:
				for (const callback of this._decodingCallbacks)
				{
					callback();
				}
			}
			catch (error)
			{
				console.error(error);

				// TODO
			}
		};

		reader.onerror = (error) =>
		{
			// TODO
		};

		reader.readAsArrayBuffer(this._data);
	}

	/**
	 * Convert an array buffer to a base64 string.
	 *
	 * @note this is heavily inspired by the following post by @Grantlyk:
	 * https://gist.github.com/jonleighton/958841#gistcomment-1953137
	 * It is necessary since the following approach only works for small buffers:
	 * const dataAsString = String.fromCharCode.apply(null, new Uint8Array(buffer));
	 * base64Data = window.btoa(dataAsString);
	 *
	 * @param arrayBuffer - the input buffer
	 * @return {string} the base64 encoded input buffer
	 */
	_base64ArrayBuffer(arrayBuffer)
	{
		let base64 = "";
		const encodings = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

		const bytes = new Uint8Array(arrayBuffer);
		const byteLength = bytes.byteLength;
		const byteRemainder = byteLength % 3;
		const mainLength = byteLength - byteRemainder;

		let a;
		let b;
		let c;
		let d;
		let chunk;

		// Main loop deals with bytes in chunks of 3
		for (let i = 0; i &lt; mainLength; i += 3)
		{
			// Combine the three bytes into a single integer
			chunk = (bytes[i] &lt;&lt; 16) | (bytes[i + 1] &lt;&lt; 8) | bytes[i + 2];

			// Use bitmasks to extract 6-bit segments from the triplet
			a = (chunk &amp; 16515072) >> 18; // 16515072 = (2^6 - 1) &lt;&lt; 18
			b = (chunk &amp; 258048) >> 12; // 258048   = (2^6 - 1) &lt;&lt; 12
			c = (chunk &amp; 4032) >> 6; // 4032     = (2^6 - 1) &lt;&lt; 6
			d = chunk &amp; 63; // 63       = 2^6 - 1

			// Convert the raw binary segments to the appropriate ASCII encoding
			base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
		}

		// Deal with the remaining bytes and padding
		if (byteRemainder === 1)
		{
			chunk = bytes[mainLength];

			a = (chunk &amp; 252) >> 2; // 252 = (2^6 - 1) &lt;&lt; 2

			// Set the 4 least significant bits to zero
			b = (chunk &amp; 3) &lt;&lt; 4; // 3   = 2^2 - 1

			base64 += `${encodings[a]}${encodings[b]}==`;
		}
		else if (byteRemainder === 2)
		{
			chunk = (bytes[mainLength] &lt;&lt; 8) | bytes[mainLength + 1];

			a = (chunk &amp; 64512) >> 10; // 64512 = (2^6 - 1) &lt;&lt; 10
			b = (chunk &amp; 1008) >> 4; // 1008  = (2^6 - 1) &lt;&lt; 4

			// Set the 2 least significant bits to zero
			c = (chunk &amp; 15) &lt;&lt; 2; // 15    = 2^4 - 1

			base64 += `${encodings[a]}${encodings[b]}${encodings[c]}=`;
		}

		return base64;
	}
}

/**
 * Recognition engines.
 *
 * @name module:sound.AudioClip#Engine
 * @enum {Symbol}
 * @readonly
 * @public
 */
AudioClip.Engine = {
	/**
	 * Google Cloud Speech-to-Text.
	 */
	GOOGLE: Symbol.for("GOOGLE"),
};

/**
 * AudioClip status.
 *
 * @enum {Symbol}
 * @readonly
 * @public
 */
AudioClip.Status = {
	CREATED: Symbol.for("CREATED"),

	DECODING: Symbol.for("DECODING"),

	READY: Symbol.for("READY"),
};
</code></pre>
        </article>
    </section>


</div>

<nav>
    <h2><a href="index.html">Home</a></h2><h3>Modules</h3><ul><li><a href="module-core.html">core</a></li><li><a href="module-data.html">data</a></li><li><a href="module-sound.html">sound</a></li><li><a href="module-util.html">util</a></li><li><a href="module-visual.html">visual</a></li></ul><h3>Classes</h3><ul><li><a href="FaceDetector_FaceDetector.html">FaceDetector</a></li><li><a href="module.data.MultiStairHandler.html">MultiStairHandler</a></li><li><a href="module.data.QuestHandler.html">QuestHandler</a></li><li><a href="module-core.BuilderKeyResponse.html">BuilderKeyResponse</a></li><li><a href="module-core.EventManager.html">EventManager</a></li><li><a href="module-core.GUI.html">GUI</a></li><li><a href="module-core.Keyboard.html">Keyboard</a></li><li><a href="module-core.KeyPress.html">KeyPress</a></li><li><a href="module-core.Logger.html">Logger</a></li><li><a href="module-core.MinimalStim.html">MinimalStim</a></li><li><a href="module-core.Mouse.html">Mouse</a></li><li><a href="module-core.PsychoJS.html">PsychoJS</a></li><li><a href="module-core.ServerManager.html">ServerManager</a></li><li><a href="module-core.Window.html">Window</a></li><li><a href="module-data.ExperimentHandler.html">ExperimentHandler</a></li><li><a href="module-data.MultiStairHandler.html">MultiStairHandler</a></li><li><a href="module-data.QuestHandler.html">QuestHandler</a></li><li><a href="module-data.Shelf.html">Shelf</a></li><li><a href="module-data.TrialHandler.html">TrialHandler</a></li><li><a href="module-hardware.Camera.html">Camera</a></li><li><a href="module-sound.AudioClip.html">AudioClip</a></li><li><a href="module-sound.AudioClipPlayer.html">AudioClipPlayer</a></li><li><a href="module-sound.Microphone.html">Microphone</a></li><li><a href="module-sound.Sound.html">Sound</a></li><li><a href="module-sound.TonePlayer.html">TonePlayer</a></li><li><a href="module-sound.TrackPlayer.html">TrackPlayer</a></li><li><a href="module-sound.Transcriber.html">Transcriber</a></li><li><a href="module-sound.Transcript.html">Transcript</a></li><li><a href="module-util.Clock.html">Clock</a></li><li><a href="module-util.Color.html">Color</a></li><li><a href="module-util.CountdownTimer.html">CountdownTimer</a></li><li><a href="module-util.EventEmitter.html">EventEmitter</a></li><li><a href="module-util.MixinBuilder.html">MixinBuilder</a></li><li><a href="module-util.MonotonicClock.html">MonotonicClock</a></li><li><a href="module-util.PsychObject.html">PsychObject</a></li><li><a href="module-util.Scheduler.html">Scheduler</a></li><li><a href="module-visual.ButtonStim.html">ButtonStim</a></li><li><a href="module-visual.FaceDetector.html">FaceDetector</a></li><li><a href="module-visual.Form.html">Form</a></li><li><a href="module-visual.GratingStim.html">GratingStim</a></li><li><a href="module-visual.ImageStim.html">ImageStim</a></li><li><a href="module-visual.MovieStim.html">MovieStim</a></li><li><a href="module-visual.Polygon.html">Polygon</a></li><li><a href="module-visual.Rect.html">Rect</a></li><li><a href="module-visual.ShapeStim.html">ShapeStim</a></li><li><a href="module-visual.Slider.html">Slider</a></li><li><a href="module-visual.TextBox.html">TextBox</a></li><li><a href="module-visual.TextStim.html">TextStim</a></li><li><a href="module-visual.VisualStim.html">VisualStim</a></li></ul><h3>Interfaces</h3><ul><li><a href="module-sound.SoundPlayer.html">SoundPlayer</a></li></ul><h3>Mixins</h3><ul><li><a href="module-core.WindowMixin.html">WindowMixin</a></li><li><a href="module-util.ColorMixin.html">ColorMixin</a></li></ul><h3>Global</h3><ul><li><a href="global.html#pad">pad</a></li></ul>
</nav>

<br class="clear">

<footer>
    Documentation generated by <a href="https://github.com/jsdoc/jsdoc">JSDoc 3.6.7</a> on Thu Jun 16 2022 12:47:14 GMT+0200 (Central European Summer Time)
</footer>

<script> prettyPrint(); </script>
<script src="scripts/linenumber.js"> </script>
</body>
</html>