author: Heiner Lohaus <hlohaus@users.noreply.github.com> 2024-04-09 19:19:33 +0200
committer: Heiner Lohaus <hlohaus@users.noreply.github.com> 2024-04-09 19:19:33 +0200
commit: 90715e702bbebcf2c3cfd39628c931bbadda28b0 (patch)
tree: f574bae75b22ee59e5c60e2f6f4017338614fd8c /projects/text_to_speech/worker.js
parent: Add async client docs (diff)
download: gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.gz
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.bz2
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.lz
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.xz
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.zst
gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.zip
1 files changed, 105 insertions, 0 deletions
diff --git a/projects/text_to_speech/worker.js b/projects/text_to_speech/worker.js
new file mode 100644
index 00000000..249208d0
--- /dev/null
+++ b/projects/text_to_speech/worker.js
@@ -0,0 +1,105 @@
+import { env, Tensor, AutoTokenizer, SpeechT5ForTextToSpeech, SpeechT5HifiGan } from '@xenova/transformers';
+import { encodeWAV } from './utils';
+
+// Disable local model checks
+env.allowLocalModels = false;
+
+// Use the Singleton pattern to enable lazy construction of the pipeline.
+class MyTextToSpeechPipeline {
+
+    static BASE_URL = 'https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/';
+
+    static model_id = 'Xenova/speecht5_tts';
+    static vocoder_id = 'Xenova/speecht5_hifigan';
+
+    static tokenizer_instance = null;
+    static model_instance = null;
+    static vocoder_instance = null;
+
+    static async getInstance(progress_callback = null) {
+        if (this.tokenizer_instance === null) {
+            this.tokenizer = AutoTokenizer.from_pretrained(this.model_id, { progress_callback });
+        }
+
+        if (this.model_instance === null) {
+            this.model_instance = SpeechT5ForTextToSpeech.from_pretrained(this.model_id, {
+                quantized: false,
+                progress_callback,
+            });
+        }
+
+        if (this.vocoder_instance === null) {
+            this.vocoder_instance = SpeechT5HifiGan.from_pretrained(this.vocoder_id, {
+                quantized: false,
+                progress_callback,
+            });
+        }
+
+        return new Promise(async (resolve, reject) => {
+            const result = await Promise.all([
+                this.tokenizer,
+                this.model_instance,
+                this.vocoder_instance,
+            ]);
+            self.postMessage({
+                status: 'ready',
+            });
+            resolve(result);
+        });
+    }
+
+    static async getSpeakerEmbeddings(speaker_id) {
+        // e.g., `cmu_us_awb_arctic-wav-arctic_a0001`
+        const speaker_embeddings_url = `${this.BASE_URL}${speaker_id}.bin`;
+        const speaker_embeddings = new Tensor(
+            'float32',
+            new Float32Array(await (await fetch(speaker_embeddings_url)).arrayBuffer()),
+            [1, 512]
+        )
+        return speaker_embeddings;
+    }
+}
+
+// Mapping of cached speaker embeddings
+const speaker_embeddings_cache = new Map();
+
+// Listen for messages from the main thread
+self.addEventListener('message', async (event) => {
+    // Load the pipeline
+    const [tokenizer, model, vocoder] = await MyTextToSpeechPipeline.getInstance(x => {
+        // We also add a progress callback so that we can track model loading.
+        self.postMessage(x);
+    });
+
+    // Tokenize the input
+    const { input_ids } = tokenizer(event.data.text);
+
+    // Load the speaker embeddings
+    let speaker_embeddings = speaker_embeddings_cache.get(event.data.speaker_id);
+    if (speaker_embeddings === undefined) {
+        speaker_embeddings = await MyTextToSpeechPipeline.getSpeakerEmbeddings(event.data.speaker_id);
+        speaker_embeddings_cache.set(event.data.speaker_id, speaker_embeddings);
+    }
+
+    // Generate the waveform
+    let response;
+    try {
+        response = await model.generate_speech(input_ids, speaker_embeddings, { vocoder });
+    } catch(e) {
+        self.postMessage({
+            status: 'error',
+            exception: e,
+        });
+        throw e;
+    }
+    const { waveform } = response;
+
+    // Encode the waveform as a WAV file
+    const wav = encodeWAV(waveform.data);
+
+    // Send the output back to the main thread
+    self.postMessage({
+        status: 'complete',
+        output: new Blob([wav], { type: 'audio/wav' }),
+    });
+});
+\ No newline at end of file
author	Heiner Lohaus <hlohaus@users.noreply.github.com>	2024-04-09 19:19:33 +0200
committer	Heiner Lohaus <hlohaus@users.noreply.github.com>	2024-04-09 19:19:33 +0200
commit	90715e702bbebcf2c3cfd39628c931bbadda28b0 (patch)
tree	f574bae75b22ee59e5c60e2f6f4017338614fd8c /projects/text_to_speech/worker.js
parent	Add async client docs (diff)
download	gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.gz gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.bz2 gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.lz gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.xz gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.tar.zst gpt4free-90715e702bbebcf2c3cfd39628c931bbadda28b0.zip