From c1b4414fb9f7b2db819f0bea5cbce97be50af2de Mon Sep 17 00:00:00 2001 From: Roberts Slisans Date: Thu, 14 Nov 2024 12:55:55 +0200 Subject: [PATCH] feat: experimental deepspeed on windows (#419) * add experimental deepspeed wheel for win * add japanese and italian for bark voice * fix stable audio layout * create React UI proxy base * update README --- README.md | 4 ++ react-ui/src/pages/api/gradio/[name].tsx | 80 ++++++++++------------ requirements.txt | 1 + tts_webui/bark/clone/tab_voice_clone.py | 2 + tts_webui/stable_audio/stable_audio_tab.py | 2 +- 5 files changed, 45 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index d248f50..99e548d 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,10 @@ ## Changelog +Nov 14: +* Add experimental Windows deepspeed wheel. +* Add more languages to Bark voice clone. + Nov 11: * Switch to a fixed fairseq version for windows reducing installation conflicts and speeding up updates. diff --git a/react-ui/src/pages/api/gradio/[name].tsx b/react-ui/src/pages/api/gradio/[name].tsx index e3d081a..537c293 100644 --- a/react-ui/src/pages/api/gradio/[name].tsx +++ b/react-ui/src/pages/api/gradio/[name].tsx @@ -2,7 +2,12 @@ import { Client } from "@gradio/client"; import type { NextApiRequest, NextApiResponse } from "next"; import { getFile } from "../../../backend-utils/getFile"; import { GradioFile } from "../../../types/GradioFile"; -import { PayloadMessage, PredictFunction } from "@gradio/client/dist/types"; +import { + GradioEvent, + PayloadMessage, + PredictFunction, + SubmitIterable, +} from "@gradio/client/dist/types"; type Data = { data: any }; @@ -53,11 +58,40 @@ const extractChoicesTuple = ({ choices }: GradioChoices) => const getChoices = (result: { data: GradioChoices[] }) => extractChoices(result?.data[0]); +const proxyGradioFile = (data: any) => + // typeof data === "object" && data.__type__ === "file" + // // ? new GradioFile(data.url, data.name) + // : data; + data + +const proxyGradioFiles = (data: any[]) => + Array.isArray(data) + ? data.map(proxyGradioFile) + : // : typeof data === "object" + // ? Object.fromEntries( + // Object.entries(data).map(([key, value]) => [ + // key, + // proxyGradioFiles(value), + // ]) + // ) + data; + const gradioPredict = (...args: Parameters) => - getClient().then((app) => app.predict(...args)) as Promise<{ data: T }>; + // getClient().then((app) => app.predict(...args)) as Promise<{ data: T }>; + getClient() + .then((app) => app.predict(...args) as Promise<{ data: T }>) + .then((result: { data: T }) => ({ + ...result, + data: proxyGradioFiles(result?.data) as T, + })); const gradioSubmit = (...args: Parameters) => - getClient().then((app) => app.submit(...args)); + getClient().then( + (app) => + app.submit(...args) as SubmitIterable< + ({ data: T } & PayloadMessage) | GradioEvent + > + ); async function musicgen({ melody, model, ...params }) { const melodyBlob = await getFile(melody); @@ -155,11 +189,6 @@ async function bark({ }; } -const reload_old_generation_dropdown = () => - gradioPredict<[GradioChoices]>("/reload_old_generation_dropdown").then( - getChoices - ); - const bark_favorite = async ({ folder_root }) => gradioPredict<[Object]>("/bark_favorite", [folder_root]).then( (result) => result?.data @@ -237,15 +266,6 @@ async function tortoise({ return results.slice(0, -1); } -const tortoise_refresh_models = () => - gradioPredict<[GradioChoices]>("/tortoise_refresh_models").then(getChoices); - -const tortoise_refresh_voices = () => - gradioPredict<[GradioChoices]>("/tortoise_refresh_voices").then(getChoices); - -const tortoise_open_models = () => gradioPredict<[]>("/tortoise_open_models"); -const tortoise_open_voices = () => gradioPredict<[]>("/tortoise_open_voices"); - async function tortoise_apply_model_settings({ model, // string (Option from: ['Default']) in 'parameter_2488' Dropdown component kv_cache, // boolean in 'parameter_2493' Checkbox component @@ -308,32 +328,6 @@ async function rvc({ const delete_generation = ({ folder_root }) => gradioPredict<[]>("/delete_generation", [folder_root]); -const save_to_voices = ({ history_npz }) => - gradioPredict<[Object]>("/save_to_voices", [history_npz]); - -const save_config_bark = ({ - text_use_gpu, - text_use_small, - coarse_use_gpu, - coarse_use_small, - fine_use_gpu, - fine_use_small, - codec_use_gpu, - load_models_on_startup, -}) => - gradioPredict<[string]>("/save_config_bark", [ - text_use_gpu, // boolean in 'Use GPU' Checkbox component - text_use_small, // boolean in 'Use small model' Checkbox component - coarse_use_gpu, // boolean in 'Use GPU' Checkbox component - coarse_use_small, // boolean in 'Use small model' Checkbox component - fine_use_gpu, // boolean in 'Use GPU' Checkbox component - fine_use_small, // boolean in 'Use small model' Checkbox component - codec_use_gpu, // boolean in 'Use GPU for codec' Checkbox component - load_models_on_startup, // boolean in 'Load Bark models on startup' Checkbox component - ]).then((result) => result?.data[0]); - -// get_config_bark - async function get_config_bark() { const result = await gradioPredict< [ diff --git a/requirements.txt b/requirements.txt index 1a20536..85ff3f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ beartype>=0.16.1 # workaround for a bug # no longer required directly # transformers==4.36.1 # cross-compatibility iso639-lang==2.2.3 pillow==10.3.0 # for gradio, conda fix +deepspeed @ https://github.com/rsxdalv/DeepSpeed/releases/download/v0.15.5-test/deepspeed-0.15.5+unknown-cp310-cp310-win_amd64.whl ; sys_platform == 'win32' # Apache 2.0 diff --git a/tts_webui/bark/clone/tab_voice_clone.py b/tts_webui/bark/clone/tab_voice_clone.py index 0e3de01..c455ed1 100644 --- a/tts_webui/bark/clone/tab_voice_clone.py +++ b/tts_webui/bark/clone/tab_voice_clone.py @@ -184,6 +184,8 @@ def tab_voice_clone(): "es_tokenizer.pth @ Lancer1408/bark-es-tokenizer", "portuguese-HuBERT-quantizer_24_epoch.pth @ MadVoyager/bark-voice-cloning-portuguese-HuBERT-quantizer", "turkish_model_epoch_14.pth @ egeadam/bark-voice-cloning-turkish-HuBERT-quantizer", + "japanese-HuBERT-quantizer_24_epoch.pth @ junwchina/bark-voice-cloning-japanese-HuBERT-quantizer", + "it_tokenizer.pth @ gpwr/bark-it-tokenizer", ], value="quantifier_hubert_base_ls960_14.pth @ GitMylo/bark-voice-cloning", allow_custom_value=True, diff --git a/tts_webui/stable_audio/stable_audio_tab.py b/tts_webui/stable_audio/stable_audio_tab.py index 1c707fb..87cffe8 100644 --- a/tts_webui/stable_audio/stable_audio_tab.py +++ b/tts_webui/stable_audio/stable_audio_tab.py @@ -222,7 +222,7 @@ def model_select_ui(): outputs=[model_select], api_name="stable_audio_refresh_models", ) - load_model_button = gr.Button(value="Load model") + load_model_button = gr.Button(value="Load model") with gr.Column(): gr.Markdown(