remove moved func

2024-11-27 17:00:54 +01:00 · 2023-08-19 22:47:10 +09:00 · 2023-08-19 22:47:10 +09:00 · c054138f0f
commit c054138f0f
parent a8854a71c4
1 changed files with 18 additions and 447 deletions
--- a/infer-web.py
+++ b/infer-web.py
@ -4,7 +4,7 @@ import sys

 now_dir = os.getcwd()
 sys.path.append(now_dir)
-import traceback, pdb
+import traceback
 import warnings

 import numpy as np
@ -19,25 +19,19 @@ from subprocess import Popen
 from time import sleep

 import faiss
-import ffmpeg
 import gradio as gr
-import soundfile as sf
-from config import Config
+from configs.config import Config
 import fairseq
 from i18n import I18nAuto
-from lib.infer_pack.models import (
-    SynthesizerTrnMs256NSFsid,
-    SynthesizerTrnMs256NSFsid_nono,
-    SynthesizerTrnMs768NSFsid,
-    SynthesizerTrnMs768NSFsid_nono,
-)
-from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
-from infer_uvr5 import _audio_pre_, _audio_pre_new
-from lib.audio import load_audio
 from lib.train.process_ckpt import change_info, extract_small_model, merge, show_info
-from vc_infer_pipeline import VC
 from sklearn.cluster import MiniBatchKMeans

+from dotenv import load_dotenv
+
+from infer.modules.vc.modules import VC
+from infer.modules.uvr5.modules import uvr
+from infer.modules.onnx.export import export_onnx
+
 logging.getLogger("numba").setLevel(logging.WARNING)

 now_dir = os.getcwd()
@ -48,12 +42,13 @@ shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_error
 os.makedirs(tmp, exist_ok=True)
 os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
 os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
-os.environ["TEMP"] = tmp
 warnings.filterwarnings("ignore")
 torch.manual_seed(114514)

-
+load_dotenv()
 config = Config()
+vc = VC(config)
+
 if config.dml == True:

    def forward_dml(ctx, x, scale):
@ -127,27 +122,10 @@ class ToolButton(gr.Button, gr.components.FormComponent):
        return "button"


-hubert_model = None
-
-
-def load_hubert():
-    global hubert_model
-    models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
-        ["hubert_base.pt"],
-        suffix="",
-    )
-    hubert_model = models[0]
-    hubert_model = hubert_model.to(config.device)
-    if config.is_half:
-        hubert_model = hubert_model.half()
-    else:
-        hubert_model = hubert_model.float()
-    hubert_model.eval()
-
-
-weight_root = "weights"
-weight_uvr5_root = "uvr5_weights"
+weight_root = os.getenv("weight_root")
+weight_uvr5_root = os.getenv("weight_uvr5_root")
 index_root = "logs"
+
 names = []
 for name in os.listdir(weight_root):
    if name.endswith(".pth"):
@ -162,365 +140,6 @@ for name in os.listdir(weight_uvr5_root):
    if name.endswith(".pth") or "onnx" in name:
        uvr5_names.append(name.replace(".pth", ""))

-cpt = None
-
-
-def vc_single(
-    sid,
-    input_audio_path,
-    f0_up_key,
-    f0_file,
-    f0_method,
-    file_index,
-    file_index2,
-    # file_big_npy,
-    index_rate,
-    filter_radius,
-    resample_sr,
-    rms_mix_rate,
-    protect,
-):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
-    global tgt_sr, net_g, vc, hubert_model, version, cpt
-    if input_audio_path is None:
-        return "You need to upload an audio", None
-    f0_up_key = int(f0_up_key)
-    try:
-        audio = load_audio(input_audio_path, 16000)
-        audio_max = np.abs(audio).max() / 0.95
-        if audio_max > 1:
-            audio /= audio_max
-        times = [0, 0, 0]
-        if not hubert_model:
-            load_hubert()
-        if_f0 = cpt.get("f0", 1)
-        file_index = (
-            (
-                file_index.strip(" ")
-                .strip('"')
-                .strip("\n")
-                .strip('"')
-                .strip(" ")
-                .replace("trained", "added")
-            )
-            if file_index != ""
-            else file_index2
-        )  # 防止小白写错，自动帮他替换掉
-        # file_big_npy = (
-        #     file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        # )
-        audio_opt = vc.pipeline(
-            hubert_model,
-            net_g,
-            sid,
-            audio,
-            input_audio_path,
-            times,
-            f0_up_key,
-            f0_method,
-            file_index,
-            # file_big_npy,
-            index_rate,
-            if_f0,
-            filter_radius,
-            tgt_sr,
-            resample_sr,
-            rms_mix_rate,
-            version,
-            protect,
-            f0_file=f0_file,
-        )
-        index_info = (
-            "Using index:%s." % file_index
-            if os.path.exists(file_index)
-            else "Index not used."
-        )
-        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
-            index_info,
-            times[0],
-            times[1],
-            times[2],
-        ), (
-            resample_sr if resample_sr >= 16000 and tgt_sr != resample_sr else tgt_sr,
-            audio_opt,
-        )
-    except:
-        info = traceback.format_exc()
-        print(info)
-        return info, (None, None)
-
-
-def vc_multi(
-    sid,
-    dir_path,
-    opt_root,
-    paths,
-    f0_up_key,
-    f0_method,
-    file_index,
-    file_index2,
-    # file_big_npy,
-    index_rate,
-    filter_radius,
-    resample_sr,
-    rms_mix_rate,
-    protect,
-    format1,
-):
-    try:
-        dir_path = (
-            dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        )  # 防止小白拷路径头尾带了空格和"和回车
-        opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        os.makedirs(opt_root, exist_ok=True)
-        try:
-            if dir_path != "":
-                paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
-            else:
-                paths = [path.name for path in paths]
-        except:
-            traceback.print_exc()
-            paths = [path.name for path in paths]
-        infos = []
-        for path in paths:
-            info, opt = vc_single(
-                sid,
-                path,
-                f0_up_key,
-                None,
-                f0_method,
-                file_index,
-                file_index2,
-                # file_big_npy,
-                index_rate,
-                filter_radius,
-                resample_sr,
-                rms_mix_rate,
-                protect,
-            )
-            if "Success" in info:
-                try:
-                    tgt_sr, audio_opt = opt
-                    if format1 in ["wav", "flac"]:
-                        sf.write(
-                            "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
-                            audio_opt,
-                            tgt_sr,
-                        )
-                    else:
-                        path = "%s/%s.wav" % (opt_root, os.path.basename(path))
-                        sf.write(
-                            path,
-                            audio_opt,
-                            tgt_sr,
-                        )
-                        if os.path.exists(path):
-                            os.system(
-                                "ffmpeg -i %s -vn %s -q:a 2 -y"
-                                % (path, path[:-4] + ".%s" % format1)
-                            )
-                except:
-                    info += traceback.format_exc()
-            infos.append("%s->%s" % (os.path.basename(path), info))
-            yield "\n".join(infos)
-        yield "\n".join(infos)
-    except:
-        yield traceback.format_exc()
-
-
-def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
-    infos = []
-    try:
-        inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        save_root_vocal = (
-            save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        )
-        save_root_ins = (
-            save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        )
-        if model_name == "onnx_dereverb_By_FoxJoy":
-            from MDXNet import MDXNetDereverb
-
-            pre_fun = MDXNetDereverb(15)
-        else:
-            func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new
-            pre_fun = func(
-                agg=int(agg),
-                model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
-                device=config.device,
-                is_half=config.is_half,
-            )
-        if inp_root != "":
-            paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
-        else:
-            paths = [path.name for path in paths]
-        for path in paths:
-            inp_path = os.path.join(inp_root, path)
-            need_reformat = 1
-            done = 0
-            try:
-                info = ffmpeg.probe(inp_path, cmd="ffprobe")
-                if (
-                    info["streams"][0]["channels"] == 2
-                    and info["streams"][0]["sample_rate"] == "44100"
-                ):
-                    need_reformat = 0
-                    pre_fun._path_audio_(
-                        inp_path, save_root_ins, save_root_vocal, format0
-                    )
-                    done = 1
-            except:
-                need_reformat = 1
-                traceback.print_exc()
-            if need_reformat == 1:
-                tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path))
-                os.system(
-                    "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
-                    % (inp_path, tmp_path)
-                )
-                inp_path = tmp_path
-            try:
-                if done == 0:
-                    pre_fun._path_audio_(
-                        inp_path, save_root_ins, save_root_vocal, format0
-                    )
-                infos.append("%s->Success" % (os.path.basename(inp_path)))
-                yield "\n".join(infos)
-            except:
-                infos.append(
-                    "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
-                )
-                yield "\n".join(infos)
-    except:
-        infos.append(traceback.format_exc())
-        yield "\n".join(infos)
-    finally:
-        try:
-            if model_name == "onnx_dereverb_By_FoxJoy":
-                del pre_fun.pred.model
-                del pre_fun.pred.model_
-            else:
-                del pre_fun.model
-                del pre_fun
-        except:
-            traceback.print_exc()
-        print("clean_empty_cache")
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    yield "\n".join(infos)
-
-
-def get_index_path_from_model(sid):
-    sel_index_path = ""
-    name = os.path.join("logs", sid.split(".")[0], "")
-    # print(name)
-    for f in index_paths:
-        if name in f:
-            # print("selected index path:", f)
-            sel_index_path = f
-            break
-    return sel_index_path
-
-
-# 一个选项卡全局只能有一个音色
-def get_vc(sid, to_return_protect0, to_return_protect1):
-    global n_spk, tgt_sr, net_g, vc, cpt, version
-    if sid == "" or sid == []:
-        global hubert_model
-        if hubert_model is not None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
-            print("clean_empty_cache")
-            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt
-            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            ###楼下不这么折腾清理不干净
-            if_f0 = cpt.get("f0", 1)
-            version = cpt.get("version", "v1")
-            if version == "v1":
-                if if_f0 == 1:
-                    net_g = SynthesizerTrnMs256NSFsid(
-                        *cpt["config"], is_half=config.is_half
-                    )
-                else:
-                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-            elif version == "v2":
-                if if_f0 == 1:
-                    net_g = SynthesizerTrnMs768NSFsid(
-                        *cpt["config"], is_half=config.is_half
-                    )
-                else:
-                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
-            del net_g, cpt
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-        return (
-            {"visible": False, "__type__": "update"},
-            {
-                "visible": True,
-                "value": to_return_protect0,
-                "__type__": "update",
-            },
-            {
-                "visible": True,
-                "value": to_return_protect1,
-                "__type__": "update",
-            },
-            "",
-            "",
-        )
-    person = "%s/%s" % (weight_root, sid)
-    print("loading %s" % person)
-
-    cpt = torch.load(person, map_location="cpu")
-    tgt_sr = cpt["config"][-1]
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
-    if_f0 = cpt.get("f0", 1)
-    if if_f0 == 0:
-        to_return_protect0 = to_return_protect1 = {
-            "visible": False,
-            "value": 0.33,
-            "__type__": "update",
-        }
-    else:
-        to_return_protect0 = {
-            "visible": True,
-            "value": to_return_protect0,
-            "__type__": "update",
-        }
-        to_return_protect1 = {
-            "visible": True,
-            "value": to_return_protect1,
-            "__type__": "update",
-        }
-    version = cpt.get("version", "v1")
-    if version == "v1":
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-    elif version == "v2":
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
-    del net_g.enc_q
-    print(net_g.load_state_dict(cpt["weight"], strict=False))
-    net_g.eval().to(config.device)
-    if config.is_half:
-        net_g = net_g.half()
-    else:
-        net_g = net_g.float()
-    vc = VC(tgt_sr, config)
-    n_spk = cpt["config"][-3]
-    index = {"value": get_index_path_from_model(sid), "__type__": "update"}
-    return (
-        {"visible": True, "maximum": n_spk, "__type__": "update"},
-        to_return_protect0,
-        to_return_protect1,
-        index,
-        index,
-    )
-
-
 def change_choices():
    names = []
    for name in os.listdir(weight_root):
@ -1385,54 +1004,6 @@ def change_f0_method(f0method8):
    return {"visible": visible, "__type__": "update"}


-def export_onnx(ModelPath, ExportedPath):
-    global cpt
-    cpt = torch.load(ModelPath, map_location="cpu")
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
-    vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768
-
-    test_phone = torch.rand(1, 200, vec_channels)  # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)  # nsf基频
-    test_ds = torch.LongTensor([0])  # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
-
-    device = "cpu"  # 导出时设备（不影响使用模型）
-
-    net_g = SynthesizerTrnMsNSFsidM(
-        *cpt["config"], is_half=False, version=cpt.get("version", "v1")
-    )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
-    net_g.load_state_dict(cpt["weight"], strict=False)
-    input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
-    output_names = [
-        "audio",
-    ]
-    # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
-    torch.onnx.export(
-        net_g,
-        (
-            test_phone.to(device),
-            test_phone_lengths.to(device),
-            test_pitch.to(device),
-            test_pitchf.to(device),
-            test_ds.to(device),
-            test_rnd.to(device),
-        ),
-        ExportedPath,
-        dynamic_axes={
-            "phone": [1],
-            "pitch": [1],
-            "pitchf": [1],
-            "rnd": [2],
-        },
-        do_constant_folding=False,
-        opset_version=13,
-        verbose=False,
-        input_names=input_names,
-        output_names=output_names,
-    )
-    return "Finished"


 with gr.Blocks(title="RVC WebUI") as app:
@ -1551,7 +1122,7 @@ with gr.Blocks(title="RVC WebUI") as app:
                        vc_output1 = gr.Textbox(label=i18n("输出信息"))
                        vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
                    but0.click(
-                        vc_single,
+                        vc.vc_single,
                        [
                            spk_item,
                            input_audio0,
@ -1671,7 +1242,7 @@ with gr.Blocks(title="RVC WebUI") as app:
                        but1 = gr.Button(i18n("转换"), variant="primary")
                        vc_output3 = gr.Textbox(label=i18n("输出信息"))
                    but1.click(
-                        vc_multi,
+                        vc.vc_multi,
                        [
                            spk_item,
                            dir_input,
@ -1693,7 +1264,7 @@ with gr.Blocks(title="RVC WebUI") as app:
                        api_name="infer_convert_batch",
                    )
            sid0.change(
-                fn=get_vc,
+                fn=vc.get_vc,
                inputs=[sid0, protect0, protect1],
                outputs=[spk_item, protect0, protect1, file_index2, file_index4],
            )
@ -1967,7 +1538,7 @@ with gr.Blocks(title="RVC WebUI") as app:
                        info3,
                        api_name="train_start",
                    )
-                    but4.click(train_index, [exp_dir1, version19], info3)
+                    but4.click(train_index, [exp_dir1, version19, config.n_cpu], info3)
                    but5.click(
                        train1key,
                        [