Add files via upload

2024-11-23 23:21:03 +01:00 · 2023-04-27 23:34:03 +08:00 · 2023-04-27 23:34:03 +08:00 · af208d5210
commit af208d5210
parent a149107c5a
5 changed files with 352 additions and 326 deletions
--- a/go-web.bat
+++ b/go-web.bat
@ -1,2 +1,2 @@
-runtime\python.exe infer-web.py --pycmd runtime\python.exe
+runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897
 pause
--- a/infer-web.py
+++ b/infer-web.py
@ -1,5 +1,5 @@
 from multiprocessing import cpu_count
-import threading
+import threading,pdb,librosa
 from time import sleep
 from subprocess import Popen
 from time import sleep
@ -17,6 +17,7 @@ os.environ["TEMP"] = tmp
 warnings.filterwarnings("ignore")
 torch.manual_seed(114514)
 from i18n import I18nAuto
 import ffmpeg
 i18n = I18nAuto()
 # 判断是否有能用来训练和加速推理的N卡
@ -235,7 +236,7 @@ def vc_multi(
        yield traceback.format_exc()
-def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
+def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins,agg):
    infos = []
    try:
        inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
@ -246,6 +247,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
            save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )
        pre_fun = _audio_pre_(
            agg=int(agg),
            model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
            device=device,
            is_half=is_half,
@ -254,10 +256,25 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
            paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
        else:
            paths = [path.name for path in paths]
-        for name in paths:
+        for path in paths:
-            inp_path = os.path.join(inp_root, name)
+            inp_path = os.path.join(inp_root, path)
            need_reformat=1
            done=0
            try:
-                pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
+                info = ffmpeg.probe(inp_path, cmd="ffprobe")
                if(info["streams"][0]["channels"]==2 and info["streams"][0]["sample_rate"]=="44100"):
                    need_reformat=0
                    pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
                    done=1
            except:
                need_reformat = 1
                traceback.print_exc()
            if(need_reformat==1):
                tmp_path="%s/%s.reformatted.wav"%(tmp,os.path.basename(inp_path))
                os.system("ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"%(inp_path,tmp_path))
                inp_path=tmp_path
            try:
                if(done==0):pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
                infos.append("%s->Success" % (os.path.basename(inp_path)))
                yield "\n".join(infos)
            except:
@ -1147,6 +1164,15 @@ with gr.Blocks() as app:
                        )
                    with gr.Column():
                        model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
                        agg = gr.Slider(
                            minimum=0,
                            maximum=20,
                            step=1,
                            label="人声提取激进程度",
                            value=10,
                            interactive=True,
                            visible=False#先不开放调整
                        )
                        opt_vocal_root = gr.Textbox(
                            label=i18n("指定输出人声文件夹"), value="opt"
                        )
@ -1161,6 +1187,7 @@ with gr.Blocks() as app:
                            opt_vocal_root,
                            wav_inputs,
                            opt_ins_root,
                            agg
                        ],
                        [vc_output4],
                    )
@ -1246,7 +1273,7 @@ with gr.Blocks() as app:
                with gr.Row():
                    save_epoch10 = gr.Slider(
                        minimum=0,
-                        maximum=200,
+                        maximum=50,
                        step=1,
                        label=i18n("保存频率save_every_epoch"),
                        value=5,
--- a/infer_uvr5.py
+++ b/infer_uvr5.py
@ -13,7 +13,7 @@ from scipy.io import wavfile
 class _audio_pre_:
-    def __init__(self, model_path, device, is_half):
+    def __init__(self, agg,model_path, device, is_half):
        self.model_path = model_path
        self.device = device
        self.data = {
@ -22,7 +22,7 @@ class _audio_pre_:
            "tta": False,
            # Constants
            "window_size": 512,
-            "agg": 10,
+            "agg": agg,
            "high_end_process": "mirroring",
        }
        nn_arch_sizes = [
@ -139,7 +139,7 @@ class _audio_pre_:
                wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
            print("%s instruments done" % name)
            wavfile.write(
-                os.path.join(ins_root, "instrument_{}.wav".format(name)),
+                os.path.join(ins_root, "instrument_{}_{}.wav".format(name,self.data["agg"])),
                self.mp.param["sr"],
                (np.array(wav_instrument) * 32768).astype("int16"),
            )  #
@ -155,7 +155,7 @@ class _audio_pre_:
                wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
            print("%s vocals done" % name)
            wavfile.write(
-                os.path.join(vocal_root, "vocal_{}.wav".format(name)),
+                os.path.join(vocal_root, "vocal_{}_{}.wav".format(name,self.data["agg"])),
                self.mp.param["sr"],
                (np.array(wav_vocals) * 32768).astype("int16"),
            )
--- a/train_nsf_sim_cache_sid_load_pretrain.py
+++ b/train_nsf_sim_cache_sid_load_pretrain.py
@ -45,7 +45,7 @@ global_step = 0
 def main():
    # n_gpus = torch.cuda.device_count()
    os.environ["MASTER_ADDR"] = "localhost"
-    os.environ["MASTER_PORT"] = "51515"
+    os.environ["MASTER_PORT"] = "51545"
    mp.spawn(
        run,
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@ -123,7 +123,6 @@ class VC(object):
            # _, I = index.search(npy, 1)
            # npy = big_npy[I.squeeze()]
            #by github @nadare881
            score, ix = index.search(npy, k=8)
            weight = np.square(1 / score)
            weight /= weight.sum(axis=1, keepdims=True)
`@ -1,2 +1,2 @@`
	`runtime\python.exe infer-web.py --pycmd runtime\python.exe`	`runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897`
	`pause`	`pause`