Retrieval-based-Voice-Conve.../rvc_for_realtime.py

import os, sys
import faiss, torch, traceback, parselmouth, numpy as np, torchcrepe, torch.nn as nn, pyworld
import fairseq
from lib.infer_pack.models import (
    SynthesizerTrnMs256NSFsid,
    SynthesizerTrnMs256NSFsid_nono,
    SynthesizerTrnMs768NSFsid,
    SynthesizerTrnMs768NSFsid_nono,
)
from time import time as ttime
import torch.nn.functional as F
import scipy.signal as signal

now_dir = os.getcwd()
sys.path.append(now_dir)
from config import defaultconfig as config
from multiprocessing import Manager as M

mm = M()
if config.dml == True:

    def forward_dml(ctx, x, scale):
        ctx.scale = scale
        res = x.clone().detach()
        return res

    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml


# config.device=torch.device("cpu")########强制cpu测试
# config.is_half=False########强制cpu测试
class RVC:
    def __init__(
        self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device
    ) -> None:
        """
        初始化
        """
        try:
            global config
            self.inp_q = inp_q
            self.opt_q = opt_q
            # device="cpu"########强制cpu测试
            self.device = device
            self.f0_up_key = key
            self.time_step = 160 / 16000 * 1000
            self.f0_min = 50
            self.f0_max = 1100
            self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
            self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
            self.sr = 16000
            self.window = 160
            self.n_cpu = n_cpu
            if index_rate != 0:
                self.index = faiss.read_index(index_path)
                self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
                print("index search enabled")
            self.index_rate = index_rate
            models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
                ["hubert_base.pt"],
                suffix="",
            )
            hubert_model = models[0]
            hubert_model = hubert_model.to(config.device)
            if config.is_half:
                hubert_model = hubert_model.half()
            else:
                hubert_model = hubert_model.float()
            hubert_model.eval()
            self.model = hubert_model
            cpt = torch.load(pth_path, map_location="cpu")
            self.tgt_sr = cpt["config"][-1]
            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
            self.if_f0 = cpt.get("f0", 1)
            self.version = cpt.get("version", "v1")
            if self.version == "v1":
                if self.if_f0 == 1:
                    self.net_g = SynthesizerTrnMs256NSFsid(
                        *cpt["config"], is_half=config.is_half
                    )
                else:
                    self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
            elif self.version == "v2":
                if self.if_f0 == 1:
                    self.net_g = SynthesizerTrnMs768NSFsid(
                        *cpt["config"], is_half=config.is_half
                    )
                else:
                    self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
            del self.net_g.enc_q
            print(self.net_g.load_state_dict(cpt["weight"], strict=False))
            self.net_g.eval().to(device)
            # print(2333333333,device,config.device,self.device)#net_g是device，hubert是config.device
            if config.is_half:
                self.net_g = self.net_g.half()
            else:
                self.net_g = self.net_g.float()
            self.is_half = config.is_half
        except:
            print(traceback.format_exc())

    def get_f0_post(self, f0):
        f0_min = self.f0_min
        f0_max = self.f0_max
        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
        f0_mel_max = 1127 * np.log(1 + f0_max / 700)
        f0bak = f0.copy()
        f0_mel = 1127 * np.log(1 + f0 / 700)
        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
            f0_mel_max - f0_mel_min
        ) + 1
        f0_mel[f0_mel <= 1] = 1
        f0_mel[f0_mel > 255] = 255
        f0_coarse = np.rint(f0_mel).astype(np.int32)
        return f0_coarse, f0bak

    def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
        n_cpu = int(n_cpu)
        if method == "crepe":
            return self.get_f0_crepe(x, f0_up_key)
        if method == "rmvpe":
            return self.get_f0_rmvpe(x, f0_up_key)
        if method == "pm":
            p_len = x.shape[0] // 160
            f0 = (
                parselmouth.Sound(x, 16000)
                .to_pitch_ac(
                    time_step=0.01,
                    voicing_threshold=0.6,
                    pitch_floor=50,
                    pitch_ceiling=1100,
                )
                .selected_array["frequency"]
            )

            pad_size = (p_len - len(f0) + 1) // 2
            if pad_size > 0 or p_len - len(f0) - pad_size > 0:
                # print(pad_size, p_len - len(f0) - pad_size)
                f0 = np.pad(
                    f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                )

            f0 *= pow(2, f0_up_key / 12)
            return self.get_f0_post(f0)
        if n_cpu == 1:
            f0, t = pyworld.harvest(
                x.astype(np.double),
                fs=16000,
                f0_ceil=1100,
                f0_floor=50,
                frame_period=10,
            )
            f0 = signal.medfilt(f0, 3)
            f0 *= pow(2, f0_up_key / 12)
            return self.get_f0_post(f0)
        f0bak = np.zeros(x.shape[0] // 160, dtype=np.float64)
        length = len(x)
        part_length = int(length / n_cpu / 160) * 160
        ts = ttime()
        res_f0 = mm.dict()
        for idx in range(n_cpu):
            tail = part_length * (idx + 1) + 320
            if idx == 0:
                self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
            else:
                self.inp_q.put(
                    (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
                )
        while 1:
            res_ts = self.opt_q.get()
            if res_ts == ts:
                break
        f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
        for idx, f0 in enumerate(f0s):
            if idx == 0:
                f0 = f0[:-3]
            elif idx != n_cpu - 1:
                f0 = f0[2:-3]
            else:
                f0 = f0[2:-1]
            f0bak[
                part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
            ] = f0
        f0bak = signal.medfilt(f0bak, 3)
        f0bak *= pow(2, f0_up_key / 12)
        return self.get_f0_post(f0bak)

    def get_f0_crepe(self, x, f0_up_key):
        if self.device.type == "privateuseone":  ###不支持dml，cpu又太慢用不成，拿pm顶替
            return self.get_f0(x, f0_up_key, 1, "pm")
        audio = torch.tensor(np.copy(x))[None].float()
        # print("using crepe,device:%s"%self.device)
        f0, pd = torchcrepe.predict(
            audio,
            self.sr,
            160,
            self.f0_min,
            self.f0_max,
            "full",
            batch_size=512,
            # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用
            device=self.device,
            return_periodicity=True,
        )
        pd = torchcrepe.filter.median(pd, 3)
        f0 = torchcrepe.filter.mean(f0, 3)
        f0[pd < 0.1] = 0
        f0 = f0[0].cpu().numpy()
        f0 *= pow(2, f0_up_key / 12)
        return self.get_f0_post(f0)

    def get_f0_rmvpe(self, x, f0_up_key):
        if hasattr(self, "model_rmvpe") == False:
            from lib.rmvpe import RMVPE

            print("loading rmvpe model")
            self.model_rmvpe = RMVPE(
                # "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑
                #  "rmvpe.pt", is_half=False, device=self.device####dml配置
                # "rmvpe.pt", is_half=False, device="cpu"####锁定cpu配置
                "rmvpe.pt",
                is_half=self.is_half,
                device=self.device,  ####正常逻辑
            )
            # self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device)
        f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
        f0 *= pow(2, f0_up_key / 12)
        return self.get_f0_post(f0)

    def infer(
        self,
        feats: torch.Tensor,
        indata: np.ndarray,
        rate1,
        rate2,
        cache_pitch,
        cache_pitchf,
        f0method,
    ) -> np.ndarray:
        feats = feats.view(1, -1)
        if config.is_half:
            feats = feats.half()
        else:
            feats = feats.float()
        feats = feats.to(self.device)
        t1 = ttime()
        with torch.no_grad():
            padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
            inputs = {
                "source": feats,
                "padding_mask": padding_mask,
                "output_layer": 9 if self.version == "v1" else 12,
            }
            logits = self.model.extract_features(**inputs)
            feats = (
                self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
            )
        t2 = ttime()
        try:
            if hasattr(self, "index") and self.index_rate != 0:
                leng_replace_head = int(rate1 * feats[0].shape[0])
                npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32")
                score, ix = self.index.search(npy, k=8)
                weight = np.square(1 / score)
                weight /= weight.sum(axis=1, keepdims=True)
                npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
                if config.is_half:
                    npy = npy.astype("float16")
                feats[0][-leng_replace_head:] = (
                    torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
                    + (1 - self.index_rate) * feats[0][-leng_replace_head:]
                )
            else:
                print("index search FAIL or disabled")
        except:
            traceback.print_exc()
            print("index search FAIL")
        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
        t3 = ttime()
        if self.if_f0 == 1:
            pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method)
            cache_pitch[:] = np.append(cache_pitch[pitch[:-1].shape[0] :], pitch[:-1])
            cache_pitchf[:] = np.append(
                cache_pitchf[pitchf[:-1].shape[0] :], pitchf[:-1]
            )
            p_len = min(feats.shape[1], 13000, cache_pitch.shape[0])
        else:
            cache_pitch, cache_pitchf = None, None
            p_len = min(feats.shape[1], 13000)
        t4 = ttime()
        feats = feats[:, :p_len, :]
        if self.if_f0 == 1:
            cache_pitch = cache_pitch[:p_len]
            cache_pitchf = cache_pitchf[:p_len]
            cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device)
            cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device)
        p_len = torch.LongTensor([p_len]).to(self.device)
        ii = 0  # sid
        sid = torch.LongTensor([ii]).to(self.device)
        with torch.no_grad():
            if self.if_f0 == 1:
                # print(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2)
                infered_audio = (
                    self.net_g.infer(
                        feats, p_len, cache_pitch, cache_pitchf, sid, rate2
                    )[0][0, 0]
                    .data.cpu()
                    .float()
                )
            else:
                infered_audio = (
                    self.net_g.infer(feats, p_len, sid, rate2)[0][0, 0]
                    .data.cpu()
                    .float()
                )
        t5 = ttime()
        print("time->fea-index-f0-model:", t2 - t1, t3 - t2, t4 - t3, t5 - t4)
        return infered_audio
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								import os, sys
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								import faiss, torch, traceback, parselmouth, numpy as np, torchcrepe, torch.nn as nn, pyworld
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								import fairseq
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								from lib.infer_pack.models import (
 								    SynthesizerTrnMs256NSFsid,
 								    SynthesizerTrnMs256NSFsid_nono,
 								    SynthesizerTrnMs768NSFsid,
 								    SynthesizerTrnMs768NSFsid_nono,
 								)
 								from time import time as ttime
 								import torch.nn.functional as F
 								import scipy.signal as signal
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								now_dir = os.getcwd()
 								sys.path.append(now_dir)
-												optimize: config

											
										
										
											2023-08-26 18:35:39 +02:00
+								from config import defaultconfig as config
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								from multiprocessing import Manager as M
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								mm = M()
-												Format code (#989)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-08-13 05:52:51 +02:00
+								if config.dml == True:
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								    def forward_dml(ctx, x, scale):
 								        ctx.scale = scale
 								        res = x.clone().detach()
 								        return res
-												Format code (#989)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-08-13 05:52:51 +02:00
 								    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								# config.device=torch.device("cpu")########强制cpu测试
 								# config.is_half=False########强制cpu测试
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								class RVC:
 								    def __init__(
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								        self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								    ) -> None:
 								        """
 								        初始化
 								        """
 								        try:
 								            global config
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            self.inp_q = inp_q
 								            self.opt_q = opt_q
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								            # device="cpu"########强制cpu测试
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            self.device = device
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            self.f0_up_key = key
 								            self.time_step = 160 / 16000 * 1000
 								            self.f0_min = 50
 								            self.f0_max = 1100
 								            self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
 								            self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
 								            self.sr = 16000
 								            self.window = 160
 								            self.n_cpu = n_cpu
 								            if index_rate != 0:
 								                self.index = faiss.read_index(index_path)
 								                self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
 								                print("index search enabled")
 								            self.index_rate = index_rate
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								            models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                ["hubert_base.pt"],
 								                suffix="",
 								            )
 								            hubert_model = models[0]
 								            hubert_model = hubert_model.to(config.device)
 								            if config.is_half:
 								                hubert_model = hubert_model.half()
 								            else:
 								                hubert_model = hubert_model.float()
 								            hubert_model.eval()
 								            self.model = hubert_model
 								            cpt = torch.load(pth_path, map_location="cpu")
 								            self.tgt_sr = cpt["config"][-1]
 								            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
 								            self.if_f0 = cpt.get("f0", 1)
 								            self.version = cpt.get("version", "v1")
 								            if self.version == "v1":
 								                if self.if_f0 == 1:
 								                    self.net_g = SynthesizerTrnMs256NSFsid(
 								                        *cpt["config"], is_half=config.is_half
 								                    )
 								                else:
 								                    self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
 								            elif self.version == "v2":
 								                if self.if_f0 == 1:
 								                    self.net_g = SynthesizerTrnMs768NSFsid(
 								                        *cpt["config"], is_half=config.is_half
 								                    )
 								                else:
 								                    self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
 								            del self.net_g.enc_q
 								            print(self.net_g.load_state_dict(cpt["weight"], strict=False))
 								            self.net_g.eval().to(device)
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								            # print(2333333333,device,config.device,self.device)#net_g是device，hubert是config.device
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            if config.is_half:
 								                self.net_g = self.net_g.half()
 								            else:
 								                self.net_g = self.net_g.float()
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            self.is_half = config.is_half
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        except:
 								            print(traceback.format_exc())
 								    def get_f0_post(self, f0):
 								        f0_min = self.f0_min
 								        f0_max = self.f0_max
 								        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
 								        f0_mel_max = 1127 * np.log(1 + f0_max / 700)
 								        f0bak = f0.copy()
 								        f0_mel = 1127 * np.log(1 + f0 / 700)
 								        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
 								            f0_mel_max - f0_mel_min
 								        ) + 1
 								        f0_mel[f0_mel <= 1] = 1
 								        f0_mel[f0_mel > 255] = 255
-												replace np.int with np.int32 (#948)

ref: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
											
										
										
											2023-08-10 04:28:30 +02:00
+								        f0_coarse = np.rint(f0_mel).astype(np.int32)
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        return f0_coarse, f0bak
 								    def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
 								        n_cpu = int(n_cpu)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								        if method == "crepe":
 								            return self.get_f0_crepe(x, f0_up_key)
 								        if method == "rmvpe":
 								            return self.get_f0_rmvpe(x, f0_up_key)
 								        if method == "pm":
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            p_len = x.shape[0] // 160
 								            f0 = (
 								                parselmouth.Sound(x, 16000)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                .to_pitch_ac(
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                    time_step=0.01,
 								                    voicing_threshold=0.6,
 								                    pitch_floor=50,
 								                    pitch_ceiling=1100,
 								                )
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                .selected_array["frequency"]
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            )
 								            pad_size = (p_len - len(f0) + 1) // 2
 								            if pad_size > 0 or p_len - len(f0) - pad_size > 0:
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								                # print(pad_size, p_len - len(f0) - pad_size)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                f0 = np.pad(
 								                    f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
 								                )
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
 								            f0 *= pow(2, f0_up_key / 12)
 								            return self.get_f0_post(f0)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								        if n_cpu == 1:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            f0, t = pyworld.harvest(
 								                x.astype(np.double),
 								                fs=16000,
 								                f0_ceil=1100,
 								                f0_floor=50,
 								                frame_period=10,
 								            )
 								            f0 = signal.medfilt(f0, 3)
 								            f0 *= pow(2, f0_up_key / 12)
 								            return self.get_f0_post(f0)
 								        f0bak = np.zeros(x.shape[0] // 160, dtype=np.float64)
 								        length = len(x)
 								        part_length = int(length / n_cpu / 160) * 160
 								        ts = ttime()
 								        res_f0 = mm.dict()
 								        for idx in range(n_cpu):
 								            tail = part_length * (idx + 1) + 320
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            if idx == 0:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
 								            else:
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                self.inp_q.put(
 								                    (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
 								                )
 								        while 1:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            res_ts = self.opt_q.get()
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            if res_ts == ts:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                break
 								        f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
 								        for idx, f0 in enumerate(f0s):
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            if idx == 0:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                f0 = f0[:-3]
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            elif idx != n_cpu - 1:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                f0 = f0[2:-3]
 								            else:
 								                f0 = f0[2:-1]
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            f0bak[
 								                part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
 								            ] = f0
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        f0bak = signal.medfilt(f0bak, 3)
 								        f0bak *= pow(2, f0_up_key / 12)
 								        return self.get_f0_post(f0bak)
 								    def get_f0_crepe(self, x, f0_up_key):
-												Format code (#989)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-08-13 05:52:51 +02:00
+								        if self.device.type == "privateuseone":  ###不支持dml，cpu又太慢用不成，拿pm顶替
 								            return self.get_f0(x, f0_up_key, 1, "pm")
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        audio = torch.tensor(np.copy(x))[None].float()
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								        # print("using crepe,device:%s"%self.device)
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        f0, pd = torchcrepe.predict(
 								            audio,
 								            self.sr,
 ,
 								            self.f0_min,
 								            self.f0_max,
 								            "full",
 								            batch_size=512,
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								            # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            device=self.device,
 								            return_periodicity=True,
 								        )
 								        pd = torchcrepe.filter.median(pd, 3)
 								        f0 = torchcrepe.filter.mean(f0, 3)
 								        f0[pd < 0.1] = 0
 								        f0 = f0[0].cpu().numpy()
 								        f0 *= pow(2, f0_up_key / 12)
 								        return self.get_f0_post(f0)
 								    def get_f0_rmvpe(self, x, f0_up_key):
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								        if hasattr(self, "model_rmvpe") == False:
-												optimize: move slicer2, rmvpe, my_utils to lib

											
										
										
											2023-07-24 08:16:58 +02:00
+								            from lib.rmvpe import RMVPE
-												Format code (#989)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-08-13 05:52:51 +02:00
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            print("loading rmvpe model")
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            self.model_rmvpe = RMVPE(
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								                # "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑
 								                #  "rmvpe.pt", is_half=False, device=self.device####dml配置
-												Format code (#989)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-08-13 05:52:51 +02:00
+								                # "rmvpe.pt", is_half=False, device="cpu"####锁定cpu配置
 								                "rmvpe.pt",
 								                is_half=self.is_half,
 								                device=self.device,  ####正常逻辑
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            )
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            # self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device)
 								        f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
 								        f0 *= pow(2, f0_up_key / 12)
 								        return self.get_f0_post(f0)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								    def infer(
 								        self,
 								        feats: torch.Tensor,
 								        indata: np.ndarray,
 								        rate1,
 								        rate2,
 								        cache_pitch,
 								        cache_pitchf,
 								        f0method,
 								    ) -> np.ndarray:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        feats = feats.view(1, -1)
 								        if config.is_half:
 								            feats = feats.half()
 								        else:
 								            feats = feats.float()
 								        feats = feats.to(self.device)
 								        t1 = ttime()
 								        with torch.no_grad():
 								            padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
 								            inputs = {
 								                "source": feats,
 								                "padding_mask": padding_mask,
 								                "output_layer": 9 if self.version == "v1" else 12,
 								            }
 								            logits = self.model.extract_features(**inputs)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            feats = (
 								                self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
 								            )
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								        t2 = ttime()
 								        try:
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            if hasattr(self, "index") and self.index_rate != 0:
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                leng_replace_head = int(rate1 * feats[0].shape[0])
 								                npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32")
 								                score, ix = self.index.search(npy, k=8)
 								                weight = np.square(1 / score)
 								                weight /= weight.sum(axis=1, keepdims=True)
 								                npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
 								                if config.is_half:
 								                    npy = npy.astype("float16")
 								                feats[0][-leng_replace_head:] = (
 								                    torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
 								                    + (1 - self.index_rate) * feats[0][-leng_replace_head:]
 								                )
 								            else:
 								                print("index search FAIL or disabled")
 								        except:
 								            traceback.print_exc()
 								            print("index search FAIL")
 								        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
 								        t3 = ttime()
 								        if self.if_f0 == 1:
 								            pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method)
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								            cache_pitch[:] = np.append(cache_pitch[pitch[:-1].shape[0] :], pitch[:-1])
 								            cache_pitchf[:] = np.append(
 								                cache_pitchf[pitchf[:-1].shape[0] :], pitchf[:-1]
 								            )
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								            p_len = min(feats.shape[1], 13000, cache_pitch.shape[0])
 								        else:
 								            cache_pitch, cache_pitchf = None, None
 								            p_len = min(feats.shape[1], 13000)
 								        t4 = ttime()
 								        feats = feats[:, :p_len, :]
 								        if self.if_f0 == 1:
 								            cache_pitch = cache_pitch[:p_len]
 								            cache_pitchf = cache_pitchf[:p_len]
 								            cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device)
 								            cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device)
 								        p_len = torch.LongTensor([p_len]).to(self.device)
 								        ii = 0  # sid
 								        sid = torch.LongTensor([ii]).to(self.device)
 								        with torch.no_grad():
 								            if self.if_f0 == 1:
-												Add files via upload
											
										
										
											2023-08-12 19:05:58 +02:00
+								                # print(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2)
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                infered_audio = (
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                    self.net_g.infer(
 								                        feats, p_len, cache_pitch, cache_pitchf, sid, rate2
 								                    )[0][0, 0]
 								                    .data.cpu()
 								                    .float()
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                )
 								            else:
 								                infered_audio = (
-												Format code (#727)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
											
										
										
											2023-07-13 08:35:24 +02:00
+								                    self.net_g.infer(feats, p_len, sid, rate2)[0][0, 0]
 								                    .data.cpu()
 								                    .float()
-												Update rvc_for_realtime.py
											
										
										
											2023-07-11 16:09:41 +02:00
+								                )
 								        t5 = ttime()
 								        print("time->fea-index-f0-model:", t2 - t1, t3 - t2, t4 - t3, t5 - t4)
 								        return infered_audio