1
0
mirror of synced 2024-11-23 23:21:03 +01:00

Add files via upload

This commit is contained in:
RVC-Boss 2023-05-14 15:05:42 +08:00 committed by GitHub
parent 3909ce4a7b
commit 3b5a2298d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 305 additions and 218 deletions

View File

@ -1,5 +1,7 @@
import os, traceback, sys, parselmouth
import librosa
now_dir = os.getcwd()
sys.path.append(now_dir)
from my_utils import load_audio
import pyworld
from scipy.io import wavfile
import numpy as np, logging
@ -33,17 +35,14 @@ class FeatureInput(object):
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
def compute_f0(self, path, f0_method):
# default resample type of librosa.resample is "soxr_hq".
# Quality: soxr_vhq > soxr_hq
x, sr = librosa.load(path, self.fs) # , res_type='soxr_vhq'
x=load_audio(path,self.fs)
p_len = x.shape[0] // self.hop
assert sr == self.fs
if f0_method == "pm":
time_step = 160 / 16000 * 1000
f0_min = 50
f0_max = 1100
f0 = (
parselmouth.Sound(x, sr)
parselmouth.Sound(x, self.fs)
.to_pitch_ac(
time_step=time_step / 1000,
voicing_threshold=0.6,
@ -60,19 +59,19 @@ class FeatureInput(object):
elif f0_method == "harvest":
f0, t = pyworld.harvest(
x.astype(np.double),
fs=sr,
fs=self.fs,
f0_ceil=self.f0_max,
f0_floor=self.f0_min,
frame_period=1000 * self.hop / sr,
frame_period=1000 * self.hop / self.fs,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
elif f0_method == "dio":
f0, t = pyworld.dio(
x.astype(np.double),
fs=sr,
fs=self.fs,
f0_ceil=self.f0_max,
f0_floor=self.f0_min,
frame_period=1000 * self.hop / sr,
frame_period=1000 * self.hop / self.fs,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
return f0

View File

@ -9,7 +9,7 @@ else:
i_gpu = sys.argv[4]
exp_dir = sys.argv[5]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
version = sys.argv[6]
import torch
import torch.nn.functional as F
import soundfile as sf
@ -18,12 +18,9 @@ from fairseq import checkpoint_utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
if torch.cuda.is_available():device="cuda"
elif torch.backends.mps.is_available():device="mps"
else:device="cpu"
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
@ -39,7 +36,7 @@ model_path = "hubert_base.pt"
printt(exp_dir)
wavPath = "%s/1_16k_wavs" % exp_dir
outPath = "%s/3_feature256" % exp_dir
outPath = "%s/3_feature256" % exp_dir if version=="v1"else "%s/3_feature768" % exp_dir
os.makedirs(outPath, exist_ok=True)
@ -67,7 +64,7 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
model = models[0]
model = model.to(device)
printt("move model to %s" % device)
if device not in ["mps", "cpu"]:
if device not in ["mps","cpu"]:
model = model.half()
model.eval()
@ -93,11 +90,11 @@ else:
if device not in ["mps", "cpu"]
else feats.to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9, # layer 9
"output_layer": 9 if version=="v1"else 12, # layer 9
}
with torch.no_grad():
logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0])
feats = model.final_proj(logits[0])if version=="v1"else logits[0]
feats = feats.squeeze(0).float().cpu().numpy()
if np.isnan(feats).sum() == 0:

View File

@ -1,9 +1,9 @@
import torch, os, traceback, sys, warnings, shutil, numpy as np
os.environ["no_proxy"]="localhost, 127.0.0.1, ::1"
from multiprocessing import cpu_count
import threading
from time import sleep
from subprocess import Popen
from time import sleep
import torch, os, traceback, sys, warnings, shutil, numpy as np
import faiss
from random import shuffle
@ -11,8 +11,8 @@ now_dir = os.getcwd()
sys.path.append(now_dir)
tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack"%(now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack"%(now_dir) , ignore_errors=True)
os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
@ -70,7 +70,7 @@ else:
gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
default_batch_size = 1
gpus = "-".join([i[0] for i in gpu_infos])
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono,SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono
from scipy.io import wavfile
from fairseq import checkpoint_utils
import gradio as gr
@ -121,11 +121,11 @@ names = []
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
index_paths = []
index_paths=[]
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name))
index_paths.append("%s/%s"%(root,name))
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth"):
@ -144,29 +144,29 @@ def vc_single(
index_rate,
filter_radius,
resample_sr,
rms_mix_rate
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
global tgt_sr, net_g, vc, hubert_model
global tgt_sr, net_g, vc, hubert_model,version
if input_audio_path is None:
return "You need to upload an audio", None
f0_up_key = int(f0_up_key)
try:
audio = load_audio(input_audio_path, 16000)
audio_max=np.abs(audio).max()/0.95
if(audio_max>1):
audio/=audio_max
times = [0, 0, 0]
if hubert_model == None:
load_hubert()
if_f0 = cpt.get("f0", 1)
file_index = (
(
file_index.strip(" ")
.strip('"')
.strip("\n")
.strip('"')
.strip(" ")
.replace("trained", "added")
)
if file_index != ""
else file_index2
) # 防止小白写错,自动帮他替换掉
file_index.strip(" ")
.strip('"')
.strip("\n")
.strip('"')
.strip(" ")
.replace("trained", "added")
)if file_index!=""else file_index2 # 防止小白写错,自动帮他替换掉
# file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# )
@ -186,21 +186,14 @@ def vc_single(
filter_radius,
tgt_sr,
resample_sr,
rms_mix_rate,
version,
f0_file=f0_file,
)
if resample_sr >= 16000 and tgt_sr != resample_sr:
tgt_sr = resample_sr
index_info = (
"Using index:%s." % file_index
if os.path.exists(file_index)
else "Index not used."
)
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
index_info,
times[0],
times[1],
times[2],
), (tgt_sr, audio_opt)
if(resample_sr>=16000 and tgt_sr!=resample_sr):
tgt_sr=resample_sr
index_info="Using index:%s."%file_index if os.path.exists(file_index)else"Index not used."
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss"%(index_info,times[0],times[1],times[2]), (tgt_sr, audio_opt)
except:
info = traceback.format_exc()
print(info)
@ -220,6 +213,7 @@ def vc_multi(
index_rate,
filter_radius,
resample_sr,
rms_mix_rate
):
try:
dir_path = (
@ -249,8 +243,9 @@ def vc_multi(
index_rate,
filter_radius,
resample_sr,
rms_mix_rate
)
if "Success" in info:
if "Success"in info:
try:
tgt_sr, audio_opt = opt
wavfile.write(
@ -335,8 +330,8 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
# 一个选项卡全局只能有一个音色
def get_vc(sid):
global n_spk, tgt_sr, net_g, vc, cpt
if sid == "" or sid == []:
global n_spk, tgt_sr, net_g, vc, cpt,version
if sid == ""or sid==[]:
global hubert_model
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print("clean_empty_cache")
@ -346,12 +341,17 @@ def get_vc(sid):
torch.cuda.empty_cache()
###楼下不这么折腾清理不干净
if_f0 = cpt.get("f0", 1)
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
version = cpt.get("version", "v1")
if (version == "v1"):
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif (version == "v2"):
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g, cpt
if torch.cuda.is_available():
torch.cuda.empty_cache()
@ -363,12 +363,19 @@ def get_vc(sid):
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
if_f0 = cpt.get("f0", 1)
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
version = cpt.get("version", "v1")
if(version=="v1"):
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif(version=="v2"):
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(config.device)
if config.is_half:
net_g = net_g.half()
@ -384,37 +391,17 @@ def change_choices():
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
index_paths = []
index_paths=[]
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name))
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
return {"choices": sorted(names), "__type__": "update"},{"choices": sorted(index_paths), "__type__": "update"}
def clean():
return {"value": "", "__type__": "update"}
def change_f0(if_f0_3, sr2): # np7, f0method8,pretrained_G14,pretrained_D15
if if_f0_3:
return (
{"visible": True, "__type__": "update"},
{"visible": True, "__type__": "update"},
"pretrained/f0G%s.pth" % sr2,
"pretrained/f0D%s.pth" % sr2,
)
return (
{"visible": False, "__type__": "update"},
{"visible": False, "__type__": "update"},
"pretrained/G%s.pth" % sr2,
"pretrained/D%s.pth" % sr2,
)
sr_dict = {
"32k": 32000,
"40k": 40000,
@ -481,7 +468,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir):
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir,version19):
gpus = gpus.split("-")
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
@ -527,13 +514,14 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir):
leng = len(gpus)
ps = []
for idx, n_g in enumerate(gpus):
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s" % (
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s %s" % (
config.device,
leng,
idx,
n_g,
now_dir,
exp_dir,
version19,
)
print(cmd)
p = Popen(
@ -561,12 +549,33 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir):
yield log
def change_sr2(sr2, if_f0_3):
if if_f0_3:
return "pretrained/f0G%s.pth" % sr2, "pretrained/f0D%s.pth" % sr2
else:
return "pretrained/G%s.pth" % sr2, "pretrained/D%s.pth" % sr2
def change_sr2(sr2, if_f0_3,version19):
vis_v=True if sr2=="40k"else False
if(sr2!="40k"):version19="v1"
path_str=""if version19=="v1"else "_v2"
version_state={"visible": vis_v, "__type__": "update"}
if(vis_v==False):version_state["value"]="v1"
f0_str="f0"if if_f0_3 else""
return "pretrained%s/%sG%s.pth" % (path_str,f0_str,sr2), "pretrained%s/%sD%s.pth" % (path_str,f0_str,sr2),version_state
def change_version19(sr2,if_f0_3,version19):
path_str=""if version19=="v1"else "_v2"
f0_str="f0"if if_f0_3 else""
return "pretrained%s/%sG%s.pth" % (path_str,f0_str,sr2), "pretrained%s/%sD%s.pth" % (path_str,f0_str,sr2)
def change_f0(if_f0_3, sr2,version19): # f0method8,pretrained_G14,pretrained_D15
path_str=""if version19=="v1"else "_v2"
if if_f0_3:
return (
{"visible": True, "__type__": "update"},
"pretrained%s/f0G%s.pth" % (path_str,sr2),
"pretrained%s/f0D%s.pth" % (path_str,sr2),
)
return (
{"visible": False, "__type__": "update"},
"pretrained%s/G%s.pth" % (path_str,sr2),
"pretrained%s/D%s.pth" % (path_str,sr2),
)
# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
def click_train(
@ -582,24 +591,26 @@ def click_train(
pretrained_D15,
gpus16,
if_cache_gpu17,
if_save_every_weights18,
version19,
):
# 生成filelist
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
os.makedirs(exp_dir, exist_ok=True)
gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
co256_dir = "%s/3_feature256" % (exp_dir)
feature_dir = "%s/3_feature256" % (exp_dir)if version19=="v1"else "%s/3_feature768" % (exp_dir)
if if_f0_3:
f0_dir = "%s/2a_f0" % (exp_dir)
f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
names = (
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
& set([name.split(".")[0] for name in os.listdir(co256_dir)])
& set([name.split(".")[0] for name in os.listdir(feature_dir)])
& set([name.split(".")[0] for name in os.listdir(f0_dir)])
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
)
else:
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
[name.split(".")[0] for name in os.listdir(co256_dir)]
[name.split(".")[0] for name in os.listdir(feature_dir)]
)
opt = []
for name in names:
@ -609,7 +620,7 @@ def click_train(
% (
gt_wavs_dir.replace("\\", "\\\\"),
name,
co256_dir.replace("\\", "\\\\"),
feature_dir.replace("\\", "\\\\"),
name,
f0_dir.replace("\\", "\\\\"),
name,
@ -624,22 +635,23 @@ def click_train(
% (
gt_wavs_dir.replace("\\", "\\\\"),
name,
co256_dir.replace("\\", "\\\\"),
feature_dir.replace("\\", "\\\\"),
name,
spk_id5,
)
)
fea_dim = 256 if version19 == "v1"else 768
if if_f0_3:
for _ in range(2):
opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature256/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir, now_dir, now_dir, spk_id5)
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, now_dir, now_dir, spk_id5)
)
else:
for _ in range(2):
opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature256/mute.npy|%s"
% (now_dir, sr2, now_dir, spk_id5)
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, spk_id5)
)
shuffle(opt)
with open("%s/filelist.txt" % exp_dir, "w") as f:
@ -651,7 +663,7 @@ def click_train(
if gpus16:
cmd = (
config.python_cmd
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s"
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s -sw %s -v %s"
% (
exp_dir1,
sr2,
@ -664,12 +676,14 @@ def click_train(
pretrained_D15,
1 if if_save_latest13 == i18n("") else 0,
1 if if_cache_gpu17 == i18n("") else 0,
1 if if_save_every_weights18 == i18n("") else 0,
version19,
)
)
else:
cmd = (
config.python_cmd
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s"
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s -sw %s -v %s"
% (
exp_dir1,
sr2,
@ -681,6 +695,8 @@ def click_train(
pretrained_D15,
1 if if_save_latest13 == i18n("") else 0,
1 if if_cache_gpu17 == i18n("") else 0,
1 if if_save_every_weights18 == i18n("") else 0,
version19,
)
)
print(cmd)
@ -690,10 +706,10 @@ def click_train(
# but4.click(train_index, [exp_dir1], info3)
def train_index(exp_dir1):
def train_index(exp_dir1,version19):
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
os.makedirs(exp_dir, exist_ok=True)
feature_dir = "%s/3_feature256" % (exp_dir)
feature_dir = "%s/3_feature256" % (exp_dir)if version19=="v1"else "%s/3_feature768" % (exp_dir)
if os.path.exists(feature_dir) == False:
return "请先进行特征提取!"
listdir_res = list(os.listdir(feature_dir))
@ -713,8 +729,8 @@ def train_index(exp_dir1):
infos = []
infos.append("%s,%s" % (big_npy.shape, n_ivf))
yield "\n".join(infos)
index = faiss.index_factory(256, "IVF%s,Flat" % n_ivf)
# index = faiss.index_factory(256, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,Flat" % n_ivf)
# index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
infos.append("training")
yield "\n".join(infos)
index_ivf = faiss.extract_index_ivf(index) #
@ -722,9 +738,9 @@ def train_index(exp_dir1):
index.train(big_npy)
faiss.write_index(
index,
"%s/trained_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
"%s/trained_IVF%s_Flat_nprobe_%s_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe,version19),
)
# faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan.index'%(exp_dir,n_ivf))
# faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
infos.append("adding")
yield "\n".join(infos)
batch_size_add = 8192
@ -734,9 +750,9 @@ def train_index(exp_dir1):
index,
"%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
)
infos.append("成功构建索引added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe))
# faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan.index'%(exp_dir,n_ivf))
# infos.append("成功构建索引added_IVF%s_Flat_FastScan.index"%(n_ivf))
infos.append("成功构建索引added_IVF%s_Flat_nprobe_%s_%s.index" % (n_ivf, index_ivf.nprobe,version19))
# faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
# infos.append("成功构建索引added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
yield "\n".join(infos)
@ -757,6 +773,8 @@ def train1key(
pretrained_D15,
gpus16,
if_cache_gpu17,
if_save_every_weights18,
version19,
):
infos = []
@ -768,7 +786,7 @@ def train1key(
preprocess_log_path = "%s/preprocess.log" % model_log_dir
extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
feature256_dir = "%s/3_feature256" % model_log_dir
feature_dir = "%s/3_feature256" % model_log_dir if version19=="v1"else "%s/3_feature768" % model_log_dir
os.makedirs(model_log_dir, exist_ok=True)
#########step1:处理数据
@ -807,12 +825,12 @@ def train1key(
leng = len(gpus)
ps = []
for idx, n_g in enumerate(gpus):
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s" % (
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
config.device,
leng,
idx,
n_g,
model_log_dir,
model_log_dir,version19,
)
yield get_info_str(cmd)
p = Popen(
@ -831,13 +849,13 @@ def train1key(
f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
names = (
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
& set([name.split(".")[0] for name in os.listdir(feature256_dir)])
& set([name.split(".")[0] for name in os.listdir(feature_dir)])
& set([name.split(".")[0] for name in os.listdir(f0_dir)])
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
)
else:
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
[name.split(".")[0] for name in os.listdir(feature256_dir)]
[name.split(".")[0] for name in os.listdir(feature_dir)]
)
opt = []
for name in names:
@ -847,7 +865,7 @@ def train1key(
% (
gt_wavs_dir.replace("\\", "\\\\"),
name,
feature256_dir.replace("\\", "\\\\"),
feature_dir.replace("\\", "\\\\"),
name,
f0_dir.replace("\\", "\\\\"),
name,
@ -862,22 +880,23 @@ def train1key(
% (
gt_wavs_dir.replace("\\", "\\\\"),
name,
feature256_dir.replace("\\", "\\\\"),
feature_dir.replace("\\", "\\\\"),
name,
spk_id5,
)
)
fea_dim=256 if version19=="v1"else 768
if if_f0_3:
for _ in range(2):
opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature256/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir, now_dir, now_dir, spk_id5)
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, now_dir, now_dir, spk_id5)
)
else:
for _ in range(2):
opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature256/mute.npy|%s"
% (now_dir, sr2, now_dir, spk_id5)
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, spk_id5)
)
shuffle(opt)
with open("%s/filelist.txt" % model_log_dir, "w") as f:
@ -886,7 +905,7 @@ def train1key(
if gpus16:
cmd = (
config.python_cmd
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s"
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s -sw %s -v %s"
% (
exp_dir1,
sr2,
@ -899,12 +918,14 @@ def train1key(
pretrained_D15,
1 if if_save_latest13 == i18n("") else 0,
1 if if_cache_gpu17 == i18n("") else 0,
1 if if_save_every_weights18 == i18n("") else 0,
version19,
)
)
else:
cmd = (
config.python_cmd
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s"
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s -sw %s -v %s"
% (
exp_dir1,
sr2,
@ -916,6 +937,8 @@ def train1key(
pretrained_D15,
1 if if_save_latest13 == i18n("") else 0,
1 if if_cache_gpu17 == i18n("") else 0,
1 if if_save_every_weights18 == i18n("") else 0,
version19,
)
)
yield get_info_str(cmd)
@ -924,9 +947,9 @@ def train1key(
yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
#######step3b:训练索引
npys = []
listdir_res = list(os.listdir(feature256_dir))
listdir_res = list(os.listdir(feature_dir))
for name in sorted(listdir_res):
phone = np.load("%s/%s" % (feature256_dir, name))
phone = np.load("%s/%s" % (feature_dir, name))
npys.append(phone)
big_npy = np.concatenate(npys, 0)
@ -938,15 +961,15 @@ def train1key(
# n_ivf = big_npy.shape[0] // 39
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
index = faiss.index_factory(256, "IVF%s,Flat" % n_ivf)
index = faiss.index_factory(256 if version19=="v1"else 768, "IVF%s,Flat" % n_ivf)
yield get_info_str("training index")
index_ivf = faiss.extract_index_ivf(index) #
index_ivf.nprobe = 1
index.train(big_npy)
faiss.write_index(
index,
"%s/trained_IVF%s_Flat_nprobe_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe),
"%s/trained_IVF%s_Flat_nprobe_%s_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe,version19),
)
yield get_info_str("adding index")
batch_size_add = 8192
@ -954,11 +977,11 @@ def train1key(
index.add(big_npy[i : i + batch_size_add])
faiss.write_index(
index,
"%s/added_IVF%s_Flat_nprobe_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe),
"%s/added_IVF%s_Flat_nprobe_%s_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe,version19),
)
yield get_info_str(
"成功构建索引, added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe)
"成功构建索引, added_IVF%s_Flat_nprobe_%s_%s.index" % (n_ivf, index_ivf.nprobe,version19)
)
yield get_info_str(i18n("全流程结束!"))
@ -969,17 +992,18 @@ def change_info_(ckpt_path):
os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log"))
== False
):
return {"__type__": "update"}, {"__type__": "update"}
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
try:
with open(
ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
) as f:
info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
sr, f0 = info["sample_rate"], info["if_f0"]
return sr, str(f0)
version="v2"if("version"in info and info["version"]=="v2")else"v1"
return sr, str(f0),version
except:
traceback.print_exc()
return {"__type__": "update"}, {"__type__": "update"}
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
@ -1112,7 +1136,7 @@ with gr.Blocks() as app:
value="pm",
interactive=True,
)
filter_radius0 = gr.Slider(
filter_radius0=gr.Slider(
minimum=0,
maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1131,9 +1155,7 @@ with gr.Blocks() as app:
choices=sorted(index_paths),
interactive=True,
)
refresh_button.click(
fn=change_choices, inputs=[], outputs=[sid0, file_index2]
)
refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0, file_index2])
# file_big_npy1 = gr.Textbox(
# label=i18n("特征文件路径"),
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
@ -1146,7 +1168,7 @@ with gr.Blocks() as app:
value=0.76,
interactive=True,
)
resample_sr0 = gr.Slider(
resample_sr0=gr.Slider(
minimum=0,
maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1154,6 +1176,13 @@ with gr.Blocks() as app:
step=1,
interactive=True,
)
rms_mix_rate0 = gr.Slider(
minimum=0,
maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
value=1,
interactive=True,
)
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
but0 = gr.Button(i18n("转换"), variant="primary")
with gr.Column():
@ -1173,6 +1202,7 @@ with gr.Blocks() as app:
index_rate1,
filter_radius0,
resample_sr0,
rms_mix_rate0
],
[vc_output1, vc_output2],
)
@ -1192,7 +1222,7 @@ with gr.Blocks() as app:
value="pm",
interactive=True,
)
filter_radius1 = gr.Slider(
filter_radius1=gr.Slider(
minimum=0,
maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1223,7 +1253,7 @@ with gr.Blocks() as app:
value=1,
interactive=True,
)
resample_sr1 = gr.Slider(
resample_sr1=gr.Slider(
minimum=0,
maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1231,6 +1261,13 @@ with gr.Blocks() as app:
step=1,
interactive=True,
)
rms_mix_rate1 = gr.Slider(
minimum=0,
maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
value=1,
interactive=True,
)
with gr.Column():
dir_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
@ -1256,6 +1293,7 @@ with gr.Blocks() as app:
index_rate2,
filter_radius1,
resample_sr1,
rms_mix_rate1
],
[vc_output3],
)
@ -1324,6 +1362,13 @@ with gr.Blocks() as app:
value=True,
interactive=True,
)
version19 = gr.Radio(
label=i18n("版本(目前仅40k支持了v2)"),
choices=["v1", "v2"],
value="v1",
interactive=True,
visible=True,
)
np7 = gr.Slider(
minimum=0,
maximum=ncpu,
@ -1353,7 +1398,7 @@ with gr.Blocks() as app:
but1 = gr.Button(i18n("处理数据"), variant="primary")
info1 = gr.Textbox(label=i18n("输出信息"), value="")
but1.click(
preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
preprocess_dataset, [trainset_dir4, exp_dir1, sr2,np7], [info1]
)
with gr.Group():
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
@ -1378,7 +1423,7 @@ with gr.Blocks() as app:
info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
but2.click(
extract_f0_feature,
[gpus6, np7, f0method8, if_f0_3, exp_dir1],
[gpus6, np7, f0method8, if_f0_3, exp_dir1,version19],
[info2],
)
with gr.Group():
@ -1422,6 +1467,14 @@ with gr.Blocks() as app:
value=i18n(""),
interactive=True,
)
if_save_every_weights18 = gr.Radio(
label=i18n(
"是否在每次保存时间点将最终小模型保存至weights文件夹"
),
choices=[i18n(""), i18n("")],
value=i18n(""),
interactive=True,
)
with gr.Row():
pretrained_G14 = gr.Textbox(
label=i18n("加载预训练底模G路径"),
@ -1434,12 +1487,15 @@ with gr.Blocks() as app:
interactive=True,
)
sr2.change(
change_sr2, [sr2, if_f0_3], [pretrained_G14, pretrained_D15]
change_sr2, [sr2, if_f0_3,version19], [pretrained_G14, pretrained_D15,version19]
)
version19.change(
change_version19, [sr2, if_f0_3,version19], [pretrained_G14, pretrained_D15]
)
if_f0_3.change(
change_f0,
[if_f0_3, sr2],
[np7, f0method8, pretrained_G14, pretrained_D15],
[if_f0_3, sr2,version19],
[f0method8, pretrained_G14, pretrained_D15],
)
gpus16 = gr.Textbox(
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
@ -1465,10 +1521,12 @@ with gr.Blocks() as app:
pretrained_D15,
gpus16,
if_cache_gpu17,
if_save_every_weights18,
version19,
],
info3,
)
but4.click(train_index, [exp_dir1], info3)
but4.click(train_index, [exp_dir1,version19], info3)
but5.click(
train1key,
[
@ -1487,6 +1545,8 @@ with gr.Blocks() as app:
pretrained_D15,
gpus16,
if_cache_gpu17,
if_save_every_weights18,
version19,
],
info3,
)
@ -1526,12 +1586,18 @@ with gr.Blocks() as app:
max_lines=1,
interactive=True,
)
version_2=gr.Radio(
label=i18n("模型版本型号"),
choices=["v1", "v2"],
value="v1",
interactive=True,
)
with gr.Row():
but6 = gr.Button(i18n("融合"), variant="primary")
info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
but6.click(
merge,
[ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0],
[ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0,version_2],
info4,
) # def merge(path1,path2,alpha1,sr,f0,info):
with gr.Group():
@ -1589,15 +1655,21 @@ with gr.Blocks() as app:
value="1",
interactive=True,
)
version_1=gr.Radio(
label=i18n("模型版本型号"),
choices=["v1", "v2"],
value="v1",
interactive=True,
)
info___ = gr.Textbox(
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
)
but9 = gr.Button(i18n("提取"), variant="primary")
info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
ckpt_path2.change(change_info_, [ckpt_path2], [sr__, if_f0__])
ckpt_path2.change(change_info_, [ckpt_path2], [sr__, if_f0__,version_1])
but9.click(
extract_small_model,
[ckpt_path2, save_name, sr__, if_f0__, info___],
[ckpt_path2, save_name, sr__, if_f0__, info___,version_1],
info7,
)
@ -1615,16 +1687,16 @@ with gr.Blocks() as app:
butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary")
butOnnx.click(export_onnx, [ckpt_dir, onnx_dir, moevs], infoOnnx)
tab_faq = i18n("常见问题解答")
tab_faq=i18n("常见问题解答")
with gr.TabItem(tab_faq):
try:
if tab_faq == "常见问题解答":
with open("docs/faq.md", "r", encoding="utf8") as f:
info = f.read()
if(tab_faq=="常见问题解答"):
with open("docs/faq.md","r",encoding="utf8")as f:info=f.read()
else:
with open("docs/faq_en.md", "r") as f:
info = f.read()
gr.Markdown(value=info)
with open("docs/faq_en.md", "r")as f:info = f.read()
gr.Markdown(
value=info
)
except:
gr.Markdown(traceback.format_exc())

View File

@ -31,14 +31,21 @@ from data_utils import (
TextAudioCollate,
DistributedBucketSampler,
)
from infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
MultiPeriodDiscriminator,
)
if(hps.version=="v1"):
from infer_pack.models import (
SynthesizerTrnMs256NSFsid as RVC_Model_f0,
SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
MultiPeriodDiscriminator,
)
else:
from infer_pack.models import (
SynthesizerTrnMs768NSFsid as RVC_Model_f0,
SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0,
MultiPeriodDiscriminatorV2 as MultiPeriodDiscriminator,
)
from losses import generator_loss, discriminator_loss, feature_loss, kl_loss
from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from process_ckpt import savee
global_step = 0
@ -63,7 +70,7 @@ def run(rank, n_gpus, hps):
if rank == 0:
logger = utils.get_logger(hps.model_dir)
logger.info(hps)
utils.check_git_hash(hps.model_dir)
# utils.check_git_hash(hps.model_dir)
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
@ -104,7 +111,7 @@ def run(rank, n_gpus, hps):
prefetch_factor=8,
)
if hps.if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(
net_g = RVC_Model_f0(
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model,
@ -112,7 +119,7 @@ def run(rank, n_gpus, hps):
sr=hps.sample_rate,
)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(
net_g = RVC_Model_nof0(
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model,
@ -343,7 +350,7 @@ def train_and_evaluate(
spec = spec.cuda(rank, non_blocking=True)
spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
wave = wave.cuda(rank, non_blocking=True)
wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
# Calculate
with autocast(enabled=hps.train.fp16_run):
@ -428,10 +435,10 @@ def train_and_evaluate(
)
)
# Amor For Tensorboard display
if loss_mel > 50:
loss_mel = 50
if loss_kl > 5:
loss_kl = 5
if loss_mel > 75:
loss_mel = 75
if loss_kl > 9:
loss_kl = 9
logger.info([global_step, lr])
logger.info(
@ -512,12 +519,20 @@ def train_and_evaluate(
epoch,
os.path.join(hps.model_dir, "D_{}.pth".format(2333333)),
)
if(rank==0 and hps.save_every_weights=="1"):
if hasattr(net_g, "module"):
ckpt = net_g.module.state_dict()
else:
ckpt = net_g.state_dict()
logger.info(
"saving ckpt %s_e%s:%s"
% (hps.name,epoch,savee(ckpt, hps.sample_rate, hps.if_f0, hps.name+"_e%s"%epoch, epoch,hps.version))
)
if rank == 0:
logger.info("====> Epoch: {}".format(epoch))
if epoch >= hps.total_epoch and rank == 0:
logger.info("Training is done. The program is closed.")
from process_ckpt import savee # def savee(ckpt,sr,if_f0,name,epoch):
if hasattr(net_g, "module"):
ckpt = net_g.module.state_dict()
@ -525,7 +540,7 @@ def train_and_evaluate(
ckpt = net_g.state_dict()
logger.info(
"saving final ckpt:%s"
% (savee(ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch))
% (savee(ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch,hps.version))
)
sleep(1)
os._exit(2333333)

View File

@ -32,19 +32,19 @@ class PreProcess:
def __init__(self, sr, exp_dir):
self.slicer = Slicer(
sr=sr,
threshold=-40,
min_length=800,
threshold=-42,
min_length=1500,
min_interval=400,
hop_size=15,
max_sil_kept=150,
max_sil_kept=500,
)
self.sr = sr
self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr)
self.per = 3.7
self.overlap = 0.3
self.tail = self.per + self.overlap
self.max = 0.95
self.alpha = 0.8
self.max = 0.9
self.alpha = 0.75
self.exp_dir = exp_dir
self.gt_wavs_dir = "%s/0_gt_wavs" % exp_dir
self.wavs16k_dir = "%s/1_16k_wavs" % exp_dir

View File

@ -2,18 +2,16 @@ import numpy as np, parselmouth, torch, pdb
from time import time as ttime
import torch.nn.functional as F
import scipy.signal as signal
import pyworld, os, traceback, faiss, librosa
import pyworld, os, traceback, faiss,librosa
from scipy import signal
from functools import lru_cache
bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
input_audio_path2wav = {}
input_audio_path2wav={}
@lru_cache
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
audio = input_audio_path2wav[input_audio_path]
def cache_harvest_f0(input_audio_path,fs,f0max,f0min,frame_period):
audio=input_audio_path2wav[input_audio_path]
f0, t = pyworld.harvest(
audio,
fs=fs,
@ -24,6 +22,17 @@ def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
f0 = pyworld.stonemask(audio, f0, t, fs)
return f0
def change_rms(data1,sr1,data2,sr2,rate):#1是输入音频2是输出音频,rate是2的占比
# print(data1.max(),data2.max())
rms1 = librosa.feature.rms(y=data1, frame_length=sr1//2*2, hop_length=sr1//2)#每半秒一个点
rms2 = librosa.feature.rms(y=data2, frame_length=sr2//2*2, hop_length=sr2//2)
rms1=torch.from_numpy(rms1)
rms1=F.interpolate(rms1.unsqueeze(0), size=data2.shape[0],mode='linear').squeeze()
rms2=torch.from_numpy(rms2)
rms2=F.interpolate(rms2.unsqueeze(0), size=data2.shape[0],mode='linear').squeeze()
rms2=torch.max(rms2,torch.zeros_like(rms2)+1e-6)
data2*=(torch.pow(rms1,torch.tensor(1-rate))*torch.pow(rms2,torch.tensor(rate-1))).numpy()
return data2
class VC(object):
def __init__(self, tgt_sr, config):
@ -44,16 +53,7 @@ class VC(object):
self.t_max = self.sr * self.x_max # 免查询时长阈值
self.device = config.device
def get_f0(
self,
input_audio_path,
x,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0=None,
):
def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
global input_audio_path2wav
time_step = self.window / self.sr * 1000
f0_min = 50
@ -77,9 +77,9 @@ class VC(object):
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
)
elif f0_method == "harvest":
input_audio_path2wav[input_audio_path] = x.astype(np.double)
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if filter_radius > 2:
input_audio_path2wav[input_audio_path]=x.astype(np.double)
f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
if(filter_radius>2):
f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
@ -118,6 +118,7 @@ class VC(object):
index,
big_npy,
index_rate,
version,
): # ,file_index,file_big_npy
feats = torch.from_numpy(audio0)
if self.is_half:
@ -133,12 +134,12 @@ class VC(object):
inputs = {
"source": feats.to(self.device),
"padding_mask": padding_mask,
"output_layer": 9, # layer 9
"output_layer": 9if version=="v1"else 12,
}
t0 = ttime()
with torch.no_grad():
logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0])
feats = model.final_proj(logits[0])if version=="v1"else logits[0]
if (
isinstance(index, type(None)) == False
@ -176,14 +177,14 @@ class VC(object):
with torch.no_grad():
if pitch != None and pitchf != None:
audio1 = (
(net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
(net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0])
.data.cpu()
.float()
.numpy()
)
else:
audio1 = (
(net_g.infer(feats, p_len, sid)[0][0, 0] * 32768)
(net_g.infer(feats, p_len, sid)[0][0, 0])
.data.cpu()
.float()
.numpy()
@ -213,6 +214,8 @@ class VC(object):
filter_radius,
tgt_sr,
resample_sr,
rms_mix_rate,
version,
f0_file=None,
):
if (
@ -267,15 +270,7 @@ class VC(object):
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
pitch, pitchf = None, None
if if_f0 == 1:
pitch, pitchf = self.get_f0(
input_audio_path,
audio_pad,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0,
)
pitch, pitchf = self.get_f0(input_audio_path,audio_pad, p_len, f0_up_key, f0_method,filter_radius, inp_f0)
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
if self.device == "mps":
@ -299,6 +294,7 @@ class VC(object):
index,
big_npy,
index_rate,
version,
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
@ -314,6 +310,7 @@ class VC(object):
index,
big_npy,
index_rate,
version,
)[self.t_pad_tgt : -self.t_pad_tgt]
)
s = t
@ -330,6 +327,7 @@ class VC(object):
index,
big_npy,
index_rate,
version,
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
@ -345,14 +343,20 @@ class VC(object):
index,
big_npy,
index_rate,
version,
)[self.t_pad_tgt : -self.t_pad_tgt]
)
audio_opt = np.concatenate(audio_opt)
if resample_sr >= 16000 and tgt_sr != resample_sr:
if(rms_mix_rate!=1):
audio_opt=change_rms(audio,16000,audio_opt,tgt_sr,rms_mix_rate)
if(resample_sr>=16000 and tgt_sr!=resample_sr):
audio_opt = librosa.resample(
audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
)
audio_opt = audio_opt.astype(np.int16)
audio_max=np.abs(audio_opt).max()/0.99
max_int16=32768
if(audio_max>1):max_int16/=audio_max
audio_opt=(audio_opt * max_int16).astype(np.int16)
del pitch, pitchf, sid
if torch.cuda.is_available():
torch.cuda.empty_cache()