1
0
mirror of synced 2024-11-23 23:21:03 +01:00

train 1-2b

This commit is contained in:
Ftps 2023-08-21 20:53:11 +09:00
parent cd924f9eec
commit ed7b11eb49
11 changed files with 214 additions and 80 deletions

2
assets/hubert/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
assets/rmvpe/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

28
i18n.py
View File

@ -1,28 +0,0 @@
import locale
import json
import os
def load_language_list(language):
with open(f"./i18n/locale/{language}.json", "r", encoding="utf-8") as f:
language_list = json.load(f)
return language_list
class I18nAuto:
def __init__(self, language=None):
if language in ["Auto", None]:
language = locale.getdefaultlocale()[
0
] # getlocale can't identify the system's language ((None, None))
if not os.path.exists(f"./lib/i18n/{language}.json"):
language = "en_US"
self.language = language
# print("Use Language:", language)
self.language_map = load_language_list(language)
def __call__(self, key):
return self.language_map.get(key, key)
def print(self):
print("Use Language:", self.language)

View File

@ -20,8 +20,13 @@ import faiss
import gradio as gr
from configs.config import Config
import fairseq
from i18n import I18nAuto
from lib.train.process_ckpt import change_info, extract_small_model, merge, show_info
from i18n.i18n import I18nAuto
from infer.lib.train.process_ckpt import (
change_info,
extract_small_model,
merge,
show_info,
)
from sklearn.cluster import MiniBatchKMeans
from dotenv import load_dotenv
@ -197,7 +202,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
f.close()
cmd = (
config.python_cmd
+ ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s/logs/%s" '
+ ' infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" '
% (trainset_dir, sr, n_p, now_dir, exp_dir)
+ str(config.noparallel)
)
@ -232,11 +237,15 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
f.close()
if if_f0:
if f0method != "rmvpe_gpu":
cmd = config.python_cmd + ' extract_f0_print.py "%s/logs/%s" %s %s' % (
now_dir,
exp_dir,
n_p,
f0method,
cmd = (
config.python_cmd
+ ' infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s'
% (
now_dir,
exp_dir,
n_p,
f0method,
)
)
print(cmd)
p = Popen(
@ -259,7 +268,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
for idx, n_g in enumerate(gpus_rmvpe):
cmd = (
config.python_cmd
+ ' extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
+ ' infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
% (leng, idx, n_g, now_dir, exp_dir, config.is_half)
)
print(cmd)
@ -277,9 +286,13 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
),
).start()
else:
cmd = config.python_cmd + ' extract_f0_rmvpe_dml.py "%s/logs/%s" ' % (
now_dir,
exp_dir,
cmd = (
config.python_cmd
+ ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
% (
now_dir,
exp_dir,
)
)
print(cmd)
p = Popen(
@ -312,7 +325,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
for idx, n_g in enumerate(gpus):
cmd = (
config.python_cmd
+ ' extract_feature_print.py %s %s %s %s "%s/logs/%s" %s'
+ ' infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s'
% (
config.device,
leng,
@ -353,26 +366,26 @@ def change_sr2(sr2, if_f0_3, version19):
path_str = "" if version19 == "v1" else "_v2"
f0_str = "f0" if if_f0_3 else ""
if_pretrained_generator_exist = os.access(
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if_pretrained_discriminator_exist = os.access(
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if not if_pretrained_generator_exist:
print(
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
"not exist, will not use pretrained model",
)
if not if_pretrained_discriminator_exist:
print(
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
"not exist, will not use pretrained model",
)
return (
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_generator_exist
else "",
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else "",
)
@ -389,26 +402,26 @@ def change_version19(sr2, if_f0_3, version19):
)
f0_str = "f0" if if_f0_3 else ""
if_pretrained_generator_exist = os.access(
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if_pretrained_discriminator_exist = os.access(
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if not if_pretrained_generator_exist:
print(
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
"not exist, will not use pretrained model",
)
if not if_pretrained_discriminator_exist:
print(
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
"not exist, will not use pretrained model",
)
return (
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_generator_exist
else "",
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else "",
to_return_sr2,
@ -418,37 +431,37 @@ def change_version19(sr2, if_f0_3, version19):
def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
path_str = "" if version19 == "v1" else "_v2"
if_pretrained_generator_exist = os.access(
"pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK
"assets/pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK
)
if_pretrained_discriminator_exist = os.access(
"pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK
"assets/pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK
)
if not if_pretrained_generator_exist:
print(
"pretrained%s/f0G%s.pth" % (path_str, sr2),
"assets/pretrained%s/f0G%s.pth" % (path_str, sr2),
"not exist, will not use pretrained model",
)
if not if_pretrained_discriminator_exist:
print(
"pretrained%s/f0D%s.pth" % (path_str, sr2),
"assets/pretrained%s/f0D%s.pth" % (path_str, sr2),
"not exist, will not use pretrained model",
)
if if_f0_3:
return (
{"visible": True, "__type__": "update"},
"pretrained%s/f0G%s.pth" % (path_str, sr2)
"assets/pretrained%s/f0G%s.pth" % (path_str, sr2)
if if_pretrained_generator_exist
else "",
"pretrained%s/f0D%s.pth" % (path_str, sr2)
"assets/pretrained%s/f0D%s.pth" % (path_str, sr2)
if if_pretrained_discriminator_exist
else "",
)
return (
{"visible": False, "__type__": "update"},
("pretrained%s/G%s.pth" % (path_str, sr2))
("assets/pretrained%s/G%s.pth" % (path_str, sr2))
if if_pretrained_generator_exist
else "",
("pretrained%s/D%s.pth" % (path_str, sr2))
("assets/pretrained%s/D%s.pth" % (path_str, sr2))
if if_pretrained_discriminator_exist
else "",
)
@ -548,7 +561,7 @@ def click_train(
if gpus16:
cmd = (
config.python_cmd
+ ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+ ' infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
% (
exp_dir1,
sr2,
@ -568,7 +581,7 @@ def click_train(
else:
cmd = (
config.python_cmd
+ ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+ ' infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
% (
exp_dir1,
sr2,
@ -1482,12 +1495,12 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Row():
pretrained_G14 = gr.Textbox(
label=i18n("加载预训练底模G路径"),
value="pretrained_v2/f0G40k.pth",
value="assets/pretrained_v2/f0G40k.pth",
interactive=True,
)
pretrained_D15 = gr.Textbox(
label=i18n("加载预训练底模D路径"),
value="pretrained_v2/f0D40k.pth",
value="assets/pretrained_v2/f0D40k.pth",
interactive=True,
)
sr2.change(

View File

@ -1,7 +1,6 @@
import torch, traceback, os, sys
now_dir = os.getcwd()
sys.path.append(now_dir)
from collections import OrderedDict
from i18n.i18n import I18nAuto

View File

@ -362,9 +362,9 @@ def get_hparams(init=True):
os.makedirs(experiment_dir)
if args.version == "v1" or args.sample_rate == "40k":
config_path = "configs/%s.json" % args.sample_rate
config_path = "configs/v1/%s.json" % args.sample_rate
else:
config_path = "configs/%s_v2.json" % args.sample_rate
config_path = "configs/v2/%s.json" % args.sample_rate
config_save_path = os.path.join(experiment_dir, "config.json")
if init:
with open(config_path, "r") as f:

View File

@ -79,7 +79,9 @@ class FeatureInput(object):
from lib.rmvpe import RMVPE
print("loading rmvpe model")
self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cpu")
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", is_half=False, device="cpu"
)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
return f0

View File

@ -42,7 +42,9 @@ class FeatureInput(object):
from lib.rmvpe import RMVPE
print("loading rmvpe model")
self.model_rmvpe = RMVPE("rmvpe.pt", is_half=is_half, device="cuda")
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", is_half=is_half, device="cuda"
)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
return f0

View File

@ -40,7 +40,9 @@ class FeatureInput(object):
from lib.rmvpe import RMVPE
print("loading rmvpe model")
self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device=device)
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", is_half=False, device=device
)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
return f0

View File

@ -0,0 +1,135 @@
import os, sys, traceback
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
device = sys.argv[1]
n_part = int(sys.argv[2])
i_part = int(sys.argv[3])
if len(sys.argv) == 6:
exp_dir = sys.argv[4]
version = sys.argv[5]
else:
i_gpu = sys.argv[4]
exp_dir = sys.argv[5]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
version = sys.argv[6]
import torch
import torch.nn.functional as F
import soundfile as sf
import numpy as np
import fairseq
if "privateuseone" not in device:
device = "cpu"
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
import torch_directml
device = torch_directml.device(torch_directml.default_device())
def forward_dml(ctx, x, scale):
ctx.scale = scale
res = x.clone().detach()
return res
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
def printt(strr):
print(strr)
f.write("%s\n" % strr)
f.flush()
printt(sys.argv)
model_path = "assets/hubert/hubert_base.pt"
printt(exp_dir)
wavPath = "%s/1_16k_wavs" % exp_dir
outPath = (
"%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir
)
os.makedirs(outPath, exist_ok=True)
# wave must be 16k, hop_size=320
def readwave(wav_path, normalize=False):
wav, sr = sf.read(wav_path)
assert sr == 16000
feats = torch.from_numpy(wav).float()
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
if normalize:
with torch.no_grad():
feats = F.layer_norm(feats, feats.shape)
feats = feats.view(1, -1)
return feats
# HuBERT model
printt("load model(s) from {}".format(model_path))
# if hubert model is exist
if os.access(model_path, os.F_OK) == False:
printt(
"Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main"
% model_path
)
exit(0)
models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
[model_path],
suffix="",
)
model = models[0]
model = model.to(device)
printt("move model to %s" % device)
if device not in ["mps", "cpu"]:
model = model.half()
model.eval()
todo = sorted(list(os.listdir(wavPath)))[i_part::n_part]
n = max(1, len(todo) // 10) # 最多打印十条
if len(todo) == 0:
printt("no-feature-todo")
else:
printt("all-feature-%s" % len(todo))
for idx, file in enumerate(todo):
try:
if file.endswith(".wav"):
wav_path = "%s/%s" % (wavPath, file)
out_path = "%s/%s" % (outPath, file.replace("wav", "npy"))
if os.path.exists(out_path):
continue
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.half().to(device)
if device not in ["mps", "cpu"]
else feats.to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9 if version == "v1" else 12, # layer 9
}
with torch.no_grad():
logits = model.extract_features(**inputs)
feats = (
model.final_proj(logits[0]) if version == "v1" else logits[0]
)
feats = feats.squeeze(0).float().cpu().numpy()
if np.isnan(feats).sum() == 0:
np.save(out_path, feats, allow_pickle=False)
else:
printt("%s-contains nan" % file)
if idx % n == 0:
printt("now-%s,all-%s,%s,%s" % (len(todo), idx, file, feats.shape))
except:
printt(traceback.format_exc())
printt("all-feature-done")

View File

@ -3,7 +3,7 @@ import os, sys
now_dir = os.getcwd()
sys.path.append(os.path.join(now_dir))
from lib.train import utils
from infer.lib.train import utils
import datetime
hps = utils.get_hparams()
@ -22,10 +22,10 @@ import torch.multiprocessing as mp
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
from lib.infer_pack import commons
from infer.lib.infer_pack import commons
from time import sleep
from time import time as ttime
from lib.train.data_utils import (
from infer.lib.train.data_utils import (
TextAudioLoaderMultiNSFsid,
TextAudioLoader,
TextAudioCollateMultiNSFsid,
@ -34,20 +34,25 @@ from lib.train.data_utils import (
)
if hps.version == "v1":
from lib.infer_pack.models import (
from infer.lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid as RVC_Model_f0,
SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
MultiPeriodDiscriminator,
)
else:
from lib.infer_pack.models import (
from infer.lib.infer_pack.models import (
SynthesizerTrnMs768NSFsid as RVC_Model_f0,
SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0,
MultiPeriodDiscriminatorV2 as MultiPeriodDiscriminator,
)
from lib.train.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
from lib.train.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from lib.train.process_ckpt import savee
from infer.lib.train.losses import (
generator_loss,
discriminator_loss,
feature_loss,
kl_loss,
)
from infer.lib.train.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from infer.lib.train.process_ckpt import savee
global_step = 0