1
0
mirror of synced 2024-11-23 23:21:03 +01:00

Merge pull request #1768 from RVC-Project/formatter-main

chore(format): run black on main
This commit is contained in:
RVC-Boss 2024-01-26 16:10:24 +08:00 committed by GitHub
commit 8c0cec1c9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 264 additions and 146 deletions

View File

@ -92,7 +92,9 @@ if torch.cuda.is_available() or ngpu != 0:
"90",
"M4",
"T4",
"TITAN","4060", "L",
"TITAN",
"4060",
"L",
"6000",
]
):
@ -405,12 +407,16 @@ def get_pretrained_models(path_str, f0_str, sr2):
sr2,
)
return (
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_generator_exist
else "",
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else "",
(
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_generator_exist
else ""
),
(
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else ""
),
)
@ -735,7 +741,9 @@ def train1key(
if_save_every_weights18,
version19,
)
yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
yield get_info_str(
i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")
)
# step3b:训练索引
[get_info_str(_) for _ in train_index(exp_dir1, version19)]
@ -782,7 +790,9 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Row():
sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
with gr.Column():
refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary")
refresh_button = gr.Button(
i18n("刷新音色列表和索引路径"), variant="primary"
)
clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
spk_item = gr.Slider(
minimum=0,
@ -801,14 +811,19 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Row():
with gr.Column():
vc_transform0 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
value=0,
)
input_audio0 = gr.Textbox(
label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
label=i18n(
"输入待处理音频文件路径(默认是正确格式示例)"
),
placeholder="C:\\Users\\Desktop\\audio_example.wav",
)
file_index1 = gr.Textbox(
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
label=i18n(
"特征检索库文件路径,为空则使用下拉的选择结果"
),
placeholder="C:\\Users\\Desktop\\model_example.index",
interactive=True,
)
@ -821,9 +836,11 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
),
choices=["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"],
choices=(
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe",
interactive=True,
)
@ -840,7 +857,9 @@ with gr.Blocks(title="RVC WebUI") as app:
rms_mix_rate0 = gr.Slider(
minimum=0,
maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=0.25,
interactive=True,
)
@ -872,7 +891,9 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True,
)
f0_file = gr.File(
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"),
label=i18n(
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
),
visible=False,
)
@ -892,7 +913,9 @@ with gr.Blocks(title="RVC WebUI") as app:
but0 = gr.Button(i18n("转换"), variant="primary")
with gr.Row():
vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
vc_output2 = gr.Audio(
label=i18n("输出音频(右下角三个点,点了可以下载)")
)
but0.click(
vc.vc_single,
@ -916,14 +939,19 @@ with gr.Blocks(title="RVC WebUI") as app:
)
with gr.TabItem(i18n("批量推理")):
gr.Markdown(
value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")
value=i18n(
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. "
)
)
with gr.Row():
with gr.Column():
vc_transform1 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
value=0,
)
opt_input = gr.Textbox(
label=i18n("指定输出文件夹"), value="opt"
)
opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
file_index3 = gr.Textbox(
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
value="",
@ -938,9 +966,11 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
),
choices=["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"],
choices=(
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe",
interactive=True,
)
@ -975,7 +1005,9 @@ with gr.Blocks(title="RVC WebUI") as app:
rms_mix_rate1 = gr.Slider(
minimum=0,
maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=1,
interactive=True,
)
@ -992,7 +1024,9 @@ with gr.Blocks(title="RVC WebUI") as app:
filter_radius1 = gr.Slider(
minimum=0,
maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
label=i18n(
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"
),
value=3,
step=1,
interactive=True,
@ -1006,11 +1040,14 @@ with gr.Blocks(title="RVC WebUI") as app:
)
with gr.Row():
dir_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
label=i18n(
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"
),
placeholder="C:\\Users\\Desktop\\input_vocal_dir",
)
inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
file_count="multiple",
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
)
with gr.Row():
@ -1059,10 +1096,13 @@ with gr.Blocks(title="RVC WebUI") as app:
placeholder="C:\\Users\\Desktop\\todo-songs",
)
wav_inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
file_count="multiple",
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
)
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
model_choose = gr.Dropdown(
label=i18n("模型"), choices=uvr5_names
)
agg = gr.Slider(
minimum=0,
maximum=20,
@ -1143,7 +1183,8 @@ with gr.Blocks(title="RVC WebUI") as app:
)
with gr.Row():
trainset_dir4 = gr.Textbox(
label=i18n("输入训练文件夹路径"), value=i18n("E:\\语音音频+标注\\米津玄师\\src")
label=i18n("输入训练文件夹路径"),
value=i18n("E:\\语音音频+标注\\米津玄师\\src"),
)
spk_id5 = gr.Slider(
minimum=0,
@ -1162,11 +1203,17 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="train_preprocess",
)
with gr.Group():
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
gr.Markdown(
value=i18n(
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"
)
)
with gr.Row():
with gr.Column():
gpus6 = gr.Textbox(
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
label=i18n(
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
),
value=gpus,
interactive=True,
visible=F0GPUVisible,
@ -1254,7 +1301,9 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True,
)
if_save_every_weights18 = gr.Radio(
label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
label=i18n(
"是否在每次保存时间点将最终小模型保存至weights文件夹"
),
choices=[i18n(""), i18n("")],
value=i18n(""),
interactive=True,
@ -1286,7 +1335,9 @@ with gr.Blocks(title="RVC WebUI") as app:
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15],
)
gpus16 = gr.Textbox(
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
label=i18n(
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
),
value=gpus,
interactive=True,
)
@ -1346,8 +1397,12 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Group():
gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
with gr.Row():
ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True)
ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True)
ckpt_a = gr.Textbox(
label=i18n("A模型路径"), value="", interactive=True
)
ckpt_b = gr.Textbox(
label=i18n("B模型路径"), value="", interactive=True
)
alpha_a = gr.Slider(
minimum=0,
maximum=1,
@ -1369,7 +1424,10 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True,
)
info__ = gr.Textbox(
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
label=i18n("要置入的模型信息"),
value="",
max_lines=8,
interactive=True,
)
name_to_save0 = gr.Textbox(
label=i18n("保存的模型名不带后缀"),
@ -1402,13 +1460,18 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="ckpt_merge",
) # def merge(path1,path2,alpha1,sr,f0,info):
with gr.Group():
gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)"))
gr.Markdown(
value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")
)
with gr.Row():
ckpt_path0 = gr.Textbox(
label=i18n("模型路径"), value="", interactive=True
)
info_ = gr.Textbox(
label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True
label=i18n("要改的模型信息"),
value="",
max_lines=8,
interactive=True,
)
name_to_save1 = gr.Textbox(
label=i18n("保存的文件名, 默认空为和源文件同名"),
@ -1426,7 +1489,9 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="ckpt_modify",
)
with gr.Group():
gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)"))
gr.Markdown(
value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")
)
with gr.Row():
ckpt_path1 = gr.Textbox(
label=i18n("模型路径"), value="", interactive=True
@ -1468,7 +1533,10 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True,
)
info___ = gr.Textbox(
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
label=i18n("要置入的模型信息"),
value="",
max_lines=8,
interactive=True,
)
but9 = gr.Button(i18n("提取"), variant="primary")
info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
@ -1484,7 +1552,9 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.TabItem(i18n("Onnx导出")):
with gr.Row():
ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
ckpt_dir = gr.Textbox(
label=i18n("RVC模型路径"), value="", interactive=True
)
with gr.Row():
onnx_dir = gr.Textbox(
label=i18n("Onnx输出路径"), value="", interactive=True

View File

@ -1,4 +1,4 @@
import platform,os
import platform, os
import ffmpeg
import numpy as np
import av
@ -46,6 +46,6 @@ def load_audio(file, sr):
def clean_path(path_str):
if platform.system() == 'Windows':
path_str = path_str.replace('/', '\\')
if platform.system() == "Windows":
path_str = path_str.replace("/", "\\")
return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")

View File

@ -400,13 +400,17 @@ class SineGen(torch.nn.Module):
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
idx + 2
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化
rad_values = (
f0_buf / self.sampling_rate
) % 1 ###%1意味着n_har的乘积无法后处理优化
rand_ini = torch.rand(
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
)
rand_ini[:, 0] = 0
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one = torch.cumsum(
rad_values, 1
) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one *= upp
tmp_over_one = F.interpolate(
tmp_over_one.transpose(2, 1),

View File

@ -333,13 +333,17 @@ class SineGen(torch.nn.Module):
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
idx + 2
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化
rad_values = (
f0_buf / self.sampling_rate
) % 1 ###%1意味着n_har的乘积无法后处理优化
rand_ini = torch.rand(
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
)
rand_ini[:, 0] = 0
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one = torch.cumsum(
rad_values, 1
) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one *= upp
tmp_over_one = F.interpolate(
tmp_over_one.transpose(2, 1),

View File

@ -62,12 +62,12 @@ def torch_bmm(input, mat2, *, out=None):
): # pylint: disable=invalid-name
start_idx_2 = i2 * split_2_slice_size
end_idx_2 = (i2 + 1) * split_2_slice_size
hidden_states[
start_idx:end_idx, start_idx_2:end_idx_2
] = original_torch_bmm(
input[start_idx:end_idx, start_idx_2:end_idx_2],
mat2[start_idx:end_idx, start_idx_2:end_idx_2],
out=out,
hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
original_torch_bmm(
input[start_idx:end_idx, start_idx_2:end_idx_2],
mat2[start_idx:end_idx, start_idx_2:end_idx_2],
out=out,
)
)
else:
hidden_states[start_idx:end_idx] = original_torch_bmm(
@ -138,61 +138,67 @@ def scaled_dot_product_attention(
start_idx_2 = i2 * split_2_slice_size
end_idx_2 = (i2 + 1) * split_2_slice_size
if no_shape_one:
hidden_states[
start_idx:end_idx, start_idx_2:end_idx_2
] = original_scaled_dot_product_attention(
query[start_idx:end_idx, start_idx_2:end_idx_2],
key[start_idx:end_idx, start_idx_2:end_idx_2],
value[start_idx:end_idx, start_idx_2:end_idx_2],
attn_mask=attn_mask[
start_idx:end_idx, start_idx_2:end_idx_2
]
if attn_mask is not None
else attn_mask,
dropout_p=dropout_p,
is_causal=is_causal,
hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
original_scaled_dot_product_attention(
query[start_idx:end_idx, start_idx_2:end_idx_2],
key[start_idx:end_idx, start_idx_2:end_idx_2],
value[start_idx:end_idx, start_idx_2:end_idx_2],
attn_mask=(
attn_mask[start_idx:end_idx, start_idx_2:end_idx_2]
if attn_mask is not None
else attn_mask
),
dropout_p=dropout_p,
is_causal=is_causal,
)
)
else:
hidden_states[
:, start_idx:end_idx, start_idx_2:end_idx_2
] = original_scaled_dot_product_attention(
query[:, start_idx:end_idx, start_idx_2:end_idx_2],
key[:, start_idx:end_idx, start_idx_2:end_idx_2],
value[:, start_idx:end_idx, start_idx_2:end_idx_2],
attn_mask=attn_mask[
:, start_idx:end_idx, start_idx_2:end_idx_2
]
if attn_mask is not None
else attn_mask,
dropout_p=dropout_p,
is_causal=is_causal,
hidden_states[:, start_idx:end_idx, start_idx_2:end_idx_2] = (
original_scaled_dot_product_attention(
query[:, start_idx:end_idx, start_idx_2:end_idx_2],
key[:, start_idx:end_idx, start_idx_2:end_idx_2],
value[:, start_idx:end_idx, start_idx_2:end_idx_2],
attn_mask=(
attn_mask[
:, start_idx:end_idx, start_idx_2:end_idx_2
]
if attn_mask is not None
else attn_mask
),
dropout_p=dropout_p,
is_causal=is_causal,
)
)
else:
if no_shape_one:
hidden_states[
start_idx:end_idx
] = original_scaled_dot_product_attention(
query[start_idx:end_idx],
key[start_idx:end_idx],
value[start_idx:end_idx],
attn_mask=attn_mask[start_idx:end_idx]
if attn_mask is not None
else attn_mask,
dropout_p=dropout_p,
is_causal=is_causal,
hidden_states[start_idx:end_idx] = (
original_scaled_dot_product_attention(
query[start_idx:end_idx],
key[start_idx:end_idx],
value[start_idx:end_idx],
attn_mask=(
attn_mask[start_idx:end_idx]
if attn_mask is not None
else attn_mask
),
dropout_p=dropout_p,
is_causal=is_causal,
)
)
else:
hidden_states[
:, start_idx:end_idx
] = original_scaled_dot_product_attention(
query[:, start_idx:end_idx],
key[:, start_idx:end_idx],
value[:, start_idx:end_idx],
attn_mask=attn_mask[:, start_idx:end_idx]
if attn_mask is not None
else attn_mask,
dropout_p=dropout_p,
is_causal=is_causal,
hidden_states[:, start_idx:end_idx] = (
original_scaled_dot_product_attention(
query[:, start_idx:end_idx],
key[:, start_idx:end_idx],
value[:, start_idx:end_idx],
attn_mask=(
attn_mask[:, start_idx:end_idx]
if attn_mask is not None
else attn_mask
),
dropout_p=dropout_p,
is_causal=is_causal,
)
)
else:
return original_scaled_dot_product_attention(

View File

@ -104,11 +104,11 @@ def return_xpu(device):
return (
f"xpu:{device[-1]}"
if isinstance(device, str) and ":" in device
else f"xpu:{device}"
if isinstance(device, int)
else torch.device("xpu")
if isinstance(device, torch.device)
else "xpu"
else (
f"xpu:{device}"
if isinstance(device, int)
else torch.device("xpu") if isinstance(device, torch.device) else "xpu"
)
)
@ -271,12 +271,16 @@ def ipex_hijacks():
"torch.batch_norm",
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
input,
weight
if weight is not None
else torch.ones(input.size()[1], device=input.device),
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device),
(
weight
if weight is not None
else torch.ones(input.size()[1], device=input.device)
),
(
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device)
),
*args,
**kwargs,
),
@ -286,12 +290,16 @@ def ipex_hijacks():
"torch.instance_norm",
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
input,
weight
if weight is not None
else torch.ones(input.size()[1], device=input.device),
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device),
(
weight
if weight is not None
else torch.ones(input.size()[1], device=input.device)
),
(
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device)
),
*args,
**kwargs,
),

View File

@ -113,9 +113,11 @@ else:
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.half().to(device)
if device not in ["mps", "cpu"]
else feats.to(device),
"source": (
feats.half().to(device)
if device not in ["mps", "cpu"]
else feats.to(device)
),
"padding_mask": padding_mask.to(device),
"output_layer": 9 if version == "v1" else 12, # layer 9
}

View File

@ -38,26 +38,28 @@ class VC:
to_return_protect0 = {
"visible": self.if_f0 != 0,
"value": to_return_protect[0]
if self.if_f0 != 0 and to_return_protect
else 0.5,
"value": (
to_return_protect[0] if self.if_f0 != 0 and to_return_protect else 0.5
),
"__type__": "update",
}
to_return_protect1 = {
"visible": self.if_f0 != 0,
"value": to_return_protect[1]
if self.if_f0 != 0 and to_return_protect
else 0.33,
"value": (
to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33
),
"__type__": "update",
}
if sid == "" or sid == []:
if self.hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
if (
self.hubert_model is not None
): # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
logger.info("Clean model cache")
del (self.net_g, self.n_spk, self.hubert_model, self.tgt_sr) # ,cpt
self.hubert_model = (
self.net_g
) = self.n_spk = self.hubert_model = self.tgt_sr = None
self.hubert_model = self.net_g = self.n_spk = self.hubert_model = (
self.tgt_sr
) = None
if torch.cuda.is_available():
torch.cuda.empty_cache()
###楼下不这么折腾清理不干净

View File

@ -59,12 +59,18 @@ with app:
)
sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item])
gr.Markdown(
value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
value=i18n(
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. "
)
)
vc_input3 = gr.Audio(label="上传音频长度小于90秒")
vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
vc_transform0 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
)
f0method0 = gr.Radio(
label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
),
choices=["pm", "harvest", "crepe", "rmvpe"],
value="pm",
interactive=True,
@ -72,7 +78,9 @@ with app:
filter_radius0 = gr.Slider(
minimum=0,
maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
label=i18n(
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"
),
value=3,
step=1,
interactive=True,
@ -107,19 +115,25 @@ with app:
rms_mix_rate0 = gr.Slider(
minimum=0,
maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=1,
interactive=True,
)
protect0 = gr.Slider(
minimum=0,
maximum=0.5,
label=i18n("保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
label=i18n(
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"
),
value=0.33,
step=0.01,
interactive=True,
)
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
f0_file = gr.File(
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")
)
but0 = gr.Button(i18n("转换"), variant="primary")
vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))

View File

@ -2,6 +2,7 @@
对源特征进行检索
"""
import os
import logging

View File

@ -1,6 +1,7 @@
"""
格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个
"""
import os
import traceback
import logging

View File

@ -1,6 +1,7 @@
"""
格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个
"""
import os
import logging

View File

@ -8,7 +8,9 @@ f0_up_key = 0 # 升降调
sid = 0 # 角色ID
f0_method = "dio" # F0提取算法
model_path = "ShirohaRVC.onnx" # 模型的完整路径
vec_name = "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
vec_name = (
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
)
wav_path = "123.wav" # 输入路径或ByteIO实例
out_path = "out.wav" # 输出路径或ByteIO实例

View File

@ -273,15 +273,17 @@ class RVC:
f0 = f0[2:-3]
else:
f0 = f0[2:]
f0bak[
part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
] = f0
f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = (
f0
)
f0bak = signal.medfilt(f0bak, 3)
f0bak *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0bak)
def get_f0_crepe(self, x, f0_up_key):
if "privateuseone" in str(self.device): ###不支持dmlcpu又太慢用不成拿fcpe顶替
if "privateuseone" in str(
self.device
): ###不支持dmlcpu又太慢用不成拿fcpe顶替
return self.get_f0(x, f0_up_key, 1, "fcpe")
# printt("using crepe,device:%s"%self.device)
f0, pd = torchcrepe.predict(

View File

@ -9,4 +9,5 @@ torchgate imports all the functions from PyTorch, and in addition provides:
TorchGating --- A PyTorch module that applies a spectral gate to an input signal
"""
from .torchgate import TorchGate