优化笔记本、翻译并重新引入自动push (#48)

* optimize: 精简未用到的配置项并在特征提取初步引入mps * add cmd argument: --noautoopen * fix: i18n * fix * fix * add genlocale workflow * add unitest * fix * fix * fix * 优化笔记本 * reintroduce Push changes * disable genlocale on non-main branch * 将笔记本checkout改为stable
2024-11-23 23:21:03 +01:00 · 2023-04-13 21:32:08 +08:00 · 2023-04-13 21:32:08 +08:00 · 99996fbe8a
commit 99996fbe8a
parent 55135e0a0e
6 changed files with 90 additions and 25 deletions
--- a/.github/workflows/genlocale.yml
+++ b/.github/workflows/genlocale.yml
@ -1,5 +1,8 @@
 name: genlocale
-on: [ push ]
+on:
+  push:
+    branches:
+      - main
 jobs:
  golangci:
    name: genlocale
@ -22,3 +25,10 @@ jobs:
          git config --local user.email '41898282+github-actions[bot]@users.noreply.github.com'
          git add --all
          git commit -m "🎨 同步 locale"
+
+      - name: Push changes
+        if: ${{ !github.head_ref }}
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: main
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@ -58,7 +58,7 @@
      "source": [
        "#@title 克隆仓库\n",
        "\n",
-        "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
        "!mkdir -p pretrained uvr5_weights"
      ],
@ -80,11 +80,22 @@
      "execution_count": null,
      "outputs": []
    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 安装aria2\n",
+        "!apt -y install -qq aria2"
+      ],
+      "metadata": {
+        "id": "pqE0PrnuRqI2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
    {
      "cell_type": "code",
      "source": [
        "#@title 下载底模\n",
-        "!apt -y install -qq aria2\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
@ -96,12 +107,7 @@
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
-        "\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
-        "\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth"
      ],
      "metadata": {
        "id": "UG3XpUwEomUz"
@ -109,6 +115,31 @@
      "execution_count": null,
      "outputs": []
    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载人声分离模型\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
+      ],
+      "metadata": {
+        "id": "HugjmZqZRuiF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载hubert_base\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ],
+      "metadata": {
+        "id": "2RCaT9FTR0ej"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
    {
      "cell_type": "code",
      "source": [
@ -223,8 +254,12 @@
        "#@title 手动预处理（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 采样率\n",
+        "BITRATE = 48000  #@param {type:\"integer\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
        "\n",
-        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 48000 8 logs/{MODELNAME} True\n"
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True\n"
      ],
      "metadata": {
        "id": "ZKAyuKb9J6dz"
@ -238,8 +273,14 @@
        "#@title 手动提取特征（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "#@markdown 音高提取算法\n",
+        "ALGO = \"harvest\"  #@param {type:\"string\"}\n",
        "\n",
-        "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n"
+        "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
+        "\n",
+        "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}\n"
      ],
      "metadata": {
        "id": "CrxJqzAUKmPJ"
@ -253,14 +294,22 @@
        "#@title 手动训练（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的GPU\n",
+        "USEGPU = \"0\"  #@param {type:\"string\"}\n",
+        "#@markdown 批大小\n",
+        "BATCHSIZE = 32  #@param {type:\"integer\"}\n",
        "#@markdown 停止的epoch\n",
        "MODELEPOCH = 3200  #@param {type:\"integer\"}\n",
        "#@markdown 保存epoch间隔\n",
        "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
        "#@markdown 采样率\n",
        "MODELSAMPLE = \"48k\"  #@param {type:\"string\"}\n",
+        "#@markdown 是否缓存训练集\n",
+        "CACHEDATA = 1  #@param {type:\"integer\"}\n",
+        "#@markdown 是否仅保存最新的ckpt文件\n",
+        "ONLYLATEST = 0  #@param {type:\"integer\"}\n",
        "\n",
-        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs 32 -g 0 -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l 0 -c 1\n"
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}\n"
      ],
      "metadata": {
        "id": "IMLPLKOaKj58"
--- a/gui.py
+++ b/gui.py
@ -165,7 +165,7 @@ class GUI:
        layout=[
            [
                sg.Frame(title=i18n('加载模型'),layout=[
-                    [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert File'))],
+                    [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert模型'))],
                    [sg.Input(default_text='TEMP\\atri.pth',key='pth_path'),sg.FileBrowse(i18n('选择.pth文件'))],
                    [sg.Input(default_text='TEMP\\added_IVF512_Flat_atri_baseline_src_feat.index',key='index_path'),sg.FileBrowse(i18n('选择.index文件'))],
                    [sg.Input(default_text='TEMP\\big_src_feature_atri.npy',key='npy_path'),sg.FileBrowse(i18n('选择.npy文件'))]
@ -187,10 +187,10 @@ class GUI:
                    [sg.Text(i18n("采样长度")),sg.Slider(range=(0.1,3.0),key='block_time',resolution=0.1,orientation='h',default_value=1.0)],
                    [sg.Text(i18n("淡入淡出长度")),sg.Slider(range=(0.01,0.15),key='crossfade_length',resolution=0.01,orientation='h',default_value=0.08)],
                    [sg.Text(i18n("额外推理时长")),sg.Slider(range=(0.05,3.00),key='extra_time',resolution=0.01,orientation='h',default_value=0.05)],
-                    [sg.Checkbox(i18n('Input Noisereduce'),key='I_noise_reduce'),sg.Checkbox(i18n('Output Noisereduce'),key='O_noise_reduce')]
+                    [sg.Checkbox(i18n('输入降噪'),key='I_noise_reduce'),sg.Checkbox(i18n('输出降噪'),key='O_noise_reduce')]
                ],title=i18n("性能设置"))
            ],
-            [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("Infer Time(ms):")),sg.Text("0",key='infer_time')]
+            [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("推理时间(ms):")),sg.Text("0",key='infer_time')]
        ]
        
        self.window=sg.Window("RVC - GUI",layout=layout)
--- a/locale/en_US.json
+++ b/locale/en_US.json
@ -76,6 +76,7 @@
    "点击查看交流、问题反馈群号": "Click to view the communication and problem feedback group number",
    "xxxxx": "xxxxx",
    "加载模型": "加载模型",
+    "Hubert模型": "Hubert File",
    "选择.pth文件": "选择.pth文件",
    "选择.index文件": "选择.index文件",
    "选择.npy文件": "选择.npy文件",
@ -88,8 +89,10 @@
    "采样长度": "采样长度",
    "淡入淡出长度": "淡入淡出长度",
    "额外推理时长": "额外推理时长",
-    "输出降噪/Output Noisereduce": "输出降噪/Output Noisereduce",
+    "输入降噪": "Input Noisereduce",
+    "输出降噪": "Output Noisereduce",
    "性能设置": "性能设置",
    "开始音频转换": "开始音频转换",
-    "停止音频转换": "停止音频转换"
+    "停止音频转换": "停止音频转换",
+    "推理时间(ms):": "Infer Time(ms):"
 }
--- a/locale/ja_JP.json
+++ b/locale/ja_JP.json
@ -36,7 +36,7 @@
    "请指定说话人id": "話者IDを指定してください",
    "处理数据": "データ処理",
    "step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "ステップ2b: CPUを使用して音高を抽出する(モデルに音高がある場合)、GPUを使用して特徴を抽出する(カード番号を選択する)",
-    "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します" ,
+    "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します",
    "显卡信息": "カード情報",
    "提取音高使用的CPU进程数": "抽出に使用するCPUプロセス数",
    "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "音高抽出アルゴリズムの選択:歌声を入力する場合は、pmを使用して速度を上げることができます。CPUが低い場合はdioを使用して速度を上げることができます。harvestは品質が高く、精度が高いですが、遅いです。",
@ -72,10 +72,11 @@
    "模型是否带音高指导,1是0否": "モデルに音高ガイドを付けるかどうか、1は付ける、0は付けない",
    "提取": "抽出",
    "招募音高曲线前端编辑器": "音高曲線フロントエンドエディターを募集",
-    "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx" ,
+    "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx",
    "点击查看交流、问题反馈群号": "クリックして交流、問題フィードバックグループ番号を表示",
    "xxxxx": "xxxxx",
    "加载模型": "モデルをロードする",
+    "Hubert模型": "Hubert模型",
    "选择.pth文件": ".pthファイルを選択する",
    "选择.index文件": ".indexファイルを選択する",
    "选择.npy文件": ".npyファイルを選択する",
@ -88,8 +89,10 @@
    "采样长度": "サンプル長",
    "淡入淡出长度": "フェードイン/フェードアウト長",
    "额外推理时长": "追加推論時間",
-    "输出降噪/Output Noisereduce": "出力ノイズリダクション",
+    "输入降噪": "输入降噪",
+    "输出降噪": "输出降噪",
    "性能设置": "パフォーマンス設定",
    "开始音频转换": "音声変換を開始する",
-    "停止音频转换": "音声変換を停止する"
+    "停止音频转换": "音声変換を停止する",
+    "推理时间(ms):": "推理时间(ms):"
 }
--- a/locale/zh_CN.json
+++ b/locale/zh_CN.json
@ -76,7 +76,7 @@
    "点击查看交流、问题反馈群号": "点击查看交流、问题反馈群号",
    "xxxxx": "xxxxx",
    "加载模型": "加载模型",
-    "Hubert File":"Hubert模型",
+    "Hubert模型": "Hubert模型",
    "选择.pth文件": "选择.pth文件",
    "选择.index文件": "选择.index文件",
    "选择.npy文件": "选择.npy文件",
@ -89,10 +89,10 @@
    "采样长度": "采样长度",
    "淡入淡出长度": "淡入淡出长度",
    "额外推理时长": "额外推理时长",
-    "Input Noisereduce":"输入降噪",
-    "Output Noisereduce": "输出降噪",
+    "输入降噪": "输入降噪",
+    "输出降噪": "输出降噪",
    "性能设置": "性能设置",
    "开始音频转换": "开始音频转换",
    "停止音频转换": "停止音频转换",
-    "Infer Time(ms):":"推理时间(ms):"
+    "推理时间(ms):": "推理时间(ms):"
 }