Merge pull request #1534 from RVC-Project/dev

chore(sync): merge dev into main
2024-11-27 17:00:54 +01:00 · 2023-11-12 17:51:55 +08:00 · 2023-11-12 17:51:55 +08:00 · ff093ad88f
commit ff093ad88f
parent f431f8fb3f e2d494d1a7
5 changed files with 24 additions and 37 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -18,13 +18,3 @@
 # Screenshot

 - Please include a screenshot if applicable
-
-# Localhost url to test on
-
- Please include a url on localhost to test.
-
-# Jira Link
-
- Please include a link to the ticket if applicable.
-
-[Ticket]()
--- a/README.md
+++ b/README.md
@ -14,7 +14,7 @@

 [![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk)

-[**更新日志**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_CN.md) | [**常见问题解答**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98%E8%A7%A3%E7%AD%94) | [**AutoDL·5毛钱训练AI歌手**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B) | [**对照实验记录**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%AF%B9%E7%85%A7%E5%AE%9E%E9%AA%8C%C2%B7%E5%AE%9E%E9%AA%8C%E8%AE%B0%E5%BD%95)) | [**在线演示**](https://huggingface.co/spaces/Ricecake123/RVC-demo)
+[**更新日志**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_CN.md) | [**常见问题解答**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98%E8%A7%A3%E7%AD%94) | [**AutoDL·5毛钱训练AI歌手**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B) | [**对照实验记录**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%AF%B9%E7%85%A7%E5%AE%9E%E9%AA%8C%C2%B7%E5%AE%9E%E9%AA%8C%E8%AE%B0%E5%BD%95)) | [**在线演示**](https://modelscope.cn/studios/FlowerCry/RVCv2demo)

 </div>

--- a/i18n/locale/fr_FR.json
+++ b/i18n/locale/fr_FR.json
@ -88,7 +88,7 @@
    "特征检索库文件路径,为空则使用下拉的选择结果": "Chemin d'accès au fichier d'index des caractéristiques. Laisser vide pour utiliser le résultat sélectionné dans la liste déroulante :",
    "男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Il est recommandé d'utiliser la clé +12 pour la conversion homme-femme et la clé -12 pour la conversion femme-homme. Si la plage sonore est trop large et que la voix est déformée, vous pouvez également l'ajuster vous-même à la plage appropriée.",
    "目标采样率": "Taux d'échantillonnage cible :",
-    "算法延迟(ms):": "算法延迟(ms):",
+    "算法延迟(ms):": "Délais algorithmiques (ms):",
    "自动检测index路径,下拉式选择(dropdown)": "Détecter automatiquement le chemin d'accès à l'index et le sélectionner dans la liste déroulante :",
    "融合": "Fusion",
    "要改的模型信息": "Informations sur le modèle à modifier :",
@ -107,12 +107,12 @@
    "输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)": "Entrez le chemin du dossier audio à traiter (copiez-le depuis la barre d'adresse du gestionnaire de fichiers) :",
    "输入待处理音频文件路径(默认是正确格式示例)": "Entrez le chemin d'accès du fichier audio à traiter (par défaut, l'exemple de format correct) :",
    "输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络": "Ajustez l'échelle de l'enveloppe de volume. Plus il est proche de 0, plus il imite le volume des voix originales. Cela peut aider à masquer les bruits et à rendre le volume plus naturel lorsqu'il est réglé relativement bas. Plus le volume est proche de 1, plus le volume sera fort et constant :",
-    "输入监听": "输入监听",
+    "输入监听": "Moniteur vocal d'entrée",
    "输入训练文件夹路径": "Indiquez le chemin d'accès au dossier d'entraînement :",
    "输入设备": "Dispositif d'entrée",
    "输入降噪": "Réduction du bruit d'entrée",
    "输出信息": "Informations sur la sortie",
-    "输出变声": "输出变声",
+    "输出变声": "Sortie voix convertie",
    "输出设备": "Dispositif de sortie",
    "输出降噪": "Réduction du bruit de sortie",
    "输出音频(右下角三个点,点了可以下载)": "Exporter l'audio (cliquer sur les trois points dans le coin inférieur droit pour télécharger)",
--- a/infer/modules/vc/modules.py
+++ b/infer/modules/vc/modules.py
@ -169,8 +169,8 @@ class VC:
            if self.hubert_model is None:
                self.hubert_model = load_hubert(self.config)

-            file_index = (
-                (
+            if file_index:
+                file_index = (
                    file_index.strip(" ")
                    .strip('"')
                    .strip("\n")
@ -178,9 +178,10 @@ class VC:
                    .strip(" ")
                    .replace("trained", "added")
                )
-                if file_index != ""
-                else file_index2
-            )  # 防止小白写错，自动帮他替换掉
+            elif file_index2:
+                file_index = file_index2
+            else:
+                file_index = ""  # 防止小白写错，自动帮他替换掉

            audio_opt = self.pipeline.pipeline(
                self.hubert_model,
--- a/tools/rvc_for_realtime.py
+++ b/tools/rvc_for_realtime.py
@ -219,24 +219,20 @@ class RVC:
            return self.get_f0_rmvpe(x, f0_up_key)
        if method == "pm":
            p_len = x.shape[0] // 160 + 1
-            f0 = (
-                parselmouth.Sound(x, 16000)
-                .to_pitch_ac(
-                    time_step=0.01,
-                    voicing_threshold=0.6,
-                    pitch_floor=50,
-                    pitch_ceiling=1100,
-                )
-                .selected_array["frequency"]
+            f0_min = 65
+            l_pad = int(np.ceil(1.5 / f0_min * 16000))
+            r_pad = l_pad + 1
+            s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac(
+                time_step=0.01,
+                voicing_threshold=0.6,
+                pitch_floor=f0_min,
+                pitch_ceiling=1100,
            )
-
-            pad_size = (p_len - len(f0) + 1) // 2
-            if pad_size > 0 or p_len - len(f0) - pad_size > 0:
-                # printt(pad_size, p_len - len(f0) - pad_size)
-                f0 = np.pad(
-                    f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
-                )
-
+            assert np.abs(s.t1 - 1.5 / f0_min) < 0.001
+            f0 = s.selected_array["frequency"]
+            if len(f0) < p_len:
+                f0 = np.pad(f0, (0, p_len - len(f0)))
+            f0 = f0[:p_len]
            f0 *= pow(2, f0_up_key / 12)
            return self.get_f0_post(f0)
        if n_cpu == 1:
@ -354,7 +350,7 @@ class RVC:
            feats = (
                self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
            )
-            feats = F.pad(feats, (0, 0, 1, 0))
+            feats = torch.cat((feats, feats[:, -1:, :]), 1)
        t2 = ttime()
        try:
            if hasattr(self, "index") and self.index_rate != 0: