From 4303c5b18db3bf15794fac2ada15b1d844877f41 Mon Sep 17 00:00:00 2001 From: yxlllc Date: Sun, 12 Nov 2023 17:21:25 +0800 Subject: [PATCH] fix alignments of f0 and hubert --- tools/rvc_for_realtime.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py index e71c3f2..8f7fcb7 100644 --- a/tools/rvc_for_realtime.py +++ b/tools/rvc_for_realtime.py @@ -219,24 +219,20 @@ class RVC: return self.get_f0_rmvpe(x, f0_up_key) if method == "pm": p_len = x.shape[0] // 160 + 1 - f0 = ( - parselmouth.Sound(x, 16000) - .to_pitch_ac( + f0_min = 65 + l_pad = int(np.ceil(1.5 / f0_min * 16000)) + r_pad = l_pad + 1 + s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac( time_step=0.01, voicing_threshold=0.6, - pitch_floor=50, + pitch_floor=f0_min, pitch_ceiling=1100, - ) - .selected_array["frequency"] - ) - - pad_size = (p_len - len(f0) + 1) // 2 - if pad_size > 0 or p_len - len(f0) - pad_size > 0: - # printt(pad_size, p_len - len(f0) - pad_size) - f0 = np.pad( - f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" - ) - + ) + assert np.abs(s.t1 - 1.5 / f0_min) < 0.001 + f0 = s.selected_array['frequency'] + if len(f0) < p_len: + f0 = np.pad(f0, (0, p_len - len(f0))) + f0 = f0[: p_len] f0 *= pow(2, f0_up_key / 12) return self.get_f0_post(f0) if n_cpu == 1: @@ -354,7 +350,7 @@ class RVC: feats = ( self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] ) - feats = F.pad(feats, (0, 0, 1, 0)) + feats = torch.cat((feats, feats[:,-1:,:]), 1) t2 = ttime() try: if hasattr(self, "index") and self.index_rate != 0: