From 5449f84f06d48094ede804e09c3bda325a014d4f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 22:03:02 +0900
Subject: [PATCH] chore(format): run black on dev (#1638)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 gui_v1.py                         | 67 +++++++++++++++++----------
 infer/lib/infer_pack/models.py    | 20 ++++----
 infer/lib/train/mel_processing.py | 10 ++--
 tools/rvc_for_realtime.py         | 76 +++++++++++++++++--------------
 4 files changed, 102 insertions(+), 71 deletions(-)

diff --git a/gui_v1.py b/gui_v1.py
index e5c6757..86b52d3 100644
--- a/gui_v1.py
+++ b/gui_v1.py
@@ -38,10 +38,14 @@ def phase_vocoder(a, b, fade_out, fade_in):
     deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
     w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
     t = torch.arange(n).unsqueeze(-1).to(a) / n
-    result = a * (fade_out ** 2) + b * (fade_in ** 2) + torch.sum(absab * torch.cos(w * t + phia), -1) * window / n
+    result = (
+        a * (fade_out**2)
+        + b * (fade_in**2)
+        + torch.sum(absab * torch.cos(w * t + phia), -1) * window / n
+    )
     return result
 
-    
+
 class Harvest(multiprocessing.Process):
     def __init__(self, inp_q, opt_q):
         multiprocessing.Process.__init__(self)
@@ -592,11 +596,11 @@ if __name__ == "__main__":
             self.gui_config.pth_path = values["pth_path"]
             self.gui_config.index_path = values["index_path"]
             self.gui_config.sr_type = ["sr_model", "sr_device"][
-                                [
-                                    values["sr_model"],
-                                    values["sr_device"],
-                                ].index(True)
-                            ]
+                [
+                    values["sr_model"],
+                    values["sr_device"],
+                ].index(True)
+            ]
             self.gui_config.threhold = values["threhold"]
             self.gui_config.pitch = values["pitch"]
             self.gui_config.block_time = values["block_time"]
@@ -633,7 +637,11 @@ if __name__ == "__main__":
                 self.config,
                 self.rvc if hasattr(self, "rvc") else None,
             )
-            self.gui_config.samplerate = self.rvc.tgt_sr if self.gui_config.sr_type == "sr_model" else self.get_device_samplerate()
+            self.gui_config.samplerate = (
+                self.rvc.tgt_sr
+                if self.gui_config.sr_type == "sr_model"
+                else self.get_device_samplerate()
+            )
             self.zc = self.gui_config.samplerate // 100
             self.block_frame = (
                 int(
@@ -690,7 +698,9 @@ if __name__ == "__main__":
                 2 * self.zc, device=self.config.device, dtype=torch.float32
             )
             self.skip_head = self.extra_frame // self.zc
-            self.return_length = (self.block_frame + self.sola_buffer_frame + self.sola_search_frame) // self.zc
+            self.return_length = (
+                self.block_frame + self.sola_buffer_frame + self.sola_search_frame
+            ) // self.zc
             self.fade_in_window: torch.Tensor = (
                 torch.sin(
                     0.5
@@ -824,7 +834,11 @@ if __name__ == "__main__":
             # volume envelop mixing
             if self.gui_config.rms_mix_rate < 1 and self.function == "vc":
                 rms1 = librosa.feature.rms(
-                    y=self.input_wav_res[160 * self.skip_head : 160 * (self.skip_head + self.return_length)]
+                    y=self.input_wav_res[
+                        160
+                        * self.skip_head : 160
+                        * (self.skip_head + self.return_length)
+                    ]
                     .cpu()
                     .numpy(),
                     frame_length=640,
@@ -871,21 +885,24 @@ if __name__ == "__main__":
             else:
                 sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
             printt("sola_offset = %d", int(sola_offset))
-            infer_wav = infer_wav[sola_offset :]
+            infer_wav = infer_wav[sola_offset:]
             if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv:
                 infer_wav[: self.sola_buffer_frame] *= self.fade_in_window
-                infer_wav[: self.sola_buffer_frame] += self.sola_buffer * self.fade_out_window
+                infer_wav[: self.sola_buffer_frame] += (
+                    self.sola_buffer * self.fade_out_window
+                )
             else:
                 infer_wav[: self.sola_buffer_frame] = phase_vocoder(
-                                                        self.sola_buffer,
-                                                        infer_wav[: self.sola_buffer_frame],
-                                                        self.fade_out_window,
-                                                        self.fade_in_window)
-            self.sola_buffer[:] = infer_wav[self.block_frame : self.block_frame + self.sola_buffer_frame]
-            if sys.platform == "darwin":
-                outdata[:] = (
-                    infer_wav[: self.block_frame].cpu().numpy()[:, np.newaxis]
+                    self.sola_buffer,
+                    infer_wav[: self.sola_buffer_frame],
+                    self.fade_out_window,
+                    self.fade_in_window,
                 )
+            self.sola_buffer[:] = infer_wav[
+                self.block_frame : self.block_frame + self.sola_buffer_frame
+            ]
+            if sys.platform == "darwin":
+                outdata[:] = infer_wav[: self.block_frame].cpu().numpy()[:, np.newaxis]
             else:
                 outdata[:] = (
                     infer_wav[: self.block_frame].repeat(2, 1).t().cpu().numpy()
@@ -930,7 +947,7 @@ if __name__ == "__main__":
                 input_devices_indices,
                 output_devices_indices,
             )
-                    
+
         def set_devices(self, input_device, output_device):
             """设置输出设备"""
             (
@@ -947,8 +964,10 @@ if __name__ == "__main__":
             ]
             printt("Input device: %s:%s", str(sd.default.device[0]), input_device)
             printt("Output device: %s:%s", str(sd.default.device[1]), output_device)
-        
+
         def get_device_samplerate(self):
-            return int(sd.query_devices(device=sd.default.device[0])['default_samplerate'])
-            
+            return int(
+                sd.query_devices(device=sd.default.device[0])["default_samplerate"]
+            )
+
     gui = GUI()
diff --git a/infer/lib/infer_pack/models.py b/infer/lib/infer_pack/models.py
index a81c1de..e489634 100644
--- a/infer/lib/infer_pack/models.py
+++ b/infer/lib/infer_pack/models.py
@@ -795,9 +795,9 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
             assert isinstance(return_length, torch.Tensor)
             head = int(skip_head.item())
             length = int(return_length.item())
-            z_p = z_p[:, :, head: head + length]
-            x_mask = x_mask[:, :, head: head + length]
-            nsff0 = nsff0[:, head: head + length]
+            z_p = z_p[:, :, head : head + length]
+            x_mask = x_mask[:, :, head : head + length]
+            nsff0 = nsff0[:, head : head + length]
         z = self.flow(z_p, x_mask, g=g, reverse=True)
         o = self.dec(z * x_mask, nsff0, g=g)
         return o, x_mask, (z, z_p, m_p, logs_p)
@@ -957,9 +957,9 @@ class SynthesizerTrnMs768NSFsid(nn.Module):
             assert isinstance(return_length, torch.Tensor)
             head = int(skip_head.item())
             length = int(return_length.item())
-            z_p = z_p[:, :, head: head + length]
-            x_mask = x_mask[:, :, head: head + length]
-            nsff0 = nsff0[:, head: head + length]
+            z_p = z_p[:, :, head : head + length]
+            x_mask = x_mask[:, :, head : head + length]
+            nsff0 = nsff0[:, head : head + length]
         z = self.flow(z_p, x_mask, g=g, reverse=True)
         o = self.dec(z * x_mask, nsff0, g=g)
         return o, x_mask, (z, z_p, m_p, logs_p)
@@ -1108,8 +1108,8 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
             assert isinstance(return_length, torch.Tensor)
             head = int(skip_head.item())
             length = int(return_length.item())
-            z_p = z_p[:, :, head: head + length]
-            x_mask = x_mask[:, :, head: head + length]
+            z_p = z_p[:, :, head : head + length]
+            x_mask = x_mask[:, :, head : head + length]
         z = self.flow(z_p, x_mask, g=g, reverse=True)
         o = self.dec(z * x_mask, g=g)
         return o, x_mask, (z, z_p, m_p, logs_p)
@@ -1258,8 +1258,8 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module):
             assert isinstance(return_length, torch.Tensor)
             head = int(skip_head.item())
             length = int(return_length.item())
-            z_p = z_p[:, :, head: head + length]
-            x_mask = x_mask[:, :, head: head + length]
+            z_p = z_p[:, :, head : head + length]
+            x_mask = x_mask[:, :, head : head + length]
         z = self.flow(z_p, x_mask, g=g, reverse=True)
         o = self.dec(z * x_mask, g=g)
         return o, x_mask, (z, z_p, m_p, logs_p)
diff --git a/infer/lib/train/mel_processing.py b/infer/lib/train/mel_processing.py
index 14a960f..3751f1e 100644
--- a/infer/lib/train/mel_processing.py
+++ b/infer/lib/train/mel_processing.py
@@ -38,6 +38,7 @@ def spectral_de_normalize_torch(magnitudes):
 mel_basis = {}
 hann_window = {}
 
+
 def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
     """Convert waveform into Linear-frequency Linear-amplitude spectrogram.
 
@@ -51,7 +52,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
     Returns:
         :: (B, Freq, Frame) - Linear-frequency Linear-amplitude spectrogram
     """
-    
+
     # Window - Cache if needed
     global hann_window
     dtype_device = str(y.dtype) + "_" + str(y.device)
@@ -60,7 +61,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
         hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(
             dtype=y.dtype, device=y.device
         )
-    
+
     # Padding
     y = torch.nn.functional.pad(
         y.unsqueeze(1),
@@ -68,7 +69,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
         mode="reflect",
     )
     y = y.squeeze(1)
-    
+
     # Complex Spectrogram :: (B, T) -> (B, Freq, Frame, RealComplex=2)
     spec = torch.stft(
         y,
@@ -82,11 +83,12 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
         onesided=True,
         return_complex=True,
     )
-    
+
     # Linear-frequency Linear-amplitude spectrogram :: (B, Freq, Frame, RealComplex=2) -> (B, Freq, Frame)
     spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + 1e-6)
     return spec
 
+
 def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
     # MelBasis - Cache if needed
     global mel_basis
diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py
index 257c44d..ff1ea88 100644
--- a/tools/rvc_for_realtime.py
+++ b/tools/rvc_for_realtime.py
@@ -46,22 +46,23 @@ def printt(strr, *args):
 # config.is_half=False########强制cpu测试
 class RVC:
     def __init__(
-            self,
-            key,
-            pth_path,
-            index_path,
-            index_rate,
-            n_cpu,
-            inp_q,
-            opt_q,
-            config: Config,
-            last_rvc=None,
+        self,
+        key,
+        pth_path,
+        index_path,
+        index_rate,
+        n_cpu,
+        inp_q,
+        opt_q,
+        config: Config,
+        last_rvc=None,
     ) -> None:
         """
         初始化
         """
         try:
             if config.dml == True:
+
                 def forward_dml(ctx, x, scale):
                     ctx.scale = scale
                     res = x.clone().detach()
@@ -92,7 +93,7 @@ class RVC:
             self.index_rate = index_rate
             self.cache_pitch: np.ndarray = np.zeros(1024, dtype="int32")
             self.cache_pitchf = np.zeros(1024, dtype="float32")
-            
+
             if last_rvc is None:
                 models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
                     ["assets/hubert/hubert_base.pt"],
@@ -201,7 +202,7 @@ class RVC:
         f0bak = f0.copy()
         f0_mel = 1127 * np.log(1 + f0 / 700)
         f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
-                self.f0_mel_max - self.f0_mel_min
+            self.f0_mel_max - self.f0_mel_min
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
@@ -258,7 +259,7 @@ class RVC:
                 self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
             else:
                 self.inp_q.put(
-                    (idx, x[part_length * idx - 320: tail], res_f0, n_cpu, ts)
+                    (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
                 )
         while 1:
             res_ts = self.opt_q.get()
@@ -273,7 +274,7 @@ class RVC:
             else:
                 f0 = f0[2:]
             f0bak[
-            part_length * idx // 160: part_length * idx // 160 + f0.shape[0]
+                part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
             ] = f0
         f0bak = signal.medfilt(f0bak, 3)
         f0bak *= pow(2, f0_up_key / 12)
@@ -320,6 +321,7 @@ class RVC:
     def get_f0_fcpe(self, x, f0_up_key):
         if hasattr(self, "model_fcpe") == False:
             from torchfcpe import spawn_bundled_infer_model
+
             printt("Loading fcpe model")
             if "privateuseone" in str(self.device):
                 self.device_fcpe = "cpu"
@@ -329,7 +331,7 @@ class RVC:
         f0 = self.model_fcpe.infer(
             x.to(self.device_fcpe).unsqueeze(0).float(),
             sr=16000,
-            decoder_mode='local_argmax',
+            decoder_mode="local_argmax",
             threshold=0.006,
         )
         f0 *= pow(2, f0_up_key / 12)
@@ -337,12 +339,12 @@ class RVC:
         return self.get_f0_post(f0)
 
     def infer(
-            self,
-            input_wav: torch.Tensor,
-            block_frame_16k,
-            skip_head,
-            return_length,
-            f0method,
+        self,
+        input_wav: torch.Tensor,
+        block_frame_16k,
+        skip_head,
+        return_length,
+        f0method,
     ) -> np.ndarray:
         t1 = ttime()
         with torch.no_grad():
@@ -364,16 +366,16 @@ class RVC:
         t2 = ttime()
         try:
             if hasattr(self, "index") and self.index_rate != 0:
-                npy = feats[0][skip_head // 2:].cpu().numpy().astype("float32")
+                npy = feats[0][skip_head // 2 :].cpu().numpy().astype("float32")
                 score, ix = self.index.search(npy, k=8)
                 weight = np.square(1 / score)
                 weight /= weight.sum(axis=1, keepdims=True)
                 npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
                 if self.config.is_half:
                     npy = npy.astype("float16")
-                feats[0][skip_head // 2:] = (
-                        torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
-                        + (1 - self.index_rate) * feats[0][skip_head // 2:]
+                feats[0][skip_head // 2 :] = (
+                    torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
+                    + (1 - self.index_rate) * feats[0][skip_head // 2 :]
                 )
             else:
                 printt("Index search FAILED or disabled")
@@ -384,21 +386,29 @@ class RVC:
         if self.if_f0 == 1:
             f0_extractor_frame = block_frame_16k + 800
             if f0method == "rmvpe":
-                f0_extractor_frame = (
-                    5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
-                )
-            pitch, pitchf = self.get_f0(input_wav[-f0_extractor_frame: ], self.f0_up_key, self.n_cpu, f0method)
+                f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
+            pitch, pitchf = self.get_f0(
+                input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method
+            )
             start_frame = block_frame_16k // 160
             end_frame = len(self.cache_pitch) - (pitch.shape[0] - 4) + start_frame
-            self.cache_pitch[:] = np.append(self.cache_pitch[start_frame: end_frame], pitch[3:-1])
+            self.cache_pitch[:] = np.append(
+                self.cache_pitch[start_frame:end_frame], pitch[3:-1]
+            )
             self.cache_pitchf[:] = np.append(
-                self.cache_pitchf[start_frame: end_frame], pitchf[3:-1]
+                self.cache_pitchf[start_frame:end_frame], pitchf[3:-1]
             )
         t4 = ttime()
         p_len = input_wav.shape[0] // 160
         if self.if_f0 == 1:
-            cache_pitch = torch.LongTensor(self.cache_pitch[-p_len: ]).to(self.device).unsqueeze(0)
-            cache_pitchf = torch.FloatTensor(self.cache_pitchf[-p_len: ]).to(self.device).unsqueeze(0)
+            cache_pitch = (
+                torch.LongTensor(self.cache_pitch[-p_len:]).to(self.device).unsqueeze(0)
+            )
+            cache_pitchf = (
+                torch.FloatTensor(self.cache_pitchf[-p_len:])
+                .to(self.device)
+                .unsqueeze(0)
+            )
         feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
         feats = feats[:, :p_len, :]
         p_len = torch.LongTensor([p_len]).to(self.device)