From 5dbdbab2c1e405aeb39af2c06f19817a2d64a478 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:15:11 +0800 Subject: [PATCH] Update audio.py --- infer/lib/audio.py | 60 +++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 41 deletions(-) diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 13c12f5..6c886e1 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -1,7 +1,5 @@ -import os -import traceback - -import librosa +import platform,os +import ffmpeg import numpy as np import av from io import BytesIO @@ -30,44 +28,24 @@ def wav2(i, o, format): inp.close() -def audio2(i, o, format, sr): - inp = av.open(i, "rb") - out = av.open(o, "wb", format=format) - if format == "ogg": - format = "libvorbis" - if format == "f32le": - format = "pcm_f32le" - - ostream = out.add_stream(format, channels=1) - ostream.sample_rate = sr - - for frame in inp.decode(audio=0): - for p in ostream.encode(frame): - out.mux(p) - - out.close() - inp.close() - - def load_audio(file, sr): - file = ( - file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) # 防止小白拷路径头尾带了空格和"和回车 - if os.path.exists(file) == False: - raise RuntimeError( - "You input a wrong audio path that does not exists, please fix it!" - ) try: - with open(file, "rb") as f: - with BytesIO() as out: - audio2(f, out, "f32le", sr) - return np.frombuffer(out.getvalue(), np.float32).flatten() + # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 + # This launches a subprocess to decode audio while down-mixing and resampling as necessary. + # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. + file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车 + out, _ = ( + ffmpeg.input(file, threads=0) + .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) + .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + ) + except Exception as e: + raise RuntimeError(f"Failed to load audio: {e}") - except AttributeError: - audio = file[1] / 32768.0 - if len(audio.shape) == 2: - audio = np.mean(audio, -1) - return librosa.resample(audio, orig_sr=file[0], target_sr=16000) + return np.frombuffer(out, np.float32).flatten() - except: - raise RuntimeError(traceback.format_exc()) + +def clean_path(path_str): + if platform.system() == 'Windows': + path_str = path_str.replace('/', '\\') + return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")