some change precision audio processing (#94)
* some change precision audio processing * fix clipping problem in resample resample sometimes causes signal clipping, not just librosa.resample * fix error
This commit is contained in:
parent
c423f77a16
commit
297d92bf5d
@ -33,7 +33,9 @@ class FeatureInput(object):
|
|||||||
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
|
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
|
||||||
|
|
||||||
def compute_f0(self, path, f0_method):
|
def compute_f0(self, path, f0_method):
|
||||||
x, sr = librosa.load(path, self.fs)
|
# default resample type of librosa.resample is "soxr_hq".
|
||||||
|
# Quality: soxr_vhq > soxr_hq
|
||||||
|
x, sr = librosa.load(path, self.fs, res_type='soxr_vhq')
|
||||||
p_len = x.shape[0] // self.hop
|
p_len = x.shape[0] // self.hop
|
||||||
assert sr == self.fs
|
assert sr == self.fs
|
||||||
if f0_method == "pm":
|
if f0_method == "pm":
|
||||||
|
@ -12,10 +12,10 @@ def load_audio(file, sr):
|
|||||||
) # 防止小白拷路径头尾带了空格和"和回车
|
) # 防止小白拷路径头尾带了空格和"和回车
|
||||||
out, _ = (
|
out, _ = (
|
||||||
ffmpeg.input(file, threads=0)
|
ffmpeg.input(file, threads=0)
|
||||||
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
|
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to load audio: {e}")
|
raise RuntimeError(f"Failed to load audio: {e}")
|
||||||
|
|
||||||
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
return np.frombuffer(out, np.float32).flatten()
|
@ -98,7 +98,10 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
|
|||||||
sampling_rate, self.sampling_rate
|
sampling_rate, self.sampling_rate
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
audio_norm = audio / self.max_wav_value
|
audio_norm = audio
|
||||||
|
# audio_norm = audio / self.max_wav_value
|
||||||
|
# audio_norm = audio / np.abs(audio).max()
|
||||||
|
|
||||||
audio_norm = audio_norm.unsqueeze(0)
|
audio_norm = audio_norm.unsqueeze(0)
|
||||||
spec_filename = filename.replace(".wav", ".spec.pt")
|
spec_filename = filename.replace(".wav", ".spec.pt")
|
||||||
if os.path.exists(spec_filename):
|
if os.path.exists(spec_filename):
|
||||||
@ -287,7 +290,10 @@ class TextAudioLoader(torch.utils.data.Dataset):
|
|||||||
sampling_rate, self.sampling_rate
|
sampling_rate, self.sampling_rate
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
audio_norm = audio / self.max_wav_value
|
audio_norm = audio
|
||||||
|
# audio_norm = audio / self.max_wav_value
|
||||||
|
# audio_norm = audio / np.abs(audio).max()
|
||||||
|
|
||||||
audio_norm = audio_norm.unsqueeze(0)
|
audio_norm = audio_norm.unsqueeze(0)
|
||||||
spec_filename = filename.replace(".wav", ".spec.pt")
|
spec_filename = filename.replace(".wav", ".spec.pt")
|
||||||
if os.path.exists(spec_filename):
|
if os.path.exists(spec_filename):
|
||||||
|
@ -59,19 +59,34 @@ class PreProcess:
|
|||||||
wavfile.write(
|
wavfile.write(
|
||||||
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
|
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
|
||||||
self.sr,
|
self.sr,
|
||||||
(tmp_audio * 32768).astype(np.int16),
|
(tmp_audio * 1).astype(np.float32),
|
||||||
)
|
)
|
||||||
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)
|
|
||||||
|
# default resample type of librosa.resample is "soxr_hq".
|
||||||
|
# Quality: soxr_vhq > soxr_hq
|
||||||
|
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000, res_type="soxr_vhq")
|
||||||
|
tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (
|
||||||
|
1 - self.alpha
|
||||||
|
) * tmp_audio
|
||||||
|
wavfile.write(
|
||||||
|
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
|
||||||
|
self.sr,
|
||||||
|
(tmp_audio * 1).astype(np.float32),
|
||||||
|
)
|
||||||
|
|
||||||
wavfile.write(
|
wavfile.write(
|
||||||
"%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
|
"%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
|
||||||
16000,
|
16000,
|
||||||
(tmp_audio * 32768).astype(np.int16),
|
(tmp_audio * 1).astype(np.float32),
|
||||||
)
|
)
|
||||||
|
|
||||||
def pipeline(self, path, idx0):
|
def pipeline(self, path, idx0):
|
||||||
try:
|
try:
|
||||||
audio = load_audio(path, self.sr)
|
audio = load_audio(path, self.sr)
|
||||||
audio = signal.filtfilt(self.bh, self.ah, audio)
|
# zero phased digital filter cause pre-ringing noise...
|
||||||
|
# audio = signal.filtfilt(self.bh, self.ah, audio)
|
||||||
|
audio = signal.lfilter(self.bh, self.ah, audio)
|
||||||
|
|
||||||
idx1 = 0
|
idx1 = 0
|
||||||
for audio in self.slicer.slice(audio):
|
for audio in self.slicer.slice(audio):
|
||||||
i = 0
|
i = 0
|
||||||
|
Loading…
Reference in New Issue
Block a user