Add files via upload

This commit is contained in:
Anjok07 2023-04-12 03:26:00 -05:00 committed by GitHub
parent 6ffd7a244e
commit 459b38b3b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -5,8 +5,9 @@ import math
import random
import math
import platform
import traceback
from . import pyrb
#cur
OPERATING_SYSTEM = platform.system()
SYSTEM_ARCH = platform.platform()
SYSTEM_PROC = platform.processor()
@ -18,7 +19,7 @@ else:
from . import pyrb
if OPERATING_SYSTEM == 'Darwin':
wav_resolution = "polyphase" if SYSTEM_PROC == ARM or ARM in SYSTEM_ARCH else 'sinc_fastest'
wav_resolution = "polyphase" if SYSTEM_PROC == ARM or ARM in SYSTEM_ARCH else "sinc_fastest"
else:
wav_resolution = "sinc_fastest"
@ -35,8 +36,6 @@ def crop_center(h1, h2):
elif h1_shape[3] < h2_shape[3]:
raise ValueError('h1_shape[3] must be greater than h2_shape[3]')
# s_freq = (h2_shape[2] - h1_shape[2]) // 2
# e_freq = s_freq + h1_shape[2]
s_time = (h1_shape[3] - h2_shape[3]) // 2
e_time = s_time + h2_shape[3]
h1 = h1[:, :, :, s_time:e_time]
@ -116,6 +115,8 @@ def normalize(wave, is_normalize=False):
if is_normalize:
print(f"The result was normalized.")
wave /= maxv
else:
print(f"The result was not normalized.")
else:
print(f"\nNormalization Set {is_normalize}: Input not above threshold for clipping. Max:{maxv}")
@ -128,11 +129,14 @@ def normalize_two_stem(wave, mix, is_normalize=False):
max_mix = np.abs(mix).max()
if maxv > 1.0:
print(f"\nNormalization Set {is_normalize}: Primary source above threshold for clipping. The result was normalized. Max:{maxv}")
print(f"\nNormalization Set {is_normalize}: Mixture above threshold for clipping. The result was normalized. Max:{max_mix}")
print(f"\nNormalization Set {is_normalize}: Primary source above threshold for clipping. Max:{maxv}")
print(f"\nNormalization Set {is_normalize}: Mixture above threshold for clipping. Max:{max_mix}")
if is_normalize:
print(f"The result was normalized.")
wave /= maxv
mix /= maxv
else:
print(f"The result was not normalized.")
else:
print(f"\nNormalization Set {is_normalize}: Input not above threshold for clipping. Max:{maxv}")
@ -205,75 +209,51 @@ def reduce_vocal_aggressively(X, y, softmask):
return y_mag * np.exp(1.j * np.angle(y))
def merge_artifacts(y_mask, thres=0.01, min_range=64, fade_size=32):
if min_range < fade_size * 2:
raise ValueError('min_range must be >= fade_size * 2')
idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
artifact_idx = np.where(end_idx - start_idx > min_range)[0]
weight = np.zeros_like(y_mask)
if len(artifact_idx) > 0:
start_idx = start_idx[artifact_idx]
end_idx = end_idx[artifact_idx]
old_e = None
for s, e in zip(start_idx, end_idx):
if old_e is not None and s - old_e < fade_size:
s = old_e - fade_size * 2
if s != 0:
weight[:, :, s:s + fade_size] = np.linspace(0, 1, fade_size)
else:
s -= fade_size
if e != y_mask.shape[2]:
weight[:, :, e - fade_size:e] = np.linspace(1, 0, fade_size)
else:
e += fade_size
weight[:, :, s + fade_size:e - fade_size] = 1
old_e = e
v_mask = 1 - y_mask
y_mask += weight * v_mask
return y_mask
def mask_silence(mag, ref, thres=0.1, min_range=64, fade_size=32):
if min_range < fade_size * 2:
raise ValueError('min_range must be >= fade_area * 2')
mag = mag.copy()
idx = np.where(ref.mean(axis=(0, 1)) < thres)[0]
starts = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
ends = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
uninformative = np.where(ends - starts > min_range)[0]
if len(uninformative) > 0:
starts = starts[uninformative]
ends = ends[uninformative]
old_e = None
for s, e in zip(starts, ends):
if old_e is not None and s - old_e < fade_size:
s = old_e - fade_size * 2
if s != 0:
weight = np.linspace(0, 1, fade_size)
mag[:, :, s:s + fade_size] += weight * ref[:, :, s:s + fade_size]
else:
s -= fade_size
if e != mag.shape[2]:
weight = np.linspace(1, 0, fade_size)
mag[:, :, e - fade_size:e] += weight * ref[:, :, e - fade_size:e]
else:
e += fade_size
mag[:, :, s + fade_size:e - fade_size] += ref[:, :, s + fade_size:e - fade_size]
old_e = e
return mag
mask = y_mask
try:
if min_range < fade_size * 2:
raise ValueError('min_range must be >= fade_size * 2')
idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
artifact_idx = np.where(end_idx - start_idx > min_range)[0]
weight = np.zeros_like(y_mask)
if len(artifact_idx) > 0:
start_idx = start_idx[artifact_idx]
end_idx = end_idx[artifact_idx]
old_e = None
for s, e in zip(start_idx, end_idx):
if old_e is not None and s - old_e < fade_size:
s = old_e - fade_size * 2
if s != 0:
weight[:, :, s:s + fade_size] = np.linspace(0, 1, fade_size)
else:
s -= fade_size
if e != y_mask.shape[2]:
weight[:, :, e - fade_size:e] = np.linspace(1, 0, fade_size)
else:
e += fade_size
weight[:, :, s + fade_size:e - fade_size] = 1
old_e = e
v_mask = 1 - y_mask
y_mask += weight * v_mask
mask = y_mask
except Exception as e:
error_name = f'{type(e).__name__}'
traceback_text = ''.join(traceback.format_tb(e.__traceback__))
message = f'{error_name}: "{e}"\n{traceback_text}"'
print('Post Process Failed: ', message)
return mask
def align_wave_head_and_tail(a, b):
l = min([a[0].size, b[0].size])
@ -386,11 +366,11 @@ def mirroring(a, spec_m, input_high_end, mp):
return np.where(np.abs(input_high_end) <= np.abs(mi), input_high_end, mi)
def adjust_aggr(mask, is_vocal_model, aggressiveness):
aggr = aggressiveness.get('value', 0.0) * 4
def adjust_aggr(mask, is_non_accom_stem, aggressiveness):
aggr = aggressiveness['value']
if aggr != 0:
if is_vocal_model:
if is_non_accom_stem:
aggr = 1 - aggr
aggr = [aggr, aggr]
@ -403,6 +383,9 @@ def adjust_aggr(mask, is_vocal_model, aggressiveness):
mask[ch, :aggressiveness['split_bin']] = np.power(mask[ch, :aggressiveness['split_bin']], 1 + aggr[ch] / 3)
mask[ch, aggressiveness['split_bin']:] = np.power(mask[ch, aggressiveness['split_bin']:], 1 + aggr[ch])
# if is_non_accom_stem:
# mask = (1.0 - mask)
return mask
def stft(wave, nfft, hl):
@ -442,36 +425,20 @@ def spec_effects(wave, algorithm='Default', value=None):
return wave
def spectrogram_to_wave_bare(spec, hop_length=1024):
spec_left = np.asfortranarray(spec[0])
spec_right = np.asfortranarray(spec[1])
wave_left = librosa.istft(spec_left, hop_length=hop_length)
wave_right = librosa.istft(spec_right, hop_length=hop_length)
wave = np.asfortranarray([wave_left, wave_right])
return wave
def spectrogram_to_wave_no_mp(spec, hop_length=1024):
if spec.ndim == 2:
wave = librosa.istft(spec, hop_length=hop_length)
elif spec.ndim == 3:
spec_left = np.asfortranarray(spec[0])
spec_right = np.asfortranarray(spec[1])
wave_left = librosa.istft(spec_left, hop_length=hop_length)
wave_right = librosa.istft(spec_right, hop_length=hop_length)
wave = np.asfortranarray([wave_left, wave_right])
def spectrogram_to_wave_no_mp(spec, n_fft=2048, hop_length=1024):
wave = librosa.istft(spec, n_fft=n_fft, hop_length=hop_length)
if wave.ndim == 1:
wave = np.asfortranarray([wave,wave])
return wave
def wave_to_spectrogram_no_mp(wave):
wave_left = np.asfortranarray(wave[0])
wave_right = np.asfortranarray(wave[1])
spec_left = librosa.stft(wave_left, n_fft=2048, hop_length=1024)
spec_right = librosa.stft(wave_right, n_fft=2048, hop_length=1024)
spec = np.asfortranarray([spec_left, spec_right])
spec = librosa.stft(wave, n_fft=2048, hop_length=1024)
if spec.ndim == 1:
spec = np.asfortranarray([spec,spec])
return spec
@ -519,6 +486,8 @@ def ensembling(a, specs):
return spec
def ensemble_inputs(audio_input, algorithm, is_normalization, wav_type_set, save_path):
wavs_ = []
if algorithm == AVERAGE:
output = average_audio(audio_input)
@ -528,10 +497,15 @@ def ensemble_inputs(audio_input, algorithm, is_normalization, wav_type_set, save
for i in range(len(audio_input)):
wave, samplerate = librosa.load(audio_input[i], mono=False, sr=44100)
wavs_.append(wave)
spec = wave_to_spectrogram_no_mp(wave)
specs.append(spec)
wave_shapes = [w.shape[1] for w in wavs_]
target_shape = wavs_[wave_shapes.index(max(wave_shapes))]
output = spectrogram_to_wave_no_mp(ensembling(algorithm, specs))
output = to_shape(output, target_shape.shape)
sf.write(save_path, normalize(output.T, is_normalization), samplerate, subtype=wav_type_set)
@ -555,7 +529,7 @@ def to_shape_minimize(x: np.ndarray, target_shape):
return np.pad(x, tuple(padding_list), mode='constant')
def augment_audio(export_path, audio_file, rate, is_normalization, wav_type_set, save_format=None, is_pitch=False):
print('Rate: ', rate)
wav, sr = librosa.load(audio_file, sr=44100, mono=False)
if wav.ndim == 1: