From 4a0b899b15bdb017c2a368f01044d093f9e4eef8 Mon Sep 17 00:00:00 2001 From: Anjok07 <68268275+Anjok07@users.noreply.github.com> Date: Wed, 6 Jul 2022 02:57:56 -0500 Subject: [PATCH] Add files via upload --- UVR.py | 82 +++++-- inference_MDX.py | 104 ++++++--- inference_demucs.py | 93 +++++--- inference_v5.py | 75 ++++++- inference_v5_ensemble.py | 456 ++++++++++++++++++++++++++++----------- 5 files changed, 602 insertions(+), 208 deletions(-) diff --git a/UVR.py b/UVR.py index c3992fa..9f79473 100644 --- a/UVR.py +++ b/UVR.py @@ -21,6 +21,7 @@ from PIL import Image from PIL import ImageTk import pickle # Save Data from pathlib import Path + # Other Modules # Pathfinding @@ -39,7 +40,6 @@ import inference_MDX import inference_v5 import inference_v5_ensemble import inference_demucs -# Version from __version__ import VERSION from win32api import GetSystemMetrics @@ -112,6 +112,7 @@ DEFAULT_DATA = { 'appendensem': False, 'demucs_only': False, 'split_mode': True, + 'normalize': False, #MDX-Net 'demucsmodel': False, 'demucsmodelVR': False, @@ -130,6 +131,9 @@ DEFAULT_DATA = { 'segment': 'None', 'dim_f': 2048, 'noise_pro_select': 'Auto Select', + 'wavtype': 'PCM_16', + 'flactype': 'PCM_16', + 'mp3bit': '320k', 'overlap': 0.25, 'shifts': 2, 'overlap_b': 0.25, @@ -144,6 +148,7 @@ DEFAULT_DATA = { 'DemucsModel': 'mdx_extra', 'DemucsModel_MDX': 'UVR_Demucs_Model_1', 'ModelParams': 'Auto', + 'settest': False, } def open_image(path: str, size: tuple = None, keep_aspect: bool = True, rotate: int = 0) -> ImageTk.PhotoImage: @@ -416,6 +421,7 @@ class MainWindow(TkinterDnD.Tk): self.appendensem_var = tk.BooleanVar(value=data['appendensem']) self.demucs_only_var = tk.BooleanVar(value=data['demucs_only']) self.split_mode_var = tk.BooleanVar(value=data['split_mode']) + self.normalize_var = tk.BooleanVar(value=data['normalize']) # Processing Options self.gpuConversion_var = tk.BooleanVar(value=data['gpu']) self.postprocessing_var = tk.BooleanVar(value=data['postprocess']) @@ -443,6 +449,9 @@ class MainWindow(TkinterDnD.Tk): self.segment_var = tk.StringVar(value=data['segment']) self.dim_f_var = tk.StringVar(value=data['dim_f']) self.noise_pro_select_var = tk.StringVar(value=data['noise_pro_select']) + self.wavtype_var = tk.StringVar(value=data['wavtype']) + self.flactype_var = tk.StringVar(value=data['flactype']) + self.mp3bit_var = tk.StringVar(value=data['mp3bit']) self.overlap_var = tk.StringVar(value=data['overlap']) self.shifts_var = tk.StringVar(value=data['shifts']) self.overlap_b_var = tk.StringVar(value=data['overlap_b']) @@ -459,6 +468,7 @@ class MainWindow(TkinterDnD.Tk): self.inst_only_b_var = tk.BooleanVar(value=data['inst_only_b']) self.audfile_var = tk.BooleanVar(value=data['audfile']) self.autocompensate_var = tk.BooleanVar(value=data['autocompensate']) + self.settest_var = tk.BooleanVar(value=data['settest']) # Choose Conversion Method self.aiModel_var = tk.StringVar(value=data['aiModel']) self.last_aiModel = self.aiModel_var.get() @@ -530,8 +540,10 @@ class MainWindow(TkinterDnD.Tk): self.command_Text = ThreadSafeConsole(master=self, background='#0e0e0f',fg='#898b8e', font=('Century Gothic', 11),borderwidth=0) + #self.command_Text.write(f'Ultimate Vocal Remover [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') self.command_Text.write(f'Ultimate Vocal Remover v{VERSION} [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') - + + def configure_widgets(self): """Change widget styling and appearance""" @@ -1223,6 +1235,7 @@ class MainWindow(TkinterDnD.Tk): 'appendensem': self.appendensem_var.get(), 'demucs_only': self.demucs_only_var.get(), 'split_mode': self.split_mode_var.get(), + 'normalize': self.normalize_var.get(), 'tta': self.tta_var.get(), 'save': self.save_var.get(), 'output_image': self.outputImage_var.get(), @@ -1261,6 +1274,7 @@ class MainWindow(TkinterDnD.Tk): 'inst_only_b': self.inst_only_b_var.get(), 'audfile': self.audfile_var.get(), 'autocompensate': self.autocompensate_var.get(), + 'settest': self.settest_var.get(), 'chunks': chunks, 'chunks_d': self.chunks_d_var.get(), 'noisereduc_s': noisereduc_s, @@ -1269,6 +1283,9 @@ class MainWindow(TkinterDnD.Tk): 'segment': self.segment_var.get(), 'dim_f': self.dim_f_var.get(), 'noise_pro_select': self.noise_pro_select_var.get(), + 'wavtype': self.wavtype_var.get(), + 'flactype': self.flactype_var.get(), + 'mp3bit': self.mp3bit_var.get(), 'overlap': self.overlap_var.get(), 'shifts': self.shifts_var.get(), 'overlap_b': self.overlap_b_var.get(), @@ -2009,14 +2026,13 @@ class MainWindow(TkinterDnD.Tk): if self.autocompensate_var.get() == True: - self.compensate_var.set('Auto') try: self.options_compensate.configure(state=tk.DISABLED) except: pass if self.autocompensate_var.get() == False: - self.compensate_var.set(1.03597672895) + #self.compensate_var.set() try: self.options_compensate.configure(state=tk.NORMAL) except: @@ -2365,25 +2381,19 @@ class MainWindow(TkinterDnD.Tk): l0.grid(row=10,column=0,padx=0,pady=0) l0=ttk.Checkbutton(frame0, text='Save Stems to Model & Track Name Directory', variable=self.audfile_var) - l0.grid(row=11,column=0,padx=0,pady=0) - - l0=ttk.Checkbutton(frame0, text='Settings Test Mode', variable=self.modelFolder_var) - l0.grid(row=12,column=0,padx=0,pady=0) - - # l0=ttk.Checkbutton(frame0, text='Basic Prediction', variable=self.audfile_var) - # l0.grid(row=10,column=0,padx=0,pady=0) + l0.grid(row=11,column=0,padx=0,pady=5) l0=ttk.Button(frame0,text='Open Demucs Model Folder', command=self.open_Modelfolder_de) - l0.grid(row=13,column=0,padx=0,pady=0) + l0.grid(row=12,column=0,padx=0,pady=0) l0=ttk.Button(frame0,text='Back to Main Menu', command=close_win) - l0.grid(row=14,column=0,padx=0,pady=10) + l0.grid(row=13,column=0,padx=0,pady=10) def close_win_self(): top.destroy() l0=ttk.Button(frame0,text='Close Window', command=close_win_self) - l0.grid(row=15,column=0,padx=0,pady=0) + l0.grid(row=14,column=0,padx=0,pady=0) def advanced_mdx_options(self): @@ -2467,13 +2477,13 @@ class MainWindow(TkinterDnD.Tk): l0.grid(row=8,column=0,padx=0,pady=0) l0=ttk.Checkbutton(frame0, text='Autoset Volume Compensation', variable=self.autocompensate_var) - l0.grid(row=9,column=0,padx=0,pady=10) + l0.grid(row=9,column=0,padx=0,pady=5) l0=ttk.Checkbutton(frame0, text='Reduce Instrumental Noise Separately', variable=self.nophaseinst_var) l0.grid(row=10,column=0,padx=0,pady=0) l0=tk.Label(frame0, text='Noise Profile', font=("Century Gothic", "9"), foreground='#13a4c9') - l0.grid(row=11,column=0,padx=0,pady=10) + l0.grid(row=11,column=0,padx=0,pady=5) l0=ttk.OptionMenu(frame0, self.noise_pro_select_var, None, 'Auto Select', 'MDX-NET_Noise_Profile_14_kHz', 'MDX-NET_Noise_Profile_17_kHz', 'MDX-NET_Noise_Profile_Full_Band') l0.grid(row=12,column=0,padx=0,pady=0) @@ -3242,13 +3252,18 @@ class MainWindow(TkinterDnD.Tk): tabControl = ttk.Notebook(top) tab1 = ttk.Frame(tabControl) + tab2 = ttk.Frame(tabControl) tabControl.add(tab1, text ='Settings Guide') + tabControl.add(tab2, text ='Audio Format Settings') tabControl.pack(expand = 1, fill ="both") tab1.grid_rowconfigure(0, weight=1) tab1.grid_columnconfigure(0, weight=1) + + tab2.grid_rowconfigure(0, weight=1) + tab2.grid_columnconfigure(0, weight=1) frame0=Frame(tab1,highlightbackground='red',highlightthicknes=0) frame0.grid(row=0,column=0,padx=0,pady=0) @@ -3277,11 +3292,35 @@ class MainWindow(TkinterDnD.Tk): l0=Label(frame0,text="Additional Options",font=("Century Gothic", "13", "bold", "underline"), justify="center", fg="#13a4c9") l0.grid(row=7,column=0,padx=0,pady=10) + l0=ttk.Checkbutton(frame0, text='Settings Test Mode', variable=self.settest_var) + l0.grid(row=8,column=0,padx=0,pady=0) + l0=ttk.Button(frame0,text='Open Application Directory', command=self.open_appdir_filedialog) - l0.grid(row=8,column=0,padx=20,pady=5) + l0.grid(row=9,column=0,padx=20,pady=5) l0=ttk.Button(frame0,text='Close Window', command=close_win) - l0.grid(row=9,column=0,padx=20,pady=5) + l0.grid(row=10,column=0,padx=20,pady=5) + + frame0=Frame(tab2,highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=0) + + l0=Label(frame0,text="Audio Format Settings",font=("Century Gothic", "13", "bold", "underline"), justify="center", fg="#13a4c9") + l0.grid(row=0,column=0,padx=0,pady=10) + + l0=tk.Label(frame0, text='Wav Type', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=1,column=0,padx=0,pady=10) + + l0=ttk.OptionMenu(frame0, self.wavtype_var, None, 'PCM_U8', 'PCM_16', 'PCM_24', 'PCM_32', '32-bit Float', '64-bit Float') + l0.grid(row=2,column=0,padx=20,pady=0) + + l0=tk.Label(frame0, text='Mp3 Bitrate', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=0,padx=0,pady=10) + + l0=ttk.OptionMenu(frame0, self.mp3bit_var, None, '96k', '128k', '160k', '224k', '256k', '320k') + l0.grid(row=6,column=0,padx=20,pady=0) + + l0=ttk.Checkbutton(frame0, text='Normalize Outputs\n(Prevents clipping)', variable=self.normalize_var) + l0.grid(row=7,column=0,padx=0,pady=10) def error_log(self): @@ -3443,6 +3482,7 @@ class MainWindow(TkinterDnD.Tk): 'appendensem': self.appendensem_var.get(), 'demucs_only': self.demucs_only_var.get(), 'split_mode': self.split_mode_var.get(), + 'normalize': self.normalize_var.get(), 'postprocess': self.postprocessing_var.get(), 'tta': self.tta_var.get(), 'save': self.save_var.get(), @@ -3473,12 +3513,16 @@ class MainWindow(TkinterDnD.Tk): 'inst_only_b': self.inst_only_b_var.get(), 'audfile': self.audfile_var.get(), 'autocompensate': self.autocompensate_var.get(), + 'settest': self.settest_var.get(), 'chunks': chunks, 'chunks_d': self.chunks_d_var.get(), 'n_fft_scale': self.n_fft_scale_var.get(), 'segment': self.segment_var.get(), 'dim_f': self.dim_f_var.get(), 'noise_pro_select': self.noise_pro_select_var.get(), + 'wavtype': self.wavtype_var.get(), + 'flactype': self.flactype_var.get(), + 'mp3bit': self.mp3bit_var.get(), 'overlap': self.overlap_var.get(), 'shifts': self.shifts_var.get(), 'overlap_b': self.overlap_b_var.get(), @@ -3512,4 +3556,4 @@ if __name__ == "__main__": def callback(url): webbrowser.open_new_tab(url) - root.mainloop() + root.mainloop() \ No newline at end of file diff --git a/inference_MDX.py b/inference_MDX.py index f5038e7..90b1671 100644 --- a/inference_MDX.py +++ b/inference_MDX.py @@ -38,6 +38,7 @@ import torch import tkinter as tk import traceback # Error Message Recent Calls import time # Timer +from random import randrange from typing import Literal @@ -213,7 +214,7 @@ class Predictor(): non_reduced_Instrumental_path = '{save_path}/{file_name}.wav'.format( save_path=save_path, file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_{model_set_name}_No_Reduction',) - non_reduced_path_mp3 = '{save_path}/{file_name}.mp3'.format( + non_reduced_Instrumental_path_mp3 = '{save_path}/{file_name}.mp3'.format( save_path=save_path, file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_{model_set_name}_No_Reduction',) non_reduced_Instrumental_path_flac = '{save_path}/{file_name}.flac'.format( @@ -257,7 +258,7 @@ class Predictor(): else: widget_text.write(base_text + 'Saving vocals... ') - sf.write(non_reduced_vocal_path, sources[c].T, samplerate) + sf.write(non_reduced_vocal_path, sources[c].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -279,9 +280,9 @@ class Predictor(): if data['demucs_only']: if 'UVR' in demucs_model_set: - sf.write(non_reduced_vocal_path, sources[1].T, samplerate) + sf.write(non_reduced_vocal_path, sources[1].T, samplerate, subtype=wav_type_set) else: - sf.write(non_reduced_vocal_path, sources[source_val].T, samplerate) + sf.write(non_reduced_vocal_path, sources[source_val].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -304,7 +305,7 @@ class Predictor(): widget_text.write(base_text + 'Preparing Instrumental...') else: widget_text.write(base_text + 'Saving Vocals... ') - sf.write(vocal_path, sources[c].T, samplerate) + sf.write(vocal_path, sources[c].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -316,11 +317,11 @@ class Predictor(): if data['demucs_only']: if 'UVR' in demucs_model_set: - sf.write(vocal_path, sources[1].T, samplerate) + sf.write(vocal_path, sources[1].T, samplerate, subtype=wav_type_set) else: - sf.write(vocal_path, sources[source_val].T, samplerate) + sf.write(vocal_path, sources[source_val].T, samplerate, subtype=wav_type_set) else: - sf.write(vocal_path, sources[source_val].T, samplerate) + sf.write(vocal_path, sources[source_val].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) @@ -373,7 +374,7 @@ class Predictor(): wave[d] = np.array([wave[d], wave[d]]) else: # lower bands wave[d] = librosa.resample(wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type']) - + spec[d] = spec_utils.wave_to_spectrogram(wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']) specs[i] = spec_utils.combine_spectrograms(spec, mp) @@ -387,12 +388,13 @@ class Predictor(): y_mag = np.abs(specs[1]) max_mag = np.where(X_mag >= y_mag, X_mag, y_mag) v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0])) + update_progress(**progress_kwargs, step=(1)) if not data['noisereduc_s'] == 'None': if data['nophaseinst']: - sf.write(non_reduced_Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + sf.write(non_reduced_Instrumental_path, normalization_set(spec_utils.cmb_spectrogram_to_wave(-v_spec, mp)), mp.param['sr'], subtype=wav_type_set) reduction_sen = float(data['noisereduc_s'])/10 print(noise_pro_set) @@ -403,9 +405,9 @@ class Predictor(): shell=True, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) else: - sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + sf.write(Instrumental_path, normalization_set(spec_utils.cmb_spectrogram_to_wave(-v_spec, mp)), mp.param['sr'], subtype=wav_type_set) else: - sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + sf.write(Instrumental_path, normalization_set(spec_utils.cmb_spectrogram_to_wave(-v_spec, mp)), mp.param['sr'], subtype=wav_type_set) if data['inst_only']: if file_exists_v == 'there': @@ -427,7 +429,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) - musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) try: os.remove(non_reduced_Instrumental_path) except: @@ -435,7 +437,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(vocal_path) - musfile.export(vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocal_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_v == 'there': pass else: @@ -451,7 +453,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) - musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_n == 'there': pass else: @@ -462,7 +464,7 @@ class Predictor(): if data['voc_only'] == True: if data['non_red'] == True: musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) - musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate=mp3_bit_set) try: os.remove(non_reduced_vocal_path) except: @@ -470,7 +472,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(Instrumental_path) - musfile.export(Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(Instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_i == 'there': pass else: @@ -483,7 +485,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) - musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_n == 'there': pass else: @@ -713,7 +715,6 @@ class Predictor(): if not data['demucsmodel']: sources = self.demix_base(segmented_mix, margin_size=margin) - #value=float(0.9)*float(compensate) elif data['demucs_only']: if split_mode == True: sources = self.demix_demucs_split(mix) @@ -742,7 +743,11 @@ class Predictor(): sources[source_val] = (spec_effects(wave=[demucs_out[source_val],base_out[0]], algorithm=data['mixing'], value=b[source_val])*float(compensate)) # compensation - return sources + + if not data['demucsmodel']: + return sources*float(compensate) + else: + return sources def demix_base(self, mixes, margin_size): chunked_sources = [] @@ -882,10 +887,14 @@ data = { 'shifts': 0, 'margin': 44100, 'split_mode': False, + 'normalize': False, 'nophaseinst': True, 'compensate': 1.03597672895, 'autocompensate': True, 'demucs_only': False, + 'wavtype': 'PCM_16', + 'flactype': 'PCM_16', + 'mp3bit': '320k', 'mixing': 'Default', 'DemucsModel_MDX': 'UVR_Demucs_Model_1', # Choose Model @@ -941,10 +950,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global stemset_n global noise_pro_set global demucs_model_set - global autocompensate global compensate - global channel_set global margin_set global overlap_set @@ -952,10 +959,13 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global source_val global split_mode global demucs_model_set + global wav_type_set + global flac_type_set + global mp3_bit_set + global normalization_set global demucs_switch - autocompensate = data['autocompensate'] # Update default settings default_chunks = data['chunks'] @@ -987,6 +997,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress data.update(kwargs) + autocompensate = data['autocompensate'] + if data['mdxnetModeltype'] == 'Vocals (Custom)': stemset = 'v' source_val_set = 3 @@ -1156,7 +1168,22 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress else: noise_pro_set = data['noise_pro_select'] - + if data['wavtype'] == '32-bit Float': + wav_type_set = 'FLOAT' + elif data['wavtype'] == '64-bit Float': + wav_type_set = 'DOUBLE' + else: + wav_type_set = data['wavtype'] + + flac_type_set = data['flactype'] + mp3_bit_set = data['mp3bit'] + + if data['normalize'] == True: + normalization_set = spec_utils.normalize + print('normalization on') + else: + normalization_set = spec_utils.nonormalize + print('normalization off') print(n_fft_scale_set) print(dim_f_set) @@ -1179,6 +1206,22 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress split_mode = data['split_mode'] demucs_switch = data['demucsmodel'] + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Flac': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Mp3': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + if stemset_n == '(Bass)': if 'UVR' in demucs_model_set: text_widget.write('The selected Demucs model can only be used with vocal stems.\n') @@ -1211,8 +1254,17 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress pass _mixture = f'{data["input_paths"]}' - _basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' - + + timestampnum = round(datetime.utcnow().timestamp()) + randomnum = randrange(100000, 1000000) + + if data['settest']: + try: + _basename = f'{data["export_path"]}/{str(timestampnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + except: + _basename = f'{data["export_path"]}/{str(randomnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + else: + _basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' # -Get text and update progress- base_text = get_baseText(total_files=len(data['input_paths']), file_num=file_num) diff --git a/inference_demucs.py b/inference_demucs.py index b1dc6a7..0be705c 100644 --- a/inference_demucs.py +++ b/inference_demucs.py @@ -245,22 +245,22 @@ class Predictor(): pass if 'UVR' in model_set_name: - sf.write(Instrumental_path, sources[0].T, samplerate) - sf.write(vocals_path, sources[1].T, samplerate) + sf.write(Instrumental_path, normalization_set(sources[0]).T, samplerate, subtype=wav_type_set) + sf.write(vocals_path, normalization_set(sources[1]).T, samplerate, subtype=wav_type_set) else: - sf.write(bass_path, sources[0].T, samplerate) - sf.write(drums_path, sources[1].T, samplerate) - sf.write(other_path, sources[2].T, samplerate) - sf.write(vocals_path, sources[3].T, samplerate) + sf.write(bass_path, normalization_set(sources[0]).T, samplerate, subtype=wav_type_set) + sf.write(drums_path, normalization_set(sources[1]).T, samplerate, subtype=wav_type_set) + sf.write(other_path, normalization_set(sources[2]).T, samplerate, subtype=wav_type_set) + sf.write(vocals_path, normalization_set(sources[3]).T, samplerate, subtype=wav_type_set) if data['saveFormat'] == 'Mp3': try: if 'UVR' in model_set_name: widget_text.write(base_text + 'Saving Stem(s) as Mp3(s)... ') musfile = pydub.AudioSegment.from_wav(vocals_path) - musfile.export(vocals_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocals_path_mp3, format="mp3", bitrate=mp3_bit_set) musfile = pydub.AudioSegment.from_wav(Instrumental_path) - musfile.export(Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(Instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) try: os.remove(Instrumental_path) os.remove(vocals_path) @@ -269,13 +269,13 @@ class Predictor(): else: widget_text.write(base_text + 'Saving Stem(s) as Mp3(s)... ') musfile = pydub.AudioSegment.from_wav(drums_path) - musfile.export(drums_path_mp3, format="mp3", bitrate="320k") + musfile.export(drums_path_mp3, format="mp3", bitrate=mp3_bit_set) musfile = pydub.AudioSegment.from_wav(bass_path) - musfile.export(bass_path_mp3, format="mp3", bitrate="320k") + musfile.export(bass_path_mp3, format="mp3", bitrate=mp3_bit_set) musfile = pydub.AudioSegment.from_wav(other_path) - musfile.export(other_path_mp3, format="mp3", bitrate="320k") + musfile.export(other_path_mp3, format="mp3", bitrate=mp3_bit_set) musfile = pydub.AudioSegment.from_wav(vocals_path) - musfile.export(vocals_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocals_path_mp3, format="mp3", bitrate=mp3_bit_set) try: os.remove(drums_path) os.remove(bass_path) @@ -364,11 +364,11 @@ class Predictor(): else: if 'UVR' in model_set_name: if stemset_n == '(Vocals)': - sf.write(vocal_path, sources[1].T, samplerate) + sf.write(vocal_path, sources[1].T, samplerate, subtype=wav_type_set) else: - sf.write(vocal_path, sources[source_val].T, samplerate) + sf.write(vocal_path, sources[source_val].T, samplerate, subtype=wav_type_set) else: - sf.write(vocal_path, sources[source_val].T, samplerate) + sf.write(vocal_path, sources[source_val].T, samplerate, subtype=wav_type_set) widget_text.write('Done!\n') @@ -426,7 +426,7 @@ class Predictor(): update_progress(**progress_kwargs, step=(1)) - sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + sf.write(Instrumental_path, normalization_set(spec_utils.cmb_spectrogram_to_wave(-v_spec, mp)), mp.param['sr'], subtype=wav_type_set) if data['inst_only_b']: @@ -449,7 +449,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(vocal_path) - musfile.export(vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocal_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_v == 'there': pass else: @@ -461,7 +461,7 @@ class Predictor(): pass else: musfile = pydub.AudioSegment.from_wav(Instrumental_path) - musfile.export(Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(Instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_i == 'there': pass else: @@ -693,7 +693,7 @@ data = { 'demucsmodel': True, 'gpu': -1, 'chunks_d': 'Full', - 'modelFolder': False, + 'settest': False, 'voc_only_b': False, 'inst_only_b': False, 'overlap_b': 0.25, @@ -701,10 +701,13 @@ data = { 'segment': 'None', 'margin': 44100, 'split_mode': False, + 'normalize': False, 'compensate': 1.03597672895, 'demucs_stems': 'All Stems', 'DemucsModel': 'mdx_extra', 'audfile': True, + 'wavtype': 'PCM_16', + 'mp3bit': '320k', } default_chunks = data['chunks_d'] @@ -753,6 +756,13 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global shift_set global source_val global split_mode + + global wav_type_set + global flac_type_set + global mp3_bit_set + global normalization_set + + wav_type_set = data['wavtype'] # Update default settings default_chunks = data['chunks_d'] @@ -786,6 +796,23 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress data.update(kwargs) + if data['wavtype'] == '32-bit Float': + wav_type_set = 'FLOAT' + elif data['wavtype'] == '64-bit Float': + wav_type_set = 'DOUBLE' + else: + wav_type_set = data['wavtype'] + + flac_type_set = data['flactype'] + mp3_bit_set = data['mp3bit'] + + if data['normalize'] == True: + normalization_set = spec_utils.normalize + print('normalization on') + else: + normalization_set = spec_utils.nonormalize + print('normalization off') + stime = time.perf_counter() progress_var.set(0) text_widget.clear() @@ -794,6 +821,22 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress try: #Load File(s) for file_num, music_file in tqdm(enumerate(data['input_paths'], start=1)): + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Flac': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Mp3': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + model_set_name = data['DemucsModel'] if data['demucs_stems'] == 'Vocals': @@ -889,20 +932,20 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress os.mkdir(folder_path) _mixture = f'{data["input_paths"]}' - if data['modelFolder']: + if data['settest']: try: - _basename = f'{data["export_path"]}{modelFolderName}{songFolderName}/{file_num}_{str(timestampnum)}_{os.path.splitext(os.path.basename(music_file))[0]}' + _basename = f'{data["export_path"]}{modelFolderName}{songFolderName}/{str(timestampnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' except: - _basename = f'{data["export_path"]}{modelFolderName}{songFolderName}/{file_num}_{str(randomnum)}_{os.path.splitext(os.path.basename(music_file))[0]}' + _basename = f'{data["export_path"]}{modelFolderName}{songFolderName}/{str(randomnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' else: _basename = f'{data["export_path"]}{modelFolderName}{songFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' else: _mixture = f'{data["input_paths"]}' - if data['modelFolder']: + if data['settest']: try: - _basename = f'{data["export_path"]}/{file_num}_{str(timestampnum)}_{model_set_name}_{os.path.splitext(os.path.basename(music_file))[0]}' + _basename = f'{data["export_path"]}/{str(timestampnum)}_{file_num}_{model_set_name}_{os.path.splitext(os.path.basename(music_file))[0]}' except: - _basename = f'{data["export_path"]}/{file_num}_{str(randomnum)}_{model_set_name}_{os.path.splitext(os.path.basename(music_file))[0]}' + _basename = f'{data["export_path"]}/{str(randomnum)}{file_num}_{model_set_name}_{os.path.splitext(os.path.basename(music_file))[0]}' else: _basename = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' diff --git a/inference_v5.py b/inference_v5.py index 065c355..3d3a791 100644 --- a/inference_v5.py +++ b/inference_v5.py @@ -28,6 +28,7 @@ from collections import defaultdict import tkinter as tk import traceback # Error Message Recent Calls import time # Timer +from random import randrange class VocalRemover(object): @@ -63,7 +64,11 @@ data = { 'shifts': 0, 'segment': 'None', 'split_mode': False, + 'normalize': False, 'demucsmodelVR': True, + 'wavtype': 'PCM_16', + 'mp3bit': '320k', + 'settest': False, } default_window_size = data['window_size'] @@ -113,6 +118,12 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global shift_set global split_mode global demucs_model_set + global wav_type_set + + global flac_type_set + global mp3_bit_set + + wav_type_set = data['wavtype'] #Error Handling @@ -158,13 +169,13 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress # and for vocal the instrumental is the temp file due # to reversement if data['demucsmodelVR']: - sameplerate = 44100 + samplerate = 44100 else: - sameplerate = mp.param['sr'] + samplerate = mp.param['sr'] sf.write(f'temp.wav', - wav_instrument.T, sameplerate) + normalization_set(wav_instrument).T, samplerate, subtype=wav_type_set) appendModelFolderName = modelFolderName.replace('/', '_') @@ -199,14 +210,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in model_name and data['voc_only']: sf.write(instrumental_path, - wav_instrument.T, sameplerate) + normalization_set(wav_instrument).T, samplerate, subtype=wav_type_set) elif VModel in model_name and data['inst_only']: pass elif data['voc_only']: pass else: sf.write(instrumental_path, - wav_instrument.T, sameplerate) + normalization_set(wav_instrument).T, samplerate, subtype=wav_type_set) # Vocal if vocal_name is not None: @@ -238,14 +249,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in model_name and data['inst_only']: sf.write(vocal_path, - wav_vocals.T, sameplerate) + normalization_set(wav_vocals).T, samplerate, subtype=wav_type_set) elif VModel in model_name and data['voc_only']: pass elif data['inst_only']: pass else: sf.write(vocal_path, - wav_vocals.T, sameplerate) + normalization_set(wav_vocals).T, samplerate, subtype=wav_type_set) if data['saveFormat'] == 'Mp3': try: @@ -253,7 +264,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress pass else: musfile = pydub.AudioSegment.from_wav(vocal_path) - musfile.export(vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocal_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_v == 'there': pass else: @@ -265,7 +276,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress pass else: musfile = pydub.AudioSegment.from_wav(instrumental_path) - musfile.export(instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(instrumental_path_mp3, format="mp3", bitrate=mp3_bit_set) if file_exists_i == 'there': pass else: @@ -377,6 +388,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress # Update default settings global default_window_size global default_agg + global normalization_set default_window_size = data['window_size'] default_agg = data['agg'] @@ -389,16 +401,43 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress shift_set = int(data['shifts']) demucs_model_set = data['demucsmodel_sel_VR'] split_mode = data['split_mode'] + + if data['wavtype'] == '32-bit Float': + wav_type_set = 'FLOAT' + elif data['wavtype'] == '64-bit Float': + wav_type_set = 'DOUBLE' + else: + wav_type_set = data['wavtype'] + + flac_type_set = data['flactype'] + mp3_bit_set = data['mp3bit'] + + if data['normalize'] == True: + normalization_set = spec_utils.normalize + print('normalization on') + else: + normalization_set = spec_utils.nonormalize + print('normalization off') vocal_remover = VocalRemover(data, text_widget) modelFolderName = determineModelFolderName() + timestampnum = round(datetime.utcnow().timestamp()) + randomnum = randrange(100000, 1000000) + # Separation Preperation try: #Load File(s) for file_num, music_file in enumerate(data['input_paths'], start=1): # Determine File Name m=music_file - base_name = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + + if data['settest']: + try: + base_name = f'{data["export_path"]}/{str(timestampnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + except: + base_name = f'{data["export_path"]}/{str(randomnum)}_{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + else: + base_name = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' model_name = os.path.basename(data[f'{data["useModel"]}Model']) model = vocal_remover.models[data['useModel']] @@ -435,6 +474,22 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress text_widget.write('Detected Free Space: ' + str(free_space) + ' GB' + '\n\n') except: pass + + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Flac': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Mp3': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return #Load Model text_widget.write(base_text + 'Loading models...') diff --git a/inference_v5_ensemble.py b/inference_v5_ensemble.py index 9b7ab8f..54a203e 100644 --- a/inference_v5_ensemble.py +++ b/inference_v5_ensemble.py @@ -6,6 +6,7 @@ from pathlib import Path import pydub import hashlib from random import randrange +import re import subprocess import soundfile as sf @@ -172,7 +173,7 @@ class Predictor(): widget_text.write(base_text + 'Preparing to save Instrumental...') else: widget_text.write(base_text + 'Saving vocals... ') - sf.write(non_reduced_vocal_path, sources[c].T, samplerate) + sf.write(non_reduced_vocal_path, sources[c].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -193,17 +194,17 @@ class Predictor(): widget_text.write(base_text + 'Saving Vocals... ') if demucs_only == 'on': if 'UVR' in model_set_name: - sf.write(vocal_path, sources[1].T, samplerate) + sf.write(vocal_path, sources[1].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.95)) widget_text.write('Done!\n') if 'extra' in model_set_name: - sf.write(vocal_path, sources[3].T, samplerate) + sf.write(vocal_path, sources[3].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.95)) widget_text.write('Done!\n') else: - sf.write(non_reduced_vocal_path, sources[3].T, samplerate) + sf.write(non_reduced_vocal_path, sources[3].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -221,7 +222,7 @@ class Predictor(): c += 1 if demucs_switch == 'off': widget_text.write(base_text + 'Saving Vocals..') - sf.write(vocal_path, sources[c].T, samplerate) + sf.write(vocal_path, sources[c].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -229,11 +230,11 @@ class Predictor(): widget_text.write(base_text + 'Saving Vocals... ') if demucs_only == 'on': if 'UVR' in model_set_name: - sf.write(vocal_path, sources[1].T, samplerate) + sf.write(vocal_path, sources[1].T, samplerate, subtype=wav_type_set) if 'extra' in model_set_name: - sf.write(vocal_path, sources[3].T, samplerate) + sf.write(vocal_path, sources[3].T, samplerate, subtype=wav_type_set) else: - sf.write(vocal_path, sources[3].T, samplerate) + sf.write(vocal_path, sources[3].T, samplerate, subtype=wav_type_set) update_progress(**progress_kwargs, step=(0.9)) widget_text.write('Done!\n') @@ -284,7 +285,7 @@ class Predictor(): v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0])) update_progress(**progress_kwargs, step=(0.95)) - sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + sf.write(Instrumental_path, normalization_set(spec_utils.cmb_spectrogram_to_wave(-v_spec, mp)), mp.param['sr'], subtype=wav_type_set) if data['inst_only']: if file_exists == 'there': pass @@ -413,7 +414,10 @@ class Predictor(): algorithm=data['mixing'], value=b[3])*float(compensate)) # compensation - return sources + if demucs_switch == 'off': + return sources*float(compensate) + else: + return sources def demix_base(self, mixes, margin_size): chunked_sources = [] @@ -642,11 +646,15 @@ data = { 'shifts': 0, 'margin': 44100, 'split_mode': False, + 'normalize': False, 'compensate': 1.03597672895, 'autocompensate': True, 'demucs_only': False, 'mixing': 'Default', 'DemucsModel_MDX': 'UVR_Demucs_Model_1', + 'wavtype': 'PCM_16', + 'mp3bit': '320k', + 'settest': False, # Models 'instrumentalModel': None, @@ -694,21 +702,22 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global ModelName_2 global compensate global autocompensate - global demucs_model_set - global channel_set global margin_set global overlap_set global shift_set - global noise_pro_set global n_fft_scale_set global dim_f_set - global split_mode global demucs_switch global demucs_only + global wav_type_set + global flac_type_set + global mp3_bit_set + + wav_type_set = data['wavtype'] # Update default settings default_chunks = data['chunks'] @@ -768,7 +777,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress # to reversement sf.write(f'temp.wav', - wav_instrument, mp.param['sr']) + normalization_set(wav_instrument), mp.param['sr'], subtype=wav_type_set) # -Save files- # Instrumental @@ -780,14 +789,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in ModelName_1 and data['voc_only']: sf.write(instrumental_path, - wav_instrument, mp.param['sr']) + normalization_set(wav_instrument), mp.param['sr'], subtype=wav_type_set) elif VModel in ModelName_1 and data['inst_only']: pass elif data['voc_only']: pass else: sf.write(instrumental_path, - wav_instrument, mp.param['sr']) + normalization_set(wav_instrument), mp.param['sr'], subtype=wav_type_set) # Vocal if vocal_name is not None: @@ -798,23 +807,42 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in ModelName_1 and data['inst_only']: sf.write(vocal_path, - wav_vocals, mp.param['sr']) + normalization_set(wav_vocals), mp.param['sr'], subtype=wav_type_set) elif VModel in ModelName_1 and data['voc_only']: pass elif data['inst_only']: pass else: sf.write(vocal_path, - wav_vocals, mp.param['sr']) + normalization_set(wav_vocals), mp.param['sr'], subtype=wav_type_set) data.update(kwargs) # Update default settings global default_window_size global default_agg + global normalization_set + default_window_size = data['window_size'] default_agg = data['agg'] + if data['wavtype'] == '32-bit Float': + wav_type_set = 'FLOAT' + elif data['wavtype'] == '64-bit Float': + wav_type_set = 'DOUBLE' + else: + wav_type_set = data['wavtype'] + + flac_type_set = data['flactype'] + mp3_bit_set = data['mp3bit'] + + if data['normalize'] == True: + normalization_set = spec_utils.normalize + print('normalization on') + else: + normalization_set = spec_utils.nonormalize + print('normalization off') + stime = time.perf_counter() progress_var.set(0) text_widget.clear() @@ -853,6 +881,21 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress else: demucs_only = 'off' + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Flac': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return + + if data['wavtype'] == '64-bit Float': + if data['saveFormat'] == 'Mp3': + text_widget.write('Please select \"WAV\" as your save format to use 64-bit Float.\n') + text_widget.write(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}') + progress_var.set(0) + button_widget.configure(state=tk.NORMAL) # Enable Button + return if not data['ensChoose'] == 'Manual Ensemble': @@ -1706,10 +1749,17 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress os.mkdir(folder_path) # Determine File Name + base_name = f'{data["export_path"]}{enseFolderName}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + enseExport = f'{data["export_path"]}{enseFolderName}/' trackname = f'{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' + def get_numbers_from_filename(filename): + return re.search(r'\d+', filename).group(0) + + foldernum = get_numbers_from_filename(enseFolderName) + if c['model_location'] == 'pass': pass @@ -2249,79 +2299,156 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress return [f"{folder}{i}" for i in os.listdir(folder) if i.startswith(prefix) if i.endswith(suffix)] if data['appendensem'] == False: - voc_inst = [ - { - 'algorithm':'min_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), - 'output':'{}_(Instrumental)'.format(trackname), - 'type': 'Instrumentals' - }, - { - 'algorithm':'max_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), - 'output': '{}_(Vocals)'.format(trackname), - 'type': 'Vocals' - } - ] + if data['settest']: + voc_inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_{}_(Instrumental)'.format(foldernum, trackname), + 'type': 'Instrumentals' + }, + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_{}_(Vocals)'.format(foldernum, trackname), + 'type': 'Vocals' + } + ] - inst = [ - { - 'algorithm':'min_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), - 'output':'{}_(Instrumental)'.format(trackname), - 'type': 'Instrumentals' - } - ] + inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_{}_(Instrumental)'.format(foldernum, trackname), + 'type': 'Instrumentals' + } + ] - vocal = [ - { - 'algorithm':'max_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), - 'output': '{}_(Vocals)'.format(trackname), - 'type': 'Vocals' - } - ] + vocal = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_{}_(Vocals)'.format(foldernum, trackname), + 'type': 'Vocals' + } + ] + else: + voc_inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_(Instrumental)'.format(trackname), + 'type': 'Instrumentals' + }, + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_(Vocals)'.format(trackname), + 'type': 'Vocals' + } + ] + + inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_(Instrumental)'.format(trackname), + 'type': 'Instrumentals' + } + ] + + vocal = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_(Vocals)'.format(trackname), + 'type': 'Vocals' + } + ] + else: - voc_inst = [ - { - 'algorithm':'min_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), - 'output':'{}_Ensembled_{}_(Instrumental)'.format(trackname, ensemode), - 'type': 'Instrumentals' - }, - { - 'algorithm':'max_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), - 'output': '{}_Ensembled_{}_(Vocals)'.format(trackname, ensemode), - 'type': 'Vocals' - } - ] + if data['settest']: + voc_inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_{}_Ensembled_{}_(Instrumental)'.format(foldernum, trackname, ensemode), + 'type': 'Instrumentals' + }, + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_{}_Ensembled_{}_(Vocals)'.format(foldernum, trackname, ensemode), + 'type': 'Vocals' + } + ] - inst = [ - { - 'algorithm':'min_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), - 'output':'{}_Ensembled_{}_(Instrumental)'.format(trackname, ensemode), - 'type': 'Instrumentals' - } - ] + inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_{}_Ensembled_{}_(Instrumental)'.format(foldernum, trackname, ensemode), + 'type': 'Instrumentals' + } + ] - vocal = [ - { - 'algorithm':'max_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), - 'output': '{}_Ensembled_{}_(Vocals)'.format(trackname, ensemode), - 'type': 'Vocals' - } - ] + vocal = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_{}_Ensembled_{}_(Vocals)'.format(foldernum, trackname, ensemode), + 'type': 'Vocals' + } + ] + else: + voc_inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_Ensembled_{}_(Instrumental)'.format(trackname, ensemode), + 'type': 'Instrumentals' + }, + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_Ensembled_{}_(Vocals)'.format(trackname, ensemode), + 'type': 'Vocals' + } + ] + + inst = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Instrumental).wav"), + 'output':'{}_Ensembled_{}_(Instrumental)'.format(trackname, ensemode), + 'type': 'Instrumentals' + } + ] + + vocal = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav"), + 'output': '{}_Ensembled_{}_(Vocals)'.format(trackname, ensemode), + 'type': 'Vocals' + } + ] if data['voc_only']: ensembles = vocal @@ -2362,13 +2489,13 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress del wave sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])), - spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'], - specs), mp), mp.param['sr']) + normalization_set(spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'], + specs), mp)), mp.param['sr'], subtype=wav_type_set) if data['saveFormat'] == 'Mp3': try: musfile = pydub.AudioSegment.from_wav(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output']))) - musfile.export((os.path.join('{}'.format(data['export_path']),'{}.mp3'.format(e['output']))), format="mp3", bitrate="320k") + musfile.export((os.path.join('{}'.format(data['export_path']),'{}.mp3'.format(e['output']))), format="mp3", bitrate=mp3_bit_set) os.remove((os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])))) except Exception as e: traceback_text = ''.join(traceback.format_tb(e.__traceback__)) @@ -2456,7 +2583,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if trackname in file: musfile = pydub.AudioSegment.from_wav(file) #rename them using the old name + ".wav" - musfile.export("{0}.mp3".format(name), format="mp3", bitrate="320k") + musfile.export("{0}.mp3".format(name), format="mp3", bitrate=mp3_bit_set) try: files = get_files(folder=enseExport, prefix=trackname, suffix="_(Vocals).wav") for file in files: @@ -2607,39 +2734,112 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress savefilename = (data['input_paths'][0]) trackname1 = f'{os.path.splitext(os.path.basename(savefilename))[0]}' - insts = [ - { - 'algorithm':'min_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'output':'{}_Manual_Ensemble_(Min Spec)'.format(trackname1), - 'type': 'Instrumentals' - } - ] + timestampnum = round(datetime.utcnow().timestamp()) + randomnum = randrange(100000, 1000000) + + if data['settest']: + try: + insts = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output':'{}_{}_Manual_Ensemble_(Min Spec)'.format(timestampnum, trackname1), + 'type': 'Instrumentals' + } + ] - vocals = [ - { - 'algorithm':'max_mag', - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'output': '{}_Manual_Ensemble_(Max Spec)'.format(trackname1), - 'type': 'Vocals' - } - ] - - invert_spec = [ - { - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'output': '{}_diff_si'.format(trackname1), - 'type': 'Spectral Inversion' - } - ] - - invert_nor = [ - { - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'output': '{}_diff_ni'.format(trackname1), - 'type': 'Normal Inversion' - } - ] + vocals = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_Manual_Ensemble_(Max Spec)'.format(timestampnum, trackname1), + 'type': 'Vocals' + } + ] + + invert_spec = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_diff_si'.format(timestampnum, trackname1), + 'type': 'Spectral Inversion' + } + ] + + invert_nor = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_diff_ni'.format(timestampnum, trackname1), + 'type': 'Normal Inversion' + } + ] + except: + insts = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output':'{}_{}_Manual_Ensemble_(Min Spec)'.format(randomnum, trackname1), + 'type': 'Instrumentals' + } + ] + + vocals = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_Manual_Ensemble_(Max Spec)'.format(randomnum, trackname1), + 'type': 'Vocals' + } + ] + + invert_spec = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_diff_si'.format(randomnum, trackname1), + 'type': 'Spectral Inversion' + } + ] + + invert_nor = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_{}_diff_ni'.format(randomnum, trackname1), + 'type': 'Normal Inversion' + } + ] + else: + insts = [ + { + 'algorithm':'min_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output':'{}_Manual_Ensemble_(Min Spec)'.format(trackname1), + 'type': 'Instrumentals' + } + ] + + vocals = [ + { + 'algorithm':'max_mag', + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_Manual_Ensemble_(Max Spec)'.format(trackname1), + 'type': 'Vocals' + } + ] + + invert_spec = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_diff_si'.format(trackname1), + 'type': 'Spectral Inversion' + } + ] + + invert_nor = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'output': '{}_diff_ni'.format(trackname1), + 'type': 'Normal Inversion' + } + ] if data['algo'] == 'Instrumentals (Min Spec)': ensem = insts @@ -2681,13 +2881,13 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress del wave sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])), - spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'], - specs), mp), mp.param['sr']) + normalization_set(spec_utils.cmb_spectrogram_to_wave(spec_utils.ensembling(e['algorithm'], + specs), mp)), mp.param['sr'], subtype=wav_type_set) if data['saveFormat'] == 'Mp3': try: musfile = pydub.AudioSegment.from_wav(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output']))) - musfile.export((os.path.join('{}'.format(data['export_path']),'{}.mp3'.format(e['output']))), format="mp3", bitrate="320k") + musfile.export((os.path.join('{}'.format(data['export_path']),'{}.mp3'.format(e['output']))), format="mp3", bitrate=mp3_bit_set) os.remove((os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])))) except Exception as e: text_widget.write('\n' + base_text + 'Failed to save output(s) as Mp3.') @@ -2782,11 +2982,11 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress max_mag = np.where(X_mag >= y_mag, X_mag, y_mag) v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0])) sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])), - spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr'], subtype=wav_type_set) if data['algo'] == 'Invert (Normal)': v_spec = specs[0] - specs[1] sf.write(os.path.join('{}'.format(data['export_path']),'{}.wav'.format(e['output'])), - spec_utils.cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr']) + spec_utils.cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr'], subtype=wav_type_set) text_widget.write("Done!\n")