From 04f29d3bb7d9331151b7fcc49a230cc994ccf077 Mon Sep 17 00:00:00 2001 From: Anjok07 <68268275+Anjok07@users.noreply.github.com> Date: Sun, 3 Jul 2022 18:47:33 -0500 Subject: [PATCH] Add files via upload --- UVR.py | 172 +++++++++++++++++++++++++--------- inference_MDX.py | 237 ++++++++++++++++++++++++++++++++++++++++++----- inference_v5.py | 157 ++++++++++++++++++++++++++++--- 3 files changed, 489 insertions(+), 77 deletions(-) diff --git a/UVR.py b/UVR.py index 26bce50..81e6e09 100644 --- a/UVR.py +++ b/UVR.py @@ -39,8 +39,6 @@ import inference_MDX import inference_v5 import inference_v5_ensemble import inference_demucs -# Version -from __version__ import VERSION from win32api import GetSystemMetrics @@ -90,8 +88,9 @@ DEFAULT_DATA = { 'vr_ensem_mdx_a': 'No Model', 'vr_ensem_mdx_b': 'No Model', 'vr_ensem_mdx_c': 'No Model', - 'mdx_ensem': 'UVR-MDX-NET 1', + 'mdx_ensem': 'UVR-MDX-NET Main', 'mdx_ensem_b': 'No Model', + 'demucsmodel_sel_VR': 'UVR_Demucs_Model_1', 'gpu': False, 'postprocess': False, 'tta': False, @@ -114,15 +113,19 @@ DEFAULT_DATA = { 'split_mode': True, #MDX-Net 'demucsmodel': True, + 'demucsmodelVR': False, 'non_red': False, 'noise_reduc': True, + 'nophaseinst': False, 'voc_only': False, 'inst_only': False, 'voc_only_b': False, 'inst_only_b': False, 'audfile': True, + 'autocompensate': True, 'chunks': 'Auto', 'n_fft_scale': 6144, + 'segment': 'None', 'dim_f': 2048, 'noise_pro_select': 'Auto Select', 'overlap': 0.25, @@ -135,7 +138,7 @@ DEFAULT_DATA = { 'mdxnetModeltype': 'Vocals (Custom)', 'noisereduc_s': '3', 'mixing': 'Default', - 'mdxnetModel': 'UVR-MDX-NET 1', + 'mdxnetModel': 'UVR-MDX-NET Main', 'DemucsModel': 'mdx_extra', 'DemucsModel_MDX': 'UVR_Demucs_Model_1', 'ModelParams': 'Auto', @@ -406,6 +409,7 @@ class MainWindow(TkinterDnD.Tk): self.vrensemchoose_mdx_c_var = tk.StringVar(value=data['vr_ensem_mdx_c']) self.mdxensemchoose_var = tk.StringVar(value=data['mdx_ensem']) self.mdxensemchoose_b_var = tk.StringVar(value=data['mdx_ensem_b']) + self.demucsmodel_sel_VR_var = tk.StringVar(value=data['demucsmodel_sel_VR']) #Advanced Options self.appendensem_var = tk.BooleanVar(value=data['appendensem']) self.demucs_only_var = tk.BooleanVar(value=data['demucs_only']) @@ -418,7 +422,9 @@ class MainWindow(TkinterDnD.Tk): self.outputImage_var = tk.BooleanVar(value=data['output_image']) # MDX-NET Specific Processing Options self.demucsmodel_var = tk.BooleanVar(value=data['demucsmodel']) + self.demucsmodelVR_var = tk.BooleanVar(value=data['demucsmodelVR']) self.non_red_var = tk.BooleanVar(value=data['non_red']) + self.nophaseinst_var = tk.BooleanVar(value=data['nophaseinst']) self.noisereduc_var = tk.BooleanVar(value=data['noise_reduc']) self.chunks_var = tk.StringVar(value=data['chunks']) self.noisereduc_s_var = tk.StringVar(value=data['noisereduc_s']) @@ -431,6 +437,7 @@ class MainWindow(TkinterDnD.Tk): self.winSize_var = tk.StringVar(value=data['window_size']) self.agg_var = tk.StringVar(value=data['agg']) self.n_fft_scale_var = tk.StringVar(value=data['n_fft_scale']) + self.segment_var = tk.StringVar(value=data['segment']) self.dim_f_var = tk.StringVar(value=data['dim_f']) self.noise_pro_select_var = tk.StringVar(value=data['noise_pro_select']) self.overlap_var = tk.StringVar(value=data['overlap']) @@ -448,6 +455,7 @@ class MainWindow(TkinterDnD.Tk): self.voc_only_b_var = tk.BooleanVar(value=data['voc_only_b']) self.inst_only_b_var = tk.BooleanVar(value=data['inst_only_b']) self.audfile_var = tk.BooleanVar(value=data['audfile']) + self.autocompensate_var = tk.BooleanVar(value=data['autocompensate']) # Choose Conversion Method self.aiModel_var = tk.StringVar(value=data['aiModel']) self.last_aiModel = self.aiModel_var.get() @@ -519,7 +527,7 @@ class MainWindow(TkinterDnD.Tk): self.command_Text = ThreadSafeConsole(master=self, background='#0e0e0f',fg='#898b8e', font=('Century Gothic', 11),borderwidth=0) - self.command_Text.write(f'Ultimate Vocal Remover v{VERSION} [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') + self.command_Text.write(f'Ultimate Vocal Remover [{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]\n') def configure_widgets(self): """Change widget styling and appearance""" @@ -773,14 +781,14 @@ class MainWindow(TkinterDnD.Tk): # MDX-Auto-Chunk self.options_non_red_Checkbutton = ttk.Checkbutton(master=self.options_Frame, - text='Save Noisey Vocal', + text='Save Noisey Output', variable=self.non_red_var, ) - # Postprocessing - self.options_post_Checkbutton = ttk.Checkbutton(master=self.options_Frame, - text='Post-Process', - variable=self.postprocessing_var, + # Demucs Model VR + self.options_demucsmodelVR_Checkbutton = ttk.Checkbutton(master=self.options_Frame, + text='Demucs Model', + variable=self.demucsmodelVR_var, ) # Split Mode @@ -974,7 +982,7 @@ class MainWindow(TkinterDnD.Tk): #---VR Architecture Specific--- #Post-Process - self.options_post_Checkbutton.place(x=35, y=21, width=0, height=5, + self.options_demucsmodelVR_Checkbutton.place(x=35, y=21, width=0, height=5, relx=2/3, rely=6/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) #Save Image # self.options_image_Checkbutton.place(x=35, y=21, width=0, height=5, @@ -1035,6 +1043,12 @@ class MainWindow(TkinterDnD.Tk): self.chunks_var.trace_add('write', lambda *args: self.update_states()) + self.autocompensate_var.trace_add('write', + lambda *args: self.update_states()) + + self.compensate_var.trace_add('write', + lambda *args: self.update_states()) + # Opening filedialogs def open_file_filedialog(self): """Make user select music files""" @@ -1176,14 +1190,13 @@ class MainWindow(TkinterDnD.Tk): 'vr_ensem_b': self.vrensemchoose_b_var.get(), 'vr_ensem_c': self.vrensemchoose_c_var.get(), 'vr_ensem_d': self.vrensemchoose_d_var.get(), - 'vr_ensem_e': self.vrensemchoose_e_var.get(), 'vr_ensem_mdx_a': self.vrensemchoose_mdx_a_var.get(), 'vr_ensem_mdx_b': self.vrensemchoose_mdx_b_var.get(), 'vr_ensem_mdx_c': self.vrensemchoose_mdx_c_var.get(), - 'mdx_ensem': self.mdxensemchoose_var.get(), 'mdx_ensem_b': self.mdxensemchoose_b_var.get(), + 'demucsmodel_sel_VR': self.demucsmodel_sel_VR_var.get(), # Processing Options 'gpu': 0 if self.gpuConversion_var.get() else -1, 'postprocess': self.postprocessing_var.get(), @@ -1218,17 +1231,21 @@ class MainWindow(TkinterDnD.Tk): 'progress_var': self.progress_var, # MDX-Net Specific 'demucsmodel': self.demucsmodel_var.get(), + 'demucsmodelVR': self.demucsmodelVR_var.get(), 'non_red': self.non_red_var.get(), + 'nophaseinst': self.nophaseinst_var.get(), 'noise_reduc': self.noisereduc_var.get(), 'voc_only': self.voc_only_var.get(), 'inst_only': self.inst_only_var.get(), 'voc_only_b': self.voc_only_b_var.get(), 'inst_only_b': self.inst_only_b_var.get(), 'audfile': self.audfile_var.get(), + 'autocompensate': self.autocompensate_var.get(), 'chunks': chunks, 'noisereduc_s': noisereduc_s, 'mixing': mixing, 'n_fft_scale': self.n_fft_scale_var.get(), + 'segment': self.segment_var.get(), 'dim_f': self.dim_f_var.get(), 'noise_pro_select': self.noise_pro_select_var.get(), 'overlap': self.overlap_var.get(), @@ -1331,6 +1348,10 @@ class MainWindow(TkinterDnD.Tk): i = ["UVR_MDXNET_KARA"] for char in i: file_name_1 = file_name_1.replace(char, "UVR-MDX-NET Karaoke") + + i = ["UVR_MDXNET_Main"] + for char in i: + file_name_1 = file_name_1.replace(char, "UVR-MDX-NET Main") self.options_mdxnetModel_Optionmenu['menu'].add_radiobutton(label=file_name_1, command=tk._setit(self.mdxnetModel_var, file_name_1)) @@ -1464,8 +1485,8 @@ class MainWindow(TkinterDnD.Tk): self.options_instrumentalModel_Optionmenu.place_forget() self.options_save_Checkbutton.configure(state=tk.DISABLED) self.options_save_Checkbutton.place_forget() - self.options_post_Checkbutton.configure(state=tk.DISABLED) - self.options_post_Checkbutton.place_forget() + self.options_demucsmodelVR_Checkbutton.configure(state=tk.DISABLED) + self.options_demucsmodelVR_Checkbutton.place_forget() self.options_tta_Checkbutton.configure(state=tk.DISABLED) self.options_tta_Checkbutton.place_forget() # self.options_image_Checkbutton.configure(state=tk.DISABLED) @@ -1527,8 +1548,8 @@ class MainWindow(TkinterDnD.Tk): self.options_tta_Checkbutton.place(x=35, y=21, width=0, height=5, relx=2/3, rely=5/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) #Post-Process - self.options_post_Checkbutton.configure(state=tk.NORMAL) - self.options_post_Checkbutton.place(x=35, y=21, width=0, height=5, + self.options_demucsmodelVR_Checkbutton.configure(state=tk.NORMAL) + self.options_demucsmodelVR_Checkbutton.place(x=35, y=21, width=0, height=5, relx=2/3, rely=6/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) #Save Image # self.options_image_Checkbutton.configure(state=tk.NORMAL) @@ -1627,8 +1648,8 @@ class MainWindow(TkinterDnD.Tk): # Forget Widgets self.options_save_Checkbutton.configure(state=tk.DISABLED) self.options_save_Checkbutton.place_forget() - self.options_post_Checkbutton.configure(state=tk.DISABLED) - self.options_post_Checkbutton.place_forget() + self.options_demucsmodelVR_Checkbutton.configure(state=tk.DISABLED) + self.options_demucsmodelVR_Checkbutton.place_forget() self.options_tta_Checkbutton.configure(state=tk.DISABLED) self.options_tta_Checkbutton.place_forget() # self.options_image_Checkbutton.configure(state=tk.DISABLED) @@ -1674,8 +1695,8 @@ class MainWindow(TkinterDnD.Tk): # Forget Widgets self.options_save_Checkbutton.configure(state=tk.DISABLED) self.options_save_Checkbutton.place_forget() - self.options_post_Checkbutton.configure(state=tk.DISABLED) - self.options_post_Checkbutton.place_forget() + self.options_demucsmodelVR_Checkbutton.configure(state=tk.DISABLED) + self.options_demucsmodelVR_Checkbutton.place_forget() self.options_tta_Checkbutton.configure(state=tk.DISABLED) self.options_tta_Checkbutton.place_forget() self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED) @@ -1772,8 +1793,8 @@ class MainWindow(TkinterDnD.Tk): self.options_save_Checkbutton.place(x=35, y=3, width=0, height=5, relx=2/3, rely=9/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) # Forget Widgets - self.options_post_Checkbutton.configure(state=tk.DISABLED) - self.options_post_Checkbutton.place_forget() + self.options_demucsmodelVR_Checkbutton.configure(state=tk.DISABLED) + self.options_demucsmodelVR_Checkbutton.place_forget() self.options_modelFolder_Checkbutton.configure(state=tk.DISABLED) self.options_modelFolder_Checkbutton.place_forget() # self.options_image_Checkbutton.configure(state=tk.DISABLED) @@ -1832,8 +1853,8 @@ class MainWindow(TkinterDnD.Tk): self.options_tta_Checkbutton.place(x=35, y=21, width=0, height=5, relx=2/3, rely=5/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) #Post-Process - self.options_post_Checkbutton.configure(state=tk.NORMAL) - self.options_post_Checkbutton.place(x=35, y=21, width=0, height=5, + self.options_demucsmodelVR_Checkbutton.configure(state=tk.NORMAL) + self.options_demucsmodelVR_Checkbutton.place(x=35, y=21, width=0, height=5, relx=2/3, rely=6/self.COL2_ROWS, relwidth=1/3, relheight=1/self.COL2_ROWS) #Save Image # self.options_image_Checkbutton.configure(state=tk.NORMAL) @@ -1882,7 +1903,7 @@ class MainWindow(TkinterDnD.Tk): if self.inst_only_var.get() == True: self.options_voc_only_Checkbutton.configure(state=tk.DISABLED) self.voc_only_var.set(False) - self.non_red_var.set(False) + #self.non_red_var.set(False) elif self.inst_only_var.get() == False: self.options_non_red_Checkbutton.configure(state=tk.NORMAL) self.options_voc_only_Checkbutton.configure(state=tk.NORMAL) @@ -1954,6 +1975,20 @@ class MainWindow(TkinterDnD.Tk): self.options_non_red_Checkbutton.configure(state=tk.NORMAL) + if self.autocompensate_var.get() == True: + self.compensate_var.set('Auto') + try: + self.options_compensate.configure(state=tk.DISABLED) + except: + pass + + if self.autocompensate_var.get() == False: + self.compensate_var.set(1.03597672895) + try: + self.options_compensate.configure(state=tk.NORMAL) + except: + pass + if self.mdxnetModeltype_var.get() == 'Vocals (Default)': self.n_fft_scale_var.set('6144') self.dim_f_var.set('2048') @@ -2125,13 +2160,17 @@ class MainWindow(TkinterDnD.Tk): tabControl = ttk.Notebook(top) tab1 = ttk.Frame(tabControl) + tab2 = ttk.Frame(tabControl) tabControl.add(tab1, text ='Advanced Settings') + tabControl.add(tab2, text ='Demucs Settings') tabControl.pack(expand = 1, fill ="both") tab1.grid_rowconfigure(0, weight=1) tab1.grid_columnconfigure(0, weight=1) + tab2.grid_rowconfigure(0, weight=1) + tab2.grid_columnconfigure(0, weight=1) frame0=Frame(tab1, highlightbackground='red',highlightthicknes=0) frame0.grid(row=0,column=0,padx=0,pady=30) @@ -2160,23 +2199,59 @@ class MainWindow(TkinterDnD.Tk): l0.grid(row=7,column=0,padx=0,pady=0) l0=ttk.Checkbutton(frame0, text='Save Output Image(s) of Spectrogram(s)', variable=self.outputImage_var) - l0.grid(row=8,column=0,padx=0,pady=10) - - l0=ttk.Button(frame0,text='Open VR Models Folder', command=self.open_Modelfolder_vr) + l0.grid(row=8,column=0,padx=0,pady=0) + + l0=ttk.Checkbutton(frame0, text='Post-Process', variable=self.postprocessing_var) l0.grid(row=9,column=0,padx=0,pady=0) - l0=ttk.Button(frame0,text='Back to Main Menu', command=close_win) + l0=ttk.Button(frame0,text='Open VR Models Folder', command=self.open_Modelfolder_vr) l0.grid(row=10,column=0,padx=0,pady=10) + l0=ttk.Button(frame0,text='Back to Main Menu', command=close_win) + l0.grid(row=11,column=0,padx=0,pady=0) + def close_win_self(): top.destroy() l0=ttk.Button(frame0,text='Close Window', command=close_win_self) - l0.grid(row=11,column=0,padx=0,pady=0) + l0.grid(row=12,column=0,padx=0,pady=10) self.ModelParamsLabel_to_path = defaultdict(lambda: '') self.lastModelParams = [] + frame0=Frame(tab2, highlightbackground='red',highlightthicknes=0) + frame0.grid(row=0,column=0,padx=0,pady=30) + + l0=tk.Label(frame0,text='\nDemucs Model\n',font=("Century Gothic", "9"), justify="center", foreground='#13a4c9') + l0.grid(row=1,column=0,padx=0,pady=0) + + l0=ttk.OptionMenu(frame0, self.demucsmodel_sel_VR_var, None, 'UVR_Demucs_Model_1', 'UVR_Demucs_Model_2', 'UVR_Demucs_Model_Bag') + l0.grid(row=2,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Shifts\n(Higher values use more resources and increase processing times)', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=3,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.shifts_var, justify='center') + l0.grid(row=4,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Overlap', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=5,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.overlap_var, justify='center') + l0.grid(row=6,column=0,padx=0,pady=0) + + l0=tk.Label(frame0, text='Segment', font=("Century Gothic", "9"), foreground='#13a4c9') + l0.grid(row=7,column=0,padx=0,pady=10) + + l0=ttk.Entry(frame0, textvariable=self.segment_var, justify='center') + l0.grid(row=8,column=0,padx=0,pady=0) + + l0=ttk.Checkbutton(frame0, text='Split Mode', variable=self.split_mode_var) + l0.grid(row=9,column=0,padx=0,pady=10) + + self.DemucsLabel_MDX_to_path = defaultdict(lambda: '') + self.lastDemucsModels_MDX = [] + self.update_states() @@ -2278,8 +2353,8 @@ class MainWindow(TkinterDnD.Tk): """ top= Toplevel(self) - top.geometry("670x550") - window_height = 670 + top.geometry("740x550") + window_height = 740 window_width = 550 top.title("Advanced MDX-Net Options") @@ -2347,26 +2422,34 @@ class MainWindow(TkinterDnD.Tk): l0=tk.Label(frame0, text='Volume Compensation', font=("Century Gothic", "9"), foreground='#13a4c9') l0.grid(row=7,column=0,padx=0,pady=10) - l0=ttk.Entry(frame0, textvariable=self.compensate_var, justify='center') + self.options_compensate = l0=ttk.Entry(frame0, textvariable=self.compensate_var, justify='center') + + self.options_compensate l0.grid(row=8,column=0,padx=0,pady=0) - l0=tk.Label(frame0, text='Noise Profile', font=("Century Gothic", "9"), foreground='#13a4c9') + l0=ttk.Checkbutton(frame0, text='Autoset Volume Compensation', variable=self.autocompensate_var) l0.grid(row=9,column=0,padx=0,pady=10) - l0=ttk.OptionMenu(frame0, self.noise_pro_select_var, None, 'Auto Select', 'MDX-NET_Noise_Profile_14_kHz', 'MDX-NET_Noise_Profile_17_kHz', 'MDX-NET_Noise_Profile_Full_Band') + l0=ttk.Checkbutton(frame0, text='Reduce Instrumental Noise Separately', variable=self.nophaseinst_var) l0.grid(row=10,column=0,padx=0,pady=0) - l0=ttk.Button(frame0,text='Open MDX-Net Models Folder', command=self.open_newModel_filedialog) + l0=tk.Label(frame0, text='Noise Profile', font=("Century Gothic", "9"), foreground='#13a4c9') l0.grid(row=11,column=0,padx=0,pady=10) - l0=ttk.Button(frame0,text='Back to Main Menu', command=close_win) + l0=ttk.OptionMenu(frame0, self.noise_pro_select_var, None, 'Auto Select', 'MDX-NET_Noise_Profile_14_kHz', 'MDX-NET_Noise_Profile_17_kHz', 'MDX-NET_Noise_Profile_Full_Band') l0.grid(row=12,column=0,padx=0,pady=0) + l0=ttk.Button(frame0,text='Open MDX-Net Models Folder', command=self.open_newModel_filedialog) + l0.grid(row=13,column=0,padx=0,pady=10) + + l0=ttk.Button(frame0,text='Back to Main Menu', command=close_win) + l0.grid(row=14,column=0,padx=0,pady=0) + def close_win_self(): top.destroy() l0=ttk.Button(frame0,text='Close Window', command=close_win_self) - l0.grid(row=13,column=0,padx=0,pady=10) + l0.grid(row=15,column=0,padx=0,pady=10) frame0=Frame(tab2, highlightbackground='red',highlightthicknes=0) frame0.grid(row=0,column=0,padx=0,pady=30) @@ -2501,14 +2584,14 @@ class MainWindow(TkinterDnD.Tk): l0=tk.Label(frame0,text='MDX-Net or Demucs Model 1\n',font=("Century Gothic", "9"), justify="center", foreground='#13a4c9') l0.grid(row=2,column=0,padx=0,pady=0) - l0=ttk.OptionMenu(frame0, self.mdxensemchoose_var, None, 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', + l0=ttk.OptionMenu(frame0, self.mdxensemchoose_var, None, 'UVR-MDX-NET Main', 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', 'UVR-MDX-NET Karaoke', 'Demucs UVR Model 1', 'Demucs UVR Model 2', 'Demucs mdx_extra', 'Demucs mdx_extra_q') l0.grid(row=3,column=0,padx=0,pady=0) l0=tk.Label(frame0,text='\nMDX-Net or Demucs Model 2\n',font=("Century Gothic", "9"), justify="center", foreground='#13a4c9') l0.grid(row=4,column=0,padx=0,pady=0) - l0=ttk.OptionMenu(frame0, self.mdxensemchoose_b_var, None, 'No Model', 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', + l0=ttk.OptionMenu(frame0, self.mdxensemchoose_b_var, None, 'No Model', 'UVR-MDX-NET Main', 'UVR-MDX-NET 1', 'UVR-MDX-NET 2', 'UVR-MDX-NET 3', 'UVR-MDX-NET Karaoke', 'Demucs UVR Model 1', 'Demucs UVR Model 2', 'Demucs mdx_extra', 'Demucs mdx_extra_q') l0.grid(row=5,column=0,padx=0,pady=0) @@ -3316,6 +3399,7 @@ class MainWindow(TkinterDnD.Tk): 'vr_ensem_mdx_c': self.vrensemchoose_mdx_c_var.get(), 'mdx_ensem': self.mdxensemchoose_var.get(), 'mdx_ensem_b': self.mdxensemchoose_b_var.get(), + 'demucsmodel_sel_VR': self.demucsmodel_sel_VR_var.get(), 'gpu': self.gpuConversion_var.get(), 'appendensem': self.appendensem_var.get(), 'demucs_only': self.demucs_only_var.get(), @@ -3340,15 +3424,19 @@ class MainWindow(TkinterDnD.Tk): 'ModelParams': self.ModelParams_var.get(), #MDX-Net 'demucsmodel': self.demucsmodel_var.get(), + 'demucsmodelVR': self.demucsmodelVR_var.get(), 'non_red': self.non_red_var.get(), + 'nophaseinst': self.nophaseinst_var.get(), 'noise_reduc': self.noisereduc_var.get(), 'voc_only': self.voc_only_var.get(), 'inst_only': self.inst_only_var.get(), 'voc_only_b': self.voc_only_b_var.get(), 'inst_only_b': self.inst_only_b_var.get(), 'audfile': self.audfile_var.get(), + 'autocompensate': self.autocompensate_var.get(), 'chunks': chunks, 'n_fft_scale': self.n_fft_scale_var.get(), + 'segment': self.segment_var.get(), 'dim_f': self.dim_f_var.get(), 'noise_pro_select': self.noise_pro_select_var.get(), 'overlap': self.overlap_var.get(), @@ -3384,4 +3472,4 @@ if __name__ == "__main__": def callback(url): webbrowser.open_new_tab(url) - root.mainloop() + root.mainloop() \ No newline at end of file diff --git a/inference_MDX.py b/inference_MDX.py index dde6a60..60b6fb7 100644 --- a/inference_MDX.py +++ b/inference_MDX.py @@ -101,6 +101,7 @@ class Predictor(): def prediction(self, m): mix, samplerate = librosa.load(m, mono=False, sr=44100) + print('print mix: ', mix) if mix.ndim == 1: mix = np.asfortranarray([mix,mix]) samplerate = samplerate @@ -208,6 +209,27 @@ class Predictor(): save_path=save_path, file_name = f'{os.path.basename(_basename)}_{vocal_name}_No_Reduction',) + if data['modelFolder']: + non_reduced_Instrumental_path = '{save_path}/{file_name}.wav'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_{model_set_name}_No_Reduction',) + non_reduced_path_mp3 = '{save_path}/{file_name}.mp3'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_{model_set_name}_No_Reduction',) + non_reduced_Instrumental_path_flac = '{save_path}/{file_name}.flac'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_{model_set_name}_No_Reduction',) + else: + non_reduced_Instrumental_path = '{save_path}/{file_name}.wav'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_No_Reduction',) + non_reduced_Instrumental_path_mp3 = '{save_path}/{file_name}.mp3'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_No_Reduction',) + non_reduced_Instrumental_path_flac = '{save_path}/{file_name}.flac'.format( + save_path=save_path, + file_name = f'{os.path.basename(_basename)}_{Instrumental_name}_No_Reduction',) + if os.path.isfile(non_reduced_vocal_path): file_exists_n = 'there' @@ -306,14 +328,30 @@ class Predictor(): if data['voc_only'] and not data['inst_only']: pass - else: - finalfiles = [ - { - 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', - 'files':[str(music_file), vocal_path], - } - ] + if not data['noisereduc_s'] == 'None': + if data['nophaseinst']: + finalfiles = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':[str(music_file), non_reduced_vocal_path], + } + ] + else: + finalfiles = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':[str(music_file), vocal_path], + } + ] + else: + finalfiles = [ + { + 'model_params':'lib_v5/modelparams/1band_sr44100_hl512.json', + 'files':[str(music_file), vocal_path], + } + ] + widget_text.write(base_text + 'Saving Instrumental... ') for i, e in tqdm(enumerate(finalfiles)): @@ -351,9 +389,24 @@ class Predictor(): v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0])) update_progress(**progress_kwargs, step=(1)) - sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + if not data['noisereduc_s'] == 'None': + if data['nophaseinst']: + sf.write(non_reduced_Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + reduction_sen = float(data['noisereduc_s'])/10 + print(noise_pro_set) + + subprocess.call("lib_v5\\sox\\sox.exe" + ' "' + + f"{str(non_reduced_Instrumental_path)}" + '" "' + f"{str(Instrumental_path)}" + '" ' + + "noisered lib_v5\\sox\\" + noise_pro_set + ".prof " + f"{reduction_sen}", + shell=True, stdout=subprocess.PIPE, + stdin=subprocess.PIPE, stderr=subprocess.PIPE) + else: + sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + else: + sf.write(Instrumental_path, spec_utils.cmb_spectrogram_to_wave(-v_spec, mp), mp.param['sr']) + if data['inst_only']: if file_exists_v == 'there': pass @@ -365,14 +418,24 @@ class Predictor(): widget_text.write('Done!\n') - if data['saveFormat'] == 'Mp3': try: + if data['inst_only'] == True: + if data['non_red'] == True: + if not data['nophaseinst']: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) + musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate="320k") + try: + os.remove(non_reduced_Instrumental_path) + except: + pass pass else: musfile = pydub.AudioSegment.from_wav(vocal_path) - musfile.export(vocal_path_mp3, format="mp3", bitrate="320k") + musfile.export(vocal_path_mp3, format="mp3", bitrate="320k") if file_exists_v == 'there': pass else: @@ -380,21 +443,47 @@ class Predictor(): os.remove(vocal_path) except: pass + if data['non_red'] == True: + if not data['nophaseinst']: + pass + else: + if data['voc_only'] == True: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) + musfile.export(non_reduced_Instrumental_path_mp3, format="mp3", bitrate="320k") + if file_exists_n == 'there': + pass + else: + try: + os.remove(non_reduced_Instrumental_path) + except: + pass if data['voc_only'] == True: + if data['non_red'] == True: + musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) + musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate="320k") + try: + os.remove(non_reduced_vocal_path) + except: + pass pass else: musfile = pydub.AudioSegment.from_wav(Instrumental_path) - musfile.export(Instrumental_path_mp3, format="mp3", bitrate="320k") + musfile.export(Instrumental_path_mp3, format="mp3", bitrate="320k") if file_exists_i == 'there': pass else: try: os.remove(Instrumental_path) except: - pass + pass if data['non_red'] == True: - musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) - musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate="320k") + if data['inst_only'] == True: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) + musfile.export(non_reduced_vocal_path_mp3, format="mp3", bitrate="320k") if file_exists_n == 'there': pass else: @@ -429,6 +518,16 @@ class Predictor(): if data['saveFormat'] == 'Flac': try: if data['inst_only'] == True: + if data['non_red'] == True: + if not data['nophaseinst']: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) + musfile.export(non_reduced_Instrumental_path_flac, format="flac") + try: + os.remove(non_reduced_Instrumental_path) + except: + pass pass else: musfile = pydub.AudioSegment.from_wav(vocal_path) @@ -440,7 +539,30 @@ class Predictor(): os.remove(vocal_path) except: pass + if data['non_red'] == True: + if not data['nophaseinst']: + pass + else: + if data['voc_only'] == True: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_Instrumental_path) + musfile.export(non_reduced_Instrumental_path_flac, format="flac") + if file_exists_n == 'there': + pass + else: + try: + os.remove(non_reduced_Instrumental_path) + except: + pass if data['voc_only'] == True: + if data['non_red'] == True: + musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) + musfile.export(non_reduced_vocal_path_flac, format="flac") + try: + os.remove(non_reduced_vocal_path) + except: + pass pass else: musfile = pydub.AudioSegment.from_wav(Instrumental_path) @@ -453,8 +575,11 @@ class Predictor(): except: pass if data['non_red'] == True: - musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) - musfile.export(non_reduced_vocal_path_flac, format="flac") + if data['inst_only'] == True: + pass + else: + musfile = pydub.AudioSegment.from_wav(non_reduced_vocal_path) + musfile.export(non_reduced_vocal_path_flac, format="flac") if file_exists_n == 'there': pass else: @@ -489,6 +614,14 @@ class Predictor(): if data['noisereduc_s'] == 'None': pass elif data['non_red'] == True: + if data['inst_only']: + if file_exists_n == 'there': + pass + else: + try: + os.remove(non_reduced_vocal_path) + except: + pass pass elif data['inst_only']: if file_exists_n == 'there': @@ -501,6 +634,7 @@ class Predictor(): else: try: os.remove(non_reduced_vocal_path) + os.remove(non_reduced_Instrumental_path) except: pass @@ -579,6 +713,7 @@ class Predictor(): if not data['demucsmodel']: sources = self.demix_base(segmented_mix, margin_size=margin) + #value=float(0.9)*float(compensate) elif data['demucs_only']: if split_mode == True: sources = self.demix_demucs_split(mix) @@ -599,13 +734,14 @@ class Predictor(): print(data['mixing']) if 'UVR' in demucs_model_set: + sources[source_val] = (spec_effects(wave=[demucs_out[1],base_out[0]], algorithm=data['mixing'], - value=b[source_val])*float(data['compensate'])) # compensation + value=b[source_val])*float(compensate)) # compensation else: sources[source_val] = (spec_effects(wave=[demucs_out[source_val],base_out[0]], algorithm=data['mixing'], - value=b[source_val])*float(data['compensate'])) # compensation + value=b[source_val])*float(compensate)) # compensation return sources def demix_base(self, mixes, margin_size): @@ -697,6 +833,8 @@ class Predictor(): sources = list(processed.values()) sources = np.concatenate(sources, axis=-1) widget_text.write('Done!\n') + print('the demucs model is done running') + return sources def demix_demucs_split(self, mix): @@ -718,6 +856,9 @@ class Predictor(): sources = (sources * ref.std() + ref.mean()).cpu().numpy() sources[[0,1]] = sources[[1,0]] + + print('the demucs model is done running') + return sources data = { @@ -741,12 +882,14 @@ data = { 'shifts': 0, 'margin': 44100, 'split_mode': False, + 'nophaseinst': True, 'compensate': 1.03597672895, + 'autocompensate': True, 'demucs_only': False, 'mixing': 'Default', 'DemucsModel_MDX': 'UVR_Demucs_Model_1', # Choose Model - 'mdxnetModel': 'UVR-MDX-NET 1', + 'mdxnetModel': 'UVR-MDX-NET Main', 'mdxnetModeltype': 'Vocals (Custom)', } default_chunks = data['chunks'] @@ -799,7 +942,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global noise_pro_set global demucs_model_set - global mdx_model_hash + global autocompensate + global compensate global channel_set global margin_set @@ -807,9 +951,12 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global shift_set global source_val global split_mode + global demucs_model_set global demucs_switch + autocompensate = data['autocompensate'] + # Update default settings default_chunks = data['chunks'] default_noisereduc_s = data['noisereduc_s'] @@ -883,6 +1030,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'v' noise_pro = 'MDX-NET_Noise_Profile_14_kHz' stemset_n = '(Vocals)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 @@ -896,6 +1047,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'v' noise_pro = 'MDX-NET_Noise_Profile_14_kHz' stemset_n = '(Vocals)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 @@ -909,6 +1064,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'v' noise_pro = 'MDX-NET_Noise_Profile_14_kHz' stemset_n = '(Vocals)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 @@ -918,15 +1077,36 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'v' noise_pro = 'MDX-NET_Noise_Profile_14_kHz' stemset_n = '(Vocals)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 3 n_fft_scale_set=6144 dim_f_set=2048 + elif data['mdxnetModel'] == 'UVR-MDX-NET Main': + model_set = 'UVR_MDXNET_Main' + model_set_name = 'UVR_MDXNET_Main' + modeltype = 'v' + noise_pro = 'MDX-NET_Noise_Profile_17_kHz' + stemset_n = '(Vocals)' + if autocompensate == True: + compensate = 1.08 + else: + compensate = data['compensate'] + source_val = 3 + n_fft_scale_set=7680 + dim_f_set=3072 elif 'other' in data['mdxnetModel']: model_set = 'other' model_set_name = 'other' modeltype = 'o' noise_pro = 'MDX-NET_Noise_Profile_Full_Band' stemset_n = '(Other)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 2 n_fft_scale_set=8192 dim_f_set=2048 @@ -936,6 +1116,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'd' noise_pro = 'MDX-NET_Noise_Profile_Full_Band' stemset_n = '(Drums)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 1 n_fft_scale_set=4096 dim_f_set=2048 @@ -945,6 +1129,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = 'b' noise_pro = 'MDX-NET_Noise_Profile_Full_Band' stemset_n = '(Bass)' + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = 0 n_fft_scale_set=16384 dim_f_set=2048 @@ -954,6 +1142,10 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress modeltype = stemset noise_pro = 'MDX-NET_Noise_Profile_Full_Band' stemset_n = stem_name + if autocompensate == True: + compensate = 1.03597672895 + else: + compensate = data['compensate'] source_val = source_val_set n_fft_scale_set=int(data['n_fft_scale']) dim_f_set=int(data['dim_f']) @@ -963,7 +1155,8 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress noise_pro_set = noise_pro else: noise_pro_set = data['noise_pro_select'] - + + print(n_fft_scale_set) print(dim_f_set) @@ -1031,7 +1224,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress try: - if float(data['noisereduc_s']) >= 10: + if float(data['noisereduc_s']) >= 11: text_widget.write('Error: Noise Reduction only supports values between 0-10.\nPlease set a value between 0-10 (with or without decimals) and try again.') progress_var.set(0) button_widget.configure(state=tk.NORMAL) # Enable Button diff --git a/inference_v5.py b/inference_v5.py index 338060d..065c355 100644 --- a/inference_v5.py +++ b/inference_v5.py @@ -11,6 +11,12 @@ import numpy as np import soundfile as sf from tqdm import tqdm +from demucs.pretrained import get_model as _gm +from demucs.hdemucs import HDemucs +from demucs.apply import BagOfModels, apply_model +from pathlib import Path +from models import stft, istft + from lib_v5 import dataset from lib_v5 import spec_utils from lib_v5.model_param_init import ModelParameters @@ -51,7 +57,13 @@ data = { 'window_size': 512, 'agg': 10, 'high_end_process': 'mirroring', - 'ModelParams': 'Auto' + 'ModelParams': 'Auto', + 'demucsmodel_sel_VR': 'UVR_Demucs_Model_1', + 'overlap': 0.5, + 'shifts': 0, + 'segment': 'None', + 'split_mode': False, + 'demucsmodelVR': True, } default_window_size = data['window_size'] @@ -97,6 +109,11 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress global nn_arch_sizes global nn_architecture + global overlap_set + global shift_set + global split_mode + global demucs_model_set + #Error Handling runtimeerr = "CUDNN error executing cudnnSetTensorNdDescriptor" @@ -140,8 +157,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress # For instrumental the instrumental is the temp file # and for vocal the instrumental is the temp file due # to reversement + if data['demucsmodelVR']: + sameplerate = 44100 + else: + sameplerate = mp.param['sr'] + + sf.write(f'temp.wav', - wav_instrument, mp.param['sr']) + wav_instrument.T, sameplerate) appendModelFolderName = modelFolderName.replace('/', '_') @@ -176,14 +199,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in model_name and data['voc_only']: sf.write(instrumental_path, - wav_instrument, mp.param['sr']) + wav_instrument.T, sameplerate) elif VModel in model_name and data['inst_only']: pass elif data['voc_only']: pass else: sf.write(instrumental_path, - wav_instrument, mp.param['sr']) + wav_instrument.T, sameplerate) # Vocal if vocal_name is not None: @@ -215,14 +238,14 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if VModel in model_name and data['inst_only']: sf.write(vocal_path, - wav_vocals, mp.param['sr']) + wav_vocals.T, sameplerate) elif VModel in model_name and data['voc_only']: pass elif data['inst_only']: pass else: sf.write(vocal_path, - wav_vocals, mp.param['sr']) + wav_vocals.T, sameplerate) if data['saveFormat'] == 'Mp3': try: @@ -362,6 +385,11 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress text_widget.clear() button_widget.configure(state=tk.DISABLED) # Disable Button + overlap_set = float(data['overlap']) + shift_set = int(data['shifts']) + demucs_model_set = data['demucsmodel_sel_VR'] + split_mode = data['split_mode'] + vocal_remover = VocalRemover(data, text_widget) modelFolderName = determineModelFolderName() @@ -369,6 +397,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress try: #Load File(s) for file_num, music_file in enumerate(data['input_paths'], start=1): # Determine File Name + m=music_file base_name = f'{data["export_path"]}/{file_num}_{os.path.splitext(os.path.basename(music_file))[0]}' model_name = os.path.basename(data[f'{data["useModel"]}Model']) @@ -802,6 +831,85 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress y_spec_m = pred * X_phase v_spec_m = X_spec_m - y_spec_m + def demix_demucs(mix): + #print('shift_set ', shift_set) + text_widget.write(base_text + "Running Demucs Inference...\n") + text_widget.write(base_text + "Processing... ") + print(' Running Demucs Inference...') + + mix = torch.tensor(mix, dtype=torch.float32) + ref = mix.mean(0) + mix = (mix - ref.mean()) / ref.std() + + with torch.no_grad(): + sources = apply_model(demucs, mix[None], split=split_mode, device=device, overlap=overlap_set, shifts=shift_set, progress=False)[0] + + text_widget.write('Done!\n') + + sources = (sources * ref.std() + ref.mean()).cpu().numpy() + sources[[0,1]] = sources[[1,0]] + + return sources + + def demucs_prediction(m): + global demucs_sources + mix, samplerate = librosa.load(m, mono=False, sr=44100) + if mix.ndim == 1: + mix = np.asfortranarray([mix,mix]) + + mix = mix.T + + demucs_sources = demix_demucs(mix.T) + + if data['demucsmodelVR']: + demucs = HDemucs(sources=["other", "vocals"]) + text_widget.write(base_text + 'Loading Demucs model... ') + update_progress(**progress_kwargs, + step=0.95) + path_d = Path('models/Demucs_Models') + print('What Demucs model was chosen? ', demucs_model_set) + demucs = _gm(name=demucs_model_set, repo=path_d) + text_widget.write('Done!\n') + + print('segment: ', data['segment']) + + if data['segment'] == 'None': + segment = None + if isinstance(demucs, BagOfModels): + if segment is not None: + for sub in demucs.models: + sub.segment = segment + else: + if segment is not None: + sub.segment = segment + else: + try: + segment = int(data['segment']) + if isinstance(demucs, BagOfModels): + if segment is not None: + for sub in demucs.models: + sub.segment = segment + else: + if segment is not None: + sub.segment = segment + text_widget.write(base_text + "Segments set to "f"{segment}.\n") + except: + segment = None + if isinstance(demucs, BagOfModels): + if segment is not None: + for sub in demucs.models: + sub.segment = segment + else: + if segment is not None: + sub.segment = segment + + print('segment port-process: ', segment) + + demucs.cpu() + demucs.eval() + + demucs_prediction(m) + if data['voc_only'] and not data['inst_only']: pass else: @@ -809,13 +917,25 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if data['high_end_process'].startswith('mirroring'): input_high_end_ = spec_utils.mirroring(data['high_end_process'], y_spec_m, input_high_end, mp) - wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp, input_high_end_h, input_high_end_) + if data['demucsmodelVR']: + wav_instrument = spec_utils.cmb_spectrogram_to_wave_d(y_spec_m, mp, input_high_end_h, input_high_end_, demucs=True) + demucs_inst = demucs_sources[0] + sources = [wav_instrument,demucs_inst] + spec = [stft(sources[0],2048,1024),stft(sources[1],2048,1024)] + ln = min([spec[0].shape[2], spec[1].shape[2]]) + spec[0] = spec[0][:,:,:ln] + spec[1] = spec[1][:,:,:ln] + v_spec_c = np.where(np.abs(spec[1]) <= np.abs(spec[0]), spec[1], spec[0]) + wav_instrument = istft(v_spec_c,1024) + else: + wav_instrument = spec_utils.cmb_spectrogram_to_wave_d(y_spec_m, mp, input_high_end_h, input_high_end_, demucs=False) + if data['voc_only'] and not data['inst_only']: pass else: text_widget.write('Done!\n') else: - wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, mp) + wav_instrument = spec_utils.cmb_spectrogram_to_wave_d(y_spec_m, mp) if data['voc_only'] and not data['inst_only']: pass else: @@ -828,14 +948,25 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress if data['high_end_process'].startswith('mirroring'): input_high_end_ = spec_utils.mirroring(data['high_end_process'], v_spec_m, input_high_end, mp) - - wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp, input_high_end_h, input_high_end_) + if data['demucsmodelVR']: + wav_vocals = spec_utils.cmb_spectrogram_to_wave_d(v_spec_m, mp, input_high_end_h, input_high_end_, demucs=True) + demucs_voc = demucs_sources[1] + sources = [wav_vocals,demucs_voc] + spec = [stft(sources[0],2048,1024),stft(sources[1],2048,1024)] + ln = min([spec[0].shape[2], spec[1].shape[2]]) + spec[0] = spec[0][:,:,:ln] + spec[1] = spec[1][:,:,:ln] + v_spec_c = np.where(np.abs(spec[1]) >= np.abs(spec[0]), spec[1], spec[0]) + wav_vocals = istft(v_spec_c,1024) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave_d(v_spec_m, mp, input_high_end_h, input_high_end_, demucs=False) + if data['inst_only'] and not data['voc_only']: pass else: text_widget.write('Done!\n') - else: - wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, mp) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave_d(v_spec_m, mp, demucs=False) if data['inst_only'] and not data['voc_only']: pass else: @@ -843,7 +974,7 @@ def main(window: tk.Wm, text_widget: tk.Text, button_widget: tk.Button, progress update_progress(**progress_kwargs, step=1) - + # Save output music files save_files(wav_instrument, wav_vocals)