Add files via upload

2025-03-02 08:11:30 +01:00 · 2023-10-10 21:48:02 -05:00 · 2023-10-10 21:48:02 -05:00 · 274ca43b3a
commit 274ca43b3a
parent 3630c32124
2 changed files with 39 additions and 23 deletions
--- a/UVR.py
+++ b/UVR.py
@ -53,6 +53,9 @@ import sys
 import yaml
 from ml_collections import ConfigDict
 from collections import Counter
+import torch_directml
+
+is_gpu_available = torch.backends.mps.is_available() if (OPERATING_SYSTEM == 'Darwin') else (torch_directml.is_available() or torch.cuda.is_available())

 # Change the current working directory to the directory
 # this file sits in
@ -109,6 +112,8 @@ elif OPERATING_SYSTEM=="Windows":
    right_click_button = '<Button-3>'
    application_extension = ".exe"

+clear_gpu_cache = torch.mps.empty_cache if is_macos else torch.cuda.empty_cache
+
 def right_click_release_linux(window, top_win=None):
    if OPERATING_SYSTEM=="Linux":
        root.bind('<Button-1>', lambda e:window.destroy())
@ -1437,7 +1442,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
        self.mdx_hash_MAPPER = load_model_hash_data(MDX_HASH_JSON)
        self.mdx_name_select_MAPPER = load_model_hash_data(MDX_MODEL_NAME_SELECT)
        self.demucs_name_select_MAPPER = load_model_hash_data(DEMUCS_MODEL_NAME_SELECT)
-        self.is_gpu_available = torch.cuda.is_available() if not OPERATING_SYSTEM == 'Darwin' else torch.backends.mps.is_available()
+        self.is_gpu_available = is_gpu_available
        self.is_process_stopped = False
        self.inputs_from_dir = []
        self.iteration = 0
@ -5479,7 +5484,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
            
        if self.clear_cache_torch:
            #self.set_app_font(is_chosen_font=True)
-            torch.cuda.empty_cache()
+            clear_gpu_cache()
            self.clear_cache_torch = False
            
        if self.is_process_stopped:
@ -5492,7 +5497,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
                self.conversion_Button_Text_var.set(START_PROCESSING)
                self.conversion_Button.configure(state=tk.NORMAL)
                self.progress_bar_main_var.set(0)
-                torch.cuda.empty_cache()
+                clear_gpu_cache()
                self.is_process_stopped = False

        if self.is_confirm_error_var.get():
@ -6589,7 +6594,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
                    if os.path.isfile(audio_file):
                        os.remove(audio_file)
                    
-                torch.cuda.empty_cache()
+                clear_gpu_cache()
                
            shutil.rmtree(export_path) if is_ensemble and len(os.listdir(export_path)) == 0 else None

--- a/separate.py
+++ b/separate.py
@ -31,6 +31,10 @@ import math
 #import random
 from onnx import load
 from onnx2pytorch import ConvertModel
+import torch_directml
+import gc
+
+cpu = 'cpu'

 if TYPE_CHECKING:
    from UVR import ModelData
@ -38,6 +42,8 @@ if TYPE_CHECKING:
 warnings.filterwarnings("ignore")
 cpu = torch.device('cpu')

+clear_gpu_cache = torch.mps.empty_cache if is_macos else torch.cuda.empty_cache
+
 class SeperateAttributes:
    def __init__(self, model_data: ModelData, 
                 process_data: dict, 
@ -137,8 +143,9 @@ class SeperateAttributes:
        self.stem_path_init = os.path.join(self.export_path, f'{self.audio_file_base}_({self.secondary_stem}).wav')
        self.deverb_vocal_opt = model_data.deverb_vocal_opt
        self.is_save_vocal_only = model_data.is_save_vocal_only
-        self.device = 'cpu'
+        self.device = cpu
        self.run_type = ['CPUExecutionProvider']
+        self.is_opencl = False
        
        if self.is_inst_only_voc_splitter or self.is_sec_bv_rebalance:
            self.is_primary_stem_only = False
@ -149,11 +156,11 @@ class SeperateAttributes:

        if self.is_gpu_conversion >= 0:
            if OPERATING_SYSTEM == 'Darwin' and torch.backends.mps.is_available():
-                self.device = 'mps'
-                self.is_mps = True
+                self.device, self.is_mps = 'mps', True
+            elif torch_directml.is_available():
+                self.device, self.is_mps, self.is_opencl = torch_directml.device(), True, True
            elif torch.cuda.is_available():
-                self.device = 'cuda:0'
-                self.run_type = ['CUDAExecutionProvider']
+                self.device, self.run_type = 'cuda:0', ['CUDAExecutionProvider']

        if model_data.process_method == MDX_ARCH_TYPE:
            self.is_mdx_ckpt = model_data.is_mdx_ckpt
@ -166,6 +173,7 @@ class SeperateAttributes:
            self.mdx_segment_size = model_data.mdx_segment_size
            
            if self.is_mdx_c:
+                self.device = cpu if self.is_opencl else self.device
                if not self.is_4_stem_ensemble:
                    self.primary_stem = model_data.ensemble_primary_stem if process_data['is_ensemble_master'] else model_data.primary_stem
                    self.secondary_stem = model_data.ensemble_secondary_stem if process_data['is_ensemble_master'] else model_data.secondary_stem
@ -192,7 +200,7 @@ class SeperateAttributes:
            self.is_demucs_combine_stems = model_data.is_demucs_combine_stems
            self.demucs_stem_count = model_data.demucs_stem_count
            self.pre_proc_model = model_data.pre_proc_model
-            self.device = 'cpu' if self.is_mps and not self.demucs_version == DEMUCS_V4 else self.device
+            self.device = cpu if self.is_mps and not self.demucs_version == DEMUCS_V4 else self.device

            self.primary_stem = model_data.ensemble_primary_stem if process_data['is_ensemble_master'] else model_data.primary_stem
            self.secondary_stem = model_data.ensemble_secondary_stem if process_data['is_ensemble_master'] else model_data.secondary_stem
@ -458,6 +466,7 @@ class SeperateMDX(SeperateAttributes):

            self.running_inference_console_write()
            mix = prepare_mix(self.audio_file)
+            
            source = self.demix(mix)
            
            if not self.is_vocal_split_model:
@ -484,8 +493,8 @@ class SeperateMDX(SeperateAttributes):
                self.primary_source = source.T
                
            self.primary_source_map = self.final_process(primary_stem_path, self.primary_source, self.secondary_source_primary, self.primary_stem, samplerate)
-            
-        torch.cuda.empty_cache()
+        
+        clear_gpu_cache()

        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
        
@ -499,7 +508,7 @@ class SeperateMDX(SeperateAttributes):
        self.trim = self.n_fft//2
        self.chunk_size = self.hop * (self.mdx_segment_size-1)
        self.gen_size = self.chunk_size-2*self.trim
-        self.stft = STFT(self.n_fft, self.hop, self.dim_f)
+        self.stft = STFT(self.n_fft, self.hop, self.dim_f, self.device)

    def demix(self, mix, is_match_mix=False):
        self.initialize_model_settings()
@ -548,13 +557,13 @@ class SeperateMDX(SeperateAttributes):

            mix_part = torch.tensor([mix_part_], dtype=torch.float32).to(self.device)
            mix_waves = mix_part.split(self.mdx_batch_size)
-
+            
            with torch.no_grad():
                for mix_wave in mix_waves:
                    self.running_inference_progress_bar(total_chunks, is_match_mix=is_match_mix)

                    tar_waves = self.run_model(mix_wave, is_match_mix=is_match_mix)
-
+                    
                    if window is not None:
                        tar_waves[..., :chunk_size_actual] *= window 
                        divider[..., start:end] += window
@ -562,7 +571,7 @@ class SeperateMDX(SeperateAttributes):
                        divider[..., start:end] += 1

                    result[..., start:end] += tar_waves[..., :end-start]
-
+            
        tar_waves = result / divider
        tar_waves_.append(tar_waves)

@ -685,7 +694,7 @@ class SeperateMDXC(SeperateAttributes):

                self.primary_source_map = self.final_process(primary_stem_path, self.primary_source, self.secondary_source_primary, self.primary_stem, samplerate)

-        torch.cuda.empty_cache()
+        clear_gpu_cache()
        
        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
        self.process_vocal_split_chain(secondary_sources)
@ -698,9 +707,9 @@ class SeperateMDXC(SeperateAttributes):
        org_mix = mix
        if self.is_pitch_change:
            mix, sr_pitched = spec_utils.change_pitch_semitones(mix, 44100, semitone_shift=-self.semitone_shift)
-        
-        model = TFC_TDF_net(self.mdx_c_configs).eval().to(self.device)
-        model.load_state_dict(torch.load(self.model_path, map_location=self.device))
+
+        model = TFC_TDF_net(self.mdx_c_configs, device=self.device).eval().to(self.device)
+        model.load_state_dict(torch.load(self.model_path, map_location=cpu))
        mix = torch.tensor(mix, dtype=torch.float32)

        try:
@ -830,7 +839,7 @@ class SeperateDemucs(SeperateAttributes):
            self.write_to_console(DONE, base_text='')
            
            del self.demucs
-            torch.cuda.empty_cache()
+            clear_gpu_cache()
            
        if isinstance(inst_source, np.ndarray):
            source_reshape = spec_utils.reshape_sources(inst_source[self.demucs_source_map[VOCAL_STEM]], source[self.demucs_source_map[VOCAL_STEM]])
@ -1050,7 +1059,7 @@ class SeperateVR(SeperateAttributes):
            
            self.secondary_source_map = self.final_process(secondary_stem_path, self.secondary_source, self.secondary_source_secondary, self.secondary_stem, 44100)
            
-        torch.cuda.empty_cache()
+        clear_gpu_cache()
        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
        
        self.process_vocal_split_chain(secondary_sources)
@ -1330,8 +1339,10 @@ def vr_denoiser(X, device, hop_length=1024, n_fft=2048, cropsize=256, is_deverbe
        hop_length=1024
        nout, nout_lstm = 16, 128
    
+    device = cpu if not device in ['cuda', 'cpu'] else device
+    
    model = nets_new.CascadedNet(n_fft, nout=nout, nout_lstm=nout_lstm)
-    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.load_state_dict(torch.load(model_path, map_location=cpu))
    model.to(device)

    if mp is None: