Add files via upload

2025-03-03 08:36:33 +01:00 · 2023-10-10 21:48:02 -05:00 · 2023-10-10 21:48:02 -05:00 · 274ca43b3a
commit 274ca43b3a
parent 3630c32124
2 changed files with 39 additions and 23 deletions
--- a/UVR.py
+++ b/UVR.py
@ -53,6 +53,9 @@ import sys
 import yaml
 from ml_collections import ConfigDict
 from collections import Counter
 import torch_directml
 is_gpu_available = torch.backends.mps.is_available() if (OPERATING_SYSTEM == 'Darwin') else (torch_directml.is_available() or torch.cuda.is_available())
 # Change the current working directory to the directory
 # this file sits in
@ -109,6 +112,8 @@ elif OPERATING_SYSTEM=="Windows":
    right_click_button = '<Button-3>'
    application_extension = ".exe"
 clear_gpu_cache = torch.mps.empty_cache if is_macos else torch.cuda.empty_cache
 def right_click_release_linux(window, top_win=None):
    if OPERATING_SYSTEM=="Linux":
        root.bind('<Button-1>', lambda e:window.destroy())
@ -1437,7 +1442,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
        self.mdx_hash_MAPPER = load_model_hash_data(MDX_HASH_JSON)
        self.mdx_name_select_MAPPER = load_model_hash_data(MDX_MODEL_NAME_SELECT)
        self.demucs_name_select_MAPPER = load_model_hash_data(DEMUCS_MODEL_NAME_SELECT)
-        self.is_gpu_available = torch.cuda.is_available() if not OPERATING_SYSTEM == 'Darwin' else torch.backends.mps.is_available()
+        self.is_gpu_available = is_gpu_available
        self.is_process_stopped = False
        self.inputs_from_dir = []
        self.iteration = 0
@ -5479,7 +5484,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
        if self.clear_cache_torch:
            #self.set_app_font(is_chosen_font=True)
-            torch.cuda.empty_cache()
+            clear_gpu_cache()
            self.clear_cache_torch = False
        if self.is_process_stopped:
@ -5492,7 +5497,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
                self.conversion_Button_Text_var.set(START_PROCESSING)
                self.conversion_Button.configure(state=tk.NORMAL)
                self.progress_bar_main_var.set(0)
-                torch.cuda.empty_cache()
+                clear_gpu_cache()
                self.is_process_stopped = False
        if self.is_confirm_error_var.get():
@ -6589,7 +6594,7 @@ class MainWindow(TkinterDnD.Tk if is_dnd_compatible else tk.Tk):
                    if os.path.isfile(audio_file):
                        os.remove(audio_file)
-                torch.cuda.empty_cache()
+                clear_gpu_cache()
            shutil.rmtree(export_path) if is_ensemble and len(os.listdir(export_path)) == 0 else None
--- a/separate.py
+++ b/separate.py
@ -31,6 +31,10 @@ import math
 #import random
 from onnx import load
 from onnx2pytorch import ConvertModel
 import torch_directml
 import gc
 cpu = 'cpu'
 if TYPE_CHECKING:
    from UVR import ModelData
@ -38,6 +42,8 @@ if TYPE_CHECKING:
 warnings.filterwarnings("ignore")
 cpu = torch.device('cpu')
 clear_gpu_cache = torch.mps.empty_cache if is_macos else torch.cuda.empty_cache
 class SeperateAttributes:
    def __init__(self, model_data: ModelData, 
                 process_data: dict, 
@ -137,8 +143,9 @@ class SeperateAttributes:
        self.stem_path_init = os.path.join(self.export_path, f'{self.audio_file_base}_({self.secondary_stem}).wav')
        self.deverb_vocal_opt = model_data.deverb_vocal_opt
        self.is_save_vocal_only = model_data.is_save_vocal_only
-        self.device = 'cpu'
+        self.device = cpu
        self.run_type = ['CPUExecutionProvider']
        self.is_opencl = False
        if self.is_inst_only_voc_splitter or self.is_sec_bv_rebalance:
            self.is_primary_stem_only = False
@ -149,11 +156,11 @@ class SeperateAttributes:
        if self.is_gpu_conversion >= 0:
            if OPERATING_SYSTEM == 'Darwin' and torch.backends.mps.is_available():
-                self.device = 'mps'
+                self.device, self.is_mps = 'mps', True
-                self.is_mps = True
+            elif torch_directml.is_available():
                self.device, self.is_mps, self.is_opencl = torch_directml.device(), True, True
            elif torch.cuda.is_available():
-                self.device = 'cuda:0'
+                self.device, self.run_type = 'cuda:0', ['CUDAExecutionProvider']
                self.run_type = ['CUDAExecutionProvider']
        if model_data.process_method == MDX_ARCH_TYPE:
            self.is_mdx_ckpt = model_data.is_mdx_ckpt
@ -166,6 +173,7 @@ class SeperateAttributes:
            self.mdx_segment_size = model_data.mdx_segment_size
            if self.is_mdx_c:
                self.device = cpu if self.is_opencl else self.device
                if not self.is_4_stem_ensemble:
                    self.primary_stem = model_data.ensemble_primary_stem if process_data['is_ensemble_master'] else model_data.primary_stem
                    self.secondary_stem = model_data.ensemble_secondary_stem if process_data['is_ensemble_master'] else model_data.secondary_stem
@ -192,7 +200,7 @@ class SeperateAttributes:
            self.is_demucs_combine_stems = model_data.is_demucs_combine_stems
            self.demucs_stem_count = model_data.demucs_stem_count
            self.pre_proc_model = model_data.pre_proc_model
-            self.device = 'cpu' if self.is_mps and not self.demucs_version == DEMUCS_V4 else self.device
+            self.device = cpu if self.is_mps and not self.demucs_version == DEMUCS_V4 else self.device
            self.primary_stem = model_data.ensemble_primary_stem if process_data['is_ensemble_master'] else model_data.primary_stem
            self.secondary_stem = model_data.ensemble_secondary_stem if process_data['is_ensemble_master'] else model_data.secondary_stem
@ -458,6 +466,7 @@ class SeperateMDX(SeperateAttributes):
            self.running_inference_console_write()
            mix = prepare_mix(self.audio_file)
            source = self.demix(mix)
            if not self.is_vocal_split_model:
@ -485,7 +494,7 @@ class SeperateMDX(SeperateAttributes):
            self.primary_source_map = self.final_process(primary_stem_path, self.primary_source, self.secondary_source_primary, self.primary_stem, samplerate)
-        torch.cuda.empty_cache()
+        clear_gpu_cache()
        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
@ -499,7 +508,7 @@ class SeperateMDX(SeperateAttributes):
        self.trim = self.n_fft//2
        self.chunk_size = self.hop * (self.mdx_segment_size-1)
        self.gen_size = self.chunk_size-2*self.trim
-        self.stft = STFT(self.n_fft, self.hop, self.dim_f)
+        self.stft = STFT(self.n_fft, self.hop, self.dim_f, self.device)
    def demix(self, mix, is_match_mix=False):
        self.initialize_model_settings()
@ -685,7 +694,7 @@ class SeperateMDXC(SeperateAttributes):
                self.primary_source_map = self.final_process(primary_stem_path, self.primary_source, self.secondary_source_primary, self.primary_stem, samplerate)
-        torch.cuda.empty_cache()
+        clear_gpu_cache()
        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
        self.process_vocal_split_chain(secondary_sources)
@ -699,8 +708,8 @@ class SeperateMDXC(SeperateAttributes):
        if self.is_pitch_change:
            mix, sr_pitched = spec_utils.change_pitch_semitones(mix, 44100, semitone_shift=-self.semitone_shift)
-        model = TFC_TDF_net(self.mdx_c_configs).eval().to(self.device)
+        model = TFC_TDF_net(self.mdx_c_configs, device=self.device).eval().to(self.device)
-        model.load_state_dict(torch.load(self.model_path, map_location=self.device))
+        model.load_state_dict(torch.load(self.model_path, map_location=cpu))
        mix = torch.tensor(mix, dtype=torch.float32)
        try:
@ -830,7 +839,7 @@ class SeperateDemucs(SeperateAttributes):
            self.write_to_console(DONE, base_text='')
            del self.demucs
-            torch.cuda.empty_cache()
+            clear_gpu_cache()
        if isinstance(inst_source, np.ndarray):
            source_reshape = spec_utils.reshape_sources(inst_source[self.demucs_source_map[VOCAL_STEM]], source[self.demucs_source_map[VOCAL_STEM]])
@ -1050,7 +1059,7 @@ class SeperateVR(SeperateAttributes):
            self.secondary_source_map = self.final_process(secondary_stem_path, self.secondary_source, self.secondary_source_secondary, self.secondary_stem, 44100)
-        torch.cuda.empty_cache()
+        clear_gpu_cache()
        secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
        self.process_vocal_split_chain(secondary_sources)
@ -1330,8 +1339,10 @@ def vr_denoiser(X, device, hop_length=1024, n_fft=2048, cropsize=256, is_deverbe
        hop_length=1024
        nout, nout_lstm = 16, 128
    device = cpu if not device in ['cuda', 'cpu'] else device
    model = nets_new.CascadedNet(n_fft, nout=nout, nout_lstm=nout_lstm)
-    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.load_state_dict(torch.load(model_path, map_location=cpu))
    model.to(device)
    if mp is None: