mirror of
https://github.com/Anjok07/ultimatevocalremovergui.git
synced 2024-11-28 09:21:03 +01:00
187 lines
7.4 KiB
Python
187 lines
7.4 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
import subprocess
|
|
|
|
from dora.log import fatal
|
|
import torch as th
|
|
import torchaudio as ta
|
|
|
|
from .apply import apply_model, BagOfModels
|
|
from .audio import AudioFile, convert_audio, save_audio
|
|
from .pretrained import get_model_from_args, add_model_flags, ModelLoadingError
|
|
|
|
|
|
def load_track(track, audio_channels, samplerate):
|
|
errors = {}
|
|
wav = None
|
|
|
|
try:
|
|
wav = AudioFile(track).read(
|
|
streams=0,
|
|
samplerate=samplerate,
|
|
channels=audio_channels)
|
|
except FileNotFoundError:
|
|
errors['ffmpeg'] = 'Ffmpeg is not installed.'
|
|
except subprocess.CalledProcessError:
|
|
errors['ffmpeg'] = 'FFmpeg could not read the file.'
|
|
|
|
if wav is None:
|
|
try:
|
|
wav, sr = ta.load(str(track))
|
|
except RuntimeError as err:
|
|
errors['torchaudio'] = err.args[0]
|
|
else:
|
|
wav = convert_audio(wav, sr, samplerate, audio_channels)
|
|
|
|
if wav is None:
|
|
print(f"Could not load file {track}. "
|
|
"Maybe it is not a supported file format? ")
|
|
for backend, error in errors.items():
|
|
print(f"When trying to load using {backend}, got the following error: {error}")
|
|
sys.exit(1)
|
|
return wav
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser("demucs.separate",
|
|
description="Separate the sources for the given tracks")
|
|
parser.add_argument("tracks", nargs='+', type=Path, default=[], help='Path to tracks')
|
|
add_model_flags(parser)
|
|
parser.add_argument("-v", "--verbose", action="store_true")
|
|
parser.add_argument("-o",
|
|
"--out",
|
|
type=Path,
|
|
default=Path("separated"),
|
|
help="Folder where to put extracted tracks. A subfolder "
|
|
"with the model name will be created.")
|
|
parser.add_argument("-d",
|
|
"--device",
|
|
default="cuda" if th.cuda.is_available() else "cpu",
|
|
help="Device to use, default is cuda if available else cpu")
|
|
parser.add_argument("--shifts",
|
|
default=1,
|
|
type=int,
|
|
help="Number of random shifts for equivariant stabilization."
|
|
"Increase separation time but improves quality for Demucs. 10 was used "
|
|
"in the original paper.")
|
|
parser.add_argument("--overlap",
|
|
default=0.25,
|
|
type=float,
|
|
help="Overlap between the splits.")
|
|
split_group = parser.add_mutually_exclusive_group()
|
|
split_group.add_argument("--no-split",
|
|
action="store_false",
|
|
dest="split",
|
|
default=True,
|
|
help="Doesn't split audio in chunks. "
|
|
"This can use large amounts of memory.")
|
|
split_group.add_argument("--segment", type=int,
|
|
help="Set split size of each chunk. "
|
|
"This can help save memory of graphic card. ")
|
|
parser.add_argument("--two-stems",
|
|
dest="stem", metavar="STEM",
|
|
help="Only separate audio into {STEM} and no_{STEM}. ")
|
|
group = parser.add_mutually_exclusive_group()
|
|
group.add_argument("--int24", action="store_true",
|
|
help="Save wav output as 24 bits wav.")
|
|
group.add_argument("--float32", action="store_true",
|
|
help="Save wav output as float32 (2x bigger).")
|
|
parser.add_argument("--clip-mode", default="rescale", choices=["rescale", "clamp"],
|
|
help="Strategy for avoiding clipping: rescaling entire signal "
|
|
"if necessary (rescale) or hard clipping (clamp).")
|
|
parser.add_argument("--mp3", action="store_true",
|
|
help="Convert the output wavs to mp3.")
|
|
parser.add_argument("--mp3-bitrate",
|
|
default=320,
|
|
type=int,
|
|
help="Bitrate of converted mp3.")
|
|
parser.add_argument("-j", "--jobs",
|
|
default=0,
|
|
type=int,
|
|
help="Number of jobs. This can increase memory usage but will "
|
|
"be much faster when multiple cores are available.")
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
model = get_model_from_args(args)
|
|
except ModelLoadingError as error:
|
|
fatal(error.args[0])
|
|
|
|
if args.segment is not None and args.segment < 8:
|
|
fatal('Segment must greater than 8. ')
|
|
|
|
if isinstance(model, BagOfModels):
|
|
if args.segment is not None:
|
|
for sub in model.models:
|
|
sub.segment = args.segment
|
|
else:
|
|
if args.segment is not None:
|
|
sub.segment = args.segment
|
|
|
|
model.cpu()
|
|
model.eval()
|
|
|
|
if args.stem is not None and args.stem not in model.sources:
|
|
fatal(
|
|
'error: stem "{stem}" is not in selected model. STEM must be one of {sources}.'.format(
|
|
stem=args.stem, sources=', '.join(model.sources)))
|
|
out = args.out / args.name
|
|
out.mkdir(parents=True, exist_ok=True)
|
|
print(f"Separated tracks will be stored in {out.resolve()}")
|
|
for track in args.tracks:
|
|
if not track.exists():
|
|
print(
|
|
f"File {track} does not exist. If the path contains spaces, "
|
|
"please try again after surrounding the entire path with quotes \"\".",
|
|
file=sys.stderr)
|
|
continue
|
|
print(f"Separating track {track}")
|
|
wav = load_track(track, model.audio_channels, model.samplerate)
|
|
|
|
ref = wav.mean(0)
|
|
wav = (wav - ref.mean()) / ref.std()
|
|
sources = apply_model(model, wav[None], device=args.device, shifts=args.shifts,
|
|
split=args.split, overlap=args.overlap, progress=True,
|
|
num_workers=args.jobs)[0]
|
|
sources = sources * ref.std() + ref.mean()
|
|
|
|
track_folder = out / track.name.rsplit(".", 1)[0]
|
|
track_folder.mkdir(exist_ok=True)
|
|
if args.mp3:
|
|
ext = ".mp3"
|
|
else:
|
|
ext = ".wav"
|
|
kwargs = {
|
|
'samplerate': model.samplerate,
|
|
'bitrate': args.mp3_bitrate,
|
|
'clip': args.clip_mode,
|
|
'as_float': args.float32,
|
|
'bits_per_sample': 24 if args.int24 else 16,
|
|
}
|
|
if args.stem is None:
|
|
for source, name in zip(sources, model.sources):
|
|
stem = str(track_folder / (name + ext))
|
|
save_audio(source, stem, **kwargs)
|
|
else:
|
|
sources = list(sources)
|
|
stem = str(track_folder / (args.stem + ext))
|
|
save_audio(sources.pop(model.sources.index(args.stem)), stem, **kwargs)
|
|
# Warning : after poping the stem, selected stem is no longer in the list 'sources'
|
|
other_stem = th.zeros_like(sources[0])
|
|
for i in sources:
|
|
other_stem += i
|
|
stem = str(track_folder / ("no_" + args.stem + ext))
|
|
save_audio(other_stem, stem, **kwargs)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|