diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 2e60a8331e..662839ff87 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -154,6 +154,8 @@ add_library(core STATIC
     hle/kernel/hle_ipc.h
     hle/kernel/k_affinity_mask.h
     hle/kernel/k_priority_queue.h
+    hle/kernel/k_scheduler.cpp
+    hle/kernel/k_scheduler.h
     hle/kernel/k_scheduler_lock.h
     hle/kernel/kernel.cpp
     hle/kernel/kernel.h
@@ -189,8 +191,6 @@ add_library(core STATIC
     hle/kernel/readable_event.h
     hle/kernel/resource_limit.cpp
     hle/kernel/resource_limit.h
-    hle/kernel/scheduler.cpp
-    hle/kernel/scheduler.h
     hle/kernel/server_port.cpp
     hle/kernel/server_port.h
     hle/kernel/server_session.cpp
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index b63f79915f..7a4eb88a24 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -15,8 +15,8 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hardware_properties.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/svc.h"
 #include "core/memory.h"
 #include "core/settings.h"
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 01e4faac8f..77d21d41c5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,10 +27,10 @@
 #include "core/file_sys/vfs_real.h"
 #include "core/hardware_interrupt_manager.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/service/am/applets/applets.h"
 #include "core/hle/service/apm/controller.h"
@@ -508,14 +508,6 @@ std::size_t System::CurrentCoreIndex() const {
     return core;
 }
 
-Kernel::Scheduler& System::CurrentScheduler() {
-    return impl->kernel.CurrentScheduler();
-}
-
-const Kernel::Scheduler& System::CurrentScheduler() const {
-    return impl->kernel.CurrentScheduler();
-}
-
 Kernel::PhysicalCore& System::CurrentPhysicalCore() {
     return impl->kernel.CurrentPhysicalCore();
 }
@@ -524,22 +516,14 @@ const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
     return impl->kernel.CurrentPhysicalCore();
 }
 
-Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
-    return impl->kernel.Scheduler(core_index);
-}
-
-const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
-    return impl->kernel.Scheduler(core_index);
+/// Gets the global scheduler
+Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() {
+    return impl->kernel.GlobalSchedulerContext();
 }
 
 /// Gets the global scheduler
-Kernel::GlobalScheduler& System::GlobalScheduler() {
-    return impl->kernel.GlobalScheduler();
-}
-
-/// Gets the global scheduler
-const Kernel::GlobalScheduler& System::GlobalScheduler() const {
-    return impl->kernel.GlobalScheduler();
+const Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() const {
+    return impl->kernel.GlobalSchedulerContext();
 }
 
 Kernel::Process* System::CurrentProcess() {
diff --git a/src/core/core.h b/src/core/core.h
index 29b8fb92a1..579a774e4f 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -26,11 +26,11 @@ class VfsFilesystem;
 } // namespace FileSys
 
 namespace Kernel {
-class GlobalScheduler;
+class GlobalSchedulerContext;
 class KernelCore;
 class PhysicalCore;
 class Process;
-class Scheduler;
+class KScheduler;
 } // namespace Kernel
 
 namespace Loader {
@@ -213,12 +213,6 @@ public:
     /// Gets the index of the currently running CPU core
     [[nodiscard]] std::size_t CurrentCoreIndex() const;
 
-    /// Gets the scheduler for the CPU core that is currently running
-    [[nodiscard]] Kernel::Scheduler& CurrentScheduler();
-
-    /// Gets the scheduler for the CPU core that is currently running
-    [[nodiscard]] const Kernel::Scheduler& CurrentScheduler() const;
-
     /// Gets the physical core for the CPU core that is currently running
     [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore();
 
@@ -261,17 +255,11 @@ public:
     /// Gets an immutable reference to the renderer.
     [[nodiscard]] const VideoCore::RendererBase& Renderer() const;
 
-    /// Gets the scheduler for the CPU core with the specified index
-    [[nodiscard]] Kernel::Scheduler& Scheduler(std::size_t core_index);
-
-    /// Gets the scheduler for the CPU core with the specified index
-    [[nodiscard]] const Kernel::Scheduler& Scheduler(std::size_t core_index) const;
+    /// Gets the global scheduler
+    [[nodiscard]] Kernel::GlobalSchedulerContext& GlobalSchedulerContext();
 
     /// Gets the global scheduler
-    [[nodiscard]] Kernel::GlobalScheduler& GlobalScheduler();
-
-    /// Gets the global scheduler
-    [[nodiscard]] const Kernel::GlobalScheduler& GlobalScheduler() const;
+    [[nodiscard]] const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const;
 
     /// Gets the manager for the guest device memory
     [[nodiscard]] Core::DeviceMemory& DeviceMemory();
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 0cff985e9a..1791543483 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -10,9 +10,9 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "video_core/gpu.h"
 
@@ -109,11 +109,8 @@ void* CpuManager::GetStartFuncParamater() {
 
 void CpuManager::MultiCoreRunGuestThread() {
     auto& kernel = system.Kernel();
-    {
-        auto& sched = kernel.CurrentScheduler();
-        sched.OnThreadStart();
-    }
-    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    kernel.CurrentScheduler()->OnThreadStart();
+    auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
     auto& host_context = thread->GetHostContext();
     host_context->SetRewindPoint(GuestRewindFunction, this);
     MultiCoreRunGuestLoop();
@@ -130,8 +127,8 @@ void CpuManager::MultiCoreRunGuestLoop() {
             physical_core = &kernel.CurrentPhysicalCore();
         }
         system.ExitDynarmicProfile();
-        auto& scheduler = kernel.CurrentScheduler();
-        scheduler.TryDoContextSwitch();
+        physical_core->ArmInterface().ClearExclusiveState();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
     }
 }
 
@@ -140,25 +137,21 @@ void CpuManager::MultiCoreRunIdleThread() {
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
         physical_core.Idle();
-        auto& scheduler = kernel.CurrentScheduler();
-        scheduler.TryDoContextSwitch();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
     }
 }
 
 void CpuManager::MultiCoreRunSuspendThread() {
     auto& kernel = system.Kernel();
-    {
-        auto& sched = kernel.CurrentScheduler();
-        sched.OnThreadStart();
-    }
+    kernel.CurrentScheduler()->OnThreadStart();
     while (true) {
         auto core = kernel.GetCurrentHostThreadID();
-        auto& scheduler = kernel.CurrentScheduler();
+        auto& scheduler = *kernel.CurrentScheduler();
         Kernel::Thread* current_thread = scheduler.GetCurrentThread();
         Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
         ASSERT(scheduler.ContextSwitchPending());
         ASSERT(core == kernel.GetCurrentHostThreadID());
-        scheduler.TryDoContextSwitch();
+        scheduler.RescheduleCurrentCore();
     }
 }
 
@@ -206,11 +199,8 @@ void CpuManager::MultiCorePause(bool paused) {
 
 void CpuManager::SingleCoreRunGuestThread() {
     auto& kernel = system.Kernel();
-    {
-        auto& sched = kernel.CurrentScheduler();
-        sched.OnThreadStart();
-    }
-    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    kernel.CurrentScheduler()->OnThreadStart();
+    auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
     auto& host_context = thread->GetHostContext();
     host_context->SetRewindPoint(GuestRewindFunction, this);
     SingleCoreRunGuestLoop();
@@ -218,7 +208,7 @@ void CpuManager::SingleCoreRunGuestThread() {
 
 void CpuManager::SingleCoreRunGuestLoop() {
     auto& kernel = system.Kernel();
-    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
     while (true) {
         auto* physical_core = &kernel.CurrentPhysicalCore();
         system.EnterDynarmicProfile();
@@ -230,9 +220,10 @@ void CpuManager::SingleCoreRunGuestLoop() {
         thread->SetPhantomMode(true);
         system.CoreTiming().Advance();
         thread->SetPhantomMode(false);
+        physical_core->ArmInterface().ClearExclusiveState();
         PreemptSingleCore();
         auto& scheduler = kernel.Scheduler(current_core);
-        scheduler.TryDoContextSwitch();
+        scheduler.RescheduleCurrentCore();
     }
 }
 
@@ -244,24 +235,21 @@ void CpuManager::SingleCoreRunIdleThread() {
         system.CoreTiming().AddTicks(1000U);
         idle_count++;
         auto& scheduler = physical_core.Scheduler();
-        scheduler.TryDoContextSwitch();
+        scheduler.RescheduleCurrentCore();
     }
 }
 
 void CpuManager::SingleCoreRunSuspendThread() {
     auto& kernel = system.Kernel();
-    {
-        auto& sched = kernel.CurrentScheduler();
-        sched.OnThreadStart();
-    }
+    kernel.CurrentScheduler()->OnThreadStart();
     while (true) {
         auto core = kernel.GetCurrentHostThreadID();
-        auto& scheduler = kernel.CurrentScheduler();
+        auto& scheduler = *kernel.CurrentScheduler();
         Kernel::Thread* current_thread = scheduler.GetCurrentThread();
         Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context);
         ASSERT(scheduler.ContextSwitchPending());
         ASSERT(core == kernel.GetCurrentHostThreadID());
-        scheduler.TryDoContextSwitch();
+        scheduler.RescheduleCurrentCore();
     }
 }
 
@@ -280,12 +268,12 @@ void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
     }
     current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
     system.CoreTiming().ResetTicks();
-    scheduler.Unload();
+    scheduler.Unload(scheduler.GetCurrentThread());
     auto& next_scheduler = system.Kernel().Scheduler(current_core);
     Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
     /// May have changed scheduler
     auto& current_scheduler = system.Kernel().Scheduler(current_core);
-    current_scheduler.Reload();
+    current_scheduler.Reload(scheduler.GetCurrentThread());
     auto* currrent_thread2 = current_scheduler.GetCurrentThread();
     if (!currrent_thread2->IsIdleThread()) {
         idle_count = 0;
@@ -369,8 +357,7 @@ void CpuManager::RunThread(std::size_t core) {
             return;
         }
 
-        auto& scheduler = system.Kernel().CurrentScheduler();
-        Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+        auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
         data.is_running = true;
         Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext());
         data.is_running = false;
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 048acd30e9..bc32be18b6 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -12,8 +12,8 @@
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/result.h"
@@ -153,7 +153,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
                                                     bool should_decrement) {
     auto& memory = system.Memory();
     auto& kernel = system.Kernel();
-    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
+    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
 
     Handle event_handle = InvalidHandle;
     {
@@ -223,7 +223,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
 ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
     auto& memory = system.Memory();
     auto& kernel = system.Kernel();
-    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
+    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
 
     Handle event_handle = InvalidHandle;
     {
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 3e745c18bc..40988b0fd4 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -8,9 +8,9 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Kernel {
@@ -105,7 +105,7 @@ bool HandleTable::IsValid(Handle handle) const {
 
 std::shared_ptr<Object> HandleTable::GetGeneric(Handle handle) const {
     if (handle == CurrentThread) {
-        return SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+        return SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
     } else if (handle == CurrentProcess) {
         return SharedFrom(kernel.CurrentProcess());
     }
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 81f85643b5..7eda89786e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -17,11 +17,11 @@
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
new file mode 100644
index 0000000000..7f7da610d2
--- /dev/null
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -0,0 +1,873 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file references various implementation details from Atmosphere, an open-source firmware for
+// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
+
+#include <algorithm>
+#include <mutex>
+#include <set>
+#include <unordered_set>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/fiber.h"
+#include "common/logging/log.h"
+#include "core/arm/arm_interface.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/cpu_manager.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
+
+namespace Kernel {
+
+static void IncrementScheduledCount(Kernel::Thread* thread) {
+    if (auto process = thread->GetOwnerProcess(); process) {
+        process->IncrementScheduledCount();
+    }
+}
+
+GlobalSchedulerContext::GlobalSchedulerContext(KernelCore& kernel)
+    : kernel{kernel}, scheduler_lock{kernel} {}
+
+GlobalSchedulerContext::~GlobalSchedulerContext() = default;
+
+/*static*/ void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule,
+                                            Core::EmuThreadHandle global_thread) {
+    u32 current_core = global_thread.host_handle;
+    bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
+                               (current_core < Core::Hardware::NUM_CPU_CORES);
+
+    while (cores_pending_reschedule != 0) {
+        u32 core = Common::CountTrailingZeroes64(cores_pending_reschedule);
+        ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+        if (!must_context_switch || core != current_core) {
+            auto& phys_core = kernel.PhysicalCore(core);
+            phys_core.Interrupt();
+        } else {
+            must_context_switch = true;
+        }
+        cores_pending_reschedule &= ~(1ULL << core);
+    }
+    if (must_context_switch) {
+        auto core_scheduler = kernel.CurrentScheduler();
+        kernel.ExitSVCProfile();
+        core_scheduler->RescheduleCurrentCore();
+        kernel.EnterSVCProfile();
+    }
+}
+
+u64 KScheduler::UpdateHighestPriorityThread(Thread* highest_thread) {
+    std::scoped_lock lock{guard};
+    if (Thread* prev_highest_thread = this->state.highest_priority_thread;
+        prev_highest_thread != highest_thread) {
+        if (prev_highest_thread != nullptr) {
+            IncrementScheduledCount(prev_highest_thread);
+            prev_highest_thread->SetLastScheduledTick(system.CoreTiming().GetCPUTicks());
+        }
+        if (this->state.should_count_idle) {
+            if (highest_thread != nullptr) {
+                // if (Process* process = highest_thread->GetOwnerProcess(); process != nullptr) {
+                //    process->SetRunningThread(this->core_id, highest_thread,
+                //                              this->state.idle_count);
+                //}
+            } else {
+                this->state.idle_count++;
+            }
+        }
+
+        this->state.highest_priority_thread = highest_thread;
+        this->state.needs_scheduling = true;
+        return (1ULL << this->core_id);
+    } else {
+        return 0;
+    }
+}
+
+/*static*/ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    /* Clear that we need to update. */
+    ClearSchedulerUpdateNeeded(kernel);
+
+    u64 cores_needing_scheduling = 0, idle_cores = 0;
+    Thread* top_threads[Core::Hardware::NUM_CPU_CORES];
+    auto& priority_queue = GetPriorityQueue(kernel);
+
+    /* We want to go over all cores, finding the highest priority thread and determining if
+     * scheduling is needed for that core. */
+    for (size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
+        Thread* top_thread = priority_queue.GetScheduledFront((s32)core_id);
+        if (top_thread != nullptr) {
+            ///* If the thread has no waiters, we need to check if the process has a thread pinned.
+            ///*/
+            // if (top_thread->GetNumKernelWaiters() == 0) {
+            //    if (Process* parent = top_thread->GetOwnerProcess(); parent != nullptr) {
+            //        if (Thread* pinned = parent->GetPinnedThread(core_id);
+            //            pinned != nullptr && pinned != top_thread) {
+            //            /* We prefer our parent's pinned thread if possible. However, we also
+            //            don't
+            //             * want to schedule un-runnable threads. */
+            //            if (pinned->GetRawState() == Thread::ThreadState_Runnable) {
+            //                top_thread = pinned;
+            //            } else {
+            //                top_thread = nullptr;
+            //            }
+            //        }
+            //    }
+            //}
+        } else {
+            idle_cores |= (1ULL << core_id);
+        }
+
+        top_threads[core_id] = top_thread;
+        cores_needing_scheduling |=
+            kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]);
+    }
+
+    /* Idle cores are bad. We're going to try to migrate threads to each idle core in turn. */
+    while (idle_cores != 0) {
+        u32 core_id = Common::CountTrailingZeroes64(idle_cores);
+        if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) {
+            s32 migration_candidates[Core::Hardware::NUM_CPU_CORES];
+            size_t num_candidates = 0;
+
+            /* While we have a suggested thread, try to migrate it! */
+            while (suggested != nullptr) {
+                /* Check if the suggested thread is the top thread on its core. */
+                const s32 suggested_core = suggested->GetActiveCore();
+                if (Thread* top_thread =
+                        (suggested_core >= 0) ? top_threads[suggested_core] : nullptr;
+                    top_thread != suggested) {
+                    /* Make sure we're not dealing with threads too high priority for migration. */
+                    if (top_thread != nullptr &&
+                        top_thread->GetPriority() < HighestCoreMigrationAllowedPriority) {
+                        break;
+                    }
+
+                    /* The suggested thread isn't bound to its core, so we can migrate it! */
+                    suggested->SetActiveCore(core_id);
+                    priority_queue.ChangeCore(suggested_core, suggested);
+
+                    top_threads[core_id] = suggested;
+                    cores_needing_scheduling |=
+                        kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]);
+                    break;
+                }
+
+                /* Note this core as a candidate for migration. */
+                ASSERT(num_candidates < Core::Hardware::NUM_CPU_CORES);
+                migration_candidates[num_candidates++] = suggested_core;
+                suggested = priority_queue.GetSuggestedNext(core_id, suggested);
+            }
+
+            /* If suggested is nullptr, we failed to migrate a specific thread. So let's try all our
+             * candidate cores' top threads. */
+            if (suggested == nullptr) {
+                for (size_t i = 0; i < num_candidates; i++) {
+                    /* Check if there's some other thread that can run on the candidate core. */
+                    const s32 candidate_core = migration_candidates[i];
+                    suggested = top_threads[candidate_core];
+                    if (Thread* next_on_candidate_core =
+                            priority_queue.GetScheduledNext(candidate_core, suggested);
+                        next_on_candidate_core != nullptr) {
+                        /* The candidate core can run some other thread! We'll migrate its current
+                         * top thread to us. */
+                        top_threads[candidate_core] = next_on_candidate_core;
+                        cores_needing_scheduling |=
+                            kernel.Scheduler(candidate_core)
+                                .UpdateHighestPriorityThread(top_threads[candidate_core]);
+
+                        /* Perform the migration. */
+                        suggested->SetActiveCore(core_id);
+                        priority_queue.ChangeCore(candidate_core, suggested);
+
+                        top_threads[core_id] = suggested;
+                        cores_needing_scheduling |=
+                            kernel.Scheduler(core_id).UpdateHighestPriorityThread(
+                                top_threads[core_id]);
+                        break;
+                    }
+                }
+            }
+        }
+
+        idle_cores &= ~(1ULL << core_id);
+    }
+
+    return cores_needing_scheduling;
+}
+
+void GlobalSchedulerContext::AddThread(std::shared_ptr<Thread> thread) {
+    std::scoped_lock lock{global_list_guard};
+    thread_list.push_back(std::move(thread));
+}
+
+void GlobalSchedulerContext::RemoveThread(std::shared_ptr<Thread> thread) {
+    std::scoped_lock lock{global_list_guard};
+    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
+                      thread_list.end());
+}
+
+void GlobalSchedulerContext::PreemptThreads() {
+    // The priority levels at which the global scheduler preempts threads every 10 ms. They are
+    // ordered from Core 0 to Core 3.
+    std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 63};
+
+    ASSERT(IsLocked());
+    for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
+        const u32 priority = preemption_priorities[core_id];
+        kernel.Scheduler(core_id).RotateScheduledQueue(core_id, priority);
+    }
+}
+
+bool GlobalSchedulerContext::IsLocked() const {
+    return scheduler_lock.IsLockedByCurrentThread();
+}
+
+/*static*/ void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread,
+                                                 u32 old_state) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    /* Check if the state has changed, because if it hasn't there's nothing to do. */
+    const auto cur_state = thread->scheduling_state;
+    if (cur_state == old_state) {
+        return;
+    }
+
+    /* Update the priority queues. */
+    if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        /* If we were previously runnable, then we're not runnable now, and we should remove. */
+        GetPriorityQueue(kernel).Remove(thread);
+        IncrementScheduledCount(thread);
+        SetSchedulerUpdateNeeded(kernel);
+    } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        /* If we're now runnable, then we weren't previously, and we should add. */
+        GetPriorityQueue(kernel).PushBack(thread);
+        IncrementScheduledCount(thread);
+        SetSchedulerUpdateNeeded(kernel);
+    }
+}
+
+/*static*/ void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread,
+                                                    Thread* current_thread, u32 old_priority) {
+
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    /* If the thread is runnable, we want to change its priority in the queue. */
+    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        GetPriorityQueue(kernel).ChangePriority(
+            old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread);
+        IncrementScheduledCount(thread);
+        SetSchedulerUpdateNeeded(kernel);
+    }
+}
+
+/*static*/ void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
+                                                        const KAffinityMask& old_affinity,
+                                                        s32 old_core) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    /* If the thread is runnable, we want to change its affinity in the queue. */
+    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread);
+        IncrementScheduledCount(thread);
+        SetSchedulerUpdateNeeded(kernel);
+    }
+}
+
+void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) {
+    ASSERT(system.GlobalSchedulerContext().IsLocked());
+
+    /* Get a reference to the priority queue. */
+    auto& kernel = system.Kernel();
+    auto& priority_queue = GetPriorityQueue(kernel);
+
+    /* Rotate the front of the queue to the end. */
+    Thread* top_thread = priority_queue.GetScheduledFront(core_id, priority);
+    Thread* next_thread = nullptr;
+    if (top_thread != nullptr) {
+        next_thread = priority_queue.MoveToScheduledBack(top_thread);
+        if (next_thread != top_thread) {
+            IncrementScheduledCount(top_thread);
+            IncrementScheduledCount(next_thread);
+        }
+    }
+
+    /* While we have a suggested thread, try to migrate it! */
+    {
+        Thread* suggested = priority_queue.GetSuggestedFront(core_id, priority);
+        while (suggested != nullptr) {
+            /* Check if the suggested thread is the top thread on its core. */
+            const s32 suggested_core = suggested->GetActiveCore();
+            if (Thread* top_on_suggested_core =
+                    (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
+                                          : nullptr;
+                top_on_suggested_core != suggested) {
+                /* If the next thread is a new thread that has been waiting longer than our
+                 * suggestion, we prefer it to our suggestion. */
+                if (top_thread != next_thread && next_thread != nullptr &&
+                    next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick()) {
+                    suggested = nullptr;
+                    break;
+                }
+
+                /* If we're allowed to do a migration, do one. */
+                /* NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the suggestion
+                 * to the front of the queue. */
+                if (top_on_suggested_core == nullptr ||
+                    top_on_suggested_core->GetPriority() >= HighestCoreMigrationAllowedPriority) {
+                    suggested->SetActiveCore(core_id);
+                    priority_queue.ChangeCore(suggested_core, suggested, true);
+                    IncrementScheduledCount(suggested);
+                    break;
+                }
+            }
+
+            /* Get the next suggestion. */
+            suggested = priority_queue.GetSamePriorityNext(core_id, suggested);
+        }
+    }
+
+    /* Now that we might have migrated a thread with the same priority, check if we can do better.
+     */
+    {
+        Thread* best_thread = priority_queue.GetScheduledFront(core_id);
+        if (best_thread == GetCurrentThread()) {
+            best_thread = priority_queue.GetScheduledNext(core_id, best_thread);
+        }
+
+        /* If the best thread we can choose has a priority the same or worse than ours, try to
+         * migrate a higher priority thread. */
+        if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) {
+            Thread* suggested = priority_queue.GetSuggestedFront(core_id);
+            while (suggested != nullptr) {
+                /* If the suggestion's priority is the same as ours, don't bother. */
+                if (suggested->GetPriority() >= best_thread->GetPriority()) {
+                    break;
+                }
+
+                /* Check if the suggested thread is the top thread on its core. */
+                const s32 suggested_core = suggested->GetActiveCore();
+                if (Thread* top_on_suggested_core =
+                        (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
+                                              : nullptr;
+                    top_on_suggested_core != suggested) {
+                    /* If we're allowed to do a migration, do one. */
+                    /* NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the
+                     * suggestion to the front of the queue. */
+                    if (top_on_suggested_core == nullptr ||
+                        top_on_suggested_core->GetPriority() >=
+                            HighestCoreMigrationAllowedPriority) {
+                        suggested->SetActiveCore(core_id);
+                        priority_queue.ChangeCore(suggested_core, suggested, true);
+                        IncrementScheduledCount(suggested);
+                        break;
+                    }
+                }
+
+                /* Get the next suggestion. */
+                suggested = priority_queue.GetSuggestedNext(core_id, suggested);
+            }
+        }
+    }
+
+    /* After a rotation, we need a scheduler update. */
+    SetSchedulerUpdateNeeded(kernel);
+}
+
+/*static*/ bool KScheduler::CanSchedule(KernelCore& kernel) {
+    return kernel.CurrentScheduler()->GetCurrentThread()->GetDisableDispatchCount() <= 1;
+}
+
+/*static*/ bool KScheduler::IsSchedulerUpdateNeeded(const KernelCore& kernel) {
+    return kernel.GlobalSchedulerContext().scheduler_update_needed.load(std::memory_order_acquire);
+}
+
+/*static*/ void KScheduler::SetSchedulerUpdateNeeded(KernelCore& kernel) {
+    kernel.GlobalSchedulerContext().scheduler_update_needed.store(true, std::memory_order_release);
+}
+
+/*static*/ void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) {
+    kernel.GlobalSchedulerContext().scheduler_update_needed.store(false, std::memory_order_release);
+}
+
+/*static*/ void KScheduler::DisableScheduling(KernelCore& kernel) {
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0);
+        scheduler->GetCurrentThread()->DisableDispatch();
+    }
+}
+
+/*static*/ void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling,
+                                             Core::EmuThreadHandle global_thread) {
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        scheduler->GetCurrentThread()->EnableDispatch();
+    }
+    RescheduleCores(kernel, cores_needing_scheduling, global_thread);
+}
+
+/*static*/ u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) {
+    if (IsSchedulerUpdateNeeded(kernel)) {
+        return UpdateHighestPriorityThreadsImpl(kernel);
+    } else {
+        return 0;
+    }
+}
+
+/*static*/ KSchedulerPriorityQueue& KScheduler::GetPriorityQueue(KernelCore& kernel) {
+    return kernel.GlobalSchedulerContext().priority_queue;
+}
+
+void KScheduler::YieldWithoutCoreMigration() {
+    auto& kernel = system.Kernel();
+
+    /* Validate preconditions. */
+    ASSERT(CanSchedule(kernel));
+    ASSERT(kernel.CurrentProcess() != nullptr);
+
+    /* Get the current thread and process. */
+    Thread& cur_thread = *GetCurrentThread();
+    Process& cur_process = *kernel.CurrentProcess();
+
+    /* If the thread's yield count matches, there's nothing for us to do. */
+    if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
+        return;
+    }
+
+    /* Get a reference to the priority queue. */
+    auto& priority_queue = GetPriorityQueue(kernel);
+
+    /* Perform the yield. */
+    {
+        SchedulerLock lock(kernel);
+
+        const auto cur_state = cur_thread.scheduling_state;
+        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+            /* Put the current thread at the back of the queue. */
+            Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
+            IncrementScheduledCount(std::addressof(cur_thread));
+
+            /* If the next thread is different, we have an update to perform. */
+            if (next_thread != std::addressof(cur_thread)) {
+                SetSchedulerUpdateNeeded(kernel);
+            } else {
+                /* Otherwise, set the thread's yield count so that we won't waste work until the
+                 * process is scheduled again. */
+                cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
+            }
+        }
+    }
+}
+
+void KScheduler::YieldWithCoreMigration() {
+    auto& kernel = system.Kernel();
+
+    /* Validate preconditions. */
+    ASSERT(CanSchedule(kernel));
+    ASSERT(kernel.CurrentProcess() != nullptr);
+
+    /* Get the current thread and process. */
+    Thread& cur_thread = *GetCurrentThread();
+    Process& cur_process = *kernel.CurrentProcess();
+
+    /* If the thread's yield count matches, there's nothing for us to do. */
+    if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
+        return;
+    }
+
+    /* Get a reference to the priority queue. */
+    auto& priority_queue = GetPriorityQueue(kernel);
+
+    /* Perform the yield. */
+    {
+        SchedulerLock lock(kernel);
+
+        const auto cur_state = cur_thread.scheduling_state;
+        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+            /* Get the current active core. */
+            const s32 core_id = cur_thread.GetActiveCore();
+
+            /* Put the current thread at the back of the queue. */
+            Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
+            IncrementScheduledCount(std::addressof(cur_thread));
+
+            /* While we have a suggested thread, try to migrate it! */
+            bool recheck = false;
+            Thread* suggested = priority_queue.GetSuggestedFront(core_id);
+            while (suggested != nullptr) {
+                /* Check if the suggested thread is the thread running on its core. */
+                const s32 suggested_core = suggested->GetActiveCore();
+
+                if (Thread* running_on_suggested_core =
+                        (suggested_core >= 0)
+                            ? kernel.Scheduler(suggested_core).state.highest_priority_thread
+                            : nullptr;
+                    running_on_suggested_core != suggested) {
+                    /* If the current thread's priority is higher than our suggestion's we prefer
+                     * the next thread to the suggestion. */
+                    /* We also prefer the next thread when the current thread's priority is equal to
+                     * the suggestions, but the next thread has been waiting longer. */
+                    if ((suggested->GetPriority() > cur_thread.GetPriority()) ||
+                        (suggested->GetPriority() == cur_thread.GetPriority() &&
+                         next_thread != std::addressof(cur_thread) &&
+                         next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick())) {
+                        suggested = nullptr;
+                        break;
+                    }
+
+                    /* If we're allowed to do a migration, do one. */
+                    /* NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the
+                     * suggestion to the front of the queue. */
+                    if (running_on_suggested_core == nullptr ||
+                        running_on_suggested_core->GetPriority() >=
+                            HighestCoreMigrationAllowedPriority) {
+                        suggested->SetActiveCore(core_id);
+                        priority_queue.ChangeCore(suggested_core, suggested, true);
+                        IncrementScheduledCount(suggested);
+                        break;
+                    } else {
+                        /* We couldn't perform a migration, but we should check again on a future
+                         * yield. */
+                        recheck = true;
+                    }
+                }
+
+                /* Get the next suggestion. */
+                suggested = priority_queue.GetSuggestedNext(core_id, suggested);
+            }
+
+            /* If we still have a suggestion or the next thread is different, we have an update to
+             * perform. */
+            if (suggested != nullptr || next_thread != std::addressof(cur_thread)) {
+                SetSchedulerUpdateNeeded(kernel);
+            } else if (!recheck) {
+                /* Otherwise if we don't need to re-check, set the thread's yield count so that we
+                 * won't waste work until the process is scheduled again. */
+                cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
+            }
+        }
+    }
+}
+
+void KScheduler::YieldToAnyThread() {
+    auto& kernel = system.Kernel();
+
+    /* Validate preconditions. */
+    ASSERT(CanSchedule(kernel));
+    ASSERT(kernel.CurrentProcess() != nullptr);
+
+    /* Get the current thread and process. */
+    Thread& cur_thread = *GetCurrentThread();
+    Process& cur_process = *kernel.CurrentProcess();
+
+    /* If the thread's yield count matches, there's nothing for us to do. */
+    if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
+        return;
+    }
+
+    /* Get a reference to the priority queue. */
+    auto& priority_queue = GetPriorityQueue(kernel);
+
+    /* Perform the yield. */
+    {
+        SchedulerLock lock(kernel);
+
+        const auto cur_state = cur_thread.scheduling_state;
+        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+            /* Get the current active core. */
+            const s32 core_id = cur_thread.GetActiveCore();
+
+            /* Migrate the current thread to core -1. */
+            cur_thread.SetActiveCore(-1);
+            priority_queue.ChangeCore(core_id, std::addressof(cur_thread));
+            IncrementScheduledCount(std::addressof(cur_thread));
+
+            /* If there's nothing scheduled, we can try to perform a migration. */
+            if (priority_queue.GetScheduledFront(core_id) == nullptr) {
+                /* While we have a suggested thread, try to migrate it! */
+                Thread* suggested = priority_queue.GetSuggestedFront(core_id);
+                while (suggested != nullptr) {
+                    /* Check if the suggested thread is the top thread on its core. */
+                    const s32 suggested_core = suggested->GetActiveCore();
+                    if (Thread* top_on_suggested_core =
+                            (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
+                                                  : nullptr;
+                        top_on_suggested_core != suggested) {
+                        /* If we're allowed to do a migration, do one. */
+                        if (top_on_suggested_core == nullptr ||
+                            top_on_suggested_core->GetPriority() >=
+                                HighestCoreMigrationAllowedPriority) {
+                            suggested->SetActiveCore(core_id);
+                            priority_queue.ChangeCore(suggested_core, suggested);
+                            IncrementScheduledCount(suggested);
+                        }
+
+                        /* Regardless of whether we migrated, we had a candidate, so we're done. */
+                        break;
+                    }
+
+                    /* Get the next suggestion. */
+                    suggested = priority_queue.GetSuggestedNext(core_id, suggested);
+                }
+
+                /* If the suggestion is different from the current thread, we need to perform an
+                 * update. */
+                if (suggested != std::addressof(cur_thread)) {
+                    SetSchedulerUpdateNeeded(kernel);
+                } else {
+                    /* Otherwise, set the thread's yield count so that we won't waste work until the
+                     * process is scheduled again. */
+                    cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
+                }
+            } else {
+                /* Otherwise, we have an update to perform. */
+                SetSchedulerUpdateNeeded(kernel);
+            }
+        }
+    }
+}
+
+void GlobalSchedulerContext::Lock() {
+    scheduler_lock.Lock();
+}
+
+void GlobalSchedulerContext::Unlock() {
+    scheduler_lock.Unlock();
+}
+
+KScheduler::KScheduler(Core::System& system, std::size_t core_id)
+    : system(system), core_id(core_id) {
+    switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
+    this->state.needs_scheduling = true;
+    this->state.interrupt_task_thread_runnable = false;
+    this->state.should_count_idle = false;
+    this->state.idle_count = 0;
+    this->state.idle_thread_stack = nullptr;
+    this->state.highest_priority_thread = nullptr;
+}
+
+KScheduler::~KScheduler() = default;
+
+Thread* KScheduler::GetCurrentThread() const {
+    if (current_thread) {
+        return current_thread;
+    }
+    return idle_thread;
+}
+
+u64 KScheduler::GetLastContextSwitchTicks() const {
+    return last_context_switch_time;
+}
+
+void KScheduler::RescheduleCurrentCore() {
+    ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1);
+
+    auto& phys_core = system.Kernel().PhysicalCore(core_id);
+    if (phys_core.IsInterrupted()) {
+        phys_core.ClearInterrupt();
+    }
+    guard.lock();
+    if (this->state.needs_scheduling) {
+        Schedule();
+    } else {
+        guard.unlock();
+    }
+}
+
+void KScheduler::OnThreadStart() {
+    SwitchContextStep2();
+}
+
+void KScheduler::Unload(Thread* thread) {
+    if (thread) {
+        thread->SetIsRunning(false);
+        if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) {
+            system.ArmInterface(core_id).ExceptionalExit();
+            thread->SetContinuousOnSVC(false);
+        }
+        if (!thread->IsHLEThread() && !thread->HasExited()) {
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
+            cpu_core.SaveContext(thread->GetContext32());
+            cpu_core.SaveContext(thread->GetContext64());
+            // Save the TPIDR_EL0 system register in case it was modified.
+            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        }
+        thread->context_guard.unlock();
+    }
+}
+
+void KScheduler::Reload(Thread* thread) {
+    if (thread) {
+        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
+                   "Thread must be runnable.");
+
+        // Cancel any outstanding wakeup events for this thread
+        thread->SetIsRunning(true);
+        thread->SetWasRunning(false);
+
+        auto* const thread_owner_process = thread->GetOwnerProcess();
+        if (thread_owner_process != nullptr) {
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+        }
+        if (!thread->IsHLEThread()) {
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
+            cpu_core.LoadContext(thread->GetContext32());
+            cpu_core.LoadContext(thread->GetContext64());
+            cpu_core.SetTlsAddress(thread->GetTLSAddress());
+            cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        }
+    }
+}
+
+void KScheduler::SwitchContextStep2() {
+    // Load context of new thread
+    Reload(current_thread);
+
+    RescheduleCurrentCore();
+}
+
+void KScheduler::ScheduleImpl() {
+    Thread* previous_thread = current_thread;
+    current_thread = state.highest_priority_thread;
+
+    this->state.needs_scheduling = false;
+
+    if (current_thread == previous_thread) {
+        guard.unlock();
+        return;
+    }
+
+    Process* const previous_process = system.Kernel().CurrentProcess();
+
+    UpdateLastContextSwitchTime(previous_thread, previous_process);
+
+    // Save context for previous thread
+    Unload(previous_thread);
+
+    std::shared_ptr<Common::Fiber>* old_context;
+    if (previous_thread != nullptr) {
+        old_context = &previous_thread->GetHostContext();
+    } else {
+        old_context = &idle_thread->GetHostContext();
+    }
+    guard.unlock();
+
+    Common::Fiber::YieldTo(*old_context, switch_fiber);
+    /// When a thread wakes up, the scheduler may have changed to other in another core.
+    auto& next_scheduler = *system.Kernel().CurrentScheduler();
+    next_scheduler.SwitchContextStep2();
+}
+
+void KScheduler::OnSwitch(void* this_scheduler) {
+    KScheduler* sched = static_cast<KScheduler*>(this_scheduler);
+    sched->SwitchToCurrent();
+}
+
+void KScheduler::SwitchToCurrent() {
+    while (true) {
+        {
+            std::scoped_lock lock{guard};
+            current_thread = state.highest_priority_thread;
+            this->state.needs_scheduling = false;
+        }
+        const auto is_switch_pending = [this] {
+            std::scoped_lock lock{guard};
+            return !!this->state.needs_scheduling;
+        };
+        do {
+            if (current_thread != nullptr && !current_thread->IsHLEThread()) {
+                current_thread->context_guard.lock();
+                if (!current_thread->IsRunnable()) {
+                    current_thread->context_guard.unlock();
+                    break;
+                }
+                if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) {
+                    current_thread->context_guard.unlock();
+                    break;
+                }
+            }
+            std::shared_ptr<Common::Fiber>* next_context;
+            if (current_thread != nullptr) {
+                next_context = &current_thread->GetHostContext();
+            } else {
+                next_context = &idle_thread->GetHostContext();
+            }
+            Common::Fiber::YieldTo(switch_fiber, *next_context);
+        } while (!is_switch_pending());
+    }
+}
+
+void KScheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
+    const u64 prev_switch_ticks = last_context_switch_time;
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
+    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
+
+    if (thread != nullptr) {
+        thread->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    if (process != nullptr) {
+        process->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    last_context_switch_time = most_recent_switch_ticks;
+}
+
+void KScheduler::Initialize() {
+    std::string name = "Idle Thread Id:" + std::to_string(core_id);
+    std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc();
+    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+    ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
+    auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
+                                     nullptr, std::move(init_func), init_func_parameter);
+    idle_thread = thread_res.Unwrap().get();
+
+    {
+        KScopedSchedulerLock lock{system.Kernel()};
+        idle_thread->SetStatus(ThreadStatus::Ready);
+    }
+}
+
+SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} {
+    kernel.GlobalSchedulerContext().Lock();
+}
+
+SchedulerLock::~SchedulerLock() {
+    kernel.GlobalSchedulerContext().Unlock();
+}
+
+SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle,
+                                             Thread* time_task, s64 nanoseconds)
+    : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{
+                                                                                   nanoseconds} {
+    event_handle = InvalidHandle;
+}
+
+SchedulerLockAndSleep::~SchedulerLockAndSleep() {
+    if (sleep_cancelled) {
+        return;
+    }
+    auto& time_manager = kernel.TimeManager();
+    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
+}
+
+void SchedulerLockAndSleep::Release() {
+    if (sleep_cancelled) {
+        return;
+    }
+    auto& time_manager = kernel.TimeManager();
+    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
+    sleep_cancelled = true;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/k_scheduler.h
similarity index 57%
rename from src/core/hle/kernel/scheduler.h
rename to src/core/hle/kernel/k_scheduler.h
index 68db4a5efe..535ee34b98 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -1,7 +1,10 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+// This file references various implementation details from Atmosphere, an open-source firmware for
+// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
+
 #pragma once
 
 #include <atomic>
@@ -11,8 +14,12 @@
 
 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
+#include "common/scope_exit.h"
 #include "common/spin_lock.h"
+#include "core/core_timing.h"
 #include "core/hardware_properties.h"
+#include "core/hle/kernel/k_priority_queue.h"
+#include "core/hle/kernel/k_scheduler_lock.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Common {
@@ -30,10 +37,16 @@ class KernelCore;
 class Process;
 class SchedulerLock;
 
-class GlobalScheduler final {
+using KSchedulerPriorityQueue =
+    KPriorityQueue<Thread, Core::Hardware::NUM_CPU_CORES, THREADPRIO_LOWEST, THREADPRIO_HIGHEST>;
+static constexpr s32 HighestCoreMigrationAllowedPriority = 2;
+
+class GlobalSchedulerContext final {
+    friend class KScheduler;
+
 public:
-    explicit GlobalScheduler(KernelCore& kernel);
-    ~GlobalScheduler();
+    explicit GlobalSchedulerContext(KernelCore& kernel);
+    ~GlobalSchedulerContext();
 
     /// Adds a new thread to the scheduler
     void AddThread(std::shared_ptr<Thread> thread);
@@ -46,60 +59,6 @@ public:
         return thread_list;
     }
 
-    /// Notify the scheduler a thread's status has changed.
-    void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags);
-
-    /// Notify the scheduler a thread's priority has changed.
-    void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority);
-
-    /// Notify the scheduler a thread's core and/or affinity mask has changed.
-    void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);
-
-    /**
-     * Takes care of selecting the new scheduled threads in three steps:
-     *
-     * 1. First a thread is selected from the top of the priority queue. If no thread
-     *    is obtained then we move to step two, else we are done.
-     *
-     * 2. Second we try to get a suggested thread that's not assigned to any core or
-     *    that is not the top thread in that core.
-     *
-     * 3. Third is no suggested thread is found, we do a second pass and pick a running
-     *    thread in another core and swap it with its current thread.
-     *
-     * returns the cores needing scheduling.
-     */
-    u32 SelectThreads();
-
-    bool HaveReadyThreads(std::size_t core_id) const {
-        return !scheduled_queue[core_id].empty();
-    }
-
-    /**
-     * Takes a thread and moves it to the back of the it's priority list.
-     *
-     * @note This operation can be redundant and no scheduling is changed if marked as so.
-     */
-    bool YieldThread(Thread* thread);
-
-    /**
-     * Takes a thread and moves it to the back of the it's priority list.
-     * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
-     * a better priority than the next thread in the core.
-     *
-     * @note This operation can be redundant and no scheduling is changed if marked as so.
-     */
-    bool YieldThreadAndBalanceLoad(Thread* thread);
-
-    /**
-     * Takes a thread and moves it out of the scheduling queue.
-     * and into the suggested queue. If no thread can be scheduled afterwards in that core,
-     * a suggested thread is obtained instead.
-     *
-     * @note This operation can be redundant and no scheduling is changed if marked as so.
-     */
-    bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
-
     /**
      * Rotates the scheduling queues of threads at a preemption priority and then does
      * some core rebalancing. Preemption priorities can be found in the array
@@ -113,15 +72,7 @@ public:
         return Core::Hardware::NUM_CPU_CORES;
     }
 
-    void SetReselectionPending() {
-        is_reselection_pending.store(true, std::memory_order_release);
-    }
-
-    bool IsReselectionPending() const {
-        return is_reselection_pending.load(std::memory_order_acquire);
-    }
-
-    void Shutdown();
+    bool IsLocked() const;
 
 private:
     friend class SchedulerLock;
@@ -133,109 +84,50 @@ private:
     /// and reschedules current core if needed.
     void Unlock();
 
-    void EnableInterruptAndSchedule(u32 cores_pending_reschedule,
-                                    Core::EmuThreadHandle global_thread);
+    using LockType = KAbstractSchedulerLock<KScheduler>;
 
-    /**
-     * Add a thread to the suggested queue of a cpu core. Suggested threads may be
-     * picked if no thread is scheduled to run on the core.
-     */
-    void Suggest(u32 priority, std::size_t core, Thread* thread);
+    KernelCore& kernel;
 
-    /**
-     * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
-     * picked if no thread is scheduled to run on the core.
-     */
-    void Unsuggest(u32 priority, std::size_t core, Thread* thread);
-
-    /**
-     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
-     * back the queue in its priority level.
-     */
-    void Schedule(u32 priority, std::size_t core, Thread* thread);
-
-    /**
-     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
-     * front the queue in its priority level.
-     */
-    void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
-
-    /// Reschedule an already scheduled thread based on a new priority
-    void Reschedule(u32 priority, std::size_t core, Thread* thread);
-
-    /// Unschedules a thread.
-    void Unschedule(u32 priority, std::size_t core, Thread* thread);
-
-    /**
-     * Transfers a thread into an specific core. If the destination_core is -1
-     * it will be unscheduled from its source code and added into its suggested
-     * queue.
-     */
-    void TransferToCore(u32 priority, s32 destination_core, Thread* thread);
-
-    bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);
-
-    static constexpr u32 min_regular_priority = 2;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
-        scheduled_queue;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
-        suggested_queue;
-    std::atomic<bool> is_reselection_pending{false};
-
-    // The priority levels at which the global scheduler preempts threads every 10 ms. They are
-    // ordered from Core 0 to Core 3.
-    std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
-
-    /// Scheduler lock mechanisms.
-    bool is_locked{};
-    std::mutex inner_lock;
-    std::atomic<s64> scope_lock{};
-    Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
-
-    Common::SpinLock global_list_guard{};
+    std::atomic_bool scheduler_update_needed{};
+    KSchedulerPriorityQueue priority_queue;
+    LockType scheduler_lock;
 
     /// Lists all thread ids that aren't deleted/etc.
     std::vector<std::shared_ptr<Thread>> thread_list;
-    KernelCore& kernel;
+    Common::SpinLock global_list_guard{};
 };
 
-class Scheduler final {
+class KScheduler final {
 public:
-    explicit Scheduler(Core::System& system, std::size_t core_id);
-    ~Scheduler();
-
-    /// Returns whether there are any threads that are ready to run.
-    bool HaveReadyThreads() const;
+    explicit KScheduler(Core::System& system, std::size_t core_id);
+    ~KScheduler();
 
     /// Reschedules to the next available thread (call after current thread is suspended)
-    void TryDoContextSwitch();
+    void RescheduleCurrentCore();
+
+    /// Reschedules cores pending reschedule, to be called on EnableScheduling.
+    static void RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule,
+                                Core::EmuThreadHandle global_thread);
 
     /// The next two are for SingleCore Only.
     /// Unload current thread before preempting core.
     void Unload(Thread* thread);
-    void Unload();
+
     /// Reload current thread after core preemption.
     void Reload(Thread* thread);
-    void Reload();
 
     /// Gets the current running thread
     Thread* GetCurrentThread() const;
 
-    /// Gets the currently selected thread from the top of the multilevel queue
-    Thread* GetSelectedThread() const;
-
     /// Gets the timestamp for the last context switch in ticks.
     u64 GetLastContextSwitchTicks() const;
 
     bool ContextSwitchPending() const {
-        return is_context_switch_pending;
+        return this->state.needs_scheduling;
     }
 
     void Initialize();
 
-    /// Shutdowns the scheduler.
-    void Shutdown();
-
     void OnThreadStart();
 
     std::shared_ptr<Common::Fiber>& ControlContext() {
@@ -246,11 +138,90 @@ public:
         return switch_fiber;
     }
 
+    std::size_t CurrentCoreId() const {
+        return core_id;
+    }
+
+    u64 UpdateHighestPriorityThread(Thread* highest_thread);
+
+    /**
+     * Takes a thread and moves it to the back of the it's priority list.
+     *
+     * @note This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldWithoutCoreMigration();
+
+    /**
+     * Takes a thread and moves it to the back of the it's priority list.
+     * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
+     * a better priority than the next thread in the core.
+     *
+     * @note This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldWithCoreMigration();
+
+    /**
+     * Takes a thread and moves it out of the scheduling queue.
+     * and into the suggested queue. If no thread can be scheduled afterwards in that core,
+     * a suggested thread is obtained instead.
+     *
+     * @note This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldToAnyThread();
+
+    /// Notify the scheduler a thread's status has changed.
+    static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state);
+
+    /// Notify the scheduler a thread's priority has changed.
+    static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
+                                        u32 old_priority);
+
+    /// Notify the scheduler a thread's core and/or affinity mask has changed.
+    static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
+                                            const KAffinityMask& old_affinity, s32 old_core);
+
 private:
-    friend class GlobalScheduler;
+    /**
+     * Takes care of selecting the new scheduled threads in three steps:
+     *
+     * 1. First a thread is selected from the top of the priority queue. If no thread
+     *    is obtained then we move to step two, else we are done.
+     *
+     * 2. Second we try to get a suggested thread that's not assigned to any core or
+     *    that is not the top thread in that core.
+     *
+     * 3. Third is no suggested thread is found, we do a second pass and pick a running
+     *    thread in another core and swap it with its current thread.
+     *
+     * returns the cores needing scheduling.
+     */
+    static u64 UpdateHighestPriorityThreadsImpl(KernelCore& kernel);
+
+    void RotateScheduledQueue(s32 core_id, s32 priority);
+
+public:
+    static bool CanSchedule(KernelCore& kernel);
+    static bool IsSchedulerUpdateNeeded(const KernelCore& kernel);
+    static void SetSchedulerUpdateNeeded(KernelCore& kernel);
+    static void ClearSchedulerUpdateNeeded(KernelCore& kernel);
+    static void DisableScheduling(KernelCore& kernel);
+    static void EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling,
+                                 Core::EmuThreadHandle global_thread);
+    static u64 UpdateHighestPriorityThreads(KernelCore& kernel);
+
+private:
+    friend class GlobalSchedulerContext;
+
+    static KSchedulerPriorityQueue& GetPriorityQueue(KernelCore& kernel);
+
+    void Schedule() {
+        ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1);
+        this->ScheduleImpl();
+    }
 
     /// Switches the CPU's active thread context to that of the specified thread
-    void SwitchContext();
+    void ScheduleImpl();
+    void SwitchThread(Thread* next_thread);
 
     /// When a thread wakes up, it must run this through it's new scheduler
     void SwitchContextStep2();
@@ -271,22 +242,28 @@ private:
     static void OnSwitch(void* this_scheduler);
     void SwitchToCurrent();
 
-    std::shared_ptr<Thread> current_thread = nullptr;
-    std::shared_ptr<Thread> selected_thread = nullptr;
-    std::shared_ptr<Thread> current_thread_prev = nullptr;
-    std::shared_ptr<Thread> selected_thread_set = nullptr;
-    std::shared_ptr<Thread> idle_thread = nullptr;
+private:
+    Thread* current_thread{};
+    Thread* idle_thread{};
 
-    std::shared_ptr<Common::Fiber> switch_fiber = nullptr;
+    std::shared_ptr<Common::Fiber> switch_fiber{};
+
+    struct SchedulingState {
+        std::atomic<bool> needs_scheduling;
+        bool interrupt_task_thread_runnable{};
+        bool should_count_idle{};
+        u64 idle_count{};
+        Thread* highest_priority_thread{};
+        void* idle_thread_stack{};
+    };
+
+    SchedulingState state;
 
     Core::System& system;
-    u64 last_context_switch_time = 0;
-    u64 idle_selection_count = 0;
+    u64 last_context_switch_time{};
     const std::size_t core_id;
 
     Common::SpinLock guard{};
-
-    bool is_context_switch_pending = false;
 };
 
 class SchedulerLock {
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 929db696d3..b74e34c40d 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -27,6 +27,7 @@
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_layout.h"
 #include "core/hle/kernel/memory/memory_manager.h"
@@ -34,7 +35,6 @@
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
@@ -49,17 +49,18 @@ namespace Kernel {
 
 struct KernelCore::Impl {
     explicit Impl(Core::System& system, KernelCore& kernel)
-        : global_scheduler{kernel}, synchronization{system}, time_manager{system},
-          global_handle_table{kernel}, system{system} {}
+        : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{
+                                                                                          system} {}
 
     void SetMulticore(bool is_multicore) {
         this->is_multicore = is_multicore;
     }
 
     void Initialize(KernelCore& kernel) {
-        Shutdown();
         RegisterHostThread();
 
+        global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
+
         InitializePhysicalCores();
         InitializeSystemResourceLimit(kernel);
         InitializeMemoryLayout();
@@ -86,29 +87,20 @@ struct KernelCore::Impl {
             }
         }
 
-        for (std::size_t i = 0; i < cores.size(); i++) {
-            cores[i].Shutdown();
-            schedulers[i].reset();
-        }
         cores.clear();
 
         process_list.clear();
+
         current_process = nullptr;
 
         system_resource_limit = nullptr;
 
         global_handle_table.Clear();
+
         preemption_event = nullptr;
 
-        global_scheduler.Shutdown();
-
         named_ports.clear();
 
-        for (auto& core : cores) {
-            core.Shutdown();
-        }
-        cores.clear();
-
         exclusive_monitor.reset();
 
         num_host_threads = 0;
@@ -121,7 +113,7 @@ struct KernelCore::Impl {
         exclusive_monitor =
             Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-            schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i);
+            schedulers[i] = std::make_unique<Kernel::KScheduler>(system, i);
             cores.emplace_back(i, system, *schedulers[i], interrupts);
         }
     }
@@ -155,7 +147,7 @@ struct KernelCore::Impl {
             "PreemptionCallback", [this, &kernel](std::uintptr_t, std::chrono::nanoseconds) {
                 {
                     SchedulerLock lock(kernel);
-                    global_scheduler.PreemptThreads();
+                    global_scheduler_context->PreemptThreads();
                 }
                 const auto time_interval = std::chrono::nanoseconds{
                     Core::Timing::msToCycles(std::chrono::milliseconds(10))};
@@ -245,7 +237,7 @@ struct KernelCore::Impl {
         if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) {
             return result;
         }
-        const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();
+        const Kernel::KScheduler& sched = cores[result.host_handle].Scheduler();
         const Kernel::Thread* current = sched.GetCurrentThread();
         if (current != nullptr && !current->IsPhantomMode()) {
             result.guest_handle = current->GetGlobalHandle();
@@ -314,7 +306,7 @@ struct KernelCore::Impl {
     // Lists all processes that exist in the current session.
     std::vector<std::shared_ptr<Process>> process_list;
     Process* current_process = nullptr;
-    Kernel::GlobalScheduler global_scheduler;
+    std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context;
     Kernel::Synchronization synchronization;
     Kernel::TimeManager time_manager;
 
@@ -355,7 +347,7 @@ struct KernelCore::Impl {
 
     std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
     std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
-    std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
+    std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
 
     bool is_multicore{};
     std::thread::id single_core_thread_id{};
@@ -415,19 +407,19 @@ const std::vector<std::shared_ptr<Process>>& KernelCore::GetProcessList() const
     return impl->process_list;
 }
 
-Kernel::GlobalScheduler& KernelCore::GlobalScheduler() {
-    return impl->global_scheduler;
+Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() {
+    return *impl->global_scheduler_context;
 }
 
-const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
-    return impl->global_scheduler;
+const Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() const {
+    return *impl->global_scheduler_context;
 }
 
-Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) {
+Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) {
     return *impl->schedulers[id];
 }
 
-const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const {
+const Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) const {
     return *impl->schedulers[id];
 }
 
@@ -451,16 +443,13 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
     return impl->cores[core_id];
 }
 
-Kernel::Scheduler& KernelCore::CurrentScheduler() {
+Kernel::KScheduler* KernelCore::CurrentScheduler() {
     u32 core_id = impl->GetCurrentHostThreadID();
-    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
-    return *impl->schedulers[core_id];
-}
-
-const Kernel::Scheduler& KernelCore::CurrentScheduler() const {
-    u32 core_id = impl->GetCurrentHostThreadID();
-    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
-    return *impl->schedulers[core_id];
+    if (core_id >= Core::Hardware::NUM_CPU_CORES) {
+        // This is expected when called from not a guest thread
+        return {};
+    }
+    return impl->schedulers[core_id].get();
 }
 
 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index a73a930396..5846c3f39c 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -35,12 +35,12 @@ class SlabHeap;
 
 class AddressArbiter;
 class ClientPort;
-class GlobalScheduler;
+class GlobalSchedulerContext;
 class HandleTable;
 class PhysicalCore;
 class Process;
 class ResourceLimit;
-class Scheduler;
+class KScheduler;
 class SharedMemory;
 class Synchronization;
 class Thread;
@@ -102,16 +102,16 @@ public:
     const std::vector<std::shared_ptr<Process>>& GetProcessList() const;
 
     /// Gets the sole instance of the global scheduler
-    Kernel::GlobalScheduler& GlobalScheduler();
+    Kernel::GlobalSchedulerContext& GlobalSchedulerContext();
 
     /// Gets the sole instance of the global scheduler
-    const Kernel::GlobalScheduler& GlobalScheduler() const;
+    const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const;
 
     /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
-    Kernel::Scheduler& Scheduler(std::size_t id);
+    Kernel::KScheduler& Scheduler(std::size_t id);
 
     /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
-    const Kernel::Scheduler& Scheduler(std::size_t id) const;
+    const Kernel::KScheduler& Scheduler(std::size_t id) const;
 
     /// Gets the an instance of the respective physical CPU core.
     Kernel::PhysicalCore& PhysicalCore(std::size_t id);
@@ -120,10 +120,7 @@ public:
     const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
 
     /// Gets the sole instance of the Scheduler at the current running core.
-    Kernel::Scheduler& CurrentScheduler();
-
-    /// Gets the sole instance of the Scheduler at the current running core.
-    const Kernel::Scheduler& CurrentScheduler() const;
+    Kernel::KScheduler* CurrentScheduler();
 
     /// Gets the an instance of the current physical CPU core.
     Kernel::PhysicalCore& CurrentPhysicalCore();
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 8f6c944d17..6299b1342a 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -11,11 +11,11 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
@@ -73,7 +73,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
 
     auto& kernel = system.Kernel();
     std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
     {
         SchedulerLock lock(kernel);
         // The mutex address must be 4-byte aligned
@@ -156,7 +156,7 @@ ResultCode Mutex::Release(VAddr address) {
     SchedulerLock lock(kernel);
 
     std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
 
     auto [result, new_owner] = Unlock(current_thread, address);
 
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index d6a5742bd3..7fea45f96c 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -7,14 +7,14 @@
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/core.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
-#include "core/hle/kernel/scheduler.h"
 
 namespace Kernel {
 
 PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system,
-                           Kernel::Scheduler& scheduler, Core::CPUInterrupts& interrupts)
+                           Kernel::KScheduler& scheduler, Core::CPUInterrupts& interrupts)
     : core_index{core_index}, system{system}, scheduler{scheduler},
       interrupts{interrupts}, guard{std::make_unique<Common::SpinLock>()} {}
 
@@ -37,17 +37,12 @@ void PhysicalCore::Initialize([[maybe_unused]] bool is_64_bit) {
 
 void PhysicalCore::Run() {
     arm_interface->Run();
-    arm_interface->ClearExclusiveState();
 }
 
 void PhysicalCore::Idle() {
     interrupts[core_index].AwaitInterrupt();
 }
 
-void PhysicalCore::Shutdown() {
-    scheduler.Shutdown();
-}
-
 bool PhysicalCore::IsInterrupted() const {
     return interrupts[core_index].IsInterrupted();
 }
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 37513130a8..b4d3c15de9 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -15,7 +15,7 @@ class SpinLock;
 }
 
 namespace Kernel {
-class Scheduler;
+class KScheduler;
 } // namespace Kernel
 
 namespace Core {
@@ -28,7 +28,7 @@ namespace Kernel {
 
 class PhysicalCore {
 public:
-    PhysicalCore(std::size_t core_index, Core::System& system, Kernel::Scheduler& scheduler,
+    PhysicalCore(std::size_t core_index, Core::System& system, Kernel::KScheduler& scheduler,
                  Core::CPUInterrupts& interrupts);
     ~PhysicalCore();
 
@@ -55,9 +55,6 @@ public:
     /// Check if this core is interrupted
     bool IsInterrupted() const;
 
-    // Shutdown this physical core.
-    void Shutdown();
-
     bool IsInitialized() const {
         return arm_interface != nullptr;
     }
@@ -82,18 +79,18 @@ public:
         return core_index;
     }
 
-    Kernel::Scheduler& Scheduler() {
+    Kernel::KScheduler& Scheduler() {
         return scheduler;
     }
 
-    const Kernel::Scheduler& Scheduler() const {
+    const Kernel::KScheduler& Scheduler() const {
         return scheduler;
     }
 
 private:
     const std::size_t core_index;
     Core::System& system;
-    Kernel::Scheduler& scheduler;
+    Kernel::KScheduler& scheduler;
     Core::CPUInterrupts& interrupts;
     std::unique_ptr<Common::SpinLock> guard;
     std::unique_ptr<Core::ARM_Interface> arm_interface;
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b17529dee5..238c03a13f 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -15,13 +15,13 @@
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block_manager.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/memory/slab_heap.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/memory.h"
@@ -314,7 +314,7 @@ void Process::PrepareForTermination() {
             if (thread->GetOwnerProcess() != this)
                 continue;
 
-            if (thread.get() == system.CurrentScheduler().GetCurrentThread())
+            if (thread.get() == kernel.CurrentScheduler()->GetCurrentThread())
                 continue;
 
             // TODO(Subv): When are the other running/ready threads terminated?
@@ -325,7 +325,7 @@ void Process::PrepareForTermination() {
         }
     };
 
-    stop_threads(system.GlobalScheduler().GetThreadList());
+    stop_threads(system.GlobalSchedulerContext().GetThreadList());
 
     FreeTLSRegion(tls_region_address);
     tls_region_address = 0;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 6e286419e9..927f88fed9 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -6,10 +6,10 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Kernel {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
deleted file mode 100644
index 9a969fdb55..0000000000
--- a/src/core/hle/kernel/scheduler.cpp
+++ /dev/null
@@ -1,819 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-//
-// SelectThreads, Yield functions originally by TuxSH.
-// licensed under GPLv2 or later under exception provided by the author.
-
-#include <algorithm>
-#include <mutex>
-#include <set>
-#include <unordered_set>
-#include <utility>
-
-#include "common/assert.h"
-#include "common/bit_util.h"
-#include "common/fiber.h"
-#include "common/logging/log.h"
-#include "core/arm/arm_interface.h"
-#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/cpu_manager.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/physical_core.h"
-#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/time_manager.h"
-
-namespace Kernel {
-
-GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
-
-GlobalScheduler::~GlobalScheduler() = default;
-
-void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) {
-    std::scoped_lock lock{global_list_guard};
-    thread_list.push_back(std::move(thread));
-}
-
-void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
-    std::scoped_lock lock{global_list_guard};
-    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
-                      thread_list.end());
-}
-
-u32 GlobalScheduler::SelectThreads() {
-    ASSERT(is_locked);
-    const auto update_thread = [](Thread* thread, Scheduler& sched) {
-        std::scoped_lock lock{sched.guard};
-        if (thread != sched.selected_thread_set.get()) {
-            if (thread == nullptr) {
-                ++sched.idle_selection_count;
-            }
-            sched.selected_thread_set = SharedFrom(thread);
-        }
-        const bool reschedule_pending =
-            sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread);
-        sched.is_context_switch_pending = reschedule_pending;
-        std::atomic_thread_fence(std::memory_order_seq_cst);
-        return reschedule_pending;
-    };
-    if (!is_reselection_pending.load()) {
-        return 0;
-    }
-    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{};
-
-    u32 idle_cores{};
-
-    // Step 1: Get top thread in schedule queue.
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        Thread* top_thread =
-            scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
-        if (top_thread != nullptr) {
-            // TODO(Blinkhawk): Implement Thread Pinning
-        } else {
-            idle_cores |= (1U << core);
-        }
-        top_threads[core] = top_thread;
-    }
-
-    while (idle_cores != 0) {
-        u32 core_id = Common::CountTrailingZeroes32(idle_cores);
-
-        if (!suggested_queue[core_id].empty()) {
-            std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{};
-            std::size_t num_candidates = 0;
-            auto iter = suggested_queue[core_id].begin();
-            Thread* suggested = nullptr;
-            // Step 2: Try selecting a suggested thread.
-            while (iter != suggested_queue[core_id].end()) {
-                suggested = *iter;
-                iter++;
-                s32 suggested_core_id = suggested->GetProcessorID();
-                Thread* top_thread =
-                    suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr;
-                if (top_thread != suggested) {
-                    if (top_thread != nullptr &&
-                        top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
-                        suggested = nullptr;
-                        break;
-                        // There's a too high thread to do core migration, cancel
-                    }
-                    TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
-                    break;
-                }
-                suggested = nullptr;
-                migration_candidates[num_candidates++] = suggested_core_id;
-            }
-            // Step 3: Select a suggested thread from another core
-            if (suggested == nullptr) {
-                for (std::size_t i = 0; i < num_candidates; i++) {
-                    s32 candidate_core = migration_candidates[i];
-                    suggested = top_threads[candidate_core];
-                    auto it = scheduled_queue[candidate_core].begin();
-                    it++;
-                    Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr;
-                    if (next != nullptr) {
-                        TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id),
-                                       suggested);
-                        top_threads[candidate_core] = next;
-                        break;
-                    } else {
-                        suggested = nullptr;
-                    }
-                }
-            }
-            top_threads[core_id] = suggested;
-        }
-
-        idle_cores &= ~(1U << core_id);
-    }
-    u32 cores_needing_context_switch{};
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        Scheduler& sched = kernel.Scheduler(core);
-        ASSERT(top_threads[core] == nullptr ||
-               static_cast<u32>(top_threads[core]->GetProcessorID()) == core);
-        if (update_thread(top_threads[core], sched)) {
-            cores_needing_context_switch |= (1U << core);
-        }
-    }
-    return cores_needing_context_switch;
-}
-
-bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
-    ASSERT(is_locked);
-    // Note: caller should use critical section, etc.
-    if (!yielding_thread->IsRunnable()) {
-        // Normally this case shouldn't happen except for SetThreadActivity.
-        is_reselection_pending.store(true, std::memory_order_release);
-        return false;
-    }
-    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
-    const u32 priority = yielding_thread->GetPriority();
-
-    // Yield the thread
-    Reschedule(priority, core_id, yielding_thread);
-    const Thread* const winner = scheduled_queue[core_id].front();
-    if (kernel.GetCurrentHostThreadID() != core_id) {
-        is_reselection_pending.store(true, std::memory_order_release);
-    }
-
-    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
-}
-
-bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
-    ASSERT(is_locked);
-    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
-    // etc.
-    if (!yielding_thread->IsRunnable()) {
-        // Normally this case shouldn't happen except for SetThreadActivity.
-        is_reselection_pending.store(true, std::memory_order_release);
-        return false;
-    }
-    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
-    const u32 priority = yielding_thread->GetPriority();
-
-    // Yield the thread
-    Reschedule(priority, core_id, yielding_thread);
-
-    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
-    for (std::size_t i = 0; i < current_threads.size(); i++) {
-        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
-    }
-
-    Thread* next_thread = scheduled_queue[core_id].front(priority);
-    Thread* winner = nullptr;
-    for (auto& thread : suggested_queue[core_id]) {
-        const s32 source_core = thread->GetProcessorID();
-        if (source_core >= 0) {
-            if (current_threads[source_core] != nullptr) {
-                if (thread == current_threads[source_core] ||
-                    current_threads[source_core]->GetPriority() < min_regular_priority) {
-                    continue;
-                }
-            }
-        }
-        if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
-            next_thread->GetPriority() < thread->GetPriority()) {
-            if (thread->GetPriority() <= priority) {
-                winner = thread;
-                break;
-            }
-        }
-    }
-
-    if (winner != nullptr) {
-        if (winner != yielding_thread) {
-            TransferToCore(winner->GetPriority(), s32(core_id), winner);
-        }
-    } else {
-        winner = next_thread;
-    }
-
-    if (kernel.GetCurrentHostThreadID() != core_id) {
-        is_reselection_pending.store(true, std::memory_order_release);
-    }
-
-    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
-}
-
-bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
-    ASSERT(is_locked);
-    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
-    // etc.
-    if (!yielding_thread->IsRunnable()) {
-        // Normally this case shouldn't happen except for SetThreadActivity.
-        is_reselection_pending.store(true, std::memory_order_release);
-        return false;
-    }
-    Thread* winner = nullptr;
-    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
-
-    // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
-    TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
-
-    // If the core is idle, perform load balancing, excluding the threads that have just used this
-    // function...
-    if (scheduled_queue[core_id].empty()) {
-        // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
-        std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
-        for (std::size_t i = 0; i < current_threads.size(); i++) {
-            current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
-        }
-        for (auto& thread : suggested_queue[core_id]) {
-            const s32 source_core = thread->GetProcessorID();
-            if (source_core < 0 || thread == current_threads[source_core]) {
-                continue;
-            }
-            if (current_threads[source_core] == nullptr ||
-                current_threads[source_core]->GetPriority() >= min_regular_priority) {
-                winner = thread;
-            }
-            break;
-        }
-        if (winner != nullptr) {
-            if (winner != yielding_thread) {
-                TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);
-            }
-        } else {
-            winner = yielding_thread;
-        }
-    } else {
-        winner = scheduled_queue[core_id].front();
-    }
-
-    if (kernel.GetCurrentHostThreadID() != core_id) {
-        is_reselection_pending.store(true, std::memory_order_release);
-    }
-
-    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
-}
-
-void GlobalScheduler::PreemptThreads() {
-    ASSERT(is_locked);
-    for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-        const u32 priority = preemption_priorities[core_id];
-
-        if (scheduled_queue[core_id].size(priority) > 0) {
-            if (scheduled_queue[core_id].size(priority) > 1) {
-                scheduled_queue[core_id].front(priority)->IncrementYieldCount();
-            }
-            scheduled_queue[core_id].yield(priority);
-            if (scheduled_queue[core_id].size(priority) > 1) {
-                scheduled_queue[core_id].front(priority)->IncrementYieldCount();
-            }
-        }
-
-        Thread* current_thread =
-            scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front();
-        Thread* winner = nullptr;
-        for (auto& thread : suggested_queue[core_id]) {
-            const s32 source_core = thread->GetProcessorID();
-            if (thread->GetPriority() != priority) {
-                continue;
-            }
-            if (source_core >= 0) {
-                Thread* next_thread = scheduled_queue[source_core].empty()
-                                          ? nullptr
-                                          : scheduled_queue[source_core].front();
-                if (next_thread != nullptr && next_thread->GetPriority() < 2) {
-                    break;
-                }
-                if (next_thread == thread) {
-                    continue;
-                }
-            }
-            if (current_thread != nullptr &&
-                current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
-                winner = thread;
-                break;
-            }
-        }
-
-        if (winner != nullptr) {
-            TransferToCore(winner->GetPriority(), s32(core_id), winner);
-            current_thread =
-                winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
-        }
-
-        if (current_thread != nullptr && current_thread->GetPriority() > priority) {
-            for (auto& thread : suggested_queue[core_id]) {
-                const s32 source_core = thread->GetProcessorID();
-                if (thread->GetPriority() < priority) {
-                    continue;
-                }
-                if (source_core >= 0) {
-                    Thread* next_thread = scheduled_queue[source_core].empty()
-                                              ? nullptr
-                                              : scheduled_queue[source_core].front();
-                    if (next_thread != nullptr && next_thread->GetPriority() < 2) {
-                        break;
-                    }
-                    if (next_thread == thread) {
-                        continue;
-                    }
-                }
-                if (current_thread != nullptr &&
-                    current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
-                    winner = thread;
-                    break;
-                }
-            }
-
-            if (winner != nullptr) {
-                TransferToCore(winner->GetPriority(), s32(core_id), winner);
-                current_thread = winner;
-            }
-        }
-
-        is_reselection_pending.store(true, std::memory_order_release);
-    }
-}
-
-void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
-                                                 Core::EmuThreadHandle global_thread) {
-    u32 current_core = global_thread.host_handle;
-    bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
-                               (current_core < Core::Hardware::NUM_CPU_CORES);
-    while (cores_pending_reschedule != 0) {
-        u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule);
-        ASSERT(core < Core::Hardware::NUM_CPU_CORES);
-        if (!must_context_switch || core != current_core) {
-            auto& phys_core = kernel.PhysicalCore(core);
-            phys_core.Interrupt();
-        } else {
-            must_context_switch = true;
-        }
-        cores_pending_reschedule &= ~(1U << core);
-    }
-    if (must_context_switch) {
-        auto& core_scheduler = kernel.CurrentScheduler();
-        kernel.ExitSVCProfile();
-        core_scheduler.TryDoContextSwitch();
-        kernel.EnterSVCProfile();
-    }
-}
-
-void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    suggested_queue[core].add(thread, priority);
-}
-
-void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    suggested_queue[core].remove(thread, priority);
-}
-
-void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
-    scheduled_queue[core].add(thread, priority);
-}
-
-void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
-    scheduled_queue[core].add(thread, priority, false);
-}
-
-void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    scheduled_queue[core].remove(thread, priority);
-    scheduled_queue[core].add(thread, priority);
-}
-
-void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) {
-    ASSERT(is_locked);
-    scheduled_queue[core].remove(thread, priority);
-}
-
-void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
-    ASSERT(is_locked);
-    const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
-    const s32 source_core = thread->GetProcessorID();
-    if (source_core == destination_core || !schedulable) {
-        return;
-    }
-    thread->SetProcessorID(destination_core);
-    if (source_core >= 0) {
-        Unschedule(priority, static_cast<u32>(source_core), thread);
-    }
-    if (destination_core >= 0) {
-        Unsuggest(priority, static_cast<u32>(destination_core), thread);
-        Schedule(priority, static_cast<u32>(destination_core), thread);
-    }
-    if (source_core >= 0) {
-        Suggest(priority, static_cast<u32>(source_core), thread);
-    }
-}
-
-bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
-                                                       const Thread* winner) {
-    if (current_thread == winner) {
-        current_thread->IncrementYieldCount();
-        return true;
-    } else {
-        is_reselection_pending.store(true, std::memory_order_release);
-        return false;
-    }
-}
-
-void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
-    if (old_flags == thread->scheduling_state) {
-        return;
-    }
-    ASSERT(is_locked);
-
-    if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) {
-        // In this case the thread was running, now it's pausing/exitting
-        if (thread->processor_id >= 0) {
-            Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
-        }
-
-        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            if (core != static_cast<u32>(thread->processor_id) &&
-                thread->affinity_mask.GetAffinity(core)) {
-                Unsuggest(thread->current_priority, core, thread);
-            }
-        }
-    } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
-        // The thread is now set to running from being stopped
-        if (thread->processor_id >= 0) {
-            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
-        }
-
-        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            if (core != static_cast<u32>(thread->processor_id) &&
-                thread->affinity_mask.GetAffinity(core)) {
-                Suggest(thread->current_priority, core, thread);
-            }
-        }
-    }
-
-    SetReselectionPending();
-}
-
-void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
-    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) {
-        return;
-    }
-    ASSERT(is_locked);
-    if (thread->processor_id >= 0) {
-        Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (core != static_cast<u32>(thread->processor_id) &&
-            thread->affinity_mask.GetAffinity(core)) {
-            Unsuggest(old_priority, core, thread);
-        }
-    }
-
-    if (thread->processor_id >= 0) {
-        if (thread == kernel.CurrentScheduler().GetCurrentThread()) {
-            SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
-                            thread);
-        } else {
-            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
-        }
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (core != static_cast<u32>(thread->processor_id) &&
-            thread->affinity_mask.GetAffinity(core)) {
-            Suggest(thread->current_priority, core, thread);
-        }
-    }
-    thread->IncrementYieldCount();
-    SetReselectionPending();
-}
-
-void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
-                                                 s32 old_core) {
-    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) ||
-        thread->current_priority >= THREADPRIO_COUNT) {
-        return;
-    }
-    ASSERT(is_locked);
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (((old_affinity_mask >> core) & 1) != 0) {
-            if (core == static_cast<u32>(old_core)) {
-                Unschedule(thread->current_priority, core, thread);
-            } else {
-                Unsuggest(thread->current_priority, core, thread);
-            }
-        }
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (thread->affinity_mask.GetAffinity(core)) {
-            if (core == static_cast<u32>(thread->processor_id)) {
-                Schedule(thread->current_priority, core, thread);
-            } else {
-                Suggest(thread->current_priority, core, thread);
-            }
-        }
-    }
-
-    thread->IncrementYieldCount();
-    SetReselectionPending();
-}
-
-void GlobalScheduler::Shutdown() {
-    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        scheduled_queue[core].clear();
-        suggested_queue[core].clear();
-    }
-    thread_list.clear();
-}
-
-void GlobalScheduler::Lock() {
-    Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
-    ASSERT(!current_thread.IsInvalid());
-    if (current_thread == current_owner) {
-        ++scope_lock;
-    } else {
-        inner_lock.lock();
-        is_locked = true;
-        current_owner = current_thread;
-        ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
-        scope_lock = 1;
-    }
-}
-
-void GlobalScheduler::Unlock() {
-    if (--scope_lock != 0) {
-        ASSERT(scope_lock > 0);
-        return;
-    }
-    u32 cores_pending_reschedule = SelectThreads();
-    Core::EmuThreadHandle leaving_thread = current_owner;
-    current_owner = Core::EmuThreadHandle::InvalidHandle();
-    scope_lock = 1;
-    is_locked = false;
-    inner_lock.unlock();
-    EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
-}
-
-Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) {
-    switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
-}
-
-Scheduler::~Scheduler() = default;
-
-bool Scheduler::HaveReadyThreads() const {
-    return system.GlobalScheduler().HaveReadyThreads(core_id);
-}
-
-Thread* Scheduler::GetCurrentThread() const {
-    if (current_thread) {
-        return current_thread.get();
-    }
-    return idle_thread.get();
-}
-
-Thread* Scheduler::GetSelectedThread() const {
-    return selected_thread.get();
-}
-
-u64 Scheduler::GetLastContextSwitchTicks() const {
-    return last_context_switch_time;
-}
-
-void Scheduler::TryDoContextSwitch() {
-    auto& phys_core = system.Kernel().CurrentPhysicalCore();
-    if (phys_core.IsInterrupted()) {
-        phys_core.ClearInterrupt();
-    }
-    guard.lock();
-    if (is_context_switch_pending) {
-        SwitchContext();
-    } else {
-        guard.unlock();
-    }
-}
-
-void Scheduler::OnThreadStart() {
-    SwitchContextStep2();
-}
-
-void Scheduler::Unload(Thread* thread) {
-    if (thread) {
-        thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
-        thread->SetIsRunning(false);
-        if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) {
-            system.ArmInterface(core_id).ExceptionalExit();
-            thread->SetContinuousOnSVC(false);
-        }
-        if (!thread->IsHLEThread() && !thread->HasExited()) {
-            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
-            cpu_core.SaveContext(thread->GetContext32());
-            cpu_core.SaveContext(thread->GetContext64());
-            // Save the TPIDR_EL0 system register in case it was modified.
-            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
-            cpu_core.ClearExclusiveState();
-        }
-        thread->context_guard.unlock();
-    }
-}
-
-void Scheduler::Unload() {
-    Unload(current_thread.get());
-}
-
-void Scheduler::Reload(Thread* thread) {
-    if (thread) {
-        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
-                   "Thread must be runnable.");
-
-        // Cancel any outstanding wakeup events for this thread
-        thread->SetIsRunning(true);
-        thread->SetWasRunning(false);
-        thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
-
-        auto* const thread_owner_process = thread->GetOwnerProcess();
-        if (thread_owner_process != nullptr) {
-            system.Kernel().MakeCurrentProcess(thread_owner_process);
-        }
-        if (!thread->IsHLEThread()) {
-            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
-            cpu_core.LoadContext(thread->GetContext32());
-            cpu_core.LoadContext(thread->GetContext64());
-            cpu_core.SetTlsAddress(thread->GetTLSAddress());
-            cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
-            cpu_core.ClearExclusiveState();
-        }
-    }
-}
-
-void Scheduler::Reload() {
-    Reload(current_thread.get());
-}
-
-void Scheduler::SwitchContextStep2() {
-    // Load context of new thread
-    Reload(selected_thread.get());
-
-    TryDoContextSwitch();
-}
-
-void Scheduler::SwitchContext() {
-    current_thread_prev = current_thread;
-    selected_thread = selected_thread_set;
-    Thread* previous_thread = current_thread_prev.get();
-    Thread* new_thread = selected_thread.get();
-    current_thread = selected_thread;
-
-    is_context_switch_pending = false;
-
-    if (new_thread == previous_thread) {
-        guard.unlock();
-        return;
-    }
-
-    Process* const previous_process = system.Kernel().CurrentProcess();
-
-    UpdateLastContextSwitchTime(previous_thread, previous_process);
-
-    // Save context for previous thread
-    Unload(previous_thread);
-
-    std::shared_ptr<Common::Fiber>* old_context;
-    if (previous_thread != nullptr) {
-        old_context = &previous_thread->GetHostContext();
-    } else {
-        old_context = &idle_thread->GetHostContext();
-    }
-    guard.unlock();
-
-    Common::Fiber::YieldTo(*old_context, switch_fiber);
-    /// When a thread wakes up, the scheduler may have changed to other in another core.
-    auto& next_scheduler = system.Kernel().CurrentScheduler();
-    next_scheduler.SwitchContextStep2();
-}
-
-void Scheduler::OnSwitch(void* this_scheduler) {
-    Scheduler* sched = static_cast<Scheduler*>(this_scheduler);
-    sched->SwitchToCurrent();
-}
-
-void Scheduler::SwitchToCurrent() {
-    while (true) {
-        {
-            std::scoped_lock lock{guard};
-            selected_thread = selected_thread_set;
-            current_thread = selected_thread;
-            is_context_switch_pending = false;
-        }
-        const auto is_switch_pending = [this] {
-            std::scoped_lock lock{guard};
-            return is_context_switch_pending;
-        };
-        do {
-            if (current_thread != nullptr && !current_thread->IsHLEThread()) {
-                current_thread->context_guard.lock();
-                if (!current_thread->IsRunnable()) {
-                    current_thread->context_guard.unlock();
-                    break;
-                }
-                if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) {
-                    current_thread->context_guard.unlock();
-                    break;
-                }
-            }
-            std::shared_ptr<Common::Fiber>* next_context;
-            if (current_thread != nullptr) {
-                next_context = &current_thread->GetHostContext();
-            } else {
-                next_context = &idle_thread->GetHostContext();
-            }
-            Common::Fiber::YieldTo(switch_fiber, *next_context);
-        } while (!is_switch_pending());
-    }
-}
-
-void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
-    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
-    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
-
-    if (thread != nullptr) {
-        thread->UpdateCPUTimeTicks(update_ticks);
-    }
-
-    if (process != nullptr) {
-        process->UpdateCPUTimeTicks(update_ticks);
-    }
-
-    last_context_switch_time = most_recent_switch_ticks;
-}
-
-void Scheduler::Initialize() {
-    std::string name = "Idle Thread Id:" + std::to_string(core_id);
-    std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc();
-    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
-    ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
-    auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
-                                     nullptr, std::move(init_func), init_func_parameter);
-    idle_thread = std::move(thread_res).Unwrap();
-}
-
-void Scheduler::Shutdown() {
-    current_thread = nullptr;
-    selected_thread = nullptr;
-}
-
-SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} {
-    kernel.GlobalScheduler().Lock();
-}
-
-SchedulerLock::~SchedulerLock() {
-    kernel.GlobalScheduler().Unlock();
-}
-
-SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle,
-                                             Thread* time_task, s64 nanoseconds)
-    : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{
-                                                                                   nanoseconds} {
-    event_handle = InvalidHandle;
-}
-
-SchedulerLockAndSleep::~SchedulerLockAndSleep() {
-    if (sleep_cancelled) {
-        return;
-    }
-    auto& time_manager = kernel.TimeManager();
-    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
-}
-
-void SchedulerLockAndSleep::Release() {
-    if (sleep_cancelled) {
-        return;
-    }
-    auto& time_manager = kernel.TimeManager();
-    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
-    sleep_cancelled = true;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 8c19f25341..bf2c900280 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -14,9 +14,9 @@
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
 #include "core/hle/kernel/thread.h"
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9742aaf4c4..2612a6b0d7 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -24,6 +24,7 @@
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block.h"
 #include "core/hle/kernel/memory/page_table.h"
@@ -32,7 +33,6 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/resource_limit.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_types.h"
@@ -332,7 +332,8 @@ static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
 
 /// Makes a blocking IPC call to an OS service.
 static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto& kernel = system.Kernel();
+    const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
     std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {
         LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
@@ -341,9 +342,9 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
 
     LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
 
-    auto thread = system.CurrentScheduler().GetCurrentThread();
+    auto thread = kernel.CurrentScheduler()->GetCurrentThread();
     {
-        SchedulerLock lock(system.Kernel());
+        SchedulerLock lock(kernel);
         thread->InvalidateHLECallback();
         thread->SetStatus(ThreadStatus::WaitIPC);
         session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming());
@@ -352,12 +353,12 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
     if (thread->HasHLECallback()) {
         Handle event_handle = thread->GetHLETimeEvent();
         if (event_handle != InvalidHandle) {
-            auto& time_manager = system.Kernel().TimeManager();
+            auto& time_manager = kernel.TimeManager();
             time_manager.UnscheduleTimeEvent(event_handle);
         }
 
         {
-            SchedulerLock lock(system.Kernel());
+            SchedulerLock lock(kernel);
             auto* sync_object = thread->GetHLESyncObject();
             sync_object->RemoveWaitingThread(SharedFrom(thread));
         }
@@ -665,7 +666,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
 
         handle_debug_buffer(info1, info2);
 
-        auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+        auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
         const auto thread_processor_id = current_thread->GetProcessorID();
         system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
     }
@@ -917,7 +918,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         }
 
         const auto& core_timing = system.CoreTiming();
-        const auto& scheduler = system.CurrentScheduler();
+        const auto& scheduler = *system.Kernel().CurrentScheduler();
         const auto* const current_thread = scheduler.GetCurrentThread();
         const bool same_thread = current_thread == thread.get();
 
@@ -1085,7 +1086,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread.get() == system.CurrentScheduler().GetCurrentThread()) {
+    if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -1118,7 +1119,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread.get() == system.CurrentScheduler().GetCurrentThread()) {
+    if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -1475,7 +1476,7 @@ static void ExitProcess(Core::System& system) {
     current_process->PrepareForTermination();
 
     // Kill the current thread
-    system.CurrentScheduler().GetCurrentThread()->Stop();
+    system.Kernel().CurrentScheduler()->GetCurrentThread()->Stop();
 }
 
 static void ExitProcess32(Core::System& system) {
@@ -1576,8 +1577,8 @@ static ResultCode StartThread32(Core::System& system, Handle thread_handle) {
 static void ExitThread(Core::System& system) {
     LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
-    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
-    system.GlobalScheduler().RemoveThread(SharedFrom(current_thread));
+    auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
+    system.GlobalSchedulerContext().RemoveThread(SharedFrom(current_thread));
     current_thread->Stop();
 }
 
@@ -1590,37 +1591,31 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
 
     enum class SleepType : s64 {
-        YieldWithoutLoadBalancing = 0,
-        YieldWithLoadBalancing = -1,
+        YieldWithoutCoreMigration = 0,
+        YieldWithCoreMigration = -1,
         YieldAndWaitForLoadBalancing = -2,
     };
 
-    auto& scheduler = system.CurrentScheduler();
-    auto* const current_thread = scheduler.GetCurrentThread();
-    bool is_redundant = false;
-
+    auto& scheduler = *system.Kernel().CurrentScheduler();
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
-        case SleepType::YieldWithoutLoadBalancing: {
-            auto pair = current_thread->YieldSimple();
-            is_redundant = pair.second;
+        case SleepType::YieldWithoutCoreMigration: {
+            scheduler.YieldWithoutCoreMigration();
             break;
         }
-        case SleepType::YieldWithLoadBalancing: {
-            auto pair = current_thread->YieldAndBalanceLoad();
-            is_redundant = pair.second;
+        case SleepType::YieldWithCoreMigration: {
+            scheduler.YieldWithCoreMigration();
             break;
         }
         case SleepType::YieldAndWaitForLoadBalancing: {
-            auto pair = current_thread->YieldAndWaitForLoadBalancing();
-            is_redundant = pair.second;
+            scheduler.YieldToAnyThread();
             break;
         }
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
         }
     } else {
-        current_thread->Sleep(nanoseconds);
+        scheduler.GetCurrentThread()->Sleep(nanoseconds);
     }
 }
 
@@ -1656,8 +1651,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
     ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
     auto& kernel = system.Kernel();
     Handle event_handle;
-    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
-    auto* const current_process = system.Kernel().CurrentProcess();
+    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    auto* const current_process = kernel.CurrentProcess();
     {
         SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
         const auto& handle_table = current_process->GetHandleTable();
@@ -2627,7 +2622,7 @@ void Call(Core::System& system, u32 immediate) {
     auto& kernel = system.Kernel();
     kernel.EnterSVCProfile();
 
-    auto* thread = system.CurrentScheduler().GetCurrentThread();
+    auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
     thread->SetContinuousOnSVC(true);
 
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index 8b875d853c..342fb4516a 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -5,8 +5,8 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/thread.h"
@@ -37,7 +37,7 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const {
 std::pair<ResultCode, Handle> Synchronization::WaitFor(
     std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
     auto& kernel = system.Kernel();
-    auto* const thread = system.CurrentScheduler().GetCurrentThread();
+    auto* const thread = kernel.CurrentScheduler()->GetCurrentThread();
     Handle event_handle = InvalidHandle;
     {
         SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 38b4a09876..804e07f2bd 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -17,10 +17,10 @@
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/result.h"
@@ -186,9 +186,11 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     thread->status = ThreadStatus::Dormant;
     thread->entry_point = entry_point;
     thread->stack_top = stack_top;
+    thread->disable_count = 1;
     thread->tpidr_el0 = 0;
     thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = 0;
+    thread->schedule_count = -1;
+    thread->last_scheduled_tick = 0;
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask.SetAffinity(processor_id, true);
@@ -201,7 +203,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     thread->owner_process = owner_process;
     thread->type = type_flags;
     if ((type_flags & THREADTYPE_IDLE) == 0) {
-        auto& scheduler = kernel.GlobalScheduler();
+        auto& scheduler = kernel.GlobalSchedulerContext();
         scheduler.AddThread(thread);
     }
     if (owner_process) {
@@ -402,39 +404,12 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
     return RESULT_SUCCESS;
 }
 
-std::pair<ResultCode, bool> Thread::YieldSimple() {
-    bool is_redundant = false;
-    {
-        SchedulerLock lock(kernel);
-        is_redundant = kernel.GlobalScheduler().YieldThread(this);
-    }
-    return {RESULT_SUCCESS, is_redundant};
-}
-
-std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() {
-    bool is_redundant = false;
-    {
-        SchedulerLock lock(kernel);
-        is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
-    }
-    return {RESULT_SUCCESS, is_redundant};
-}
-
-std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() {
-    bool is_redundant = false;
-    {
-        SchedulerLock lock(kernel);
-        is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
-    }
-    return {RESULT_SUCCESS, is_redundant};
-}
-
 void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
     const u32 old_state = scheduling_state;
     pausing_state |= static_cast<u32>(flag);
     const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
     scheduling_state = base_scheduling | pausing_state;
-    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
+    KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
 void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
@@ -442,19 +417,20 @@ void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
     pausing_state &= ~static_cast<u32>(flag);
     const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
     scheduling_state = base_scheduling | pausing_state;
-    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
+    KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
     const u32 old_state = scheduling_state;
     scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
                        static_cast<u32>(new_status);
-    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
+    KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
 void Thread::SetCurrentPriority(u32 new_priority) {
     const u32 old_priority = std::exchange(current_priority, new_priority);
-    kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority);
+    KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(),
+                                        old_priority);
 }
 
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
@@ -480,10 +456,10 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     if (use_override) {
         ideal_core_override = new_core;
     } else {
-        const auto old_affinity_mask = affinity_mask.GetAffinityMask();
+        const auto old_affinity_mask = affinity_mask;
         affinity_mask.SetAffinityMask(new_affinity_mask);
         ideal_core = new_core;
-        if (old_affinity_mask != new_affinity_mask) {
+        if (old_affinity_mask.GetAffinityMask() != new_affinity_mask) {
             const s32 old_core = processor_id;
             if (processor_id >= 0 && !affinity_mask.GetAffinity(processor_id)) {
                 if (static_cast<s32>(ideal_core) < 0) {
@@ -493,7 +469,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
                     processor_id = ideal_core;
                 }
             }
-            kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core);
+            KScheduler::OnThreadAffinityMaskChanged(kernel, this, old_affinity_mask, old_core);
         }
     }
     return RESULT_SUCCESS;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 5192ecff12..f1aa358a4e 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -28,10 +28,10 @@ class System;
 
 namespace Kernel {
 
-class GlobalScheduler;
+class GlobalSchedulerContext;
 class KernelCore;
 class Process;
-class Scheduler;
+class KScheduler;
 
 enum ThreadPriority : u32 {
     THREADPRIO_HIGHEST = 0,            ///< Highest thread priority
@@ -346,8 +346,11 @@ public:
 
     void SetStatus(ThreadStatus new_status);
 
-    u64 GetLastRunningTicks() const {
-        return last_running_ticks;
+    constexpr s64 GetLastScheduledTick() const {
+        return this->last_scheduled_tick;
+    }
+    constexpr void SetLastScheduledTick(s64 tick) {
+        this->last_scheduled_tick = tick;
     }
 
     u64 GetTotalCPUTimeTicks() const {
@@ -362,10 +365,18 @@ public:
         return processor_id;
     }
 
+    s32 GetActiveCore() const {
+        return GetProcessorID();
+    }
+
     void SetProcessorID(s32 new_core) {
         processor_id = new_core;
     }
 
+    void SetActiveCore(s32 new_core) {
+        processor_id = new_core;
+    }
+
     Process* GetOwnerProcess() {
         return owner_process;
     }
@@ -479,21 +490,11 @@ public:
     /// Sleeps this thread for the given amount of nanoseconds.
     ResultCode Sleep(s64 nanoseconds);
 
-    /// Yields this thread without rebalancing loads.
-    std::pair<ResultCode, bool> YieldSimple();
-
-    /// Yields this thread and does a load rebalancing.
-    std::pair<ResultCode, bool> YieldAndBalanceLoad();
-
-    /// Yields this thread and if the core is left idle, loads are rebalanced
-    std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();
-
-    void IncrementYieldCount() {
-        yield_count++;
+    constexpr s64 GetYieldScheduleCount() const {
+        return this->schedule_count;
     }
-
-    u64 GetYieldCount() const {
-        return yield_count;
+    constexpr void SetYieldScheduleCount(s64 count) {
+        this->schedule_count = count;
     }
 
     ThreadSchedStatus GetSchedulingStatus() const {
@@ -569,9 +570,62 @@ public:
         return has_exited;
     }
 
+    struct QueueEntry {
+    private:
+        Thread* prev;
+        Thread* next;
+
+    public:
+        constexpr QueueEntry() : prev(nullptr), next(nullptr) { /* ... */
+        }
+
+        constexpr void Initialize() {
+            this->prev = nullptr;
+            this->next = nullptr;
+        }
+
+        constexpr Thread* GetPrev() const {
+            return this->prev;
+        }
+        constexpr Thread* GetNext() const {
+            return this->next;
+        }
+        constexpr void SetPrev(Thread* t) {
+            this->prev = t;
+        }
+        constexpr void SetNext(Thread* t) {
+            this->next = t;
+        }
+    };
+
+    constexpr QueueEntry& GetPriorityQueueEntry(s32 core) {
+        return this->per_core_priority_queue_entry[core];
+    }
+    constexpr const QueueEntry& GetPriorityQueueEntry(s32 core) const {
+        return this->per_core_priority_queue_entry[core];
+    }
+
+    s32 GetDisableDispatchCount() const {
+        return disable_count;
+    }
+
+    void DisableDispatch() {
+        ASSERT(GetDisableDispatchCount() >= 0);
+        disable_count++;
+    }
+
+    void EnableDispatch() {
+        ASSERT(GetDisableDispatchCount() > 0);
+        disable_count--;
+    }
+
+    ThreadStatus status = ThreadStatus::Dormant;
+    u32 scheduling_state = 0;
+
 private:
-    friend class GlobalScheduler;
-    friend class Scheduler;
+    friend class GlobalSchedulerContext;
+    friend class KScheduler;
+    friend class Process;
 
     void SetSchedulingStatus(ThreadSchedStatus new_status);
     void AddSchedulingFlag(ThreadSchedFlags flag);
@@ -586,10 +640,9 @@ private:
 
     u64 thread_id = 0;
 
-    ThreadStatus status = ThreadStatus::Dormant;
-
     VAddr entry_point = 0;
     VAddr stack_top = 0;
+    std::atomic_int disable_count = 0;
 
     ThreadType type;
 
@@ -603,9 +656,8 @@ private:
     u32 current_priority = 0;
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
-    u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
-    u64 yield_count = 0;          ///< Number of redundant yields carried by this thread.
-                                  ///< a redundant yield is one where no scheduling is changed
+    s64 schedule_count{};
+    s64 last_scheduled_tick{};
 
     s32 processor_id = 0;
 
@@ -647,7 +699,9 @@ private:
     Handle hle_time_event;
     SynchronizationObject* hle_object;
 
-    Scheduler* scheduler = nullptr;
+    KScheduler* scheduler = nullptr;
+
+    QueueEntry per_core_priority_queue_entry[Core::Hardware::NUM_CPU_CORES]{};
 
     u32 ideal_core{0xFFFFFFFF};
     KAffinityMask affinity_mask{};
@@ -655,7 +709,6 @@ private:
     s32 ideal_core_override = -1;
     u32 affinity_override_count = 0;
 
-    u32 scheduling_state = 0;
     u32 pausing_state = 0;
     bool is_running = false;
     bool is_waiting_on_sync = false;
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index caf329bfb3..8e47696946 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -7,8 +7,8 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 7b7ac282dd..abc753d5d1 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -10,8 +10,8 @@
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/service/time/interface.h"
 #include "core/hle/service/time/time.h"
 #include "core/hle/service/time/time_sharedmemory.h"
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index c4ae1d61fd..546a2cd4d6 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -13,10 +13,10 @@
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/thread.h"
 #include "core/memory.h"
@@ -101,7 +101,7 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
     };
 
     const auto& system = Core::System::GetInstance();
-    add_threads(system.GlobalScheduler().GetThreadList());
+    add_threads(system.GlobalSchedulerContext().GetThreadList());
 
     return item_list;
 }
@@ -356,7 +356,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
                                                       .arg(thread.GetPriority())
                                                       .arg(thread.GetNominalPriority())));
     list.push_back(std::make_unique<WaitTreeText>(
-        tr("last running ticks = %1").arg(thread.GetLastRunningTicks())));
+        tr("last running ticks = %1").arg(thread.GetLastScheduledTick())));
 
     const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
     if (mutex_wait_address != 0) {