Interrupt GPU command processing when a frame's fence is reached. (#1741)

* Interrupt GPU command processing when a frame's fence is reached.

* Accumulate times rather than %s

* Accurate timer for vsync

Spin wait for the last .667ms of a frame. Avoids issues caused by signalling 16ms vsync. (periodic stutters in smo)

* Use event wait for better timing.

* Fix lazy wait

Windows doesn't seem to want to do 1ms consistently, so force a spin if we're less than 2ms.

* A bit more efficiency on frame waits.

Should now wait the remainder 0.6667 instead of 1.6667 sometimes (odd waits above 1ms are reliable, unlike 1ms waits)

* Better swap interval 0 solution

737 fps without breaking a sweat. Downside: Vsync can no longer be disabled on games that use the event heavily (link's awakening - which is ok since it breaks anyways)

* Fix comment.

* Address Comments.
This commit is contained in:
riperiperi 2020-12-17 18:39:52 +00:00 committed by GitHub
parent eae39f80e7
commit 10aa11ce13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 146 additions and 30 deletions

View File

@ -66,6 +66,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
private readonly AutoResetEvent _event;
private readonly GPFifoProcessor _processor;
private bool _interrupt;
/// <summary>
/// Creates a new instance of the GPU General Purpose FIFO device.
/// </summary>
@ -163,7 +165,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <returns>True if commands were received, false if wait timed out</returns>
public bool WaitForCommands()
{
return _event.WaitOne(8) && !_commandBufferQueue.IsEmpty;
return !_commandBufferQueue.IsEmpty || (_event.WaitOne(8) && !_commandBufferQueue.IsEmpty);
}
/// <summary>
@ -171,13 +173,23 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// </summary>
public void DispatchCalls()
{
while (_ibEnable && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
{
_currentCommandBuffer = entry;
_currentCommandBuffer.Fetch(_context);
_processor.Process(_currentCommandBuffer.Words);
}
_interrupt = false;
}
/// <summary>
/// Interrupts command processing. This will break out of the DispatchCalls loop.
/// </summary>
public void Interrupt()
{
_interrupt = true;
}
/// <summary>

View File

@ -2,6 +2,7 @@ using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Image;
using System;
using System.Collections.Concurrent;
using System.Threading;
namespace Ryujinx.Graphics.Gpu
{
@ -69,6 +70,8 @@ namespace Ryujinx.Graphics.Gpu
private readonly ConcurrentQueue<PresentationTexture> _frameQueue;
private int _framesAvailable;
/// <summary>
/// Creates a new instance of the GPU presentation window.
/// </summary>
@ -157,5 +160,29 @@ namespace Ryujinx.Graphics.Gpu
pt.ReleaseCallback(pt.UserObj);
}
}
/// <summary>
/// Indicate that a frame on the queue is ready to be acquired.
/// </summary>
public void SignalFrameReady()
{
Interlocked.Increment(ref _framesAvailable);
}
/// <summary>
/// Determine if any frames are available, and decrement the available count if there are.
/// </summary>
/// <returns>True if a frame is available, false otherwise</returns>
public bool ConsumeFrameAvailable()
{
if (Interlocked.CompareExchange(ref _framesAvailable, 0, 0) != 0)
{
Interlocked.Decrement(ref _framesAvailable);
return true;
}
return false;
}
}
}

View File

@ -46,6 +46,8 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
public const int BufferHistoryArraySize = 8;
public event Action BufferQueued;
public BufferQueueCore(Switch device, long pid)
{
Slots = new BufferSlotArray();
@ -197,6 +199,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
WaitForLock();
}
public void SignalQueueEvent()
{
BufferQueued?.Invoke();
}
private void WaitForLock()
{
if (Active)

View File

@ -486,6 +486,8 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
Monitor.PulseAll(_callbackLock);
}
Core.SignalQueueEvent();
return Status.Success;
}

View File

@ -25,8 +25,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
private Stopwatch _chrono;
private ManualResetEvent _event = new ManualResetEvent(false);
private AutoResetEvent _nextFrameEvent = new AutoResetEvent(true);
private long _ticks;
private long _ticksPerFrame;
private long _spinTicks;
private long _1msTicks;
private int _swapInterval;
@ -61,8 +65,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
};
_chrono = new Stopwatch();
_chrono.Start();
_ticks = 0;
_spinTicks = Stopwatch.Frequency / 500;
_1msTicks = Stopwatch.Frequency / 1000;
UpdateSwapInterval(1);
@ -76,6 +83,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
// If the swap interval is 0, Game VSync is disabled.
if (_swapInterval == 0)
{
_nextFrameEvent.Set();
_ticksPerFrame = 1;
}
else
@ -129,6 +137,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
BufferQueueCore core = BufferQueue.CreateBufferQueue(_device, pid, out BufferQueueProducer producer, out BufferQueueConsumer consumer);
core.BufferQueued += () =>
{
_nextFrameEvent.Set();
};
_layers.Add(layerId, new Layer
{
ProducerBinderId = HOSBinderDriverServer.RegisterBinderObject(producer),
@ -189,11 +202,25 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
{
_isRunning = true;
long lastTicks = _chrono.ElapsedTicks;
while (_isRunning)
{
_ticks += _chrono.ElapsedTicks;
long ticks = _chrono.ElapsedTicks;
_chrono.Restart();
if (_swapInterval == 0)
{
Compose();
_device.System?.SignalVsync();
_nextFrameEvent.WaitOne(17);
lastTicks = ticks;
}
else
{
_ticks += ticks - lastTicks;
lastTicks = ticks;
if (_ticks >= _ticksPerFrame)
{
@ -201,11 +228,33 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
_device.System?.SignalVsync();
_ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame);
// Apply a maximum bound of 3 frames to the tick remainder, in case some event causes Ryujinx to pause for a long time or messes with the timer.
_ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame * 3);
}
// Sleep the minimal amount of time to avoid being too expensive.
Thread.Sleep(1);
// Sleep if possible. If the time til the next frame is too low, spin wait instead.
long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks);
if (diff > 0)
{
if (diff < _spinTicks)
{
do
{
// SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks.
// The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time.
Thread.SpinWait(5);
ticks = _chrono.ElapsedTicks;
_ticks += ticks - lastTicks;
lastTicks = ticks;
} while (_ticks < _ticksPerFrame);
}
else
{
_event.WaitOne((int)(diff / _1msTicks));
}
}
}
}
}
@ -299,6 +348,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
Item = item,
};
item.Fence.RegisterCallback(_device.Gpu, () =>
{
_device.Gpu.Window.SignalFrameReady();
_device.Gpu.GPFifo.Interrupt();
});
_device.Gpu.Window.EnqueueFrameThreadSafe(
frameBufferAddress,
frameBufferWidth,

View File

@ -66,6 +66,13 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
return false;
}
public void RegisterCallback(GpuContext gpuContext, Action callback)
{
ref NvFence fence = ref NvFences[FenceCount - 1];
gpuContext.Synchronization.RegisterCallbackOnSyncpoint(fence.Id, fence.Value, callback);
}
public uint GetFlattenedSize()
{
return (uint)Unsafe.SizeOf<AndroidFence>();

View File

@ -16,12 +16,12 @@ namespace Ryujinx.HLE
private double[] _previousFrameTime;
private double[] _averagePercent;
private double[] _accumulatedPercent;
private double[] _accumulatedActiveTime;
private double[] _percentLastEndTime;
private double[] _percentStartTime;
private long[] _framesRendered;
private long[] _percentCount;
private double[] _percentTime;
private object[] _frameLock;
private object[] _percentLock;
@ -37,12 +37,12 @@ namespace Ryujinx.HLE
_previousFrameTime = new double[1];
_averagePercent = new double[1];
_accumulatedPercent = new double[1];
_accumulatedActiveTime = new double[1];
_percentLastEndTime = new double[1];
_percentStartTime = new double[1];
_framesRendered = new long[1];
_percentCount = new long[1];
_percentTime = new double[1];
_frameLock = new object[] { new object() };
_percentLock = new object[] { new object() };
@ -91,16 +91,16 @@ namespace Ryujinx.HLE
lock (_percentLock[percentType])
{
if (_percentCount[percentType] > 0)
if (_percentTime[percentType] > 0)
{
percent = _accumulatedPercent[percentType] / _percentCount[percentType];
percent = (_accumulatedActiveTime[percentType] / _percentTime[percentType]) * 100;
}
_averagePercent[percentType] = percent;
_percentCount[percentType] = 0;
_percentTime[percentType] = 0;
_accumulatedPercent[percentType] = 0;
_accumulatedActiveTime[percentType] = 0;
}
}
@ -138,13 +138,11 @@ namespace Ryujinx.HLE
double elapsedTime = currentTime - _percentLastEndTime[percentType];
double elapsedActiveTime = currentTime - _percentStartTime[percentType];
double percentActive = (elapsedActiveTime / elapsedTime) * 100;
lock (_percentLock[percentType])
{
_accumulatedPercent[percentType] += percentActive;
_accumulatedActiveTime[percentType] += elapsedActiveTime;
_percentCount[percentType]++;
_percentTime[percentType] += elapsedTime;
}
_percentLastEndTime[percentType] = currentTime;

View File

@ -177,6 +177,11 @@ namespace Ryujinx.HLE
Gpu.GPFifo.DispatchCalls();
}
public bool ConsumeFrameAvailable()
{
return Gpu.Window.ConsumeFrameAvailable();
}
public void PresentFrame(Action swapBuffersCallback)
{
Gpu.Window.Present(swapBuffersCallback);

View File

@ -404,6 +404,13 @@ namespace Ryujinx.Ui
_device.Statistics.RecordFifoEnd();
}
while (_device.ConsumeFrameAvailable())
{
_device.PresentFrame(SwapBuffers);
}
if (_ticks >= _ticksPerFrame)
{
string dockedMode = ConfigurationState.Instance.System.EnableDockedMode ? "Docked" : "Handheld";
float scale = Graphics.Gpu.GraphicsConfig.ResScale;
if (scale != 1)
@ -411,10 +418,6 @@ namespace Ryujinx.Ui
dockedMode += $" ({scale}x)";
}
if (_ticks >= _ticksPerFrame)
{
_device.PresentFrame(SwapBuffers);
StatusUpdatedEvent?.Invoke(this, new StatusUpdatedEventArgs(
_device.EnableDeviceVsync,
dockedMode,