HLE: Add OS-specific precise sleep methods to reduce spinwaiting (#5948)

* feat: add nanosleep for linux and macos

* Add Windows 0.5ms sleep

- Imprecise waits for longer waits with clock alignment
- 1/4 the spin time on vsync timer

* Remove old experiment

* Fix event leak

* Tweaking for MacOS

* Linux tweaks, nanosleep vsync improvement

* Fix overbias

* Cleanup

* Fix realignment

* Add some docs and some cleanup

NanosleepPool needs more, Nanosleep has some benchmark code that needs removed.

* Rename "Microsleep" to "PreciseSleep"

Might have been confused with "microseconds", which no measurement is performed in.

* Remove nanosleep measurement

* Remove unused debug logging

* Nanosleep Pool Documentation

* More cleanup

* Whitespace

* Formatting

* Address Feedback

* Allow SleepUntilTimePoint to take EventWaitHandle

* Remove `_chrono` stopwatch in SurfaceFlinger

* Move spinwaiting logic to PreciseSleepHelper

Technically, these achieve different things, but having them here makes them easier to reuse or tune.
This commit is contained in:
riperiperi 2023-11-30 10:39:42 -08:00 committed by GitHub
parent 21cd4c0c00
commit 1be668e68a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1000 additions and 49 deletions

View file

@ -0,0 +1,38 @@
using System;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// An event which works similarly to an AutoResetEvent, but is backed by a
/// more precise timer that allows waits of less than a millisecond.
/// </summary>
public interface IPreciseSleepEvent : IDisposable
{
/// <summary>
/// Adjust a timepoint to better fit the host clock.
/// When no adjustment is made, the input timepoint will be returned.
/// </summary>
/// <param name="timePoint">Timepoint to adjust</param>
/// <param name="timeoutNs">Requested timeout in nanoseconds</param>
/// <returns>Adjusted timepoint</returns>
long AdjustTimePoint(long timePoint, long timeoutNs);
/// <summary>
/// Sleep until a timepoint, or a signal is received.
/// Given no signal, may wake considerably before, or slightly after the timeout.
/// </summary>
/// <param name="timePoint">Timepoint to sleep until</param>
/// <returns>True if signalled or waited, false if a wait could not be performed</returns>
bool SleepUntil(long timePoint);
/// <summary>
/// Sleep until a signal is received.
/// </summary>
void Sleep();
/// <summary>
/// Signal the event, waking any sleeping thread or the next attempted sleep.
/// </summary>
void Signal();
}
}

View file

@ -0,0 +1,160 @@
using System;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// Access to Linux/MacOS nanosleep, with platform specific bias to improve precision.
/// </summary>
[SupportedOSPlatform("macos")]
[SupportedOSPlatform("linux")]
[SupportedOSPlatform("android")]
[SupportedOSPlatform("ios")]
internal static partial class Nanosleep
{
private const long LinuxBaseNanosleepBias = 50000; // 0.05ms
// Penalty for max allowed sleep duration
private const long LinuxNanosleepAccuracyPenaltyThreshold = 200000; // 0.2ms
private const long LinuxNanosleepAccuracyPenalty = 30000; // 0.03ms
// Penalty for base sleep duration
private const long LinuxNanosleepBasePenaltyThreshold = 500000; // 0.5ms
private const long LinuxNanosleepBasePenalty = 30000; // 0.03ms
private const long LinuxNanosleepPenaltyPerMillisecond = 18000; // 0.018ms
private const long LinuxNanosleepPenaltyCap = 18000; // 0.018ms
private const long LinuxStrictBiasOffset = 150_000; // 0.15ms
// Nanosleep duration is biased depending on the requested timeout on MacOS.
// These match the results when measuring on an M1 processor at AboveNormal priority.
private const long MacosBaseNanosleepBias = 5000; // 0.005ms
private const long MacosBiasPerMillisecond = 140000; // 0.14ms
private const long MacosBiasMaxNanoseconds = 20_000_000; // 20ms
private const long MacosStrictBiasOffset = 150_000; // 0.15ms
public static long Bias { get; }
/// <summary>
/// Get bias for a given nanosecond timeout.
/// Some platforms calculate their bias differently, this method can be used to counteract it.
/// </summary>
/// <param name="timeoutNs">Nanosecond timeout</param>
/// <returns>Bias in nanoseconds</returns>
public static long GetBias(long timeoutNs)
{
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
{
long biasNs = Math.Min(timeoutNs, MacosBiasMaxNanoseconds);
return MacosBaseNanosleepBias + biasNs * MacosBiasPerMillisecond / 1_000_000;
}
else
{
long bias = LinuxBaseNanosleepBias;
if (timeoutNs > LinuxNanosleepBasePenaltyThreshold)
{
long penalty = (timeoutNs - LinuxNanosleepBasePenaltyThreshold) * LinuxNanosleepPenaltyPerMillisecond / 1_000_000;
bias += LinuxNanosleepBasePenalty + Math.Min(LinuxNanosleepPenaltyCap, penalty);
}
return bias;
}
}
/// <summary>
/// Get a stricter bias for a given nanosecond timeout,
/// which can improve the chances the sleep completes before the timeout.
/// Some platforms calculate their bias differently, this method can be used to counteract it.
/// </summary>
/// <param name="timeoutNs">Nanosecond timeout</param>
/// <returns>Strict bias in nanoseconds</returns>
public static long GetStrictBias(long timeoutNs)
{
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
{
return GetBias(timeoutNs) + MacosStrictBiasOffset;
}
else
{
long bias = GetBias(timeoutNs) + LinuxStrictBiasOffset;
if (timeoutNs > LinuxNanosleepAccuracyPenaltyThreshold)
{
bias += LinuxNanosleepAccuracyPenalty;
}
return bias;
}
}
static Nanosleep()
{
Bias = GetBias(0);
}
[StructLayout(LayoutKind.Sequential)]
private struct Timespec
{
public long tv_sec; // Seconds
public long tv_nsec; // Nanoseconds
}
[LibraryImport("libc", SetLastError = true)]
private static partial int nanosleep(ref Timespec req, ref Timespec rem);
/// <summary>
/// Convert a timeout in nanoseconds to a timespec for nanosleep.
/// </summary>
/// <param name="nanoseconds">Timeout in nanoseconds</param>
/// <returns>Timespec for nanosleep</returns>
private static Timespec GetTimespecFromNanoseconds(ulong nanoseconds)
{
return new Timespec
{
tv_sec = (long)(nanoseconds / 1_000_000_000),
tv_nsec = (long)(nanoseconds % 1_000_000_000)
};
}
/// <summary>
/// Sleep for approximately a given time period in nanoseconds.
/// </summary>
/// <param name="nanoseconds">Time to sleep for in nanoseconds</param>
public static void Sleep(long nanoseconds)
{
nanoseconds -= GetBias(nanoseconds);
if (nanoseconds >= 0)
{
Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds);
Timespec rem = new();
nanosleep(ref req, ref rem);
}
}
/// <summary>
/// Sleep for at most a given time period in nanoseconds.
/// Uses a stricter bias to wake before the requested duration.
/// </summary>
/// <remarks>
/// Due to OS scheduling behaviour, this timeframe may still be missed.
/// </remarks>
/// <param name="nanoseconds">Maximum allowed time for sleep</param>
public static void SleepAtMost(long nanoseconds)
{
// Stricter bias to ensure we wake before the timepoint.
nanoseconds -= GetStrictBias(nanoseconds);
if (nanoseconds >= 0)
{
Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds);
Timespec rem = new();
nanosleep(ref req, ref rem);
}
}
}
}

View file

@ -0,0 +1,84 @@
using System;
using System.Runtime.Versioning;
using System.Threading;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// A precise sleep event for linux and macos that uses nanosleep for more precise timeouts.
/// </summary>
[SupportedOSPlatform("macos")]
[SupportedOSPlatform("linux")]
[SupportedOSPlatform("android")]
[SupportedOSPlatform("ios")]
internal class NanosleepEvent : IPreciseSleepEvent
{
private readonly AutoResetEvent _waitEvent = new(false);
private readonly NanosleepPool _pool;
public NanosleepEvent()
{
_pool = new NanosleepPool(_waitEvent);
}
public long AdjustTimePoint(long timePoint, long timeoutNs)
{
// No adjustment
return timePoint;
}
public bool SleepUntil(long timePoint)
{
long now = PerformanceCounter.ElapsedTicks;
long delta = (timePoint - now);
long ms = Math.Min(delta / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
long ns = (delta * 1_000_000) / PerformanceCounter.TicksPerMillisecond;
if (ms > 0)
{
_waitEvent.WaitOne((int)ms);
return true;
}
else if (ns - Nanosleep.Bias > 0)
{
// Don't bother starting a sleep if there's already a signal active.
if (_waitEvent.WaitOne(0))
{
return true;
}
// The 1ms wait will be interrupted by the nanosleep timeout if it completes.
if (!_pool.SleepAndSignal(ns, timePoint))
{
// Too many threads on the pool.
return false;
}
_waitEvent.WaitOne(1);
_pool.IgnoreSignal();
return true;
}
return false;
}
public void Sleep()
{
_waitEvent.WaitOne();
}
public void Signal()
{
_waitEvent.Set();
}
public void Dispose()
{
GC.SuppressFinalize(this);
_pool.Dispose();
_waitEvent.Dispose();
}
}
}

View file

@ -0,0 +1,228 @@
using System;
using System.Collections.Generic;
using System.Runtime.Versioning;
using System.Threading;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// A pool of threads used to allow "interruptable" nanosleep for a single target event.
/// </summary>
[SupportedOSPlatform("macos")]
[SupportedOSPlatform("linux")]
[SupportedOSPlatform("android")]
[SupportedOSPlatform("ios")]
internal class NanosleepPool : IDisposable
{
public const int MaxThreads = 8;
/// <summary>
/// A thread that nanosleeps and may signal an event on wake.
/// When a thread is assigned a nanosleep to perform, it also gets a signal ID.
/// The pool's target event is only signalled if this ID matches the latest dispatched one.
/// </summary>
private class NanosleepThread : IDisposable
{
private static readonly long _timePointEpsilon;
static NanosleepThread()
{
_timePointEpsilon = PerformanceCounter.TicksPerMillisecond / 100; // 0.01ms
}
private readonly Thread _thread;
private readonly NanosleepPool _parent;
private readonly AutoResetEvent _newWaitEvent;
private bool _running = true;
private long _signalId;
private long _nanoseconds;
private long _timePoint;
public long SignalId => _signalId;
/// <summary>
/// Creates a new NanosleepThread for a parent pool, with a specified thread ID.
/// </summary>
/// <param name="parent">Parent NanosleepPool</param>
/// <param name="id">Thread ID</param>
public NanosleepThread(NanosleepPool parent, int id)
{
_parent = parent;
_newWaitEvent = new(false);
_thread = new Thread(Loop)
{
Name = $"Common.Nanosleep.{id}",
Priority = ThreadPriority.AboveNormal,
IsBackground = true
};
_thread.Start();
}
/// <summary>
/// Service requests to perform a nanosleep, signal parent pool when complete.
/// </summary>
private void Loop()
{
_newWaitEvent.WaitOne();
while (_running)
{
Nanosleep.Sleep(_nanoseconds);
_parent.Signal(this);
_newWaitEvent.WaitOne();
}
_newWaitEvent.Dispose();
}
/// <summary>
/// Assign a nanosleep for this thread to perform, then signal at the end.
/// </summary>
/// <param name="nanoseconds">Nanoseconds to sleep</param>
/// <param name="signalId">Signal ID</param>
/// <param name="timePoint">Target timepoint</param>
public void SleepAndSignal(long nanoseconds, long signalId, long timePoint)
{
_signalId = signalId;
_nanoseconds = nanoseconds;
_timePoint = timePoint;
_newWaitEvent.Set();
}
/// <summary>
/// Resurrect an active nanosleep's signal if its target timepoint is a close enough match.
/// </summary>
/// <param name="signalId">New signal id to assign the nanosleep</param>
/// <param name="timePoint">Target timepoint</param>
/// <returns>True if resurrected, false otherwise</returns>
public bool Resurrect(long signalId, long timePoint)
{
if (Math.Abs(timePoint - _timePoint) < _timePointEpsilon)
{
_signalId = signalId;
return true;
}
return false;
}
/// <summary>
/// Dispose the NanosleepThread, interrupting its worker loop.
/// </summary>
public void Dispose()
{
if (_running)
{
_running = false;
_newWaitEvent.Set();
}
}
}
private readonly object _lock = new();
private readonly List<NanosleepThread> _threads = new();
private readonly List<NanosleepThread> _active = new();
private readonly Stack<NanosleepThread> _free = new();
private readonly AutoResetEvent _signalTarget;
private long _signalId;
/// <summary>
/// Creates a new NanosleepPool with a target event to signal when a nanosleep completes.
/// </summary>
/// <param name="signalTarget">Event to signal when nanosleeps complete</param>
public NanosleepPool(AutoResetEvent signalTarget)
{
_signalTarget = signalTarget;
}
/// <summary>
/// Signal the target event (if the source sleep has not been superseded)
/// and free the nanosleep thread.
/// </summary>
/// <param name="thread">Nanosleep thread that completed</param>
private void Signal(NanosleepThread thread)
{
lock (_lock)
{
_active.Remove(thread);
_free.Push(thread);
if (thread.SignalId == _signalId)
{
_signalTarget.Set();
}
}
}
/// <summary>
/// Sleep for the given number of nanoseconds and signal the target event.
/// This does not block the caller thread.
/// </summary>
/// <param name="nanoseconds">Nanoseconds to sleep</param>
/// <param name="timePoint">Target timepoint</param>
/// <returns>True if the signal will be set, false otherwise</returns>
public bool SleepAndSignal(long nanoseconds, long timePoint)
{
lock (_lock)
{
_signalId++;
// Check active sleeps, if any line up with the requested timepoint then resurrect that nanosleep.
foreach (NanosleepThread existing in _active)
{
if (existing.Resurrect(_signalId, timePoint))
{
return true;
}
}
if (!_free.TryPop(out NanosleepThread thread))
{
if (_threads.Count >= MaxThreads)
{
return false;
}
thread = new NanosleepThread(this, _threads.Count);
_threads.Add(thread);
}
_active.Add(thread);
thread.SleepAndSignal(nanoseconds, _signalId, timePoint);
return true;
}
}
/// <summary>
/// Ignore the latest nanosleep.
/// </summary>
public void IgnoreSignal()
{
_signalId++;
}
/// <summary>
/// Dispose the NanosleepPool, disposing all of its active threads.
/// </summary>
public void Dispose()
{
GC.SuppressFinalize(this);
foreach (NanosleepThread thread in _threads)
{
thread.Dispose();
}
_threads.Clear();
}
}
}

View file

@ -0,0 +1,104 @@
using Ryujinx.Common.SystemInterop;
using System;
using System.Threading;
namespace Ryujinx.Common.PreciseSleep
{
public static class PreciseSleepHelper
{
/// <summary>
/// Create a precise sleep event for the current platform.
/// </summary>
/// <returns>A precise sleep event</returns>
public static IPreciseSleepEvent CreateEvent()
{
if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid())
{
return new NanosleepEvent();
}
else if (OperatingSystem.IsWindows())
{
return new WindowsSleepEvent();
}
else
{
return new SleepEvent();
}
}
/// <summary>
/// Sleeps up to the closest point to the timepoint that the OS reasonably allows.
/// The provided event is used by the timer to wake the current thread, and should not be signalled from any other source.
/// </summary>
/// <param name="evt">Event used to wake this thread</param>
/// <param name="timePoint">Target timepoint in host ticks</param>
public static void SleepUntilTimePoint(EventWaitHandle evt, long timePoint)
{
if (OperatingSystem.IsWindows())
{
WindowsGranularTimer.Instance.SleepUntilTimePointWithoutExternalSignal(evt, timePoint);
}
else
{
// Events might oversleep by a little, depending on OS.
// We don't want to miss the timepoint, so bias the wait to be lower.
// Nanosleep can possibly handle it better, too.
long accuracyBias = PerformanceCounter.TicksPerMillisecond / 2;
long now = PerformanceCounter.ElapsedTicks + accuracyBias;
long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
if (ms > 0)
{
evt.WaitOne((int)ms);
}
if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid())
{
// Do a nanosleep.
now = PerformanceCounter.ElapsedTicks;
long ns = ((timePoint - now) * 1_000_000) / PerformanceCounter.TicksPerMillisecond;
Nanosleep.SleepAtMost(ns);
}
}
}
/// <summary>
/// Spinwait until the given timepoint. If wakeSignal is or becomes 1, return early.
/// Thread is allowed to yield.
/// </summary>
/// <param name="timePoint">Target timepoint in host ticks</param>
/// <param name="wakeSignal">Returns early if this is set to 1</param>
public static void SpinWaitUntilTimePoint(long timePoint, ref long wakeSignal)
{
SpinWait spinWait = new();
while (Interlocked.Read(ref wakeSignal) != 1 && PerformanceCounter.ElapsedTicks < timePoint)
{
// Our time is close - don't let SpinWait go off and potentially Thread.Sleep().
if (spinWait.NextSpinWillYield)
{
Thread.Yield();
spinWait.Reset();
}
else
{
spinWait.SpinOnce();
}
}
}
/// <summary>
/// Spinwait until the given timepoint, with no opportunity to wake early.
/// </summary>
/// <param name="timePoint">Target timepoint in host ticks</param>
public static void SpinWaitUntilTimePoint(long timePoint)
{
while (PerformanceCounter.ElapsedTicks < timePoint)
{
Thread.SpinWait(5);
}
}
}
}

View file

@ -0,0 +1,51 @@
using System;
using System.Threading;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// A cross-platform precise sleep event that has millisecond granularity.
/// </summary>
internal class SleepEvent : IPreciseSleepEvent
{
private readonly AutoResetEvent _waitEvent = new(false);
public long AdjustTimePoint(long timePoint, long timeoutNs)
{
// No adjustment
return timePoint;
}
public bool SleepUntil(long timePoint)
{
long now = PerformanceCounter.ElapsedTicks;
long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
if (ms > 0)
{
_waitEvent.WaitOne((int)ms);
return true;
}
return false;
}
public void Sleep()
{
_waitEvent.WaitOne();
}
public void Signal()
{
_waitEvent.Set();
}
public void Dispose()
{
GC.SuppressFinalize(this);
_waitEvent.Dispose();
}
}
}

View file

@ -0,0 +1,220 @@
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
using System.Threading;
namespace Ryujinx.Common.SystemInterop
{
/// <summary>
/// Timer that attempts to align with the hardware timer interrupt,
/// and can alert listeners on ticks.
/// </summary>
[SupportedOSPlatform("windows")]
internal partial class WindowsGranularTimer
{
private const int MinimumGranularity = 5000;
private static readonly WindowsGranularTimer _instance = new();
public static WindowsGranularTimer Instance => _instance;
private readonly struct WaitingObject
{
public readonly long Id;
public readonly EventWaitHandle Signal;
public readonly long TimePoint;
public WaitingObject(long id, EventWaitHandle signal, long timePoint)
{
Id = id;
Signal = signal;
TimePoint = timePoint;
}
}
[LibraryImport("ntdll.dll", SetLastError = true)]
private static partial int NtSetTimerResolution(int DesiredResolution, [MarshalAs(UnmanagedType.Bool)] bool SetResolution, out int CurrentResolution);
[LibraryImport("ntdll.dll", SetLastError = true)]
private static partial int NtQueryTimerResolution(out int MaximumResolution, out int MinimumResolution, out int CurrentResolution);
[LibraryImport("ntdll.dll", SetLastError = true)]
private static partial uint NtDelayExecution([MarshalAs(UnmanagedType.Bool)] bool Alertable, ref long DelayInterval);
public long GranularityNs => _granularityNs;
public long GranularityTicks => _granularityTicks;
private readonly Thread _timerThread;
private long _granularityNs = MinimumGranularity * 100L;
private long _granularityTicks;
private long _lastTicks = PerformanceCounter.ElapsedTicks;
private long _lastId;
private readonly object _lock = new();
private readonly List<WaitingObject> _waitingObjects = new();
private WindowsGranularTimer()
{
_timerThread = new Thread(Loop)
{
IsBackground = true,
Name = "Common.WindowsTimer",
Priority = ThreadPriority.Highest
};
_timerThread.Start();
}
/// <summary>
/// Measure and initialize the timer's target granularity.
/// </summary>
private void Initialize()
{
NtQueryTimerResolution(out _, out int min, out int curr);
if (min > 0)
{
min = Math.Max(min, MinimumGranularity);
_granularityNs = min * 100L;
NtSetTimerResolution(min, true, out _);
}
else
{
_granularityNs = curr * 100L;
}
_granularityTicks = (_granularityNs * PerformanceCounter.TicksPerMillisecond) / 1_000_000;
}
/// <summary>
/// Main loop for the timer thread. Wakes every clock tick and signals any listeners,
/// as well as keeping track of clock alignment.
/// </summary>
private void Loop()
{
Initialize();
while (true)
{
long delayInterval = -1; // Next tick
NtSetTimerResolution((int)(_granularityNs / 100), true, out _);
NtDelayExecution(false, ref delayInterval);
long newTicks = PerformanceCounter.ElapsedTicks;
long nextTicks = newTicks + _granularityTicks;
lock (_lock)
{
for (int i = 0; i < _waitingObjects.Count; i++)
{
if (nextTicks > _waitingObjects[i].TimePoint)
{
// The next clock tick will be after the timepoint, we need to signal now.
_waitingObjects[i].Signal.Set();
_waitingObjects.RemoveAt(i--);
}
}
_lastTicks = newTicks;
}
}
}
/// <summary>
/// Sleep until a timepoint.
/// </summary>
/// <param name="evt">Reset event to use to be awoken by the clock tick, or an external signal</param>
/// <param name="timePoint">Target timepoint</param>
/// <returns>True if waited or signalled, false otherwise</returns>
public bool SleepUntilTimePoint(AutoResetEvent evt, long timePoint)
{
if (evt.WaitOne(0))
{
return true;
}
long id;
lock (_lock)
{
// Return immediately if the next tick is after the requested timepoint.
long nextTicks = _lastTicks + _granularityTicks;
if (nextTicks > timePoint)
{
return false;
}
id = ++_lastId;
_waitingObjects.Add(new WaitingObject(id, evt, timePoint));
}
evt.WaitOne();
lock (_lock)
{
for (int i = 0; i < _waitingObjects.Count; i++)
{
if (id == _waitingObjects[i].Id)
{
_waitingObjects.RemoveAt(i--);
break;
}
}
}
return true;
}
/// <summary>
/// Sleep until a timepoint, but don't expect any external signals.
/// </summary>
/// <remarks>
/// Saves some effort compared to the sleep that expects to be signalled.
/// </remarks>
/// <param name="evt">Reset event to use to be awoken by the clock tick</param>
/// <param name="timePoint">Target timepoint</param>
/// <returns>True if waited, false otherwise</returns>
public bool SleepUntilTimePointWithoutExternalSignal(EventWaitHandle evt, long timePoint)
{
long id;
lock (_lock)
{
// Return immediately if the next tick is after the requested timepoint.
long nextTicks = _lastTicks + _granularityTicks;
if (nextTicks > timePoint)
{
return false;
}
id = ++_lastId;
_waitingObjects.Add(new WaitingObject(id, evt, timePoint));
}
evt.WaitOne();
return true;
}
/// <summary>
/// Returns the two nearest clock ticks for a given timepoint.
/// </summary>
/// <param name="timePoint">Target timepoint</param>
/// <returns>The nearest clock ticks before and after the given timepoint</returns>
public (long, long) ReturnNearestTicks(long timePoint)
{
long last = _lastTicks;
long delta = timePoint - last;
long lowTicks = delta / _granularityTicks;
long highTicks = (delta + _granularityTicks - 1) / _granularityTicks;
return (last + lowTicks * _granularityTicks, last + highTicks * _granularityTicks);
}
}
}

View file

@ -0,0 +1,92 @@
using Ryujinx.Common.SystemInterop;
using System;
using System.Runtime.Versioning;
using System.Threading;
namespace Ryujinx.Common.PreciseSleep
{
/// <summary>
/// A precise sleep event that uses Windows specific methods to increase clock resolution beyond 1ms,
/// use the clock's phase for more precise waits, and potentially align timepoints with it.
/// </summary>
[SupportedOSPlatform("windows")]
internal class WindowsSleepEvent : IPreciseSleepEvent
{
/// <summary>
/// The clock can drift a bit, so add this to encourage the clock to still wait if the next tick is forecasted slightly before it.
/// </summary>
private const long ErrorBias = 50000;
/// <summary>
/// Allowed to be 0.05ms away from the clock granularity to reduce precision.
/// </summary>
private const long ClockAlignedBias = 50000;
/// <summary>
/// The fraction of clock granularity above the timepoint that will align it down to the lower timepoint.
/// Currently set to the lower 1/4, so for 0.5ms granularity: 0.1ms would be rounded down, 0.2 ms would be rounded up.
/// </summary>
private const long ReverseTimePointFraction = 4;
private readonly AutoResetEvent _waitEvent = new(false);
private readonly WindowsGranularTimer _timer = WindowsGranularTimer.Instance;
/// <summary>
/// Set to true to disable timepoint realignment.
/// </summary>
public bool Precise { get; set; } = false;
public long AdjustTimePoint(long timePoint, long timeoutNs)
{
if (Precise || timePoint == long.MaxValue)
{
return timePoint;
}
// Does the timeout align with the host clock?
long granularity = _timer.GranularityNs;
long misalignment = timeoutNs % granularity;
if ((misalignment < ClockAlignedBias || misalignment > granularity - ClockAlignedBias) && timeoutNs > ClockAlignedBias)
{
// Inaccurate sleep for 0.5ms increments, typically.
(long low, long high) = _timer.ReturnNearestTicks(timePoint);
if (timePoint - low < _timer.GranularityTicks / ReverseTimePointFraction)
{
timePoint = low;
}
else
{
timePoint = high;
}
}
return timePoint;
}
public bool SleepUntil(long timePoint)
{
return _timer.SleepUntilTimePoint(_waitEvent, timePoint + (ErrorBias * PerformanceCounter.TicksPerMillisecond) / 1_000_000);
}
public void Sleep()
{
_waitEvent.WaitOne();
}
public void Signal()
{
_waitEvent.Set();
}
public void Dispose()
{
GC.SuppressFinalize(this);
_waitEvent.Dispose();
}
}
}

View file

@ -1,4 +1,5 @@
using Ryujinx.Common;
using Ryujinx.Common.PreciseSleep;
using System;
using System.Collections.Generic;
using System.Threading;
@ -23,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
private readonly KernelContext _context;
private readonly List<WaitingObject> _waitingObjects;
private AutoResetEvent _waitEvent;
private IPreciseSleepEvent _waitEvent;
private bool _keepRunning;
private long _enforceWakeupFromSpinWait;
@ -54,6 +55,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
timePoint = long.MaxValue;
}
timePoint = _waitEvent.AdjustTimePoint(timePoint, timeout);
lock (_context.CriticalSection.Lock)
{
_waitingObjects.Add(new WaitingObject(schedulerObj, timePoint));
@ -64,7 +67,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
}
}
_waitEvent.Set();
_waitEvent.Signal();
}
public void UnscheduleFutureInvocation(IKFutureSchedulerObject schedulerObj)
@ -83,10 +86,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
private void WaitAndCheckScheduledObjects()
{
SpinWait spinWait = new();
WaitingObject next;
using (_waitEvent = new AutoResetEvent(false))
using (_waitEvent = PreciseSleepHelper.CreateEvent())
{
while (_keepRunning)
{
@ -103,30 +105,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
if (next.TimePoint > timePoint)
{
long ms = Math.Min((next.TimePoint - timePoint) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
if (ms > 0)
if (!_waitEvent.SleepUntil(next.TimePoint))
{
_waitEvent.WaitOne((int)ms);
}
else
{
while (Interlocked.Read(ref _enforceWakeupFromSpinWait) != 1 && PerformanceCounter.ElapsedTicks < next.TimePoint)
{
// Our time is close - don't let SpinWait go off and potentially Thread.Sleep().
if (spinWait.NextSpinWillYield)
{
Thread.Yield();
spinWait.Reset();
}
else
{
spinWait.SpinOnce();
}
}
spinWait.Reset();
PreciseSleepHelper.SpinWaitUntilTimePoint(next.TimePoint, ref _enforceWakeupFromSpinWait);
}
}
@ -145,7 +126,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
}
else
{
_waitEvent.WaitOne();
_waitEvent.Sleep();
}
}
}
@ -212,7 +193,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
public void Dispose()
{
_keepRunning = false;
_waitEvent?.Set();
_waitEvent?.Signal();
}
}
}

View file

@ -1,5 +1,7 @@
using Ryujinx.Common.Configuration;
using Ryujinx.Common;
using Ryujinx.Common.Configuration;
using Ryujinx.Common.Logging;
using Ryujinx.Common.PreciseSleep;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu;
using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvMap;
@ -23,9 +25,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
private readonly Thread _composerThread;
private readonly Stopwatch _chrono;
private readonly ManualResetEvent _event = new(false);
private readonly AutoResetEvent _event = new(false);
private readonly AutoResetEvent _nextFrameEvent = new(true);
private long _ticks;
private long _ticksPerFrame;
@ -64,11 +64,9 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
_composerThread = new Thread(HandleComposition)
{
Name = "SurfaceFlinger.Composer",
Priority = ThreadPriority.AboveNormal
};
_chrono = new Stopwatch();
_chrono.Start();
_ticks = 0;
_spinTicks = Stopwatch.Frequency / 500;
_1msTicks = Stopwatch.Frequency / 1000;
@ -299,11 +297,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
{
_isRunning = true;
long lastTicks = _chrono.ElapsedTicks;
long lastTicks = PerformanceCounter.ElapsedTicks;
while (_isRunning)
{
long ticks = _chrono.ElapsedTicks;
long ticks = PerformanceCounter.ElapsedTicks;
if (_swapInterval == 0)
{
@ -336,21 +334,16 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
}
// Sleep if possible. If the time til the next frame is too low, spin wait instead.
long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks);
long diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks);
if (diff > 0)
{
PreciseSleepHelper.SleepUntilTimePoint(_event, PerformanceCounter.ElapsedTicks + diff);
diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks);
if (diff < _spinTicks)
{
do
{
// SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks.
// The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time.
Thread.SpinWait(5);
ticks = _chrono.ElapsedTicks;
_ticks += ticks - lastTicks;
lastTicks = ticks;
} while (_ticks < _ticksPerFrame);
PreciseSleepHelper.SpinWaitUntilTimePoint(PerformanceCounter.ElapsedTicks + diff);
}
else
{