diff --git a/.gitignore b/.gitignore index 04c1f722d..c69bc38ba 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ thirdparty/SDL/SDL thirdparty/SDL/libSDL2-2.0.0.dylib macos-xcode/ .swiftpm -*.xcodeproj \ No newline at end of file +*.xcodeproj diff --git a/desktop-ui/GNUmakefile b/desktop-ui/GNUmakefile index 06ec125c1..30be91f1b 100644 --- a/desktop-ui/GNUmakefile +++ b/desktop-ui/GNUmakefile @@ -129,6 +129,10 @@ endif cp resource/$(name).plist $(output.path)/$(name).app/Contents/Info.plist $(call mkdir,$(output.path)/$(name).app/Contents/Resources/Shaders/) $(call mkdir,$(output.path)/$(name).app/Contents/Resources/Database/) +ifeq ($(build),debug) +# Try to copy the debug .metallib into the .app bundle; if we don't succeed, user will see runtime log message + -cp ../ruby/video/metal/shaders.metallib $(output.path)/$(name).app/Contents/Resources/Shaders/shaders.metallib +endif cp ../ruby/video/metal/Shaders.metal $(output.path)/$(name).app/Contents/Resources/Shaders/Shaders.metal $(call rcopy,$(thirdparty.path)/slang-shaders/*,$(output.path)/$(name).app/Contents/Resources/Shaders/) $(call rcopy,$(mia.path)/Database/*,$(output.path)/$(name).app/Contents/Resources/Database/) diff --git a/desktop-ui/resource/Shaders.metal b/desktop-ui/resource/Shaders.metal deleted file mode 100644 index 094dc99cc..000000000 --- a/desktop-ui/resource/Shaders.metal +++ /dev/null @@ -1,82 +0,0 @@ -// -// Shaders.metal -// ares -// -// Created by jcm on 2024-03-07. -// - -#include - -#include "ShaderTypes.h" - -using namespace metal; - -// Vertex shader outputs and fragment shader inputs -struct RasterizerData -{ - // The [[position]] attribute of this member indicates that this value - // is the clip space position of the vertex when this structure is - // returned from the vertex function. - float4 position [[position]]; - - // Since this member does not have a special attribute, the rasterizer - // interpolates its value with the values of the other triangle vertices - // and then passes the interpolated value to the fragment shader for each - // fragment in the triangle. - float2 textureCoordinate; -}; - -vertex RasterizerData -vertexShader(uint vertexID [[vertex_id]], - constant AAPLVertex *vertices [[buffer(AAPLVertexInputIndexVertices)]], - constant vector_uint2 *viewportSizePointer [[buffer(AAPLVertexInputIndexViewportSize)]]) -{ - RasterizerData out; - - // Index into the array of positions to get the current vertex. - // The positions are specified in pixel dimensions (i.e. a value of 100 - // is 100 pixels from the origin). - float2 pixelSpacePosition = vertices[vertexID].position.xy; - - // Get the viewport size and cast to float. - vector_float2 viewportSize = vector_float2(*viewportSizePointer); - - - // To convert from positions in pixel space to positions in clip-space, - // divide the pixel coordinates by half the size of the viewport. - out.position = vector_float4(0.0, 0.0, 0.0, 1.0); - out.position.xy = pixelSpacePosition / (viewportSize / 2.0); - - // Pass the input color directly to the rasterizer. - out.textureCoordinate = vertices[vertexID].textureCoordinate; - - return out; -} - -fragment float4 -samplingShader(RasterizerData in [[stage_in]], - texture2d colorTexture [[ texture(AAPLTextureIndexBaseColor) ]]) -{ - constexpr sampler textureSampler (mag_filter::nearest, - min_filter::nearest); - - // Sample the texture to obtain a color - const half4 colorSample = colorTexture.sample(textureSampler, in.textureCoordinate); - - // return the color of the texture - return float4(colorSample); -} - -fragment float4 -drawableSamplingShader(RasterizerData in [[stage_in]], - texture2d colorTexture [[ texture(AAPLTextureIndexBaseColor) ]]) -{ - constexpr sampler textureSampler (mag_filter::linear, - min_filter::linear); - - // Sample the texture to obtain a color - const half4 colorSample = colorTexture.sample(textureSampler, in.textureCoordinate); - - // return the color of the texture - return float4(colorSample); -} diff --git a/desktop-ui/settings/drivers.cpp b/desktop-ui/settings/drivers.cpp index 790b55dee..370bff6db 100644 --- a/desktop-ui/settings/drivers.cpp +++ b/desktop-ui/settings/drivers.cpp @@ -44,6 +44,10 @@ auto DriverSettings::construct() -> void { settings.video.forceSRGB = videoColorSpaceToggle.checked(); ruby::video.setForceSRGB(settings.video.forceSRGB); }); + videoThreadedRendererToggle.setText("Threaded").onToggle([&] { + settings.video.threadedRenderer = videoThreadedRendererToggle.checked(); + ruby::video.setThreadedRenderer(settings.video.threadedRenderer); + }); #endif audioLabel.setText("Audio").setFont(Font().setBold()); @@ -155,6 +159,7 @@ auto DriverSettings::videoRefresh() -> void { videoBlockingToggle.setChecked(ruby::video.blocking()).setEnabled(ruby::video.hasBlocking()); #if defined(PLATFORM_MACOS) videoColorSpaceToggle.setChecked(ruby::video.forceSRGB()).setEnabled(ruby::video.hasForceSRGB()); + videoThreadedRendererToggle.setChecked(ruby::video.threadedRenderer()).setEnabled(ruby::video.hasThreadedRenderer()); #endif videoFlushToggle.setChecked(ruby::video.flush()).setEnabled(ruby::video.hasFlush()); VerticalLayout::resize(); diff --git a/desktop-ui/settings/settings.cpp b/desktop-ui/settings/settings.cpp index b67373df3..b0293d3fc 100644 --- a/desktop-ui/settings/settings.cpp +++ b/desktop-ui/settings/settings.cpp @@ -57,6 +57,7 @@ auto Settings::process(bool load) -> void { bind(boolean, "Video/Exclusive", video.exclusive); bind(boolean, "Video/Blocking", video.blocking); bind(boolean, "Video/PresentSRGB", video.forceSRGB); + bind(boolean, "Video/ThreadedRenderer", video.threadedRenderer); bind(boolean, "Video/Flush", video.flush); bind(string, "Video/Shader", video.shader); bind(natural, "Video/Multiplier", video.multiplier); diff --git a/desktop-ui/settings/settings.hpp b/desktop-ui/settings/settings.hpp index 4ce602a0e..ea12038ca 100644 --- a/desktop-ui/settings/settings.hpp +++ b/desktop-ui/settings/settings.hpp @@ -12,6 +12,7 @@ struct Settings : Markup::Node { bool exclusive = false; bool blocking = false; bool forceSRGB = false; + bool threadedRenderer = true; bool flush = false; string shader = "None"; u32 multiplier = 2; @@ -337,6 +338,7 @@ struct DriverSettings : VerticalLayout { CheckLabel videoFlushToggle{&videoToggleLayout, Size{0, 0}}; #if defined(PLATFORM_MACOS) CheckLabel videoColorSpaceToggle{&videoToggleLayout, Size{0, 0}}; + CheckLabel videoThreadedRendererToggle{&videoToggleLayout, Size{0, 0}}; #endif // Label audioLabel{this, Size{~0, 0}, 5}; diff --git a/ruby/video/metal/metal.cpp b/ruby/video/metal/metal.cpp index 6952367bd..52f5d46c8 100644 --- a/ruby/video/metal/metal.cpp +++ b/ruby/video/metal/metal.cpp @@ -18,15 +18,6 @@ struct VideoMetal; -(BOOL) acceptsFirstResponder; @end -@interface RubyWindowMetal : NSWindow { -@public - VideoMetal* video; -} --(id) initWith:(VideoMetal*)video; --(BOOL) canBecomeKeyWindow; --(BOOL) canBecomeMainWindow; -@end - struct VideoMetal : VideoDriver, Metal { VideoMetal& self = *this; VideoMetal(Video& super) : VideoDriver(super) {} @@ -39,21 +30,43 @@ struct VideoMetal : VideoDriver, Metal { auto driver() -> string override { return "Metal"; } auto ready() -> bool override { return _ready; } - auto hasFullScreen() -> bool override { return false; } + auto hasFullScreen() -> bool override { return true; } auto hasContext() -> bool override { return true; } auto hasBlocking() -> bool override { if (@available(macOS 10.15.4, *)) { - return !isVRRSupported(); + return true; } else { return false; } } auto hasForceSRGB() -> bool override { return true; } + auto hasThreadedRenderer() -> bool override { return true; } auto hasFlush() -> bool override { return true; } auto hasShader() -> bool override { return true; } auto setFullScreen(bool fullScreen) -> bool override { - return initialize(); + /// This function implements non-idiomatic macOS fullscreen behavior that sets the window frame equal to the display's + /// frame size and hides the cursor. Idiomatic fullscreen is still available via the normal stoplight window controls. This + /// version of fullscreen is desirable because it allows us to render around the camera housing on newer Macs + /// (important for bezel-style shaders), has snappier entrance/exit and tabbing behavior, and functions better with + /// recording and capture software such as OBS and screen recorders. Hiding the mouse cursor is also essential to + /// rendering with appropriate frame pacing in Metal's 'direct' presentation mode. + + // todo: unify with cursor auto-hide in hiro, ideally ares-wide fullscreen mode option + + if (fullScreen) { + frameBeforeFullScreen = view.window.frame; + [NSApp setPresentationOptions:(NSApplicationPresentationAutoHideDock | NSApplicationPresentationAutoHideMenuBar)]; + [view.window setStyleMask:NSWindowStyleMaskBorderless]; + [view.window setFrame:view.window.screen.frame display:YES]; + [NSCursor setHiddenUntilMouseMoves:YES]; + } else { + [NSApp setPresentationOptions:NSApplicationPresentationDefault]; + [view.window setStyleMask:NSWindowStyleMaskTitled]; + [view.window setFrame:frameBeforeFullScreen display:YES]; + } + [view.window makeFirstResponder:view]; + return true; } auto setContext(uintptr context) -> bool override { @@ -72,6 +85,12 @@ struct VideoMetal : VideoDriver, Metal { } else { view.colorspace = view.window.screen.colorSpace.CGColorSpace; } + return true; + } + + auto setThreadedRenderer(bool threadedRenderer) -> bool override { + _threaded = threadedRenderer; + return true; } auto setFlush(bool flush) -> bool override { @@ -79,7 +98,8 @@ struct VideoMetal : VideoDriver, Metal { return true; } - auto refreshRateHint(double refreshRate) -> void { + auto refreshRateHint(double refreshRate) -> void override { + if (refreshRate == _refreshRateHint) return; _refreshRateHint = refreshRate; updatePresentInterval(); } @@ -88,7 +108,8 @@ struct VideoMetal : VideoDriver, Metal { if (@available(macOS 12.0, *)) { NSTimeInterval minInterval = view.window.screen.minimumRefreshInterval; NSTimeInterval maxInterval = view.window.screen.maximumRefreshInterval; - return minInterval != maxInterval; + _vrrIsSupported = minInterval != maxInterval; + return _vrrIsSupported; } else { return false; } @@ -105,6 +126,8 @@ struct VideoMetal : VideoDriver, Metal { CFTimeInterval minimumInterval = view.window.screen.minimumRefreshInterval; if (_refreshRateHint != 0) { _presentInterval = (1.0 / _refreshRateHint); + NSLog(@"Refresh rate hint changed to %lf", _refreshRateHint); + averagePresentDuration = _presentInterval; } else { _presentInterval = minimumInterval; } @@ -116,20 +139,24 @@ struct VideoMetal : VideoDriver, Metal { if (_filterChain != NULL) { _libra.mtl_filter_chain_free(&_filterChain); } - + if (_preset != NULL) { _libra.preset_free(&_preset); } - - if (_libra.preset_create(pathname.data(), &_preset) != NULL) { - print(string{"Metal: Failed to load shader: ", pathname, "\n"}); + + if(file::exists(pathname)) { + if (_libra.preset_create(pathname.data(), &_preset) != NULL) { + print(string{"Metal: Failed to load shader: ", pathname, "\n"}); + return false; + } + + if (_libra.mtl_filter_chain_create(&_preset, _commandQueue, nil, &_filterChain) != NULL) { + print(string{"Metal: Failed to create filter chain for: ", pathname, "\n"}); + return false; + }; + } else { return false; } - - if (_libra.mtl_filter_chain_create(&_preset, _commandQueue, nil, &_filterChain) != NULL) { - print(string{"Metal: Failed to create filter chain for: ", pathname, "\n"}); - return false; - }; return true; } @@ -137,7 +164,10 @@ struct VideoMetal : VideoDriver, Metal { return true; } - auto clear() -> void override {} + auto clear() -> void override { + //force a resize of the framebuffer to clear it, then output one pixel + output(1, 1); + } auto size(u32& width, u32& height) -> void override { if ((_viewWidth == width && _viewHeight == height) && (_viewWidth != 0 && _viewHeight != 0)) { return; } @@ -168,16 +198,22 @@ struct VideoMetal : VideoDriver, Metal { buffer = new u32[width * height](); - MTLTextureDescriptor *textureDescriptor = [MTLTextureDescriptor new]; - textureDescriptor.pixelFormat = MTLPixelFormatBGRA8Unorm; - textureDescriptor.width = sourceWidth; - textureDescriptor.height = sourceHeight; - textureDescriptor.usage = MTLTextureUsageRenderTarget|MTLTextureUsageShaderRead; - - _sourceTexture = [_device newTextureWithDescriptor:textureDescriptor]; - bytesPerRow = sourceWidth * sizeof(u32); if (bytesPerRow < 16) bytesPerRow = 16; + + for (int i = 0; i < kMaxSourceBuffersInFlight; i++) { + if (sourceWidth < 1 || sourceHeight < 1) { + _sourceTextures[i] = nullptr; + continue; + } + MTLTextureDescriptor *textureDescriptor = [MTLTextureDescriptor new]; + textureDescriptor.pixelFormat = MTLPixelFormatBGRA8Unorm; + textureDescriptor.width = sourceWidth; + textureDescriptor.height = sourceHeight; + textureDescriptor.usage = MTLTextureUsageRenderTarget|MTLTextureUsageShaderRead; + + _sourceTextures[i] = [_device newTextureWithDescriptor:textureDescriptor]; + } } pitch = sourceWidth * sizeof(u32); @@ -187,6 +223,7 @@ struct VideoMetal : VideoDriver, Metal { auto release() -> void override {} auto resizeOutputBuffers(u32 width, u32 height) { + NSLog(@"Resizing output buffers to %i, %i", width, height); outputWidth = width; outputHeight = height; @@ -229,6 +266,52 @@ struct VideoMetal : VideoDriver, Metal { } auto output(u32 width, u32 height) -> void override { + /// Synchronously copy the current framebuffer to a Metal texture, then call into the render dispatch queue + /// either synchronously or asynchronously depending on whether blocking is on and VRR is supported. + + if (depth >= kMaxSourceBuffersInFlight) { + //if we are running very behind, drop this frame + return; + } + + //can we do this outside of the output function? + //currently no, because in theory framebuffer size can change during runtime + if (width != outputWidth || height != outputHeight) { + resizeOutputBuffers(width, height); + } + + @autoreleasepool { + + frameCount++; + + auto index = frameCount % kMaxSourceBuffersInFlight; + + auto sourceTexture = _sourceTextures[index]; + + [sourceTexture replaceRegion:MTLRegionMake2D(0, 0, sourceWidth, sourceHeight) mipmapLevel:0 withBytes:buffer bytesPerRow:bytesPerRow]; + + if (@available(macOS 10.15.4, *)) { + depth++; + } + + /// Only block with `dispatch_sync` if blocking enabled and VRR not supported, or if the threaded renderer + /// is explicitly disabled. if VRR is supported, we should try to not _literally_ block, because we'll be making a best + /// effort to synchronize to the guest and host refresh rate at the same time. It's easier to do that if we have + /// assurances that we won't block the emulation thread in the worst case system conditions. + if ((_blocking && !_vrrIsSupported) || !_threaded) { + dispatch_sync(_renderQueue, ^{ + outputHelper(width, height, sourceTexture); + }); + } else { + dispatch_async(_renderQueue, ^{ + outputHelper(width, height, sourceTexture); + }); + } + } + } + +private: + auto outputHelper(u32 width, u32 height, id sourceTexture) -> void { /// Uses two render passes (plus librashader's render passes). The first render pass samples the source texture, /// consisting of the pixel buffer from the emulator, onto a texture the same size as our eventual output, /// `_renderTargetTexture`. Then it calls into librashader, which performs postprocessing onto the same @@ -236,37 +319,59 @@ struct VideoMetal : VideoDriver, Metal { /// We need this last pass because librashader expects the viewport to be the same size as the output texture, /// which is not the case for ares. - //can we do this outside of the output function? - if (width != outputWidth || height != outputHeight) { - resizeOutputBuffers(width, height); - } + dispatch_semaphore_wait(_semaphore, DISPATCH_TIME_FOREVER); + + id commandBuffer = [_commandQueue commandBuffer]; - @autoreleasepool { + if (commandBuffer != nil) { + __block dispatch_semaphore_t block_sema = _semaphore; - dispatch_semaphore_wait(_semaphore, DISPATCH_TIME_FOREVER); + [commandBuffer addCompletedHandler:^(id buffer) { + dispatch_semaphore_signal(block_sema); + }]; - id commandBuffer = [_commandQueue commandBuffer]; + _renderToTextureRenderPassDescriptor.colorAttachments[0].texture = _renderTargetTexture; - if (commandBuffer != nil) { - __block dispatch_semaphore_t block_sema = _semaphore; + if (_renderToTextureRenderPassDescriptor != nil) { - [commandBuffer addCompletedHandler:^(id buffer) { - dispatch_semaphore_signal(block_sema); - }]; + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:_renderToTextureRenderPassDescriptor]; - _renderToTextureRenderPassDescriptor.colorAttachments[0].texture = _renderTargetTexture; + _renderToTextureRenderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore; - if (_renderToTextureRenderPassDescriptor != nil) { + [renderEncoder setRenderPipelineState:_renderToTextureRenderPipeline]; + + [renderEncoder setViewport:(MTLViewport){0, 0, (double)width, (double)height, -1.0, 1.0}]; + + [renderEncoder setVertexBuffer:_vertexBuffer + offset:0 + atIndex:0]; + + [renderEncoder setVertexBytes:&_viewportSize + length:sizeof(_viewportSize) + atIndex:MetalVertexInputIndexViewportSize]; + + [renderEncoder setFragmentTexture:sourceTexture atIndex:0]; + + [renderEncoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:6]; + + [renderEncoder endEncoding]; + + if (_filterChain) { + _libra.mtl_filter_chain_frame(&_filterChain, commandBuffer, frameCount, sourceTexture, _libraViewport, _renderTargetTexture, nil, nil); + } + + //this call will block the current thread/queue if a drawable is not yet available + MTLRenderPassDescriptor *drawableRenderPassDescriptor = view.currentRenderPassDescriptor; + + drawableRenderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear; + + if (drawableRenderPassDescriptor != nil) { - id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:_renderToTextureRenderPassDescriptor]; + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:drawableRenderPassDescriptor]; - _renderToTextureRenderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore; + [renderEncoder setRenderPipelineState:_drawableRenderPipeline]; - [_sourceTexture replaceRegion:MTLRegionMake2D(0, 0, sourceWidth, sourceHeight) mipmapLevel:0 withBytes:buffer bytesPerRow:bytesPerRow]; - - [renderEncoder setRenderPipelineState:_renderToTextureRenderPipeline]; - - [renderEncoder setViewport:(MTLViewport){0, 0, (double)width, (double)height, -1.0, 1.0}]; + [renderEncoder setViewport:(MTLViewport){_outputX, _outputY, (double)width, (double)height, -1.0, 1.0}]; [renderEncoder setVertexBuffer:_vertexBuffer offset:0 @@ -276,85 +381,98 @@ struct VideoMetal : VideoDriver, Metal { length:sizeof(_viewportSize) atIndex:MetalVertexInputIndexViewportSize]; - [renderEncoder setFragmentTexture:_sourceTexture atIndex:0]; + [renderEncoder setFragmentTexture:_renderTargetTexture atIndex:0]; [renderEncoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:6]; [renderEncoder endEncoding]; - if (_filterChain) { - _libra.mtl_filter_chain_frame(&_filterChain, commandBuffer, frameCount++, _sourceTexture, _libraViewport, _renderTargetTexture, nil, nil); + id drawable = view.currentDrawable; + + if (@available(macOS 10.15.4, *)) { + [drawable addPresentedHandler:^(id drawable) { + self.drawableWasPresented(drawable); + depth--; + }]; } - MTLRenderPassDescriptor *drawableRenderPassDescriptor = view.currentRenderPassDescriptor; + auto targetPresentDuration = determineNextPresentDuration(); - drawableRenderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear; - - if (drawableRenderPassDescriptor != nil) { - - id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:drawableRenderPassDescriptor]; - - [renderEncoder setRenderPipelineState:_drawableRenderPipeline]; - - [renderEncoder setViewport:(MTLViewport){_outputX, _outputY, (double)width, (double)height, -1.0, 1.0}]; - - [renderEncoder setVertexBuffer:_vertexBuffer - offset:0 - atIndex:0]; - - [renderEncoder setVertexBytes:&_viewportSize - length:sizeof(_viewportSize) - atIndex:MetalVertexInputIndexViewportSize]; - - [renderEncoder setFragmentTexture:_renderTargetTexture atIndex:0]; - - [renderEncoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:6]; - - [renderEncoder endEncoding]; - - id drawable = view.currentDrawable; - - if (drawable != nil) { - - if (_blocking) { - - [commandBuffer presentDrawable:drawable afterMinimumDuration:_presentInterval]; - - } else { - - [commandBuffer presentDrawable:drawable]; - - } - - [view draw]; - + if (drawable != nil) { + if (_blocking) { + //_blocking is not enabled unless 10.15.4 is available, so ignore availability warnings here +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability-new" + [commandBuffer presentDrawable:drawable afterMinimumDuration:targetPresentDuration]; +#pragma clang diagnostic pop + } else { + [commandBuffer presentDrawable:drawable]; } + [view draw]; } - - [commandBuffer commit]; - - if (_flush) { - [commandBuffer waitUntilCompleted]; - } + } + + [commandBuffer commit]; + + if (_flush) { + [commandBuffer waitUntilCompleted]; } } } } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability-new" + auto drawableWasPresented(id drawable) -> void { + if (drawable.presentedTime == 0) { return; } + if (previousPresentedTime <= 0) { + previousPresentedTime = drawable.presentedTime; + } + CFTimeInterval presentationDuration = drawable.presentedTime - previousPresentedTime; + const double alpha = kPresentIntervalRollingAverageWeight; -private: + averagePresentDuration = (presentationDuration * alpha) + (averagePresentDuration * (1.0 - alpha)); + previousPresentedTime = drawable.presentedTime; + } +#pragma clang diagnostic pop + + auto determineNextPresentDuration() -> CFTimeInterval { + /// We use a rolling average of the last few seconds worth of frames to determine if we are running fast or slow. If + /// we are running ahead, we do nothing special; it's sufficient to present at the prescribed interval and eventually + /// we will fall behind. When we fall behind, we need to present earlier than the target present interval. The way VRR + /// works on macOS, we can request an earlier present interval, but we don't always get it. So what we do in this + /// function is "nudge" the system to display our frame early, but not immediately, in an attempt to correct for running + /// behind. If in this process we get more than 3 frames behind, we start requesting immediate presents. + + CFTimeInterval targetPresentDuration = _presentInterval; + CFTimeInterval differenceFromTarget = _presentInterval - averagePresentDuration; + if (-differenceFromTarget >= (_presentInterval * kVRRCorrectiveTolerance)) { + targetPresentDuration = _presentInterval + (differenceFromTarget * kVRRCorrectiveForce); + } + if (depth > kVRRImmediatePresentThreshold) { + return 0; + } else { + return targetPresentDuration; + } + } + auto initialize() -> bool { terminate(); - if (!self.fullScreen && !self.context) return false; + if (!self.context) return false; - auto context = self.fullScreen ? [window contentView] : (__bridge NSView*)(void *)self.context; + auto context = (__bridge NSView*)(void *)self.context; auto size = [context frame].size; NSError *error = nil; + //Put renderer on a separate queue so we can choose whether or not to block the main thread (audio) waiting on a drawable. + dispatch_queue_attr_t queueAttributes = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, -1); + _renderQueue = dispatch_queue_create("com.ares.metal-renderer", queueAttributes); + _device = MTLCreateSystemDefaultDevice(); _commandQueue = [_device newCommandQueue]; - _semaphore = dispatch_semaphore_create(kMaxBuffersInFlight); + _semaphore = dispatch_semaphore_create(kMaxOutputBuffersInFlight); _renderToTextureRenderPassDescriptor = [MTLRenderPassDescriptor new]; @@ -365,27 +483,40 @@ private: ///We compile shaders at runtime so we do not need to add the `xcrun` Metal compiler toolchain to the ares build process. ///Metal frame capture does not get along with runtime-compiled shaders in my testing, however. If you are debugging ares - ///and need GPU captures, you should compile shaders with debug symbols offline, then instantiate the shader library by - ///directly referencing a compiled `.metallib` file, rather than the following instantiation flow. You will also need to alter - ///the `desktop-ui` Makefile such that it copies the `.metallib` into the bundle, rather than only the `Shaders.metal` - ///source. + ///and need GPU captures, run `scripts/macos-metal-debug.sh` and then compile ares in debug mode. - NSString *bundleResourcePath = [NSBundle mainBundle].resourcePath; - const string& fileComponent = "/Shaders/Shaders.metal"; - NSString *shaderFilePath = [bundleResourcePath stringByAppendingString: [[NSString new] initWithUTF8String:fileComponent]]; + bool libraryCreated = false; - NSString *shaderLibrarySource = [NSString stringWithContentsOfFile:shaderFilePath encoding:NSUTF8StringEncoding error: &error]; - - if (shaderLibrarySource == nil) { - NSLog(@"%@",error); - return false; +#if defined(BUILD_DEBUG) + if (@available(macOS 10.13, *)) { + NSURL *shaderLibURL = [NSURL fileURLWithPath:@"ares.app/Contents/Resources/Shaders/shaders.metallib"]; + _library = [_device newLibraryWithURL: shaderLibURL error:&error]; } - - _library = [_device newLibraryWithSource: shaderLibrarySource options: [MTLCompileOptions alloc] error:&error]; - - if (_library == nil) { - NSLog(@"%@",error); - return false; + if (_library != nil) { + libraryCreated = true; + } else { + NSLog(@"Compiled in debug mode, but debug .metallib not found. If you require Metal debugging, ensure you are on macOS 10.13+ and compile debug Metal shaders with scripts/macos-metal-debug.sh."); + } +#endif + + if (!libraryCreated) { + NSString *bundleResourcePath = [NSBundle mainBundle].resourcePath; + const string& fileComponent = "/Shaders/Shaders.metal"; + NSString *shaderFilePath = [bundleResourcePath stringByAppendingString: [[NSString new] initWithUTF8String:fileComponent]]; + + NSString *shaderLibrarySource = [NSString stringWithContentsOfFile:shaderFilePath encoding:NSUTF8StringEncoding error: &error]; + + if (shaderLibrarySource == nil) { + NSLog(@"%@",error); + return false; + } + + _library = [_device newLibraryWithSource: shaderLibrarySource options: [MTLCompileOptions alloc] error:&error]; + + if (_library == nil) { + NSLog(@"%@",error); + return false; + } } MTLRenderPipelineDescriptor *pipelineStateDescriptor = [MTLRenderPipelineDescriptor new]; @@ -423,16 +554,13 @@ private: bool forceSRGB = self.forceSRGB; self.setForceSRGB(forceSRGB); view.autoresizingMask = NSViewWidthSizable|NSViewHeightSizable; - - _commandQueue = [_device newCommandQueue]; + _threaded = self.threadedRenderer; _libra = librashader_load_instance(); if (!_libra.instance_loaded) { print("Metal: Failed to load librashader: shaders will be disabled\n"); } - setShader(self.shader); - _blocking = self.blocking; initialized = true; @@ -446,7 +574,9 @@ private: _library = nullptr; _vertexBuffer = nullptr; - _sourceTexture = nullptr; + for (int i = 0; i < kMaxSourceBuffersInFlight; i++) { + _sourceTextures[i] = nullptr; + } _mtlVertexDescriptor = nullptr; _renderToTextureRenderPassDescriptor = nullptr; @@ -464,17 +594,9 @@ private: [view removeFromSuperview]; view = nil; } - - if (window) { - [window toggleFullScreen:nil]; - [window setCollectionBehavior:NSWindowCollectionBehaviorDefault]; - [window close]; - window = nil; - } } RubyVideoMetal* view = nullptr; - RubyWindowMetal* window = nullptr; bool _ready = false; std::recursive_mutex mutex; @@ -518,28 +640,3 @@ private: } @end - -@implementation RubyWindowMetal : NSWindow - --(id) initWith:(VideoMetal*)videoPointer { - auto primaryRect = [[[NSScreen screens] objectAtIndex:0] frame]; - if (self = [super initWithContentRect:primaryRect styleMask:0 backing:NSBackingStoreBuffered defer:YES]) { - video = videoPointer; - [self setDelegate:self]; - [self setReleasedWhenClosed:NO]; - [self setAcceptsMouseMovedEvents:YES]; - [self setTitle:@""]; - [self makeKeyAndOrderFront:nil]; - } - return self; -} - --(BOOL) canBecomeKeyWindow { - return YES; -} - --(BOOL) canBecomeMainWindow { - return YES; -} - -@end diff --git a/ruby/video/metal/metal.hpp b/ruby/video/metal/metal.hpp index d03b28706..a89aff229 100644 --- a/ruby/video/metal/metal.hpp +++ b/ruby/video/metal/metal.hpp @@ -13,7 +13,30 @@ struct Metal; -static const NSUInteger kMaxBuffersInFlight = 3; +//macOS likes triple buffering, so only use three output buffers. +static const NSUInteger kMaxOutputBuffersInFlight = 3; + +//If the frame queue gets above this depth, start discarding frames. +static const u32 kMaxSourceBuffersInFlight = 6; + +/// MARK: VRR constants +/// These constants were picked through trial and error testing; not any objective principles. It may be possible to +/// achieve slight improvements, especially if optimizing for a particular system. These were found to generally perform +/// well across all of ares's systems that were tested. Despite thorough testing, the immediate present mode together +/// with the lowest possible audio latency may still be a better choice for some systems, particularly WonderSwan. + +//How far back the rolling average of present intervals extends. +static const double kPresentIntervalRollingAverageWeight = 0.05; + +//The amount that we must be off of the target refresh rate before we start 'nudging' it toward earlier presents; here, 0.5%. +static const double kVRRCorrectiveTolerance = .005; + +//Amount that we "nudge" the present interval when we are behind the target refresh rate. +//In this way, we can sometimes achieve presents between 8ms and 16ms, leading to smoother pacing. +static const double kVRRCorrectiveForce = 5.0; + +//If the frame queue gets above this depth, start scheduling immediate (0ms) presents. +static const u32 kVRRImmediatePresentThreshold = 3; struct Metal { auto setShader(const string& pathname) -> void; @@ -37,17 +60,26 @@ struct Metal { u32 outputHeight = 0; double _outputX = 0; double _outputY = 0; + u32 depth = 0; + + dispatch_queue_t _renderQueue = nullptr; CGFloat _viewWidth = 0; CGFloat _viewHeight = 0; vector_uint2 _viewportSize; - double _presentInterval = .016; + CFTimeInterval _presentInterval = .016; + CFTimeInterval averagePresentDuration = .016; + CFTimeInterval previousPresentedTime = 0; u32 frameCount = 0; - double _refreshRateHint = 60; + CFTimeInterval _refreshRateHint = 60; bool _blocking = false; bool _flush = false; + bool _vrrIsSupported = false; + bool _threaded = true; + + NSRect frameBeforeFullScreen = NSMakeRect(0,0,0,0); id _device; id _commandQueue; @@ -55,7 +87,7 @@ struct Metal { dispatch_semaphore_t _semaphore; id _vertexBuffer; - id _sourceTexture; + id _sourceTextures[kMaxSourceBuffersInFlight]; MTLVertexDescriptor *_mtlVertexDescriptor; MTLRenderPassDescriptor *_renderToTextureRenderPassDescriptor; diff --git a/ruby/video/video.cpp b/ruby/video/video.cpp index d3e1ec862..6ecb62264 100644 --- a/ruby/video/video.cpp +++ b/ruby/video/video.cpp @@ -68,6 +68,14 @@ auto Video::setForceSRGB(bool forceSRGB) -> bool { return true; } +auto Video::setThreadedRenderer(bool threadedRenderer) -> bool { + lock_guard lock(mutex); + if(instance->threadedRenderer == threadedRenderer) return true; + if(!instance->hasThreadedRenderer()) return false; + if(!instance->setThreadedRenderer(instance->threadedRenderer = threadedRenderer)) return false; + return true; +} + auto Video::setFlush(bool flush) -> bool { lock_guard lock(mutex); if(instance->flush == flush) return true; diff --git a/ruby/video/video.hpp b/ruby/video/video.hpp index e1d8c50ec..d4c6c40c9 100644 --- a/ruby/video/video.hpp +++ b/ruby/video/video.hpp @@ -14,6 +14,7 @@ struct VideoDriver { virtual auto hasContext() -> bool { return false; } virtual auto hasBlocking() -> bool { return false; } virtual auto hasForceSRGB() -> bool { return false; } + virtual auto hasThreadedRenderer() -> bool { return false; } virtual auto hasFlush() -> bool { return false; } virtual auto hasFormats() -> vector { return {"ARGB24"}; } virtual auto hasShader() -> bool { return false; } @@ -26,6 +27,7 @@ struct VideoDriver { virtual auto setContext(uintptr context) -> bool { return true; } virtual auto setBlocking(bool blocking) -> bool { return true; } virtual auto setForceSRGB(bool forceSRGB) -> bool { return true; } + virtual auto setThreadedRenderer(bool threadedRenderer) -> bool { return true; } virtual auto setFlush(bool flush) -> bool { return true; } virtual auto setFormat(string format) -> bool { return true; } virtual auto setShader(string shader) -> bool { return true; } @@ -49,6 +51,7 @@ protected: uintptr context = 0; bool blocking = false; bool forceSRGB = false; + bool threadedRenderer = true; bool flush = false; string format = "ARGB24"; string shader = "None"; @@ -90,6 +93,7 @@ struct Video { auto hasContext() -> bool { return instance->hasContext(); } auto hasBlocking() -> bool { return instance->hasBlocking(); } auto hasForceSRGB() -> bool { return instance->hasForceSRGB(); } + auto hasThreadedRenderer() -> bool { return instance->hasThreadedRenderer(); } auto hasFlush() -> bool { return instance->hasFlush(); } auto hasFormats() -> vector { return instance->hasFormats(); } auto hasShader() -> bool { return instance->hasShader(); } @@ -102,6 +106,7 @@ struct Video { auto context() -> uintptr { return instance->context; } auto blocking() -> bool { return instance->blocking; } auto forceSRGB() -> bool { return instance->forceSRGB; } + auto threadedRenderer() -> bool { return instance->threadedRenderer; } auto flush() -> bool { return instance->flush; } auto format() -> string { return instance->format; } auto shader() -> string { return instance->shader; } @@ -112,6 +117,7 @@ struct Video { auto setContext(uintptr context) -> bool; auto setBlocking(bool blocking) -> bool; auto setForceSRGB(bool forceSRGB) -> bool; + auto setThreadedRenderer(bool threadedRenderer) -> bool; auto setFlush(bool flush) -> bool; auto setFormat(string format) -> bool; auto setShader(string shader) -> bool; diff --git a/scripts/macos-metal-debug.sh b/scripts/macos-metal-debug.sh new file mode 100755 index 000000000..f546d09df --- /dev/null +++ b/scripts/macos-metal-debug.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +pushd ../ruby/video/metal +xcrun -sdk macosx metal -o shaders.ir -c -gline-tables-only -frecord-sources Shaders.metal +xcrun -sdk macosx metallib -o shaders.metallib shaders.ir +popd