449 lines
18 KiB
Swift
449 lines
18 KiB
Swift
//
|
|
// Copyright 2023 Signal Messenger, LLC
|
|
// SPDX-License-Identifier: AGPL-3.0-only
|
|
//
|
|
|
|
import Foundation
|
|
import MetalKit
|
|
import SignalServiceKit
|
|
|
|
/// Information required to render with metal, that can be loaded once
|
|
/// on startup. If loading fails, we can fall back to non-animated spoiler
|
|
/// rendering.
|
|
public struct SpoilerMetalConfiguration {
|
|
fileprivate var device: MTLDevice
|
|
fileprivate var commandQueue: MTLCommandQueue
|
|
fileprivate var clearPipelineState: MTLComputePipelineState
|
|
fileprivate var drawParticlesPipelineState: MTLComputePipelineState
|
|
|
|
fileprivate var supportsNonUniformThreadGroups: Bool
|
|
|
|
/// The maximum width/height of a single texture (single ParticleView)
|
|
/// supported by this device, in points.
|
|
var maxTextureDimensionPoints: CGFloat
|
|
|
|
public init?() {
|
|
guard let device = MTLCreateSystemDefaultDevice() else {
|
|
Logger.warn("Unable to instantiate metal device.")
|
|
return nil
|
|
}
|
|
guard let commandQueue = device.makeCommandQueue() else {
|
|
Logger.warn("Unable to instantiate metal command queue.")
|
|
return nil
|
|
}
|
|
guard let library = device.makeDefaultLibrary() else {
|
|
Logger.warn("Unable to instantiate metal library.")
|
|
return nil
|
|
}
|
|
guard
|
|
let clearFunc = library.makeFunction(name: "clear_pass_func"),
|
|
let drawParticlesFunc = library.makeFunction(name: "draw_particles_func")
|
|
else {
|
|
Logger.warn("Unable to instantiate metal compute functions.")
|
|
return nil
|
|
}
|
|
let clearPipelineState: MTLComputePipelineState
|
|
let drawParticlesPipelineState: MTLComputePipelineState
|
|
do {
|
|
clearPipelineState = try device.makeComputePipelineState(function: clearFunc)
|
|
drawParticlesPipelineState = try device.makeComputePipelineState(function: drawParticlesFunc)
|
|
} catch {
|
|
Logger.warn("Unable to instante metal compute pipelines: \(error)")
|
|
return nil
|
|
}
|
|
self.device = device
|
|
self.commandQueue = commandQueue
|
|
self.clearPipelineState = clearPipelineState
|
|
self.drawParticlesPipelineState = drawParticlesPipelineState
|
|
|
|
self.supportsNonUniformThreadGroups = Self.getSupportsNonUniformThreadGroups(device)
|
|
self.maxTextureDimensionPoints = Self.getMaxTextureDimensionPoints(device)
|
|
}
|
|
|
|
private static func getSupportsNonUniformThreadGroups(_ device: MTLDevice) -> Bool {
|
|
// Taken from https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
|
// This whole this is extremely poorly documented. Hopefully if they release
|
|
// a new GPU with e.g. .apple9, it will report itself as supporting .apple8
|
|
// and such. MTLGPUFamily is not iterable, so there's not really anything
|
|
// we can do here to guarantee forwards-compatibility.
|
|
|
|
var knownGoodFamilies: [MTLGPUFamily] = [
|
|
.common3,
|
|
.apple4, .apple5, .apple6, .apple7, .apple8,
|
|
.mac2
|
|
]
|
|
if #available(iOS 16, *) {
|
|
knownGoodFamilies.append(.metal3)
|
|
}
|
|
for knownGoodFamily in knownGoodFamilies {
|
|
if device.supportsFamily(knownGoodFamily) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
private static func getMaxTextureDimensionPoints(_ device: MTLDevice) -> CGFloat {
|
|
// Taken from https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
|
|
|
// NOTE: if this ever exceeds 32,767 (max 16-bit int) we need to update
|
|
// the types in SpoilerParticleShader and corresponding structs in this file
|
|
// to use 32-bit ints for positions.
|
|
let pixelSize: CGFloat
|
|
if device.supportsFamily(.apple3) {
|
|
pixelSize = 16384
|
|
} else if #available(iOS 16, *), device.supportsFamily(.mac2) {
|
|
pixelSize = 16384
|
|
} else {
|
|
pixelSize = 8192
|
|
}
|
|
return pixelSize / UIScreen.main.scale
|
|
}
|
|
}
|
|
|
|
internal class SpoilerParticleView: MTKView {
|
|
|
|
private let metalConfig: SpoilerMetalConfiguration
|
|
private let renderer: SpoilerRenderer
|
|
|
|
init(
|
|
metalConfig: SpoilerMetalConfiguration,
|
|
renderer: SpoilerRenderer
|
|
) {
|
|
self.metalConfig = metalConfig
|
|
self.renderer = renderer
|
|
super.init(frame: .zero, device: metalConfig.device)
|
|
|
|
self.framebufferOnly = false
|
|
self.preferredFramesPerSecond = Int(1 / fidelity.frameDelay)
|
|
layer.isOpaque = false
|
|
layer.shouldRasterize = true
|
|
// Start out paused until objects can be set.
|
|
self.isPaused = true
|
|
}
|
|
|
|
required init(coder: NSCoder) {
|
|
fatalError("init(coder:) has not been implemented")
|
|
}
|
|
|
|
// MARK: - Public API
|
|
|
|
public var spec: SpoilerRenderer.Spec?
|
|
|
|
/// Must be called after setting specs and view.frame information
|
|
/// to generate metadata and be ready for rendering.
|
|
func commitChanges() {
|
|
prepareDrawRectsForMetal()
|
|
updateParticleCount()
|
|
setNeedsDisplay()
|
|
}
|
|
|
|
public var isInUse = false
|
|
|
|
public enum Fidelity {
|
|
case high
|
|
case low
|
|
|
|
private static let defaultParticlesPerUnitArea: CGFloat = 0.03
|
|
|
|
public static func forTotalSurfaceArea(_ surfaceArea: SurfaceArea) -> Fidelity {
|
|
let defaultNumParticles = surfaceArea * defaultParticlesPerUnitArea
|
|
if defaultNumParticles >= Constants.particleCountEfficiencyThreshold {
|
|
return .low
|
|
}
|
|
return .high
|
|
}
|
|
}
|
|
|
|
public var fidelity: Fidelity = .high {
|
|
didSet {
|
|
self.preferredFramesPerSecond = Int(1 / fidelity.frameDelay)
|
|
updateParticleCount()
|
|
}
|
|
}
|
|
|
|
// MARK: - Swift<->C shared structs
|
|
|
|
// IMPORTANT: these must be exactly identical to the values defined
|
|
// in SpoilerParticleShader.metal, as they are both schemas for interpreting
|
|
// the same shared memory across the CPU (swift) and GPU (metal).
|
|
|
|
/// A rectangle to draw particles into, represented in
|
|
/// the texture's coordinates.
|
|
private struct DrawRect {
|
|
var origin: SIMD2<UInt16>
|
|
var size: SIMD2<UInt16>
|
|
/// The color with which to draw particles in this rect.
|
|
/// Values from 0 to 255. Note that textures use
|
|
/// 0 to 1 half values for color; conversion is handled
|
|
/// in the GPU.
|
|
var particleRGB: SIMD3<UInt8>
|
|
/// The base alpha value for particle colors in this rect,
|
|
/// with 255 representing an alpha of 1.
|
|
var particleBaseAlpha: UInt8
|
|
/// Every layer of particles has this much less alpha than the previous,
|
|
/// with 255 representing an alpha of 1.
|
|
var particleAlphaDropoff: UInt8
|
|
/// The size (in texture coordinates) of particles in this rect.
|
|
var particleSizePixels: UInt8
|
|
}
|
|
|
|
/// "Uniforms" is a term of art of data that is the same (uniform) across all parallel threads.
|
|
/// Contains information that applies to all particles we draw.
|
|
private struct Uniforms {
|
|
/// The amount of time passed since the animation started, in milliseconds.
|
|
var elapsedTimeMs: UInt32
|
|
/// The number of rects being drawn into.
|
|
var numDrawRects: UInt32
|
|
/// The density of particles per pixel, per layer.
|
|
/// In other words, in the texture's coordinates.
|
|
var particlesPerPixelPerLayer: Float32
|
|
/// The number of layers of particles to draw.
|
|
var numLayers: UInt8
|
|
/// Divisor for max particle speed.
|
|
var particleSpeedDivisor: UInt8
|
|
}
|
|
|
|
private static let uniformsSize = MemoryLayout<Uniforms>.stride
|
|
|
|
// MARK: - Metal Inputs
|
|
|
|
private var particlesPerPixelPerLayer: Float32 = 0
|
|
private var numDrawRects: UInt32 = 0
|
|
private var drawRectBuffer: MTLBuffer?
|
|
private var totalNumParticlesPerLayer: Int = 0
|
|
|
|
private func resetMetalInputs() {
|
|
numDrawRects = 0
|
|
drawRectBuffer = nil
|
|
isPaused = true
|
|
}
|
|
|
|
private func updateParticleCount() {
|
|
guard let spec else {
|
|
particlesPerPixelPerLayer = 0
|
|
isPaused = true
|
|
return
|
|
}
|
|
let scale = UIScreen.main.scale
|
|
|
|
var particlesPerUnitArea = fidelity.particlesPerUnitAreaPerLayer
|
|
|
|
var totalNumParticlesPerLayer = particlesPerUnitArea * spec.totalSurfaceArea
|
|
if totalNumParticlesPerLayer > Constants.maxParticleCountPerLayer {
|
|
particlesPerUnitArea *= Constants.maxParticleCountPerLayer / totalNumParticlesPerLayer
|
|
totalNumParticlesPerLayer = Constants.maxParticleCountPerLayer
|
|
}
|
|
// Divide by scale sqaured because its 2d area.
|
|
let particlesPerPixelPerLayer: CGFloat = particlesPerUnitArea / (scale * scale)
|
|
|
|
self.particlesPerPixelPerLayer = Float32(particlesPerPixelPerLayer)
|
|
self.totalNumParticlesPerLayer = Int(totalNumParticlesPerLayer)
|
|
self.isPaused = numDrawRects == 0
|
|
}
|
|
|
|
private func prepareDrawRectsForMetal() {
|
|
guard bounds.width > 0, bounds.height > 0, let spec else {
|
|
resetMetalInputs()
|
|
return
|
|
}
|
|
var drawRects = [DrawRect]()
|
|
|
|
let scale = UIScreen.main.scale
|
|
|
|
owsAssertDebug(spec.spoilerFrames.count <= SpoilerAnimationManager.maxSpoilerFrameCount)
|
|
for spoilerFrame in spec.spoilerFrames {
|
|
guard
|
|
spoilerFrame.frame.x >= bounds.x,
|
|
spoilerFrame.frame.maxX <= bounds.maxX,
|
|
spoilerFrame.frame.y >= bounds.y,
|
|
spoilerFrame.frame.maxY <= bounds.maxY,
|
|
spoilerFrame.frame.width > 0,
|
|
spoilerFrame.frame.height > 0
|
|
else {
|
|
continue
|
|
}
|
|
let drawRect = DrawRect(
|
|
origin: .init(
|
|
UInt16(clamping: UInt(spoilerFrame.frame.x * scale)),
|
|
UInt16(clamping: UInt(spoilerFrame.frame.y * scale))
|
|
),
|
|
size: .init(
|
|
UInt16(clamping: UInt(spoilerFrame.frame.width * scale)),
|
|
UInt16(clamping: UInt(spoilerFrame.frame.height * scale))
|
|
),
|
|
particleRGB: spoilerFrame.config.colorRGB,
|
|
particleBaseAlpha: spoilerFrame.config.particleBaseAlpha,
|
|
particleAlphaDropoff: spoilerFrame.config.particleAlphaDropoff,
|
|
particleSizePixels: spoilerFrame.config.particleSizePixels
|
|
)
|
|
drawRects.append(drawRect)
|
|
}
|
|
|
|
guard drawRects.isEmpty.negated else {
|
|
resetMetalInputs()
|
|
return
|
|
}
|
|
|
|
self.particlesPerPixelPerLayer = Float32(particlesPerPixelPerLayer)
|
|
self.numDrawRects = UInt32(clamping: drawRects.count)
|
|
drawRectBuffer = metalConfig.device.makeBuffer(bytes: drawRects, length: MemoryLayout<DrawRect>.stride * drawRects.count)
|
|
self.totalNumParticlesPerLayer = Int(totalNumParticlesPerLayer)
|
|
|
|
drawableSize = CGSize(
|
|
width: bounds.width * scale,
|
|
height: bounds.height * scale
|
|
)
|
|
self.isPaused = particlesPerPixelPerLayer == 0
|
|
}
|
|
|
|
override func draw(_ rect: CGRect) {
|
|
guard bounds.width > 0, bounds.height > 0 else {
|
|
return
|
|
}
|
|
// Need our state ready and committed before drawing.
|
|
guard
|
|
numDrawRects > 0,
|
|
totalNumParticlesPerLayer > 0,
|
|
let drawRectBuffer
|
|
else {
|
|
return
|
|
}
|
|
guard
|
|
let drawable = self.currentDrawable,
|
|
let commandbuffer = metalConfig.commandQueue.makeCommandBuffer(),
|
|
let computeCommandEncoder = commandbuffer.makeComputeCommandEncoder()
|
|
else {
|
|
return
|
|
}
|
|
|
|
// First we apply the clear computation to the drawable's texture, wiping it
|
|
// from whatever state it was in in the last draw cycle.
|
|
computeCommandEncoder.setComputePipelineState(metalConfig.clearPipelineState)
|
|
computeCommandEncoder.setTexture(drawable.texture, index: 0)
|
|
|
|
// We have to figure out how to distribute the work among GPU cores.
|
|
// `threadExecutionWidth` here is the number of GPU cores; a.k.a. the
|
|
// number of things we can compute in parallel.
|
|
// `maxTotalThreadsPerThreadgroup` is the number of instructions we can
|
|
// send to a thread group at once to execute.
|
|
// `h` is therefore how many things we will execute in serial in each group.
|
|
let w = metalConfig.clearPipelineState.threadExecutionWidth
|
|
let h = metalConfig.clearPipelineState.maxTotalThreadsPerThreadgroup / w
|
|
|
|
// For the clear computation, we want each computation (each "thread") to wipe
|
|
// one pixel. So we break it up into max-sized groups, and give it as many
|
|
// threads as there are pixels in the texture.
|
|
var threadsPerThreadGroup = MTLSize(width: w, height: h, depth: 1)
|
|
var threadsPerGrid = MTLSize(width: drawable.texture.width, height: drawable.texture.height, depth: 1)
|
|
|
|
if metalConfig.supportsNonUniformThreadGroups {
|
|
// If the device supports it, we just provide group size and total number of threads, and
|
|
// Metal will handle breaking up the grid into groups of variable size so we hit
|
|
// every pixel without wasting any threads.
|
|
computeCommandEncoder.dispatchThreads(threadsPerGrid, threadsPerThreadgroup: threadsPerThreadGroup)
|
|
} else {
|
|
// Otherwise we need to have a uniform thread group size, and since the total grid
|
|
// size may not be a perfect multiple of the thread group size, we will have
|
|
// wasted threads where the thread groups extend past the edge of the grid.
|
|
let threadGroupsPerGrid = MTLSize(
|
|
width: Int(ceil(Double(drawable.texture.width) / Double(w))),
|
|
height: Int(ceil(Double(drawable.texture.height) / Double(h))),
|
|
depth: 1
|
|
)
|
|
computeCommandEncoder.dispatchThreadgroups(threadGroupsPerGrid, threadsPerThreadgroup: threadsPerThreadGroup)
|
|
}
|
|
|
|
// Now we actually draw the particles.
|
|
computeCommandEncoder.setComputePipelineState(metalConfig.drawParticlesPipelineState)
|
|
|
|
// Set the inputs we need. The indexing is important and must match
|
|
// SpoilerParticleShader.metal.
|
|
computeCommandEncoder.setBuffer(drawRectBuffer, offset: 0, index: 0)
|
|
|
|
// The uniforms value (the current duration) is small and changes
|
|
// on every draw loop. Its more efficient in these cases to use
|
|
// setBytes to directly copy bytes and let Metal manage the memory.
|
|
var uniforms = Uniforms(
|
|
elapsedTimeMs: renderer.getAnimationDuration(),
|
|
numDrawRects: numDrawRects,
|
|
particlesPerPixelPerLayer: particlesPerPixelPerLayer,
|
|
numLayers: fidelity.numLayers,
|
|
particleSpeedDivisor: fidelity.particleSpeedDivisor
|
|
)
|
|
computeCommandEncoder.setBytes(&uniforms, length: Self.uniformsSize, index: 1)
|
|
|
|
// For this computation, each "thread" will draw a single particle,
|
|
// so we need as many threads as there are particles.
|
|
// Grouping is not super important; treat it as a 1xn "grid".
|
|
threadsPerGrid = MTLSize(width: totalNumParticlesPerLayer, height: 1, depth: 1)
|
|
threadsPerThreadGroup = MTLSize(width: w, height: 1, depth: 1)
|
|
if metalConfig.supportsNonUniformThreadGroups {
|
|
// Same as above, use the more efficient method if available.
|
|
computeCommandEncoder.dispatchThreads(threadsPerGrid, threadsPerThreadgroup: threadsPerThreadGroup)
|
|
} else {
|
|
// Otherwise use fixed group size that is likely too big.
|
|
let threadGroupsPerGrid = MTLSize(
|
|
width: Int(ceil(Double(totalNumParticlesPerLayer) / Double(w))),
|
|
height: 1,
|
|
depth: 1
|
|
)
|
|
computeCommandEncoder.dispatchThreadgroups(threadGroupsPerGrid, threadsPerThreadgroup: threadsPerThreadGroup)
|
|
}
|
|
|
|
// Tell the encoder we are done, and have it render the drawable's texture.
|
|
computeCommandEncoder.endEncoding()
|
|
#if targetEnvironment(simulator)
|
|
commandbuffer.present(drawable)
|
|
#else
|
|
commandbuffer.present(drawable, afterMinimumDuration: fidelity.frameDelay)
|
|
#endif
|
|
commandbuffer.commit()
|
|
}
|
|
|
|
fileprivate enum Constants {
|
|
/// After this many particles, framerate, number of layers, and particle velocity degrade.
|
|
static let particleCountEfficiencyThreshold: CGFloat = 1000
|
|
/// Allow up to a maximum number of particles, to put an upper bound on compute.
|
|
static let maxParticleCountPerLayer: CGFloat = 5000
|
|
}
|
|
}
|
|
|
|
extension SpoilerParticleView.Fidelity {
|
|
|
|
// Note that the particle max velocity and lifetime are
|
|
// defined in `SpoilerParticleShader.metal`, so we avoid
|
|
// copying values from the CPU to the GPU.
|
|
|
|
fileprivate var particlesPerUnitAreaPerLayer: CGFloat {
|
|
switch self {
|
|
case .high:
|
|
return Self.defaultParticlesPerUnitArea
|
|
case .low:
|
|
return 0.015
|
|
}
|
|
}
|
|
|
|
fileprivate var numLayers: UInt8 {
|
|
switch self {
|
|
case .high: return 4
|
|
case .low: return 3
|
|
}
|
|
}
|
|
|
|
fileprivate var frameDelay: TimeInterval {
|
|
switch self {
|
|
case .high: return 1/(20 /*FPS*/)
|
|
case .low: return 1/(12 /*FPS*/)
|
|
}
|
|
}
|
|
|
|
fileprivate var particleSpeedDivisor: UInt8 {
|
|
// Keeping in case we want to bring this back;
|
|
// trying same speed at lower framerate.
|
|
switch self {
|
|
case .high: return 1
|
|
case .low: return 1
|
|
}
|
|
}
|
|
}
|