TM-SGNL-iOS/SignalUI/Views/BodyRanges/SpoilerRendering/SpoilerParticleShader.metal
TeleMessage developers dde0620daf initial commit
2025-05-03 12:28:28 -07:00

252 lines
12 KiB
Metal

//
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//
#include <metal_stdlib>
using namespace metal;
// MARK: - Random number generation
// Generate a random float in the range [0.0f, 1.0f] using x, y, and z (based on the xor128 algorithm)
// Don't really care about "true" randomness; this is just used to generate particles so it just needs
// to be deterministic and look "random" to the human eye.
float rand(uint x, uint y, uint z)
{
int seed = x + y * 57 + z * 241;
seed= (seed<< 13) ^ seed;
return (( 1.0 - ( (seed * (seed * seed * 15731 + 789221) + 1376312589) & 2147483647) / 1073741824.0f) + 1.0f) / 2.0f;
}
// MARK: - Swift<->C shared structs
/// **IMPORTANT**: these must be exactly identical to the values defined
/// in SpoilerParticleView.swift, as they are both schemas for interpreting
/// the same shared memory across the CPU (swift) and GPU (metal).
/// A rectangle to draw particles into, represented in
/// the texture's coordinates.
struct DrawRect {
// Note: these can be 16 bit because they are
// in the texture's coordinates, which has
// a max size of
ushort2 origin;
ushort2 size;
/// The color with which to draw particles in this rect.
/// Values from 0 to 255. Note that textures use
/// 0 to 1 half values for color; conversion is handled
/// in the GPU.
uchar3 particleRGB;
/// The base alpha value for particle colors in this rect,
/// with 255 representing an alpha of 1.
uchar particleBaseAlpha;
/// Every layer of particles has this much less alpha than the previous,
/// with 255 representing an alpha of 1.
uchar particleAlphaDropoff;
/// The size (in texture coordinates) of particles in this rect.
uchar particleSizePixels;
};
/// "Uniforms" is a term of art of data that is the same (uniform) across all parallel threads.
/// Contains information that applies to all particles we draw.
struct Uniforms {
/// The amount of time passed since the animation started, in milliseconds.
uint elapsedTimeMs;
/// The number of rects being drawn into.
uint numDrawRects;
/// The density of particles per pixel, per layer.
/// In other words, in the texture's coordinates.
float particlesPerPixelPerLayer;
/// The number of layers of particles to draw.
uchar numLayers;
/// Divisor for max particle speed.
uchar particleSpeedDivisor;
};
// MARK: - Computation
/**
* Runs every draw loop to clear the texture of the prior draw loop's values.
*
* **IMPORTANT**: the name of this function must be the same as the name used in
* `MTLLibrary.makeFunction(name:)` in SpoilerParticleView.swift.
*/
kernel void clear_pass_func(texture2d<half, access::write> tex [[ texture(0) ]],
uint2 id [[ thread_position_in_grid ]]){
// If the device doesn't support non-uniform thread groups, we
// may end up with compute passes that extend past the edge of the grid.
// Just early exit.
if (id.x >= tex.get_width() || id.y >= tex.get_height()) {
return;
}
tex.write(half4(0), id);
}
/**
* Runs every draw loop, once per particle, in parallel across GPUs, drawing particles.
*
* **IMPORTANT**: the name of this function must be the same as the name used in
* `MTLLibrary.makeFunction(name:)` in SpoilerParticleView.swift.
*
* Every particle gets its own run of this method in one "thread". Reading apple documentation
* [here](https://developer.apple.com/documentation/metal/compute_passes/creating_threads_and_threadgroups) is recommended.
* Basically a "thread" is a single computation that can be done in parallel. A "thread group" is a set of threads
* that can all be run at the same time, the number depending on how many cores the GPU has. The "grid"
* is the entire set of computation to run. Terminology is driven by image generation, where the "grid" is the 2D
* grid of pixels, "thread groups" are blocks of pixels drawn in parallel, and "threads" are individual pixel draws.
*
* We use one thread per particle we draw, with ordering independent of its actual position. So visually you
* can think of the "grid" as a 1d line with each slot being a pass to compute and draw a single particle, broken
* up into thread group chunks of one particle per GPU core that we draw in parallel.
*
* Explaining the params' modifiers:
*
* `constant` means the GPU gets read access to that part of memory; another option
* to keep in mind is `device`, which puts the data in shared device memory and lets
* both the CPU and GPU read/write.
*
* The "[[ foo]]" stuff is annotation telling Metal where to get the parameter from.
* `buffer(x)` refers to
* the input MTLBuffers sent from swift with `MTLComputeCommandEncoder.setBuffer(..., index: x)`
* with the given index.
* `texture` is of course the texture being drawn to. We draw in 2d only, but textures can
* be drawn onto 3d objects and you can have more than one of them.
* `thread_position_in_grid` is the index of the thread in the entire set of threads. In our usage,
* this is the index of the particle being drawn in the particles array.
*
* And the params themselves:
* - parameter drawRects: The rects to draw into. They are uniquely identified by their origin.
* - parameter uniforms: "Uniforms" is a term of art of data that is the same (uniform) across all
* parallel threads. Stuff that is the same for all particles we are drawing.
* - parameter id: Technically speaking, the thread's position in the grid. Serves as the index
* into the particle array, since our "grid" is a 1d line of particles being drawn.
*/
kernel void draw_particles_func(constant DrawRect *drawRects [[ buffer(0) ]],
constant Uniforms &uniforms [[ buffer(1) ]],
texture2d<half, access::write> tex [[ texture(0) ]],
uint id [[ thread_position_in_grid ]]){
int numDrawRects = int(uniforms.numDrawRects);
// We have to find which rect to draw this particle in.
// To do so, iterate over the draw rects, checking the number
// of particles each has, until we hit the id of this particle,
// at which point we have found our target rect.
DrawRect rect;
uint particleCountSoFar = 0;
int particleIndexInDrawRect = -1;
for (int i = 0; i < numDrawRects; i++) {
rect = drawRects[i];
uint numParticlesInRect = uint(uniforms.particlesPerPixelPerLayer * float(rect.size.x) * float(rect.size.y));
particleCountSoFar += numParticlesInRect;
if (particleCountSoFar > id) {
particleIndexInDrawRect = particleCountSoFar - id;
break;
}
}
// If the device doesn't support non-uniform thread groups, we
// may end up with more compute passes than we have particles.
// Just early exit if we didn't find a rect, which means
// this particle is "after" the last rect.
if (particleIndexInDrawRect < 0) {
return;
}
// We encode these constant values here to avoid copying the memory from
// cpu to gpu constantly.
// These are exclusive to the GPU though, and unused by the cpu.
uint minParticleLifetimeMs = 1000;
uint maxAdditionalParticleLifetimeMs = 2000;
// Measured in pixels per ms.
float maxParticleVelocity = 0.02 / float(uniforms.particleSpeedDivisor);
// Draw one particle per layer.
for (uchar layer = 0; layer < uniforms.numLayers; layer++) {
// We are going to use some pseudo-random number generators to produce
// the particle info (position, speed, etc) based a seed for each particle.
// The seed has three parts:
// 1. The draw rect's origin. These are unique and ensure we don't
// repeat particle patterns across rects.
//
// 2. The particle's index in the draw rect, plus an offset for its layer index.
// Basically, a unique id so its rng differs from other particles in the rect.
//
// 3. The current "reincarnation". Particles have a lifetime (determined by rng).
// After its reached, they die and "respawn" in a new random place. This is
// done by using the number of lifetimes as an input into the seed for position.
//
// The particle lifetime is itself rng; so we first generate it randomly using the
// first two input seeds, then use that output to seed the rest.
// First lets generate seed (2): we take the index in the rect, but offset by
// the layer so each particle in each layer gets a unique seed.
uint seedIndex = uint(particleIndexInDrawRect) * uint(uniforms.numLayers) + uint(layer);
// Now we compute the lifetime and how many times we've reached it (seed (3)).
float lifetimeRel = rand(rect.origin.x, rect.origin.y, seedIndex);
uint lifetimeMs = uint(lifetimeRel * maxAdditionalParticleLifetimeMs) + minParticleLifetimeMs;
uint numReincarnations = uniforms.elapsedTimeMs / lifetimeMs;
uint durationInCurrentLifetime = uniforms.elapsedTimeMs - (lifetimeMs * numReincarnations);
// Now we know the number of "reincarnations", and can seed the position
// and velocity info.
// We generate 4 numbers (x/y, position/velocity) for each particle,
// so space out the "seed index" space by 4.
// Then offset by 7 (can be any number, just not a multiple of 4)
// for each lifetime so we get a new seed that doesn't overlap.
seedIndex = seedIndex * 4 + (numReincarnations * 7);
float xPosRel = rand(rect.origin.x, rect.origin.y, seedIndex);
float yPosRel = rand(rect.origin.x, rect.origin.y, seedIndex + 1);
float xVelRel = rand(rect.origin.x, rect.origin.y, seedIndex + 2);
float yVelRel = rand(rect.origin.x, rect.origin.y, seedIndex + 3);
// Positions are relative to the draw frame. compute final starting position.
int xPos = rect.origin.x + int(xPosRel * rect.size.x);
int yPos = rect.origin.y + int(yPosRel * rect.size.y);
// Velocities are relative to the provided max velocity,
// since it should be the same velocity distribution across rects.
// Mininmum of half as much velocity, with positive or negative values.
xPos += int((xVelRel - 0.5) * maxParticleVelocity * durationInCurrentLifetime);
yPos += int((yVelRel - 0.5) * maxParticleVelocity * durationInCurrentLifetime);
if(
xPos < rect.origin.x
|| xPos > rect.origin.x + rect.size.x
|| yPos < rect.origin.y
|| yPos > rect.origin.y + rect.size.y
) {
// If out of bounds, do not draw.
return;
}
// Finally, draw the particles.
// Compute the color, since the inputs are stuffed into
// fewer bytes and we need them as floating point values.
half r = half(rect.particleRGB.r) / 255;
half g = half(rect.particleRGB.g) / 255;
half b = half(rect.particleRGB.b) / 255;
// alpha is dependent on the layer.
half alpha = half(rect.particleBaseAlpha) / 255;
half alphaDropoff = half(rect.particleAlphaDropoff) / 255;
for (uchar i = 0; i < layer; i++) {
alpha -= alphaDropoff;
}
half4 color = half4(r, g, b, alpha);
// Figure out how many pixels we need to draw into.
uint particleSize = uint(rect.particleSizePixels);
// And draw.
for(uint xOffset = 0; xOffset <= particleSize; xOffset++) {
for(uint yOffset = 0; yOffset <= particleSize; yOffset++) {
tex.write(color, uint2(xPos + xOffset, yPos + yOffset));
}
}
}
}