TM-SGNL-iOS/SignalServiceKit/Storage/Database/IncrementalMigrations/TSAttachment/TSAttachmentMigration+AudioWaveformManager.swift

//
// Copyright 2024 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//

import Accelerate
import AVFoundation
import Foundation

extension TSAttachmentMigration {

    struct AudioWaveform {
        let decibelSamples: [Float]

        func archive() throws -> Data {
            return try NSKeyedArchiver.archivedData(withRootObject: decibelSamples, requiringSecureCoding: false)
        }
    }

    class AudioWaveformManager {

        static func buildAudioWaveForm(
            unencryptedFilePath: String
        ) throws -> TSAttachmentMigration.AudioWaveform {
            let asset: AVAsset = try assetFromUnencryptedAudioFile(atAudioPath: unencryptedFilePath)

            guard asset.isReadable else {
                throw OWSAssertionError("unexpectedly encountered unreadable audio file.")
            }

            guard CMTimeGetSeconds(asset.duration) <= Self.maximumDuration else {
                throw OWSAssertionError("Audio too long")
            }

            return try sampleWaveform(asset: asset)
        }

        private static func assetFromUnencryptedAudioFile(
            atAudioPath audioPath: String
        ) throws -> AVAsset {
            let audioUrl = URL(fileURLWithPath: audioPath)

            var asset = AVURLAsset(url: audioUrl)

            if !asset.isReadable {
                if let extensionOverride = Self.alternativeAudioFileExtension(fileExtension: audioUrl.pathExtension) {
                    let symlinkPath = OWSFileSystem.temporaryFilePath(
                        fileExtension: extensionOverride,
                        isAvailableWhileDeviceLocked: true
                    )
                    do {
                        try FileManager.default.createSymbolicLink(
                            atPath: symlinkPath,
                            withDestinationPath: audioPath
                        )
                    } catch {
                        throw OWSAssertionError("Failed to create symlink")
                    }
                    asset = AVURLAsset(url: URL(fileURLWithPath: symlinkPath))
                }
            }

            return asset
        }

        private static func alternativeAudioFileExtension(fileExtension: String) -> String? {
            // In some cases, Android sends audio messages with the "audio/mpeg" mime type. This
            // makes our choice of file extension ambiguous—`.mp3` or `.m4a`? AVFoundation uses the
            // extension to read the file, and if the extension is wrong, it won't be readable.
            //
            // We "lie" about the extension to generate the waveform so that AVFoundation may read
            // it. This is brittle but necessary to work around the buggy marriage of Android's
            // content type and AVFoundation's behavior.
            //
            // Note that we probably still want this code even if Android updates theirs, because
            // iOS users might have existing attachments.
            //
            // See:
            // <https://github.com/signalapp/Signal-iOS/issues/3590>.
            switch fileExtension {
            case "m4a": return "aac"
            case "mp3": return "m4a"
            default: return nil
            }
        }

        // MARK: - Sampling

        /// The maximum duration asset that we will display waveforms for.
        /// It's too intensive to sample a waveform for really long audio files.
        private static let maximumDuration: TimeInterval = 15 * kMinuteInterval
        private static let sampleCount = 100

        private static func sampleWaveform(asset: AVAsset) throws -> TSAttachmentMigration.AudioWaveform {
            let assetReader = try AVAssetReader(asset: asset)

            // We just draw the waveform based on the first track.
            guard let audioTrack = assetReader.asset.tracks.first else {
                throw OWSAssertionError("audio file has no tracks")
            }

            let trackOutput = AVAssetReaderTrackOutput(
                track: audioTrack,
                outputSettings: [
                    AVFormatIDKey: kAudioFormatLinearPCM,
                    AVLinearPCMBitDepthKey: 16,
                    AVLinearPCMIsBigEndianKey: false,
                    AVLinearPCMIsFloatKey: false,
                    AVLinearPCMIsNonInterleaved: false
                ]
            )
            assetReader.add(trackOutput)

            let decibelSamples = try readDecibels(from: assetReader)

            return TSAttachmentMigration.AudioWaveform(decibelSamples: decibelSamples)
        }

        private static func readDecibels(from assetReader: AVAssetReader) throws -> [Float] {
            let sampler = AudioWaveformSampler(
                inputCount: sampleCount(from: assetReader),
                outputCount: Self.sampleCount
            )

            assetReader.startReading()
            while assetReader.status == .reading {
                guard let trackOutput = assetReader.outputs.first else {
                    throw OWSAssertionError("track output unexpectedly missing")
                }

                // Process any newly read data.
                guard
                    let nextSampleBuffer = trackOutput.copyNextSampleBuffer(),
                    let blockBuffer = CMSampleBufferGetDataBuffer(nextSampleBuffer)
                else {
                    // There is no more data to read, break
                    break
                }

                var lengthAtOffset = 0
                var dataPointer: UnsafeMutablePointer<Int8>?
                let result = CMBlockBufferGetDataPointer(
                    blockBuffer,
                    atOffset: 0,
                    lengthAtOffsetOut: &lengthAtOffset,
                    totalLengthOut: nil,
                    dataPointerOut: &dataPointer
                )
                guard result == kCMBlockBufferNoErr else {
                    owsFailDebug("track data unexpectedly inaccessible")
                    throw AudioWaveformError.invalidAudioFile
                }
                let bufferPointer = UnsafeBufferPointer(start: dataPointer, count: lengthAtOffset)
                bufferPointer.withMemoryRebound(to: Int16.self) { sampler.update($0) }
                CMSampleBufferInvalidate(nextSampleBuffer)
            }

            return sampler.finalize()
        }

        private static func sampleCount(from assetReader: AVAssetReader) -> Int {
            let samplesPerChannel = Int(assetReader.asset.duration.value)

            // We will read in the samples from each channel, interleaved since
            // we only draw one waveform. This gives us an average of the channels
            // if it is, for example, a stereo audio file.
            return samplesPerChannel * channelCount(from: assetReader)
        }

        private static func channelCount(from assetReader: AVAssetReader) -> Int {
            guard
                let output = assetReader.outputs.first as? AVAssetReaderTrackOutput,
                let formatDescriptions = output.track.formatDescriptions as? [CMFormatDescription]
            else {
                return 0
            }

            var channelCount = 0

            for description in formatDescriptions {
                guard let basicDescription = CMAudioFormatDescriptionGetStreamBasicDescription(description) else {
                    continue
                }
                channelCount = Int(basicDescription.pointee.mChannelsPerFrame)
            }

            return channelCount
        }
    }

    private class AudioWaveformSampler {
        private static let silenceThreshold: Float = -50

        private let inputCount: Int
        private let outputCount: Int

        /// The number of input samples that feed each output sample (rounded down).
        private let segmentLength: Int

        /// The number of samples that don't evenly divide into `outputCount`. These
        /// extra samples are spread across the output samples.
        private let segmentRemainder: Int

        /// The number of samples in this segment. Either `segmentLength` or
        /// `segmentLength + 1`.
        private var currentSegmentCount: Int

        /// The number of samples remaining in this segment.
        private var currentSegmentRemainingCount: Int

        /// Tracks the cumulative average when a segment spans multiple batches.
        private var currentSegmentAverage: Float

        /// Tracks when a segment needs an extra sample (because outputCount may not
        /// evenly divide inputCount).
        private var overflowCounter: Int

        private var buffer = [Float]()
        private var output = [Float]()

        init(inputCount: Int, outputCount: Int) {
            self.inputCount = inputCount
            self.outputCount = outputCount
            if inputCount < outputCount {
                // If we don't have enough samples, just use every sample that's provided.
                // This will result in fewer than outputCount samples, but that is fine.
                (self.segmentLength, self.segmentRemainder) = (1, 0)
            } else {
                (self.segmentLength, self.segmentRemainder) = inputCount.quotientAndRemainder(dividingBy: outputCount)
            }
            self.currentSegmentAverage = 0
            // The first segment is always segmentLength because segmentRemainder is
            // less than outputCount (it's the remainder when dividing by outputCount).
            self.currentSegmentCount = self.segmentLength
            self.currentSegmentRemainingCount = self.segmentLength
            self.overflowCounter = self.outputCount - self.segmentRemainder
        }

        func update(_ samples: UnsafeBufferPointer<Int16>) {
            let sampleCount = samples.count
            if self.buffer.count < sampleCount {
                self.buffer.append(contentsOf: Array(repeating: 0, count: sampleCount - self.buffer.count))
            }

            // convert UInt16 amplitudes to Float representation
            vDSP_vflt16(samples.baseAddress!, 1, &self.buffer, 1, vDSP_Length(sampleCount))

            // take the absolute amplitude value
            vDSP_vabs(self.buffer, 1, &self.buffer, 1, vDSP_Length(sampleCount))

            // convert to dB
            // maximum amplitude storable in Int16 = 0 dB (loudest)
            // (remember decibels are often negative)
            var zeroDecibelEquivalent: Float = Float(Int16.max)
            vDSP_vdbcon(self.buffer, 1, &zeroDecibelEquivalent, &self.buffer, 1, vDSP_Length(sampleCount), 1)

            // clip between loudest + quietest
            var loudestClipValue: Float = 0.0
            var quietestClipValue = AudioWaveformSampler.silenceThreshold
            vDSP_vclip(self.buffer, 1, &quietestClipValue, &loudestClipValue, &self.buffer, 1, vDSP_Length(sampleCount))

            self.reduce(sampleCount: sampleCount)
        }

        private func reduce(sampleCount: Int) {
            self.buffer.withUnsafeBufferPointer { bufferPtr in
                var remainingCount = sampleCount
                while remainingCount > 0 {
                    let chunkCount = min(remainingCount, self.currentSegmentRemainingCount)
                    assert(chunkCount > 0)  // because currentSegmentRemainingCount starts > 0 and is checked on each iteration
                    var chunkAverage: Float = 0
                    vDSP_meanv(bufferPtr.baseAddress!.advanced(by: sampleCount - remainingCount), 1, &chunkAverage, vDSP_Length(chunkCount))
                    remainingCount -= chunkCount
                    self.currentSegmentRemainingCount -= chunkCount

                    // Add the new average to the running average for this segment.
                    let totalChunkCount = self.currentSegmentCount - self.currentSegmentRemainingCount
                    assert(totalChunkCount > 0)  // because chunkCount > 0
                    let newChunkWeight = Float(chunkCount) / Float(totalChunkCount)
                    let oldChunkWeight = 1 - newChunkWeight
                    self.currentSegmentAverage *= oldChunkWeight
                    self.currentSegmentAverage += chunkAverage * newChunkWeight

                    // If we reached the end of the chunk, add it to the output.
                    if self.currentSegmentRemainingCount <= 0 {
                        self.output.append(self.currentSegmentAverage)
                        self.currentSegmentAverage = 0  // technically redundant

                        self.currentSegmentCount = self.segmentLength
                        self.overflowCounter -= self.segmentRemainder
                        if self.overflowCounter <= 0 {
                            self.currentSegmentCount += 1
                            self.overflowCounter += self.segmentLength
                        }
                        self.currentSegmentRemainingCount = self.currentSegmentCount
                    }
                }
            }
        }

        func finalize() -> [Float] {
            assert(self.output.count <= self.outputCount)
            return self.output
        }
    }
}