TM-SGNL-iOS/SignalServiceKit/Util/String+OWS.swift
TeleMessage developers dde0620daf initial commit
2025-05-03 12:28:28 -07:00

175 lines
6.2 KiB
Swift
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// Copyright 2024 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//
import Foundation
private let bidiLeftToRightIsolate: Unicode.Scalar = "\u{2066}"
private let bidiRightToLeftIsolate: Unicode.Scalar = "\u{2067}"
private let bidiFirstStrongIsolate: Unicode.Scalar = "\u{2068}"
private let bidiLeftToRightEmbedding: Unicode.Scalar = "\u{202A}"
private let bidiRightToLeftEmbedding: Unicode.Scalar = "\u{202B}"
private let bidiLeftToRightOverride: Unicode.Scalar = "\u{202D}"
private let bidiRightToLeftOverride: Unicode.Scalar = "\u{202E}"
private let bidiPopDirectionalFormatting: Unicode.Scalar = "\u{202C}"
private let bidiPopDirectionalIsolate: Unicode.Scalar = "\u{2069}"
private let bidiControlCharacterSet: CharacterSet = [
bidiLeftToRightIsolate,
bidiRightToLeftIsolate,
bidiFirstStrongIsolate,
bidiLeftToRightEmbedding,
bidiRightToLeftEmbedding,
bidiLeftToRightOverride,
bidiRightToLeftOverride,
bidiPopDirectionalFormatting,
bidiPopDirectionalIsolate,
]
private let nonPrintingCharacterSet = {
var characterSet = CharacterSet.whitespacesAndNewlines
characterSet.formUnion(CharacterSet.controlCharacters)
characterSet.formUnion(bidiControlCharacterSet)
// Left-to-right and Right-to-left marks.
characterSet.insert(charactersIn: "\u{200E}\u{200F}")
return characterSet
}()
// 0x202D and 0x202E are the unicode ordering letters
// and can be used to control the rendering of text.
// They could be used to construct misleading attachment
// filenames that appear to have a different file extension,
// for example.
private let unsafeFilenameCharacterSet: CharacterSet = [bidiLeftToRightOverride, bidiRightToLeftOverride]
extension String {
private func sanitized() -> String {
// There was a sanitizer cache in the objc code. This should be linear in the length
// of the string so unless we're calling this over and over and over again for the
// same strings the cache doesn't seem very useful and the right place to fix things
// is to not call this over and over again.
StringSanitizer.sanitize(self)
}
/// - Warning: Only exposed for testing. Do not use.
internal func filterUnsafeFilenameCharacters() -> String {
StringSanitizer.sanitize(self) { c in
c.unicodeScalars.contains { s in
unsafeFilenameCharacterSet.contains(s)
}
}
}
public func ows_stripped() -> String {
if unicodeScalars.allSatisfy(nonPrintingCharacterSet.contains) {
// If string has no printing characters, consider it empty.
return ""
} else {
return trimmingCharacters(in: .whitespacesAndNewlines)
}
}
/// A version of the string that only contains digits.
///
/// Handles non-ASCII digits. If you only want ASCII digits, see `asciiDigitsOnly`.
///
/// ```
/// "1x2x3".digitsOnly
/// // => "123"
/// "١23".digitsOnly
/// // => "١23"
/// "123".digitsOnly
/// // => "123"
/// ```
public func digitsOnly() -> String {
String(unicodeScalars.filter { CharacterSet.decimalDigits.contains($0) })
}
public func hasAnyASCII() -> Bool {
contains(where: \.isASCII)
}
public func isOnlyASCII() -> Bool {
allSatisfy(\.isASCII)
}
/// Trims and filters a string for display
public func filterStringForDisplay() -> String {
ows_stripped().sanitized().ensureBalancedBidiControlCharacters()
}
public func filterFilename() -> String {
ows_stripped().sanitized().filterUnsafeFilenameCharacters()
}
public func withoutBidiControlCharacters() -> String {
// TODO: This may not be the right behavior. Investigate if it's supposed to remove all or just trim.
trimmingCharacters(in: bidiControlCharacterSet)
}
public func ensureBalancedBidiControlCharacters() -> String {
var isolateStartsCount = 0
var isolatePopCount = 0
var formattingStartsCount = 0
var formattingPopCount = 0
for c in unicodeScalars {
switch c {
case bidiLeftToRightIsolate, bidiRightToLeftIsolate, bidiFirstStrongIsolate:
isolateStartsCount += 1
case bidiPopDirectionalIsolate:
isolatePopCount += 1
case bidiLeftToRightEmbedding, bidiRightToLeftEmbedding, bidiLeftToRightOverride, bidiRightToLeftOverride:
formattingStartsCount += 1
case bidiPopDirectionalFormatting:
formattingPopCount += 1
default:
break
}
}
if (isolateStartsCount == isolatePopCount && formattingStartsCount == formattingPopCount) {
return self
}
var balancedString = ""
// If we have too many isolate pops, prepend FSI to balance
while isolatePopCount > isolateStartsCount {
balancedString += String(bidiFirstStrongIsolate)
isolateStartsCount += 1
}
// If we have too many formatting pops, prepend LRE to balance
while formattingPopCount > formattingStartsCount {
balancedString += String(bidiLeftToRightEmbedding)
formattingStartsCount += 1
}
balancedString += self
// If we have too many formatting starts, append PDF to balance
while formattingStartsCount > formattingPopCount {
balancedString += String(bidiPopDirectionalFormatting)
formattingPopCount += 1
}
// If we have too many isolate starts, append PDI to balance
while isolateStartsCount > isolatePopCount {
balancedString += String(bidiPopDirectionalIsolate)
isolatePopCount += 1
}
return balancedString
}
public func bidirectionallyBalancedAndIsolated() -> String {
// We're already isolated, nothing to do here.
if let first = unicodeScalars.first, let last = unicodeScalars.last, first == bidiFirstStrongIsolate && last == bidiPopDirectionalIsolate {
return self
}
return String(bidiFirstStrongIsolate) + ensureBalancedBidiControlCharacters() + String(bidiPopDirectionalIsolate)
}
}