TM-SGNL-iOS/SignalServiceKit/Messages/Interactions/LinkPreview/Manager/LinkPreviewHelper.swift
TeleMessage developers dde0620daf initial commit
2025-05-03 12:28:28 -07:00

169 lines
5.9 KiB
Swift

//
// Copyright 2024 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//
import Foundation
public enum LinkPreviewHelper {
public static func displayDomain(forUrl url: URL) -> String? {
if StickerPackInfo.isStickerPackShare(url) {
return Self.stickerPackShareDomain(forUrl: url)
}
if GroupManager.isPossibleGroupInviteLink(url) {
return "signal.org"
}
return url.host
}
/// - Parameter sourceString: The raw string that this URL was parsed from
/// The source string will be parsed to ensure that the parsed hostname has only ASCII or non-ASCII characters
/// to avoid homograph URLs.
///
/// The source string is necessary, since NSURL and NSDataDetector will automatically punycode any returned
/// URLs. The source string will be used to verify that the originating string's host only contained ASCII or
/// non-ASCII characters to avoid homographs.
///
/// If no sourceString is provided, the validated host will be whatever is returned from `host`, which will always
/// be ASCII.
public static func isPermittedLinkPreviewUrl(
_ url: URL,
parsedFrom sourceString: String? = nil
) -> Bool {
guard let scheme = url.scheme?.lowercased().nilIfEmpty, Self.schemeAllowSet.contains(scheme) else { return false }
guard url.user == nil else { return false }
guard url.password == nil else { return false }
let rawHostname: String?
if var sourceString {
let schemePrefix = "\(scheme)://"
if let schemeRange = sourceString.range(of: schemePrefix, options: [ .anchored, .caseInsensitive ]) {
sourceString.removeSubrange(schemeRange)
}
let delimiterIndex = sourceString.firstIndex(where: { Self.urlDelimeters.contains($0) })
rawHostname = String(sourceString[..<(delimiterIndex ?? sourceString.endIndex)])
guard LinkValidator.isValidLink(linkText: sourceString) else {
return false
}
} else {
// The hostname will be punycode and all ASCII
rawHostname = url.host
}
guard let hostname = rawHostname, Self.isValidHostname(hostname) else { return false }
// Check that the path and query params only have valid characters.
// The URL we get here has, in practice, already gone through sanitization
// and may already have percent-encoded the path and params, so we don't
// want to use url.path.
if
sourceString?.count ?? 0 > scheme.count + 4,
let withoutScheme = sourceString?.dropFirst(scheme.count + 4),
let pathOrParamsStart = withoutScheme.firstIndex(of: "/") ?? withoutScheme.firstIndex(of: "?"),
withoutScheme[pathOrParamsStart...].rangeOfCharacter(from: Self.validURICharacters.inverted) != nil
{
return false
}
return true
}
public static func normalizeString(_ string: String, maxLines: Int) -> String {
var result = string
var components = result.components(separatedBy: .newlines)
if components.count > maxLines {
components = Array(components[0..<maxLines])
result = components.joined(separator: "\n")
}
let maxCharacterCount = 2048
if result.count > maxCharacterCount {
let endIndex = result.index(result.startIndex, offsetBy: maxCharacterCount)
result = String(result[..<endIndex])
}
return result.filterStringForDisplay()
}
private static func stickerPackShareDomain(forUrl url: URL) -> String? {
guard let domain = url.host?.lowercased() else {
return nil
}
guard url.path.count > 1 else {
// Url must have non-empty path.
return nil
}
return domain
}
private static let schemeAllowSet: Set = ["https"]
private static let domainRejectSet: Set = [
"example.com",
"example.org",
"example.net"
]
private static let tldRejectSet: Set = [
"example",
"i2p",
"invalid",
"localhost",
"onion",
"test"
]
private static let urlDelimeters: Set<Character> = Set(":/?#[]@")
// See <https://tools.ietf.org/html/rfc3986>.
private static let validURICharacters = CharacterSet([
"%",
// "gen-delims"
":",
"/",
"?",
"#",
"[",
"]",
"@",
// "sub-delims"
"!",
"$",
"&",
"'",
"(",
")",
"*",
"+",
",",
";",
"=",
// unreserved
"-",
".",
"_",
"~",
]).union(.decimalDigits)
.union(.init(charactersIn: "a"..."z"))
.union(.init(charactersIn: "A"..."Z"))
/// Helper method that validates:
/// - TLD is permitted
/// - Comprised of valid character set
private static func isValidHostname(_ hostname: String) -> Bool {
// Technically, a TLD separator can be something other than a period (e.g. https://)
// But it looks like NSURL/NSDataDetector won't even parse that. So we'll require periods for now
let hostnameComponents = hostname.split(separator: ".")
guard
hostnameComponents.count >= 2,
let tld = hostnameComponents.last?.lowercased(),
let domain = hostnameComponents.dropLast().last?.lowercased()
else {
return false
}
let isValidTLD = !Self.tldRejectSet.contains(tld)
let isValidDomain = !Self.domainRejectSet.contains(
[domain, tld].joined(separator: ".")
)
let isAllASCII = hostname.allSatisfy { $0.isASCII }
let isAllNonASCII = hostname.allSatisfy { !$0.isASCII || $0 == "." }
return isValidTLD && isValidDomain && (isAllASCII || isAllNonASCII)
}
}