|
1 | 1 | import Foundation |
2 | | -import WebKit |
| 2 | +import UIKit |
3 | 3 | internal import MapboxCommon_Private |
4 | 4 |
|
5 | 5 | struct Attribution: Hashable { |
@@ -60,7 +60,7 @@ struct Attribution: Hashable { |
60 | 60 | internal init(title: String, url: URL?) { |
61 | 61 | self.title = title |
62 | 62 |
|
63 | | - guard let url = url else { |
| 63 | + guard let url, Self.isWebScheme(url) else { |
64 | 64 | self.kind = .nonActionable |
65 | 65 | return |
66 | 66 | } |
@@ -95,86 +95,131 @@ struct Attribution: Hashable { |
95 | 95 | return NSAttributedString(string: attributionText, attributes: attributes) |
96 | 96 | } |
97 | 97 |
|
98 | | - /// Parse the raw attribution strings from sources asynchronously |
99 | | - /// - Parameter rawAttributions: Array of HTML strings |
100 | | - /// - Parameter completion: A block that will be passed the result of parsing. |
101 | | - internal static func parse(_ rawAttributions: [String], completion: @escaping ([Attribution]) -> Void) { |
102 | | -#if compiler(>=5.6.0) && canImport(_Concurrency) |
103 | | - Task { @MainActor in |
104 | | - let attributons = await parseAsync(rawAttributions) |
105 | | - completion(attributons) |
106 | | - } |
107 | | -#else |
108 | | - completion(parseSynchronously(rawAttributions)) |
109 | | -#endif |
110 | | - } |
111 | | - |
112 | | -#if compiler(>=5.6.0) && canImport(_Concurrency) |
113 | | - /// Parse the raw attribution strings from sources asynchronously |
114 | | - /// - Parameter rawAttributions: Array of HTML strings |
115 | | - /// - Returns: Array of Attribution structs |
116 | | - private static func parseAsync(_ rawAttributions: [String]) async -> [Attribution] { |
| 98 | + /// Parse the raw attribution strings from sources. |
| 99 | + /// |
| 100 | + /// Each string is treated as a restricted HTML fragment containing only |
| 101 | + /// `<a href="...">text</a>` anchors and surrounding plain text. No HTML |
| 102 | + /// importer is invoked — anchors are extracted with a regex and the |
| 103 | + /// remaining markup is stripped. This is deliberate: handing these |
| 104 | + /// strings (which originate from operator-controlled TileJSON |
| 105 | + /// `attribution` fields) to `NSAttributedString`'s HTML reader would |
| 106 | + /// cause the WebKit-backed importer to fetch any referenced subresource. |
| 107 | + /// |
| 108 | + /// Invariants this parser deliberately enforces (see MAPSIOS-2192): |
| 109 | + /// - Only anchors with **quoted** `href` (`"…"` or `'…'`) are |
| 110 | + /// recognised. Unquoted forms like `<a href=javascript:alert(1)>x</a>` |
| 111 | + /// do not match `anchorRegex` and fall through to the plain-text |
| 112 | + /// path, where the whole string becomes a single `.nonActionable` |
| 113 | + /// Attribution. Quoting is universal in real tileset attribution, so |
| 114 | + /// rejecting unquoted hrefs avoids a tolerant URL extractor that |
| 115 | + /// could be tricked into surfacing dangerous schemes as actionable. |
| 116 | + /// - Only `http`/`https` URLs become `.actionable`; every other scheme |
| 117 | + /// (`javascript:`, `data:`, `file:`, …) downgrades to `.nonActionable` |
| 118 | + /// in `init(title:url:)`. |
| 119 | + /// - `<img>`, `<link>`, `<style>` and other non-anchor markup never |
| 120 | + /// reaches an HTML parser, so no subresource fetching can originate |
| 121 | + /// from this code path. |
| 122 | + /// |
| 123 | + /// - Parameter rawAttributions: Array of attribution strings. |
| 124 | + /// - Returns: Deduplicated array of Attribution structs. |
| 125 | + internal static func parse(_ rawAttributions: [String]) -> [Attribution] { |
| 126 | + var seen: Set<Attribution> = [] |
117 | 127 | var result: [Attribution] = [] |
118 | 128 |
|
119 | | - for attributionString in rawAttributions { |
120 | | - guard let attributedString = try? await NSAttributedString.fromHTML(attributionString).0 else { |
121 | | - continue |
| 129 | + for raw in rawAttributions { |
| 130 | + for attribution in parseOne(raw) where seen.insert(attribution).inserted { |
| 131 | + result.append(attribution) |
122 | 132 | } |
123 | | - |
124 | | - result.append(contentsOf: attributedString.attributions) |
125 | 133 | } |
126 | 134 |
|
127 | | - // Disallow duplicates. |
128 | | - // swiftlint:disable:next force_cast |
129 | | - return NSOrderedSet(array: result).array as! [Attribution] |
| 135 | + return result |
130 | 136 | } |
131 | | -#endif |
132 | 137 |
|
133 | | - /// Parse the raw attribution strings from sources synchronously. |
134 | | - /// Known for intermittent crashes - https://developer.apple.com/forums/thread/115405?answerId=356326022#356326022 |
135 | | - /// |
136 | | - /// - Parameter rawAttributions: Array of HTML strings |
137 | | - /// - Returns: Array of Attribution structs |
138 | | - private static func parseSynchronously(_ rawAttributions: [String]) -> [Attribution] { |
139 | | - let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [ |
140 | | - .characterEncoding: NSNumber(value: String.Encoding.utf8.rawValue), |
141 | | - .documentType: NSAttributedString.DocumentType.html |
142 | | - ] |
| 138 | + // MARK: - Internals |
| 139 | + |
| 140 | + /// Defense-in-depth cap on attribution string size; real attribution |
| 141 | + /// strings are tens of characters. |
| 142 | + private static let maxInputLength = 16 * 1024 |
| 143 | + |
| 144 | + private static let anchorRegex: NSRegularExpression = { |
| 145 | + let pattern = #"<a\b[^>]*\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')[^>]*>(.*?)</a>"# |
| 146 | + // swiftlint:disable:next force_try |
| 147 | + return try! NSRegularExpression(pattern: pattern, options: [.caseInsensitive, .dotMatchesLineSeparators]) |
| 148 | + }() |
| 149 | + |
| 150 | + private static let tagRegex: NSRegularExpression = { |
| 151 | + // swiftlint:disable:next force_try |
| 152 | + return try! NSRegularExpression(pattern: "<[^>]+>", options: [.dotMatchesLineSeparators]) |
| 153 | + }() |
| 154 | + |
| 155 | + private static let trimCharacterSet = CharacterSet(charactersIn: "©").union(.whitespacesAndNewlines) |
| 156 | + |
| 157 | + /// `&` must be decoded last so we don't double-decode `&copy;`. |
| 158 | + private static let htmlEntities: [(String, String)] = [ |
| 159 | + ("©", "©"), |
| 160 | + ("<", "<"), |
| 161 | + (">", ">"), |
| 162 | + (""", "\""), |
| 163 | + ("'", "'"), |
| 164 | + ("'", "'"), |
| 165 | + (" ", " "), |
| 166 | + ("&", "&") |
| 167 | + ] |
143 | 168 |
|
144 | | - let attributions = rawAttributions |
145 | | - .compactMap { $0.data(using: .utf8) } |
146 | | - .compactMap { try? NSAttributedString(data: $0, options: options, documentAttributes: nil) } |
147 | | - .flatMap(\.attributions) |
| 169 | + private static func parseOne(_ raw: String) -> [Attribution] { |
| 170 | + guard !raw.isEmpty else { return [] } |
148 | 171 |
|
149 | | - // Disallow duplicates. |
150 | | - // swiftlint:disable:next force_cast |
151 | | - return NSOrderedSet(array: attributions).array as! [Attribution] |
152 | | - } |
153 | | -} |
| 172 | + guard raw.utf8.count <= maxInputLength else { |
| 173 | + Log.warning( |
| 174 | + "Attribution string exceeds \(maxInputLength)-byte hard cap (\(raw.utf8.count) bytes); dropping. " + |
| 175 | + "Real tileset attribution strings are tens of characters — investigate the source.", |
| 176 | + category: "Attribution" |
| 177 | + ) |
| 178 | + return [] |
| 179 | + } |
154 | 180 |
|
155 | | -fileprivate extension NSAttributedString { |
156 | | - var attributions: [Attribution] { |
157 | | - let characterSet = CharacterSet(charactersIn: "©").union(.whitespacesAndNewlines) |
158 | | - var attributions: [Attribution] = [] |
| 181 | + let fullRange = NSRange(raw.startIndex..., in: raw) |
| 182 | + let matches = anchorRegex.matches(in: raw, options: [], range: fullRange) |
159 | 183 |
|
160 | | - enumerateAttribute(.link, |
161 | | - in: NSRange(location: 0, length: length), |
162 | | - options: []) { (value: Any?, range: NSRange, _: UnsafeMutablePointer<ObjCBool>) in |
163 | | - guard range.location != NSNotFound else { |
164 | | - return |
165 | | - } |
| 184 | + guard !matches.isEmpty else { |
| 185 | + let title = normalize(stripTags(raw)) |
| 186 | + return title.isEmpty ? [] : [Attribution(title: title, url: nil)] |
| 187 | + } |
166 | 188 |
|
167 | | - let substring = attributedSubstring(from: range).string |
168 | | - let trimmedString = substring.trimmingCharacters(in: characterSet) |
| 189 | + var attributions: [Attribution] = [] |
| 190 | + for match in matches { |
| 191 | + guard let innerRange = Range(match.range(at: 3), in: raw) else { continue } |
| 192 | + let title = normalize(stripTags(String(raw[innerRange]))) |
| 193 | + guard !title.isEmpty else { continue } |
| 194 | + |
| 195 | + let hrefRange = Range(match.range(at: 1), in: raw) ?? Range(match.range(at: 2), in: raw) |
| 196 | + let url = hrefRange |
| 197 | + .map { decodeEntities(String(raw[$0])) } |
| 198 | + .flatMap(URL.init(string:)) |
| 199 | + attributions.append(Attribution(title: title, url: url)) |
| 200 | + } |
| 201 | + return attributions |
| 202 | + } |
169 | 203 |
|
170 | | - guard !trimmedString.isEmpty else { |
171 | | - return |
172 | | - } |
| 204 | + private static func stripTags(_ s: String) -> String { |
| 205 | + let range = NSRange(s.startIndex..., in: s) |
| 206 | + return tagRegex.stringByReplacingMatches(in: s, options: [], range: range, withTemplate: "") |
| 207 | + } |
173 | 208 |
|
174 | | - let attribution = Attribution(title: trimmedString, url: value as? URL) |
175 | | - attributions.append(attribution) |
| 209 | + private static func decodeEntities(_ s: String) -> String { |
| 210 | + var out = s |
| 211 | + for (entity, replacement) in htmlEntities where out.contains(entity) { |
| 212 | + out = out.replacingOccurrences(of: entity, with: replacement) |
176 | 213 | } |
| 214 | + return out |
| 215 | + } |
177 | 216 |
|
178 | | - return attributions |
| 217 | + private static func normalize(_ s: String) -> String { |
| 218 | + decodeEntities(s).trimmingCharacters(in: trimCharacterSet) |
| 219 | + } |
| 220 | + |
| 221 | + private static func isWebScheme(_ url: URL) -> Bool { |
| 222 | + guard let scheme = url.scheme?.lowercased() else { return false } |
| 223 | + return scheme == "https" || scheme == "http" |
179 | 224 | } |
180 | 225 | } |
0 commit comments