Skip to content

Commit 511096c

Browse files
authored
Merge pull request #13 from josh/escaping-scalar
Configure escaping scalar
2 parents 124903b + 3e73f3d commit 511096c

7 files changed

Lines changed: 98 additions & 28 deletions

File tree

Sources/Active/Reader/Reader.swift

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ extension CSVReader {
169169
}
170170

171171
// 4. If the unicode scalar retrieved is a double quote, an escaped field is awaiting for parsing.
172-
if scalar == self.settings.escapingScalar {
173-
let field = try self.parseEscapedField(rowIndex: rowIndex)
172+
if let escapingScalar = self.settings.escapingScalar, scalar == escapingScalar {
173+
let field = try self.parseEscapedField(rowIndex: rowIndex, escaping: escapingScalar)
174174
result.append(field.value)
175175
if field.isAtEnd { break loop }
176176
// 5. If the field delimiter is encountered, an implicit empty field has been defined.
@@ -233,21 +233,22 @@ extension CSVReader {
233233
///
234234
/// When this function is executed, the quote opening the "escaped field" has already been read.
235235
/// - parameter rowIndex: The index of the row being parsed.
236+
/// - parameter escapingScalar: The unicode scalar escaping character to use.
236237
/// - throws: `CSVError<CSVReader>` exclusively.
237238
/// - returns: The parsed field and whether the row/file ending characters have been found.
238-
private func parseEscapedField(rowIndex: Int) throws -> (value: String, isAtEnd: Bool) {
239+
private func parseEscapedField(rowIndex: Int, escaping escapingScalar: Unicode.Scalar) throws -> (value: String, isAtEnd: Bool) {
239240
var field: String.UnicodeScalarView = .init()
240241
var reachedRowsEnd = false
241242

242243
fieldLoop: while true {
243244
// 1. Retrieve an scalar (if not there, it means EOF). This case is not allowed without closing the escaping field first.
244245
guard let scalar = try self.buffer.next() ?? self.decoder() else { throw Error.invalidEOF(rowIndex: rowIndex) }
245246
// 2. If the retrieved scalar is not a quote (i.e. "), just store it and continue parsing.
246-
guard scalar == self.settings.escapingScalar else { field.append(scalar); continue fieldLoop }
247+
guard scalar == escapingScalar else { field.append(scalar); continue fieldLoop }
247248
// 3. If the retrieved scalar was a quote, retrieve the following scalar and check if it is EOF. If so, the field has finished and also the row and the file.
248249
guard var followingScalar = try self.buffer.next() ?? self.decoder() else { reachedRowsEnd = true; break fieldLoop }
249250
// 4. If the second retrieved scalar is another quote, the data is escaping a single quote scalar (quotes are escaped with other quotes).
250-
guard followingScalar != self.settings.escapingScalar else { field.append(self.settings.escapingScalar); continue fieldLoop }
251+
guard followingScalar != escapingScalar else { field.append(escapingScalar); continue fieldLoop }
251252
// 5. Once this point is reached, the field has been properly escaped.
252253
if !self.settings.trimCharacters.isEmpty {
253254
// 6. Trim any character after the quote if necessary.

Sources/Active/Reader/ReaderConfiguration.swift

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ extension CSVReader {
99
public var headerStrategy: Strategy.Header
1010
/// Trims the given characters at the beginning and end of each row, and between fields.
1111
public var trimStrategry: CharacterSet
12+
/// The strategy for escaping quoted fields.
13+
public var escapingStrategy: Strategy.Escaping
1214
/// The encoding used to identify the underlying data or `nil` if you want the CSV reader to try to figure it out.
1315
///
1416
/// If no encoding is provided and the input data doesn't contain a Byte Order Marker (BOM), UTF8 is presumed.
@@ -24,6 +26,7 @@ extension CSVReader {
2426
self.delimiters = (field: ",", row: "\n")
2527
self.headerStrategy = .none
2628
self.trimStrategry = .init()
29+
self.escapingStrategy = .doubleQuote
2730
self.encoding = nil
2831
self.presample = false
2932
}
@@ -38,7 +41,7 @@ extension CSVReader {
3841
/// The characters set to be trimmed at the beginning and ending of each field.
3942
let trimCharacters: CharacterSet
4043
/// The unicode scalar used as encapsulator and escaping character (when printed two times).
41-
let escapingScalar: Unicode.Scalar = "\""
44+
let escapingScalar: Unicode.Scalar?
4245

4346
/// Creates the inmutable reader settings from the user provided configuration values.
4447
/// - parameter configuration: The configuration values provided by the API user.
@@ -61,6 +64,12 @@ extension CSVReader {
6164
}
6265
// 2. Set the trim characters set.
6366
self.trimCharacters = configuration.trimStrategry
67+
// 3. Set the escaping scalar.
68+
self.escapingScalar = configuration.escapingStrategy.scalar
69+
// 4. Ensure trim character set does not include escaping scalar
70+
if let escapingScalar = escapingScalar, trimCharacters.contains(escapingScalar) {
71+
throw Error.invalidTrimCharacter(escapingScalar: escapingScalar, trimCharacters: trimCharacters)
72+
}
6473
}
6574
}
6675
}
@@ -74,4 +83,11 @@ fileprivate extension CSVReader.Error {
7483
help: "Set different delimiters for field and rows.",
7584
userInfo: ["Delimiter": delimiter])
7685
}
86+
87+
static func invalidTrimCharacter(escapingScalar: Unicode.Scalar, trimCharacters: CharacterSet) -> CSVError<CSVReader> {
88+
.init(.invalidConfiguration,
89+
reason: "The trim characters set can not include the escaping scalar.",
90+
help: "Remove the escaping scalar from the trim characters set.",
91+
userInfo: ["Escaping scalar": escapingScalar, "Trim characters": trimCharacters])
92+
}
7793
}

Sources/Active/Writer/Writer.swift

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,28 +181,36 @@ extension CSVWriter {
181181
var result: [Unicode.Scalar]
182182

183183
if field.isEmpty {
184-
result = .init(repeating: escapingScalar, count: 2)
184+
if let escapingScalar = escapingScalar {
185+
result = .init(repeating: escapingScalar, count: 2)
186+
} else {
187+
result = []
188+
}
185189
} else {
186190
let input: [Unicode.Scalar] = .init(field.unicodeScalars)
187191
result = .init()
188192
result.reserveCapacity(input.count + 2)
189-
var (index, needsEscaping) = (0, false)
193+
var index = 0
194+
var needsEscaping: Unicode.Scalar?
190195

191196
while index < input.endIndex {
192197
let scalar = input[index]
193198

194199
if scalar == escapingScalar {
195-
needsEscaping = true
200+
needsEscaping = scalar
196201
} else if self.isFieldDelimiter(input, &index, &result) || self.isRowDelimiter(input, &index, &result) {
197-
needsEscaping = true
202+
needsEscaping = scalar
198203
continue
199204
}
200205

201206
index += 1
202207
result.append(scalar)
203208
}
204209

205-
if needsEscaping {
210+
if let needsEscaping = needsEscaping {
211+
guard let escapingScalar = escapingScalar else {
212+
throw Error.unescapedDelimiter(needsEscaping)
213+
}
206214
result.insert(escapingScalar, at: result.startIndex)
207215
result.append(escapingScalar)
208216
}
@@ -220,6 +228,13 @@ extension CSVWriter {
220228
}
221229

222230
fileprivate extension CSVWriter.Error {
231+
static func unescapedDelimiter(_ delimiter: Unicode.Scalar) -> CSVError<CSVWriter> {
232+
.init(.invalidInput,
233+
reason: "A field cannot include a delimiter if escaping strategy is disabled.",
234+
help: "Remove delimiter from field or set an escaping strategy.",
235+
userInfo: ["Invalid character": delimiter])
236+
237+
}
223238
/// Error raised when the a field is trying to be writen and it overflows the expected number of fields per row.
224239
static func fieldOverflow(expectedFields: Int) -> CSVError<CSVWriter> {
225240
.init(.invalidOperation,

Sources/Active/Writer/WriterConfiguration.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ extension CSVWriter {
33
public struct Configuration {
44
/// The field and row delimiters.
55
public var delimiters: Delimiter.Pair
6+
/// The strategy for escaping quoted fields.
7+
public var escapingStrategy: Strategy.Escaping
68
/// The row of headers to write at the beginning of the CSV data.
79
///
810
/// If empty, no row will be written.
@@ -19,6 +21,7 @@ extension CSVWriter {
1921
/// Designated initlaizer setting the default values.
2022
public init() {
2123
self.delimiters = (field: ",", row: "\n")
24+
self.escapingStrategy = .doubleQuote
2225
self.headers = .init()
2326
self.encoding = nil
2427
self.bomStrategy = .convention
@@ -53,7 +56,7 @@ extension CSVWriter {
5356
/// Boolean indicating whether the received CSV contains a header row or not.
5457
let headers: [String]
5558
/// The unicode scalar used as encapsulator and escaping character (when printed two times).
56-
let escapingScalar: Unicode.Scalar = "\""
59+
let escapingScalar: Unicode.Scalar?
5760
/// The encoding used to identify the underlying data.
5861
let encoding: String.Encoding
5962

@@ -71,6 +74,7 @@ extension CSVWriter {
7174
self.delimiters = (.init(field), .init(row))
7275
}
7376
// 2. Copy all other values.
77+
self.escapingScalar = configuration.escapingStrategy.scalar
7478
self.headers = configuration.headers
7579
self.encoding = encoding
7680
}

Sources/Strategy.swift

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ public enum Strategy {
1616
}
1717
}
1818

19+
/// The strategy for escaping quoted fields.
20+
public enum Escaping: ExpressibleByNilLiteral, ExpressibleByUnicodeScalarLiteral {
21+
/// CSV delimiters can not be escaped.
22+
case none
23+
/// Ignore delimiter with in a scalar pair.
24+
case scalar(Unicode.Scalar)
25+
26+
/// Escape double quoted values.
27+
public static let doubleQuote: Self = "\""
28+
29+
public init(nilLiteral: ()) { self = .none }
30+
31+
public init(unicodeScalarLiteral value: Unicode.Scalar) {
32+
self = .scalar(value)
33+
}
34+
35+
var scalar: Unicode.Scalar? {
36+
switch self {
37+
case .none:
38+
return nil
39+
case .scalar(let scalar):
40+
return scalar
41+
}
42+
}
43+
}
44+
1945
/// The strategy to use for non-standard floating-point values (IEEE 754 infinity and NaN).
2046
public enum NonConformingFloat {
2147
/// Throw upon encountering non-conforming values. This is the default strategy.

Tests/CodableCSVTests/ActiveTests/ReaderTests.swift

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ extension ReaderTests {
8383
let fieldDelimiters: [Delimiter.Field] = [",", ";", "\t", "|", "||", "|-|"]
8484
let headerStrategy: [Strategy.Header] = [.none, .firstLine, /*.unknown*/]
8585
let trimStrategy: [CharacterSet] = [.init(), .whitespaces]
86+
let escapingStrategy: [Strategy.Escaping] = [.none, .doubleQuote]
8687
let presamples: [Bool] = [true, false]
8788
// The data used for testing.
8889
let (headers, content) = (TestData.headers, TestData.content)
@@ -126,15 +127,18 @@ extension ReaderTests {
126127
var toTrim = t
127128
if f.rawValue.count == 1, t.contains(f.rawValue.first!) { toTrim.remove(f.rawValue.first!) }
128129
if r.rawValue.count == 1, t.contains(r.rawValue.first!) { toTrim.remove(r.rawValue.first!) }
129-
130-
for p in presamples {
131-
var c = CSVReader.Configuration()
132-
c.delimiters = pair
133-
c.headerStrategy = h
134-
c.trimStrategry = toTrim
135-
c.presample = p
136-
137-
XCTAssertNoThrow(try work(c, encoded))
130+
131+
for e in escapingStrategy {
132+
for p in presamples {
133+
var c = CSVReader.Configuration()
134+
c.delimiters = pair
135+
c.headerStrategy = h
136+
c.trimStrategry = toTrim
137+
c.escapingStrategy = e
138+
c.presample = p
139+
140+
XCTAssertNoThrow(try work(c, encoded))
141+
}
138142
}
139143
}
140144
}

Tests/CodableCSVTests/ActiveTests/WriterTests.swift

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ extension WriterTests {
4949
// The configuration values to be tested.
5050
let rowDelimiters: [Delimiter.Row] = ["\n", "\r", "\r\n", "**~**"]
5151
let fieldDelimiters: [Delimiter.Field] = [",", ";", "\t", "|", "||", "|-|"]
52+
let escapingStrategy: [Strategy.Escaping] = [.none, .doubleQuote]
5253
let encodings: [String.Encoding] = [.utf8, .utf16LittleEndian, .utf16BigEndian, .utf16LittleEndian, .utf32BigEndian]
5354
// The data used for testing.
5455
let headers = TestData.headers
@@ -69,13 +70,16 @@ extension WriterTests {
6970
let pair: Delimiter.Pair = (f, r)
7071
let sample = TestData.toCSV(input, delimiters: pair)
7172

72-
for encoding in encodings {
73-
var c = CSVWriter.Configuration()
74-
c.delimiters = pair
75-
c.headers = headers
76-
c.encoding = encoding
77-
c.bomStrategy = .never
78-
try work(c, sample)
73+
for escaping in escapingStrategy {
74+
for encoding in encodings {
75+
var c = CSVWriter.Configuration()
76+
c.delimiters = pair
77+
c.escapingStrategy = escaping
78+
c.headers = headers
79+
c.encoding = encoding
80+
c.bomStrategy = .never
81+
try work(c, sample)
82+
}
7983
}
8084
}
8185
}

0 commit comments

Comments
 (0)