Skip to content

Commit 20e8db7

Browse files
committed
[Tests] Add a test for correct usage of CP437 in ZIP
1 parent 4fa84e0 commit 20e8db7

4 files changed

Lines changed: 39 additions & 1 deletion

File tree

SWCompression.xcodeproj/project.pbxproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@
263263
E6791E1B26FD0094003852A9 /* LZ4.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6791E1A26FD0094003852A9 /* LZ4.swift */; };
264264
E6791E3326FD05EC003852A9 /* LZ4Tests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6791E3226FD05EC003852A9 /* LZ4Tests.swift */; };
265265
E6974C5B2701AC2600E06C60 /* test_dict_B5_dictID.lz4 in Resources */ = {isa = PBXBuildFile; fileRef = E6974C5A2701AC2600E06C60 /* test_dict_B5_dictID.lz4 */; };
266+
E69FAC922729ACD900D3C406 /* test_dos_latin_us.zip in Resources */ = {isa = PBXBuildFile; fileRef = E69FAC912729ACD900D3C406 /* test_dos_latin_us.zip */; };
266267
E6B18E85270884E300F9AB99 /* LZ4CompressionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6B18E84270884E300F9AB99 /* LZ4CompressionTests.swift */; };
267268
E6C4150726FE230A00F9D36F /* XxHash32.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6C4150626FE230A00F9D36F /* XxHash32.swift */; };
268269
E6D86D2F26FE35C50032CFFA /* XxHash32Tests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6D86D2E26FE35C50032CFFA /* XxHash32Tests.swift */; };
@@ -541,6 +542,7 @@
541542
E6791E1A26FD0094003852A9 /* LZ4.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LZ4.swift; sourceTree = "<group>"; };
542543
E6791E3226FD05EC003852A9 /* LZ4Tests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LZ4Tests.swift; sourceTree = "<group>"; };
543544
E6974C5A2701AC2600E06C60 /* test_dict_B5_dictID.lz4 */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_dict_B5_dictID.lz4; sourceTree = "<group>"; };
545+
E69FAC912729ACD900D3C406 /* test_dos_latin_us.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = test_dos_latin_us.zip; sourceTree = "<group>"; };
544546
E6B18E84270884E300F9AB99 /* LZ4CompressionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LZ4CompressionTests.swift; sourceTree = "<group>"; };
545547
E6C4150626FE230A00F9D36F /* XxHash32.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = XxHash32.swift; sourceTree = "<group>"; };
546548
E6D86D2E26FE35C50032CFFA /* XxHash32Tests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = XxHash32Tests.swift; sourceTree = "<group>"; };
@@ -910,6 +912,7 @@
910912
06F065DD1FFB763300312A82 /* test_zip64.zip */,
911913
06D42DA42067B46800C1A98B /* test_custom_extra_field.zip */,
912914
064DF452245618A300D285F3 /* bad_cd_ext_ts.zip */,
915+
E69FAC912729ACD900D3C406 /* test_dos_latin_us.zip */,
913916
);
914917
path = ZIP;
915918
sourceTree = "<group>";
@@ -1247,6 +1250,7 @@
12471250
06F066391FFB763400312A82 /* test8.xz in Resources */,
12481251
06F0662F1FFB763400312A82 /* test9.gz in Resources */,
12491252
064DF453245618A300D285F3 /* bad_cd_ext_ts.zip in Resources */,
1253+
E69FAC922729ACD900D3C406 /* test_dos_latin_us.zip in Resources */,
12501254
E66F36242538726E00076A6E /* test_empty.lzma in Resources */,
12511255
06F066751FFB763400312A82 /* test1.bz2 in Resources */,
12521256
064D01A920FD071300CAE058 /* текстовый файл.answer in Resources */,

Sources/ZIP/LittleEndianByteReader+Zip.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ extension LittleEndianByteReader {
2828
fileprivate extension String {
2929

3030
// "Latin-US (DOS)" CP437-2147483120
31+
// Note that this encoding is interpreted differently "depending on the context". This concerns codes in 0-31 range
32+
// and code 127: sometimes they treated as normal characters (as was defined by IBM) and sometimes as control
33+
// characters, matching corresponding ranges of ASCII/UTF-8. The second interpretation, however, doesn't make CP437
34+
// compatible with UTF-8, as CP437 contains codes in the 80-FF range which are not valid UTF-8 codes.
35+
// In any case, we are constrained to the implementation provided by Foundation, which currently treats them as
36+
// control characters.
3137
static let cp437Encoding = String.Encoding(rawValue: 0x80000400)
3238
static let cp437Available = String.availableStringEncodings.contains(cp437Encoding)
3339

Tests/Test Files

Tests/ZipTests.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,4 +288,32 @@ class ZipTests: XCTestCase {
288288
XCTAssertEqual(entries[1].data, answerData)
289289
}
290290

291+
func testDosLatinUS() throws {
292+
// This test checks that CP437 encoding is correctly used when there is no indication the file name is in UTF-8.
293+
// We introduced several CP437-specific characters from the 0x80-0xFF range into "test1.answer" to test this.
294+
// Note, that we didn't used normal characters from the 0x00-0x7F range that don't match the characters from
295+
// UTF-8 with the same codes, since they are interpreted as control characters by Foundation.
296+
let testData = try Constants.data(forTest: "test_dos_latin_us", withType: ZipTests.testType)
297+
let entries = try ZipContainer.open(container: testData)
298+
299+
XCTAssertEqual(entries.count, 1)
300+
XCTAssertEqual(entries[0].info.name, "teüë1.½n█wΩ±")
301+
XCTAssertEqual(entries[0].info.type, .regular)
302+
XCTAssertEqual(entries[0].info.fileSystemType, .unix)
303+
XCTAssertFalse(entries[0].info.isTextFile)
304+
XCTAssertEqual(entries[0].info.compressionMethod, .copy)
305+
XCTAssertEqual(entries[0].info.ownerID, 501)
306+
XCTAssertEqual(entries[0].info.groupID, 20)
307+
XCTAssertEqual(entries[0].info.permissions, Permissions(rawValue: 420))
308+
XCTAssertEqual(entries[0].info.dosAttributes, DosAttributes(rawValue: 0))
309+
XCTAssertEqual(entries[0].info.comment, "")
310+
// Checking times' values is a bit difficult since they are extremely precise.
311+
XCTAssertNotNil(entries[0].info.modificationTime)
312+
XCTAssertNotNil(entries[0].info.accessTime)
313+
XCTAssertNil(entries[0].info.creationTime)
314+
315+
let answerData = try Constants.data(forAnswer: "test1")
316+
XCTAssertEqual(entries[0].data, answerData)
317+
}
318+
291319
}

0 commit comments

Comments
 (0)