Skip to content

Commit 34b545d

Browse files
authored
feat: backspaceでtypo correctionを発動する機能を追加 (#294)
* feat: backspaceでtypo correctionを発動する機能を追加 * feat: use lm-based typo correction * feat: add flag guard for this feature * fix: revert ください-specific PoC impl * feat: n-gram based tc and on-demand weight downloading * fix: minor bug
1 parent 0ee8aed commit 34b545d

9 files changed

Lines changed: 573 additions & 16 deletions

File tree

Core/Package.swift

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ let package = Package(
2121
)
2222
],
2323
dependencies: [
24-
.package(url: "https://github.com/azooKey/AzooKeyKanaKanjiConverter", revision: "44429812ea2f6fe1b8a759dd994c6b29eafbc88f", traits: kanaKanjiConverterTraits)
24+
.package(url: "https://github.com/azooKey/AzooKeyKanaKanjiConverter", revision: "23544d6ea30822fd498caeff2dbc04d78b268134", traits: kanaKanjiConverterTraits),
25+
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
26+
.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0")
2527
],
2628
targets: [
2729
.executableTarget(
@@ -36,7 +38,9 @@ let package = Package(
3638
name: "Core",
3739
dependencies: [
3840
.product(name: "SwiftUtils", package: "AzooKeyKanaKanjiConverter"),
39-
.product(name: "KanaKanjiConverterModuleWithDefaultDictionary", package: "AzooKeyKanaKanjiConverter")
41+
.product(name: "KanaKanjiConverterModuleWithDefaultDictionary", package: "AzooKeyKanaKanjiConverter"),
42+
.product(name: "Crypto", package: "swift-crypto"),
43+
.product(name: "ZIPFoundation", package: "ZIPFoundation")
4044
],
4145
swiftSettings: [.interoperabilityMode(.Cxx)],
4246
plugins: [
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import Foundation
2+
3+
public enum AppGroup {
4+
public static let azooKeyMacIdentifier = "group.dev.ensan.inputmethod.azooKeyMac"
5+
}

Core/Sources/Core/Configs/BoolConfigItem.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ extension Config {
3232
static let `default` = false
3333
public static let key: String = "dev.ensan.inputmethod.azooKeyMac.preference.debug.predictiveTyping"
3434
}
35+
/// 入力訂正のデバッグ機能を有効化する設定
36+
public struct DebugTypoCorrection: BoolConfigItem {
37+
public init() {}
38+
static let `default` = false
39+
public static let key: String = "dev.ensan.inputmethod.azooKeyMac.preference.debug.typoCorrection"
40+
}
3541
/// ライブ変換を有効化する設定
3642
public struct LiveConversion: BoolConfigItem {
3743
public init() {}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import Crypto
2+
import Foundation
3+
#if canImport(FoundationNetworking)
4+
import FoundationNetworking
5+
#endif
6+
import ZIPFoundation
7+
8+
public enum DebugTypoCorrectionState: Sendable, Equatable {
9+
case downloaded
10+
case failed
11+
case notDownloaded
12+
}
13+
14+
public enum DebugTypoCorrectionWeightsError: LocalizedError, Sendable {
15+
case invalidHTTPStatus(url: URL, statusCode: Int)
16+
case hashMismatch(fileName: String, expected: String, actual: String)
17+
case extractedFolderNotFound(path: String)
18+
19+
public var errorDescription: String? {
20+
switch self {
21+
case .invalidHTTPStatus(let url, let statusCode):
22+
return "Failed to download \(url.lastPathComponent) (HTTP \(statusCode))"
23+
case .hashMismatch(let fileName, let expected, let actual):
24+
return "Hash mismatch for \(fileName). expected=\(expected), actual=\(actual)"
25+
case .extractedFolderNotFound(let path):
26+
return "Extracted folder not found at \(path)"
27+
}
28+
}
29+
}
30+
31+
public enum DebugTypoCorrectionWeights {
32+
public struct RequiredFile: Sendable, Equatable {
33+
public let fileName: String
34+
public let md5: String
35+
}
36+
37+
public static let bundleDirectoryName = "input_n5_lm_v1"
38+
39+
public static let requiredFiles: [RequiredFile] = [
40+
.init(fileName: "lm_c_abc.marisa", md5: "cb0c5c156eae8b16e9ddd0757d029263"),
41+
.init(fileName: "lm_c_bc.marisa", md5: "49a68be03c58d67fdf078bcb48bce4a2"),
42+
.init(fileName: "lm_r_xbx.marisa", md5: "d95157d1ff815b8d3e42b43660fdfa2f"),
43+
.init(fileName: "lm_u_abx.marisa", md5: "9d3d1be564f78e4f4ca2ec7629a2b80b"),
44+
.init(fileName: "lm_u_xbc.marisa", md5: "2c0f4652f78e8647cc70ab8eceba9b58")
45+
]
46+
47+
private static let zipURL = URL(string: "https://huggingface.co/Miwa-Keita/input_n5_lm_v1/resolve/main/input_n5_lm_v1.zip")!
48+
49+
public static var requiredFileNames: [String] {
50+
Self.requiredFiles.map(\.fileName)
51+
}
52+
53+
public static func modelDirectoryURL(azooKeyApplicationSupportDirectoryURL: URL) -> URL {
54+
azooKeyApplicationSupportDirectoryURL
55+
.appendingPathComponent("downloaded", isDirectory: true)
56+
.appendingPathComponent(Self.bundleDirectoryName, isDirectory: true)
57+
}
58+
59+
public static func hasRequiredWeightFiles(modelDirectoryURL: URL) -> Bool {
60+
Self.requiredFiles.allSatisfy {
61+
FileManager.default.fileExists(atPath: modelDirectoryURL.appendingPathComponent($0.fileName).path)
62+
}
63+
}
64+
65+
public static func state(modelDirectoryURL: URL) -> DebugTypoCorrectionState {
66+
do {
67+
return try Self.validateWeights(modelDirectoryURL: modelDirectoryURL) ? .downloaded : .notDownloaded
68+
} catch {
69+
return .failed
70+
}
71+
}
72+
73+
public static func validateWeights(modelDirectoryURL: URL) throws -> Bool {
74+
for required in Self.requiredFiles {
75+
let fileURL = modelDirectoryURL.appendingPathComponent(required.fileName)
76+
guard FileManager.default.fileExists(atPath: fileURL.path) else {
77+
return false
78+
}
79+
let md5 = try Self.fileMD5HexString(fileURL: fileURL)
80+
guard md5 == required.md5 else {
81+
return false
82+
}
83+
}
84+
return true
85+
}
86+
87+
public static func downloadWeights(modelDirectoryURL: URL) async throws {
88+
let fileManager = FileManager.default
89+
let parentDirectoryURL = modelDirectoryURL.deletingLastPathComponent()
90+
try fileManager.createDirectory(at: parentDirectoryURL, withIntermediateDirectories: true)
91+
92+
let temporaryRootURL = fileManager.temporaryDirectory
93+
.appendingPathComponent("azookey-debug-tc-\(UUID().uuidString)", isDirectory: true)
94+
try fileManager.createDirectory(at: temporaryRootURL, withIntermediateDirectories: true)
95+
defer {
96+
try? fileManager.removeItem(at: temporaryRootURL)
97+
}
98+
99+
let downloadedZipTemporaryURL = temporaryRootURL.appendingPathComponent("input_n5_lm_v1.zip", isDirectory: false)
100+
let (temporaryFileURL, response) = try await URLSession.shared.download(from: Self.zipURL)
101+
if let httpResponse = response as? HTTPURLResponse, !(200 ... 299).contains(httpResponse.statusCode) {
102+
throw DebugTypoCorrectionWeightsError.invalidHTTPStatus(url: Self.zipURL, statusCode: httpResponse.statusCode)
103+
}
104+
try fileManager.moveItem(at: temporaryFileURL, to: downloadedZipTemporaryURL)
105+
106+
let extractionRootURL = temporaryRootURL.appendingPathComponent("extracted", isDirectory: true)
107+
try fileManager.unzipItem(at: downloadedZipTemporaryURL, to: extractionRootURL)
108+
109+
let stagingDirectoryURL = extractionRootURL.appendingPathComponent(Self.bundleDirectoryName, isDirectory: true)
110+
guard fileManager.fileExists(atPath: stagingDirectoryURL.path) else {
111+
throw DebugTypoCorrectionWeightsError.extractedFolderNotFound(path: stagingDirectoryURL.path)
112+
}
113+
114+
for required in Self.requiredFiles {
115+
let fileURL = stagingDirectoryURL.appendingPathComponent(required.fileName, isDirectory: false)
116+
let actualMD5 = try Self.fileMD5HexString(fileURL: fileURL)
117+
guard actualMD5 == required.md5 else {
118+
throw DebugTypoCorrectionWeightsError.hashMismatch(fileName: required.fileName, expected: required.md5, actual: actualMD5)
119+
}
120+
}
121+
122+
if fileManager.fileExists(atPath: modelDirectoryURL.path) {
123+
try fileManager.removeItem(at: modelDirectoryURL)
124+
}
125+
try fileManager.moveItem(at: stagingDirectoryURL, to: modelDirectoryURL)
126+
}
127+
128+
private static func fileMD5HexString(fileURL: URL) throws -> String {
129+
let handle = try FileHandle(forReadingFrom: fileURL)
130+
defer {
131+
try? handle.close()
132+
}
133+
134+
var md5 = Insecure.MD5()
135+
while true {
136+
let data = try handle.read(upToCount: 1_048_576) ?? Data()
137+
if data.isEmpty {
138+
break
139+
}
140+
md5.update(data: data)
141+
}
142+
return md5.finalize().map { String(format: "%02x", $0) }.joined()
143+
}
144+
}

0 commit comments

Comments
 (0)