From 08c258e3129d797a4531e97415818dcadb1c25e7 Mon Sep 17 00:00:00 2001 From: CodeAlligator Date: Sat, 26 Apr 2025 12:01:17 +0530 Subject: [PATCH] Syntax Highlight revamp --- DevLint/DevLint.xcodeproj/project.pbxproj | 155 +++++++- .../SyntaxHighlighter.swift | 0 .../Tokenizer/LanguageRules.swift | 55 +++ .../Tokenizer/SwiftLanguageRules.swift | 81 +++++ .../SyntaxHighlighting/Tokenizer/Token.swift | 47 +++ .../Tokenizer/Tokenizer.swift | 333 ++++++++++++++++++ DevLint/DevLintTests/DevLintTests.swift | 35 ++ DevLint/DevLintTests/TokenizerTests.swift | 122 +++++++ 8 files changed, 827 insertions(+), 1 deletion(-) rename DevLint/DevLint/{Core => SyntaxHighlighting}/SyntaxHighlighter.swift (100%) create mode 100644 DevLint/DevLint/SyntaxHighlighting/Tokenizer/LanguageRules.swift create mode 100644 DevLint/DevLint/SyntaxHighlighting/Tokenizer/SwiftLanguageRules.swift create mode 100644 DevLint/DevLint/SyntaxHighlighting/Tokenizer/Token.swift create mode 100644 DevLint/DevLint/SyntaxHighlighting/Tokenizer/Tokenizer.swift create mode 100644 DevLint/DevLintTests/DevLintTests.swift create mode 100644 DevLint/DevLintTests/TokenizerTests.swift diff --git a/DevLint/DevLint.xcodeproj/project.pbxproj b/DevLint/DevLint.xcodeproj/project.pbxproj index 575c749..e9f0505 100644 --- a/DevLint/DevLint.xcodeproj/project.pbxproj +++ b/DevLint/DevLint.xcodeproj/project.pbxproj @@ -13,6 +13,12 @@ C81EF13C2D6B1D6A001B3B5E /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81EF13B2D6B1D6A001B3B5E /* ContentView.swift */; }; C81EF13E2D6B1D6E001B3B5E /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C81EF13D2D6B1D6E001B3B5E /* Assets.xcassets */; }; C81EF1412D6B1D6E001B3B5E /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C81EF1402D6B1D6E001B3B5E /* Preview Assets.xcassets */; }; + C81FA9692D897B5B00A33FEC /* Tokenizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA9682D897B5B00A33FEC /* Tokenizer.swift */; }; + C81FA96B2D897B6900A33FEC /* Token.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA96A2D897B6900A33FEC /* Token.swift */; }; + C81FA9742D8989C000A33FEC /* DevLintTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA9732D8989C000A33FEC /* DevLintTests.swift */; }; + C81FA97B2D898A0E00A33FEC /* TokenizerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA97A2D898A0E00A33FEC /* TokenizerTests.swift */; }; + C81FA97E2D899CB100A33FEC /* LanguageRules.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA97D2D899CB100A33FEC /* LanguageRules.swift */; }; + C81FA9802D899CD200A33FEC /* SwiftLanguageRules.swift in Sources */ = {isa = PBXBuildFile; fileRef = C81FA97F2D899CD200A33FEC /* SwiftLanguageRules.swift */; }; C8601D452D7EEEA60058AEAB /* CustomToolbar.swift in Sources */ = {isa = PBXBuildFile; fileRef = C8601D442D7EEEA60058AEAB /* CustomToolbar.swift */; }; C8601D472D7EFA770058AEAB /* ToolbarButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = C8601D462D7EFA770058AEAB /* ToolbarButton.swift */; }; C8763EBD2D7CCC1E00D535E6 /* LineNumberView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C8763EBC2D7CCC1E00D535E6 /* LineNumberView.swift */; }; @@ -30,6 +36,16 @@ C8F849BB2D75AF4A002090E5 /* SyntaxHighlighter.swift in Sources */ = {isa = PBXBuildFile; fileRef = C8F849BA2D75AF4A002090E5 /* SyntaxHighlighter.swift */; }; /* End PBXBuildFile section */ +/* Begin PBXContainerItemProxy section */ + C81FA9752D8989C000A33FEC /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = C81EF12E2D6B1D6A001B3B5E /* Project object */; + proxyType = 1; + remoteGlobalIDString = C81EF1352D6B1D6A001B3B5E; + remoteInfo = DevLint; + }; +/* End PBXContainerItemProxy section */ + /* Begin PBXFileReference section */ C813D9D72D7AED5500833098 /* CodeEditorModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CodeEditorModel.swift; sourceTree = ""; }; C813D9D92D7AED7600833098 /* ThemeModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ThemeModel.swift; sourceTree = ""; }; @@ -39,6 +55,13 @@ C81EF13D2D6B1D6E001B3B5E /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; C81EF1402D6B1D6E001B3B5E /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; C81EF1422D6B1D6E001B3B5E /* DevLint.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DevLint.entitlements; sourceTree = ""; }; + C81FA9682D897B5B00A33FEC /* Tokenizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tokenizer.swift; sourceTree = ""; }; + C81FA96A2D897B6900A33FEC /* Token.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Token.swift; sourceTree = ""; }; + C81FA9712D8989C000A33FEC /* DevLintTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = DevLintTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + C81FA9732D8989C000A33FEC /* DevLintTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DevLintTests.swift; sourceTree = ""; }; + C81FA97A2D898A0E00A33FEC /* TokenizerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenizerTests.swift; sourceTree = ""; }; + C81FA97D2D899CB100A33FEC /* LanguageRules.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LanguageRules.swift; sourceTree = ""; }; + C81FA97F2D899CD200A33FEC /* SwiftLanguageRules.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SwiftLanguageRules.swift; sourceTree = ""; }; C8601D442D7EEEA60058AEAB /* CustomToolbar.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomToolbar.swift; sourceTree = ""; }; C8601D462D7EFA770058AEAB /* ToolbarButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolbarButton.swift; sourceTree = ""; }; C8763EBC2D7CCC1E00D535E6 /* LineNumberView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LineNumberView.swift; sourceTree = ""; }; @@ -64,6 +87,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C81FA96E2D8989C000A33FEC /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -71,6 +101,7 @@ isa = PBXGroup; children = ( C81EF1382D6B1D6A001B3B5E /* DevLint */, + C81FA9722D8989C000A33FEC /* DevLintTests */, C81EF1372D6B1D6A001B3B5E /* Products */, ); sourceTree = ""; @@ -79,6 +110,7 @@ isa = PBXGroup; children = ( C81EF1362D6B1D6A001B3B5E /* DevLint.app */, + C81FA9712D8989C000A33FEC /* DevLintTests.xctest */, ); name = Products; sourceTree = ""; @@ -93,6 +125,7 @@ C8F849B32D75AD81002090E5 /* Views */, C8EF75F52D7CC0370023A5DE /* Components */, C8F849B92D75AF32002090E5 /* Utilities */, + C81FA9672D897B4100A33FEC /* SyntaxHighlighting */, C8EF75F22D7CBEBD0023A5DE /* Resources */, C8EF75F32D7CBEE30023A5DE /* Supporting Files */, C81EF1422D6B1D6E001B3B5E /* DevLint.entitlements */, @@ -109,6 +142,35 @@ path = "Preview Content"; sourceTree = ""; }; + C81FA9672D897B4100A33FEC /* SyntaxHighlighting */ = { + isa = PBXGroup; + children = ( + C81FA9812D899F8D00A33FEC /* Tokenizer */, + C8F849BA2D75AF4A002090E5 /* SyntaxHighlighter.swift */, + ); + path = SyntaxHighlighting; + sourceTree = ""; + }; + C81FA9722D8989C000A33FEC /* DevLintTests */ = { + isa = PBXGroup; + children = ( + C81FA9732D8989C000A33FEC /* DevLintTests.swift */, + C81FA97A2D898A0E00A33FEC /* TokenizerTests.swift */, + ); + path = DevLintTests; + sourceTree = ""; + }; + C81FA9812D899F8D00A33FEC /* Tokenizer */ = { + isa = PBXGroup; + children = ( + C81FA96A2D897B6900A33FEC /* Token.swift */, + C81FA9682D897B5B00A33FEC /* Tokenizer.swift */, + C81FA97D2D899CB100A33FEC /* LanguageRules.swift */, + C81FA97F2D899CD200A33FEC /* SwiftLanguageRules.swift */, + ); + path = Tokenizer; + sourceTree = ""; + }; C887B66F2D77784A00AE1A03 /* Models */ = { isa = PBXGroup; children = ( @@ -161,7 +223,6 @@ C8EF75F42D7CC0250023A5DE /* Core */ = { isa = PBXGroup; children = ( - C8F849BA2D75AF4A002090E5 /* SyntaxHighlighter.swift */, C8AC32312D7AF2B7003F496D /* ThemeManager.swift */, ); path = Core; @@ -230,6 +291,24 @@ productReference = C81EF1362D6B1D6A001B3B5E /* DevLint.app */; productType = "com.apple.product-type.application"; }; + C81FA9702D8989C000A33FEC /* DevLintTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = C81FA9772D8989C000A33FEC /* Build configuration list for PBXNativeTarget "DevLintTests" */; + buildPhases = ( + C81FA96D2D8989C000A33FEC /* Sources */, + C81FA96E2D8989C000A33FEC /* Frameworks */, + C81FA96F2D8989C000A33FEC /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + C81FA9762D8989C000A33FEC /* PBXTargetDependency */, + ); + name = DevLintTests; + productName = DevLintTests; + productReference = C81FA9712D8989C000A33FEC /* DevLintTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; /* End PBXNativeTarget section */ /* Begin PBXProject section */ @@ -243,6 +322,10 @@ C81EF1352D6B1D6A001B3B5E = { CreatedOnToolsVersion = 14.2; }; + C81FA9702D8989C000A33FEC = { + CreatedOnToolsVersion = 14.2; + TestTargetID = C81EF1352D6B1D6A001B3B5E; + }; }; }; buildConfigurationList = C81EF1312D6B1D6A001B3B5E /* Build configuration list for PBXProject "DevLint" */; @@ -262,6 +345,7 @@ projectRoot = ""; targets = ( C81EF1352D6B1D6A001B3B5E /* DevLint */, + C81FA9702D8989C000A33FEC /* DevLintTests */, ); }; /* End PBXProject section */ @@ -276,6 +360,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C81FA96F2D8989C000A33FEC /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXResourcesBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ @@ -284,7 +375,9 @@ buildActionMask = 2147483647; files = ( C8F849BB2D75AF4A002090E5 /* SyntaxHighlighter.swift in Sources */, + C81FA9692D897B5B00A33FEC /* Tokenizer.swift in Sources */, C8601D472D7EFA770058AEAB /* ToolbarButton.swift in Sources */, + C81FA9802D899CD200A33FEC /* SwiftLanguageRules.swift in Sources */, C8AC32342D7AF3ED003F496D /* CodeEditorView.swift in Sources */, C8AC32302D7AF297003F496D /* CodeEditorViewModel.swift in Sources */, C81EF13C2D6B1D6A001B3B5E /* ContentView.swift in Sources */, @@ -293,6 +386,7 @@ C8763EBD2D7CCC1E00D535E6 /* LineNumberView.swift in Sources */, C8763EC12D7CCD1000D535E6 /* CodeInputView.swift in Sources */, C8763EC32D7CCD4000D535E6 /* CodeOutputView.swift in Sources */, + C81FA96B2D897B6900A33FEC /* Token.swift in Sources */, C8AC32322D7AF2B7003F496D /* ThemeManager.swift in Sources */, C8763EBF2D7CCCBE00D535E6 /* EditorSectionView.swift in Sources */, C8B705482D88415700034249 /* SwiftFormatAdapter.swift in Sources */, @@ -301,11 +395,29 @@ C8EF75F72D7CC0910023A5DE /* Extensions.swift in Sources */, C8B705452D88410500034249 /* CodeFormatter.swift in Sources */, C8F849B52D75AEF1002090E5 /* CodeEditorContainer.swift in Sources */, + C81FA97E2D899CB100A33FEC /* LanguageRules.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + C81FA96D2D8989C000A33FEC /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + C81FA9742D8989C000A33FEC /* DevLintTests.swift in Sources */, + C81FA97B2D898A0E00A33FEC /* TokenizerTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXSourcesBuildPhase section */ +/* Begin PBXTargetDependency section */ + C81FA9762D8989C000A33FEC /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = C81EF1352D6B1D6A001B3B5E /* DevLint */; + targetProxy = C81FA9752D8989C000A33FEC /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + /* Begin XCBuildConfiguration section */ C81EF1432D6B1D6E001B3B5E /* Debug */ = { isa = XCBuildConfiguration; @@ -472,6 +584,38 @@ }; name = Release; }; + C81FA9782D8989C000A33FEC /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + GENERATE_INFOPLIST_FILE = YES; + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = com.codeAlligator.DevLintTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/DevLint.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/DevLint"; + }; + name = Debug; + }; + C81FA9792D8989C000A33FEC /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + GENERATE_INFOPLIST_FILE = YES; + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = com.codeAlligator.DevLintTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/DevLint.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/DevLint"; + }; + name = Release; + }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ @@ -493,6 +637,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + C81FA9772D8989C000A33FEC /* Build configuration list for PBXNativeTarget "DevLintTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C81FA9782D8989C000A33FEC /* Debug */, + C81FA9792D8989C000A33FEC /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; /* End XCConfigurationList section */ /* Begin XCRemoteSwiftPackageReference section */ diff --git a/DevLint/DevLint/Core/SyntaxHighlighter.swift b/DevLint/DevLint/SyntaxHighlighting/SyntaxHighlighter.swift similarity index 100% rename from DevLint/DevLint/Core/SyntaxHighlighter.swift rename to DevLint/DevLint/SyntaxHighlighting/SyntaxHighlighter.swift diff --git a/DevLint/DevLint/SyntaxHighlighting/Tokenizer/LanguageRules.swift b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/LanguageRules.swift new file mode 100644 index 0000000..1fbd3b9 --- /dev/null +++ b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/LanguageRules.swift @@ -0,0 +1,55 @@ +// +// LanguageRules.swift +// DevLint +// +// Created by CS Prasad on 18/03/25. +// + +import Foundation + +protocol LanguageRules { + var keywords: Set { get } // func, var, let, if, else + var identifiers: Set { get } // Variable and function names (not predefined) + var numbers: Set { get } // Integers, floats, etc. + var decimalNumbers: Set { get } // Floating-point numbers (e.g., 3.14) + var exponentNumbers: Set { get } // Scientific notation (e.g., 1.23e10) + var strings: Set { get } // "Hello, World!" + var rawStrings: Set { get } // #"Hello, World!"# + var multiLineStrings: Set { get } // """Hello\nWorld!""" + var interpolatedStrings: Set { get } // "Hello \(name)!" + var characters: Set { get } // 'A', 'B', etc. + var booleans: Set { get } // true, false + var operatorSymbols: Set { get } // +, -, *, /, =, ==, !=, etc. + var punctuation: Set { get } // { }, ( ), [ ], ; , . , , + var comments: Set { get } // //, /* */ (Single/Multi-line) + var directives: Set { get } // #if, #endif, #define (Preprocessor directives) + var escapeSequences: Set { get } // \n, \t, etc. + var annotations: Set { get } // @available, @objc, etc. + var attributes: Set { get } // Swift attributes (@available, @objc) + var types: Set { get } // Int, String, Double, custom class names + var accessModifiers: Set { get } // public, private, internal, protected + var storageModifiers: Set { get } // static, final, override, mutating + var functionCalls: Set { get } // print(), customFunction() + var macros: Set { get } // #macro (Used in some languages) + var nullLiterals: Set { get } // null, nil + var escapedIdentifiers: Set { get } // Backtick-wrapped names (`reserved`) + var hexNumbers: Set { get } // 0xFF, 0x1A + var binaryNumbers: Set { get } // 0b1010, 0b1101 + var octalNumbers: Set { get } // 0o77, 0o123 + var regexLiterals: Set { get } // /pattern/ (Used in JS, Swift 5.7+) + var genericTypes: Set { get } // , + var shebang: Set { get } // #!/usr/bin/swift + var parameterLabels: Set { get } // firstName in func greet(firstName name: String) + var tupleTypes: Set { get } // (x: Int, y: Int) + var metatypes: Set { get } // .Type, .Protocol + var keyPaths: Set { get } // \Person.name + var implicitParameters: Set { get } // $0, $1 in closures + var dollarIdentifiers: Set { get } // $var (Swift internals) + var patternMatching: Set { get } // case .some(_) + var lazyStorage: Set { get } // _storage variables + var dependencyMacros: Set { get } // #if canImport + var moduleNames: Set { get } // import MyCustomModule + var escapedNewlines: Set { get } // \ at EOL for line continuation + + func getTokenType(for value: String) -> Token.TokenType +} diff --git a/DevLint/DevLint/SyntaxHighlighting/Tokenizer/SwiftLanguageRules.swift b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/SwiftLanguageRules.swift new file mode 100644 index 0000000..6449d32 --- /dev/null +++ b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/SwiftLanguageRules.swift @@ -0,0 +1,81 @@ +// +// SwiftLanguageRules.swift +// DevLint +// +// Created by CS Prasad on 18/03/25. +// + +import Foundation +struct SwiftLanguageRules: LanguageRules { + let keywords: Set = [ + "class", "deinit", "enum", "extension", "func", "import", "init", "let", "protocol", "struct", + "subscript", "typealias", "var", "break", "case", "continue", "default", "do", "else", + "fallthrough", "for", "if", "in", "return", "switch", "where", "while", "as", "catch", + "is", "rethrows", "super", "self", "throw", "throws", "try", "_", "associatedtype", + "convenience", "didSet", "get", "guard", "indirect", "inout", "lazy", "nonmutating", + "optional", "required", "set", "some", "unowned", "weak", "willSet", "actor", "async", + "await" + ] + let identifiers: Set = [] // Variable/function names identified dynamically + let numbers: Set = [] // Handled via regex (e.g., 123) + let decimalNumbers: Set = [] // Handled via regex (e.g., 45.6) + let exponentNumbers: Set = [] // Handled via regex (e.g., 1e-5) + let strings: Set = [] // Handled via regex (e.g., "Hello") + let rawStrings: Set = [] // Handled via regex (e.g., #"Hello"#) + let multiLineStrings: Set = [] // Handled via regex (e.g., """Hello""") + let interpolatedStrings: Set = [] // Handled via regex (e.g., "Hello \(name)") + let characters: Set = [] // Handled via regex (e.g., 'A') + let booleans: Set = ["true", "false"] + let operatorSymbols: Set = ["+", "-", "*", "/", "=", "==", "!=", ">", "<", ">=", "<=", "&&", "||", "!", "&", "|", "^", "%"] + let punctuation: Set = ["{", "}", "(", ")", "[", "]", ";", ".", ","] + let comments: Set = ["//", "/*", "*/"] + let directives: Set = [ + "#if", "#else", "#elseif", "#endif", "#error", "#warning", "#define", "#import", + "#available", "#colorLiteral", "#column", "#dsohandle", "#file", "#fileID", "#fileLiteral", + "#function", "#imageLiteral", "#keyPath", "#line", "#selector", "#sourceLocation" + ] + let escapeSequences: Set = ["\\n", "\\t", "\\r", "\\\""] + let annotations: Set = ["@available", "@objc"] + let attributes: Set = ["@available", "@objc"] // Could expand with @MainActor, @escaping, etc. + let types: Set = ["Int", "String", "Double", "Bool", "Array", "Dictionary"] + let accessModifiers: Set = ["fileprivate", "internal", "private", "public", "open"] + let storageModifiers: Set = ["static", "final", "override", "mutating"] + let functionCalls: Set = [] // Identified dynamically + let macros: Set = ["#macro"] + let nullLiterals: Set = ["nil, null"] + let genericTypes: Set = [] // Identified dynamically (e.g., ) + let escapedIdentifiers: Set = [] // Identified dynamically (e.g., `var`) + let hexNumbers: Set = [] // Handled via regex (e.g., 0xFF) + let binaryNumbers: Set = [] // Handled via regex (e.g., 0b1010) + let octalNumbers: Set = [] // Handled via regex (e.g., 0o77) + let regexLiterals: Set = [] // Handled via regex (e.g., /pattern/) + let shebang: Set = [] // Handled via regex (e.g., #!/usr/bin/swift) + let parameterLabels: Set = [] // Identified dynamically (e.g., for: in func foo(for: Int)) + let tupleTypes: Set = [] // Identified dynamically (e.g., (Int, String)) + let metatypes: Set = [] // Could include .Type, .Protocol dynamically + let keyPaths: Set = [] // Handled via regex (e.g., \Person.name) + let implicitParameters: Set = [] // Identified dynamically (e.g., $0, $1) + let dollarIdentifiers: Set = [] // Identified dynamically (e.g., $value) + let patternMatching: Set = [] // Could include ~= + let lazyStorage: Set = [] // Lazy handled in keywords, could expand + let dependencyMacros: Set = [] // For macro system dependencies + let moduleNames: Set = [] // Identified dynamically (e.g., Swift, Foundation) + let escapedNewlines: Set = [] // Handled via regex (e.g., line continuation) + + func getTokenType(for value: String) -> Token.TokenType { + if keywords.contains(value) { return .keyword } + if booleans.contains(value) { return .boolean } + if operatorSymbols.contains(value) { return .operatorSymbol } + if punctuation.contains(value) { return .punctuation } + if comments.contains(value) { return .comment } + if directives.contains(value) { return .directive } + if escapeSequences.contains(value) { return .escapeSequence } + if annotations.contains(value) { return .annotation } + if types.contains(value) { return .type } + if accessModifiers.contains(value) { return .accessModifier } + if storageModifiers.contains(value) { return .storageModifier } + if macros.contains(value) { return .macro } + if nullLiterals.contains(value) { return .nullLiteral } + return .identifier + } +} diff --git a/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Token.swift b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Token.swift new file mode 100644 index 0000000..cb9be01 --- /dev/null +++ b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Token.swift @@ -0,0 +1,47 @@ +// +// Token.swift +// DevLint +// +// Created by CS Prasad on 18/03/25. +// + +import Foundation + +/// Represents a single token in the source code. +struct Token { + enum TokenType { + case keyword // e.g., func, var, let, if, else + case identifier // Variable and function names + case number // Integers, floats, etc. + case string // "Hello, World!" + case interpolatedString // "Hello \(name)!" + case character // 'A', 'B', etc. + case boolean // true, false + case operatorSymbol // +, -, *, /, =, ==, !=, etc. + case punctuation // { }, ( ), [ ], ; , . , , + case comment // // Single-line, /* Multi-line */ + case directive // #if, #endif, #define (Preprocessor directives) + case whitespace // Spaces, tabs, newlines + case newline // Line breaks + case escapeSequence // \n, \t, etc. + case annotation // @available, @objc, etc. + case type // Int, String, Double, custom class names + case accessModifier // public, private, internal, protected + case storageModifier // static, final, override, mutating + case functionCall // print(), customFunction() + case macro // #macro (used in some languages) + case unknown // Fallback category for anything unrecognized + case nullLiteral // null, nil + case preprocessorDirective // Preprocessor directives like #define, #import + case escapedIdentifier // Backtick-wrapped names (`reserved`) + case hexNumber // 0xFF, 0x1A + case binaryNumber // 0b1010, 0b1101 + case octalNumber // 0o77, 0o123 + case regexLiteral // /pattern/ (used in JS, Swift 5.7+) + case genericType // , + } + + let type: TokenType // The category of this token + let value: String // The actual text of the token + let range: NSRange // Position in the source code +} diff --git a/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Tokenizer.swift b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Tokenizer.swift new file mode 100644 index 0000000..e86dc00 --- /dev/null +++ b/DevLint/DevLint/SyntaxHighlighting/Tokenizer/Tokenizer.swift @@ -0,0 +1,333 @@ +// +// Tokenizer.swift +// DevLint +// +// Created by CS Prasad on 18/03/25. +// + +import Foundation + +/// Tokenizer responsible for breaking source code into tokens. +class Tokenizer { + private let code: String + private var position: String.Index + + init(code: String) { + self.code = code + self.position = code.startIndex + } + + /// Tokenizes the given source code and returns an array of tokens. + func tokenize() -> [Token] { + var tokens: [Token] = [] + + while position < code.endIndex { + if let token = nextToken() { + tokens.append(token) + } + } + + return tokens + } + + /// Reads the next token from the source code. + private func nextToken() -> Token? { + let currentChar = code[position] + + // Skip whitespace + if currentChar.isWhitespace { + advance() + return Token(type: .whitespace, value: String(currentChar), range: NSRange(location: 0, length: 1)) + } + + // Identify token types + if currentChar.isNumber { + return readNumber() + } else if currentChar.isLetter { + return readIdentifierOrKeyword() + } else if currentChar == "\"" { + return readString() + } else if currentChar == "'" { + return readCharacterLiteral() + } else if isOperator(currentChar) { + return readOperator() + } else if isPunctuation(currentChar) { + return readPunctuation() + } else if currentChar == "/" && peekNext() == "/" { + return readSingleLineComment() + } else if currentChar == "/" && peekNext() == "*" { + return readMultiLineComment() + } else if currentChar == "@" { + return readAnnotation() + } else if currentChar == "#" { + return readPreprocessorDirective() + } else if currentChar == "<" { + return readGenericType() + } else if currentChar == "/" { + return readRegexLiteral() + } + + // Handle unknown token + advance() + return Token(type: .unknown, value: String(currentChar), range: NSRange(location: 0, length: 1)) + } + + /// Reads numbers, including hexadecimal, binary, and octal. + private func readNumber() -> Token { + var value = "" + + if code[position] == "0" { + advance() + if position < code.endIndex { + let nextChar = code[position] + if nextChar == "x" { + return readHexNumber() + } else if nextChar == "b" { + return readBinaryNumber() + } else if nextChar == "o" { + return readOctalNumber() + } + } + } + + while position < code.endIndex, code[position].isNumber { + value.append(code[position]) + advance() + } + + return Token(type: .number, value: value, range: NSRange(location: 0, length: value.count)) + } + + + /// Reads interpolated strings like "Hello \(name)!". + private func readString() -> Token { + var value = "\"" + advance() + + while position < code.endIndex, code[position] != "\"" { + if code[position] == "\\" && peekNext() == "(" { + value.append("\\(") + advance() + advance() + while position < code.endIndex, code[position] != ")" { + value.append(code[position]) + advance() + } + advance() + value.append(")") + } else { + value.append(code[position]) + advance() + } + } + advance() + value.append("\"") + + return Token(type: .string, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads character literals. + private func readCharacterLiteral() -> Token { + var value = "" + advance() // Skip opening quote + + while position < code.endIndex, code[position] != "'" { + value.append(code[position]) + advance() + } + advance() // Skip closing quote + + return Token(type: .character, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads regex literals /pattern/. + private func readRegexLiteral() -> Token { + var value = "" + advance() + + while position < code.endIndex, code[position] != "/" { + value.append(code[position]) + advance() + } + advance() + + return Token(type: .regexLiteral, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads generic types like . + private func readGenericType() -> Token { + var value = "" + + while position < code.endIndex, code[position] != ">" { + value.append(code[position]) + advance() + } + advance() + + return Token(type: .genericType, value: "<" + value + ">", range: NSRange(location: 0, length: value.count)) + } + + /// Reads identifiers, keywords, boolean literals, or null literals. + private func readIdentifierOrKeyword() -> Token { + var value = "" + + while position < code.endIndex, code[position].isLetter { + value.append(code[position]) + advance() + } + + // Check if it's a keyword (expandable list) + // Check if it's a keyword (expanded list for multiple languages) + let keywords: Set = [ + // Swift + "func", "var", "let", "if", "else", "return", "class", "struct", "enum", "protocol", "extension", "import", "guard", "do", "catch", "throw", "throws", "try", "defer", "break", "continue", "switch", "case", "default", "fallthrough", "in", "repeat", "while", "for", "where", "as", "is", "super", "self", "init", "deinit", "subscript", "operator", "typealias", "associatedtype", "static", "final", "override", "private", "fileprivate", "internal", "public", "open", "mutating", "nonmutating", "lazy", "weak", "unowned", "indirect", "convenience", "required", "optional", "dynamic", "some", "any", + + // Python + "def", "lambda", "return", "if", "elif", "else", "try", "except", "finally", "raise", "for", "while", "break", "continue", "pass", "class", "import", "from", "as", "global", "nonlocal", "with", "assert", "yield", "del", "or", "and", "not", "is", "in", "async", "await", + + // JavaScript + "function", "var", "let", "const", "if", "else", "return", "switch", "case", "default", "for", "while", "do", "break", "continue", "try", "catch", "finally", "throw", "class", "extends", "constructor", "import", "export", "super", "this", "new", "typeof", "instanceof", "void", "yield", "async", "await", "static", "public", "private", "protected", + + // C++ + "int", "double", "float", "char", "bool", "void", "if", "else", "switch", "case", "default", "for", "while", "do", "break", "continue", "return", "class", "struct", "union", "namespace", "using", "typedef", "template", "new", "delete", "this", "friend", "operator", "virtual", "override", "final", "explicit", "mutable", "const", "constexpr", "volatile", "static", "extern", "inline", "goto", "sizeof", "enum", "public", "private", "protected" + ] + + let type: Token.TokenType + if keywords.contains(value) { + type = .keyword + } else if value == "true" || value == "false" { + type = .boolean + } else if value == "null" || value == "nil" { + type = .nullLiteral + } else if value.hasPrefix("`") && value.hasSuffix("`") { + type = .escapedIdentifier + } else { + type = .identifier + } + + return Token(type: type, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads operators. + private func readOperator() -> Token { + let value = String(code[position]) + advance() + return Token(type: .operatorSymbol, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads punctuation. + private func readPunctuation() -> Token { + let value = String(code[position]) + advance() + return Token(type: .punctuation, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads single-line comments. + private func readSingleLineComment() -> Token { + var value = "" + while position < code.endIndex, code[position] != "\n" { + value.append(code[position]) + advance() + } + return Token(type: .comment, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads multi-line comments. + private func readMultiLineComment() -> Token { + var value = "" + while position < code.endIndex, !(code[position] == "*" && peekNext() == "/") { + value.append(code[position]) + advance() + } + advance() // Move past '*' + advance() // Move past '/' + return Token(type: .comment, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Moves to the next character in the source code. + private func advance() { + position = code.index(after: position) + } + + + /// Peeks at the next character without consuming it. + private func peekNext() -> Character? { + let nextIndex = code.index(after: position) + return nextIndex < code.endIndex ? code[nextIndex] : nil + } + + /// Checks if a character is an operator. + private func isOperator(_ char: Character) -> Bool { + return "+-*/=<>!&|^%".contains(char) + } + + /// Checks if a character is punctuation. + private func isPunctuation(_ char: Character) -> Bool { + return ",;:.(){}[]".contains(char) + } + + /// Reads annotations (e.g., @objc, @Test). + private func readAnnotation() -> Token { + var value = "" + + while position < code.endIndex, code[position].isLetter || code[position] == "@" { + value.append(code[position]) + advance() + } + + return Token(type: .annotation, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads preprocessor directives (e.g., #define, #import). + private func readPreprocessorDirective() -> Token { + var value = "" + + while position < code.endIndex, code[position].isLetter || code[position] == "#" { + value.append(code[position]) + advance() + } + + return Token(type: .preprocessorDirective, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads hex number (e.g., 0xFF, 0x1A) + private func readHexNumber() -> Token { + var value = "0x" + advance() // Skip 'x' + + while position < code.endIndex, code[position].isHexDigit { + value.append(code[position]) + advance() + } + + return Token(type: .hexNumber, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads Binary number (e.g., 0b1010, 0b1101) + private func readBinaryNumber() -> Token { + var value = "0b" + advance() // Skip 'b' + + while position < code.endIndex, "01".contains(code[position]) { + value.append(code[position]) + advance() + } + + return Token(type: .binaryNumber, value: value, range: NSRange(location: 0, length: value.count)) + } + + /// Reads Octal number (e.g., 0o77, 0o123) + private func readOctalNumber() -> Token { + var value = "0o" + advance() // Skip 'o' + + while position < code.endIndex, "01234567".contains(code[position]) { + value.append(code[position]) + advance() + } + + return Token(type: .octalNumber, value: value, range: NSRange(location: 0, length: value.count)) + } + + +} diff --git a/DevLint/DevLintTests/DevLintTests.swift b/DevLint/DevLintTests/DevLintTests.swift new file mode 100644 index 0000000..8430fac --- /dev/null +++ b/DevLint/DevLintTests/DevLintTests.swift @@ -0,0 +1,35 @@ +// +// DevLintTests.swift +// DevLintTests +// +// Created by CS Prasad on 18/03/25. +// + +import XCTest + +final class DevLintTests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // This is an example of a functional test case. + // Use XCTAssert and related functions to verify your tests produce the correct results. + // Any test you write for XCTest can be annotated as throws and async. + // Mark your test throws to produce an unexpected failure when your test encounters an uncaught error. + // Mark your test async to allow awaiting for asynchronous code to complete. Check the results with assertions afterwards. + } + + func testPerformanceExample() throws { + // This is an example of a performance test case. + measure { + // Put the code you want to measure the time of here. + } + } + +} diff --git a/DevLint/DevLintTests/TokenizerTests.swift b/DevLint/DevLintTests/TokenizerTests.swift new file mode 100644 index 0000000..29bc70e --- /dev/null +++ b/DevLint/DevLintTests/TokenizerTests.swift @@ -0,0 +1,122 @@ +// +// TokenizerTests.swift +// DevLintTests +// +// Created by CS Prasad on 18/03/25. +// + +import XCTest +@testable import DevLint + +class TokenizerTests: XCTestCase { + + private func assertTokenization(_ input: String, expected: Token.TokenType) { + let tokenizer = Tokenizer(code: input) + let tokens = tokenizer.tokenize() + XCTAssertEqual(tokens.count, 1, "Expected one token but got \(tokens.count)") + XCTAssertEqual(tokens.first?.type, expected, "Expected \(expected) but got \(tokens.first?.type ?? .unknown)") + } + + + func testBooleanLiterals() { + assertTokenization("true", expected: .boolean) + assertTokenization("false", expected: .boolean) + } + + func testNullLiterals() { + assertTokenization("nil", expected: .nullLiteral) + assertTokenization("null", expected: .nullLiteral) + } + + func testAnnotations() { + assertTokenization("@objc", expected: .annotation) + assertTokenization("@available", expected: .annotation) + } + + func testPreprocessorDirectives() { + assertTokenization("#define", expected: .preprocessorDirective) + assertTokenization("#import", expected: .preprocessorDirective) + } + + func testEscapedIdentifiers() { + assertTokenization("`reserved`", expected: .escapedIdentifier) + } + + func testHexBinaryOctalNumbers() { + assertTokenization("0xFF", expected: .hexNumber) + assertTokenization("0b1010", expected: .binaryNumber) + assertTokenization("0o77", expected: .octalNumber) + } + + // New test cases for missing token types + func testKeywords() { + assertTokenization("func", expected: .keyword) + assertTokenization("if", expected: .keyword) + } + + func testIdentifiers() { + assertTokenization("myVariable", expected: .identifier) + assertTokenization("someFunction", expected: .identifier) + } + + func testNumbers() { + assertTokenization("123", expected: .number) + assertTokenization("45.6", expected: .number) + } + + func testStrings() { + assertTokenization("\"Hello, World!\"", expected: .string) + } + + func testCharacterLiterals() { + assertTokenization("'A'", expected: .character) + } + + func testEscapeSequences() { + assertTokenization("\n", expected: .escapeSequence) + assertTokenization("\t", expected: .escapeSequence) + } + + func testOperators() { + assertTokenization("+", expected: .operatorSymbol) + assertTokenization("!=", expected: .operatorSymbol) + } + + func testPunctuation() { + assertTokenization("(", expected: .punctuation) + assertTokenization(")", expected: .punctuation) + } + + func testComments() { + assertTokenization("// This is a comment", expected: .comment) + assertTokenization("/* Multi-line */", expected: .comment) + } + + func testAccessModifiers() { + assertTokenization("public", expected: .accessModifier) + assertTokenization("private", expected: .accessModifier) + } + + func testStorageModifiers() { + assertTokenization("static", expected: .storageModifier) + assertTokenization("final", expected: .storageModifier) + } + + func testFunctionCalls() { + assertTokenization("print()", expected: .functionCall) + assertTokenization("myFunction()", expected: .functionCall) + } + + func testGenericTypes() { + assertTokenization("", expected: .genericType) + assertTokenization("", expected: .genericType) + } + + func testInterpolatedStrings() { + assertTokenization("\"Hello \(name)!\"", expected: .interpolatedString) + } + + func testRegexLiterals() { + assertTokenization("/pattern/", expected: .regexLiteral) + } +}