Skip to content

Commit 91e7c51

Browse files
produce string array of potential matched phrases
- expose ContainmentEvaluator to produce string array from normal form - add PhraseCollectionConvertible to make Expressions return their phrase contents
1 parent 8f9921a commit 91e7c51

6 files changed

Lines changed: 259 additions & 0 deletions

File tree

SearchExpressionParser.xcodeproj/project.pbxproj

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@
3131
5048DFAC2078D0F900BAA4B0 /* AnyEquatable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5048DFAB2078D0F900BAA4B0 /* AnyEquatable.swift */; };
3232
5048DFAE2078D50500BAA4B0 /* TokenBuffer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5048DFAD2078D50500BAA4B0 /* TokenBuffer.swift */; };
3333
5048DFB02078D57D00BAA4B0 /* XCTAssertEqual+Expression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5048DFAF2078D57D00BAA4B0 /* XCTAssertEqual+Expression.swift */; };
34+
50765461207DE07D0090D5D2 /* ContainmentEvaluator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50765460207DE07D0090D5D2 /* ContainmentEvaluator.swift */; };
35+
50765463207DE1670090D5D2 /* ContainmentEvaluatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50765462207DE1670090D5D2 /* ContainmentEvaluatorTests.swift */; };
3436
50765465207DEBC00090D5D2 /* Descriptions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50765464207DEBC00090D5D2 /* Descriptions.swift */; };
37+
50765467207DEF160090D5D2 /* PhraseCollectionConvertibleTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50765466207DEF160090D5D2 /* PhraseCollectionConvertibleTests.swift */; };
38+
50765469207DF05C0090D5D2 /* PhraseCollectionConvertible.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50765468207DF05C0090D5D2 /* PhraseCollectionConvertible.swift */; };
3539
50976269207937820002881A /* BalanceParenthesesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50976268207937820002881A /* BalanceParenthesesTests.swift */; };
3640
5097626B207B44620002881A /* SearchExpressionParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5097626A207B44620002881A /* SearchExpressionParser.swift */; };
3741
50976270207B47760002881A /* ExpressionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5097626F207B47760002881A /* ExpressionTests.swift */; };
@@ -75,7 +79,11 @@
7579
5048DFAB2078D0F900BAA4B0 /* AnyEquatable.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AnyEquatable.swift; sourceTree = "<group>"; };
7680
5048DFAD2078D50500BAA4B0 /* TokenBuffer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenBuffer.swift; sourceTree = "<group>"; };
7781
5048DFAF2078D57D00BAA4B0 /* XCTAssertEqual+Expression.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "XCTAssertEqual+Expression.swift"; sourceTree = "<group>"; };
82+
50765460207DE07D0090D5D2 /* ContainmentEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContainmentEvaluator.swift; sourceTree = "<group>"; };
83+
50765462207DE1670090D5D2 /* ContainmentEvaluatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContainmentEvaluatorTests.swift; sourceTree = "<group>"; };
7884
50765464207DEBC00090D5D2 /* Descriptions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Descriptions.swift; sourceTree = "<group>"; };
85+
50765466207DEF160090D5D2 /* PhraseCollectionConvertibleTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PhraseCollectionConvertibleTests.swift; sourceTree = "<group>"; };
86+
50765468207DF05C0090D5D2 /* PhraseCollectionConvertible.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PhraseCollectionConvertible.swift; sourceTree = "<group>"; };
7987
50976268207937820002881A /* BalanceParenthesesTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BalanceParenthesesTests.swift; sourceTree = "<group>"; };
8088
5097626A207B44620002881A /* SearchExpressionParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SearchExpressionParser.swift; sourceTree = "<group>"; };
8189
5097626F207B47760002881A /* ExpressionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ExpressionTests.swift; sourceTree = "<group>"; };
@@ -125,6 +133,7 @@
125133
5097626A207B44620002881A /* SearchExpressionParser.swift */,
126134
5048DF8020789DC800BAA4B0 /* Tokenization */,
127135
5048DFA42078CFA400BAA4B0 /* Parsing */,
136+
5076545F207DE05A0090D5D2 /* NormalForm */,
128137
5048DF8320789E1C00BAA4B0 /* Either.swift */,
129138
5048DF622078985800BAA4B0 /* Info.plist */,
130139
);
@@ -138,6 +147,8 @@
138147
5048DFA92078D00F00BAA4B0 /* ParserTests.swift */,
139148
50976268207937820002881A /* BalanceParenthesesTests.swift */,
140149
5097626F207B47760002881A /* ExpressionTests.swift */,
150+
50765462207DE1670090D5D2 /* ContainmentEvaluatorTests.swift */,
151+
50765466207DEF160090D5D2 /* PhraseCollectionConvertibleTests.swift */,
141152
50765464207DEBC00090D5D2 /* Descriptions.swift */,
142153
5048DF8520789FD300BAA4B0 /* TestHelpers.swift */,
143154
5048DFAF2078D57D00BAA4B0 /* XCTAssertEqual+Expression.swift */,
@@ -193,6 +204,15 @@
193204
path = Parsing;
194205
sourceTree = "<group>";
195206
};
207+
5076545F207DE05A0090D5D2 /* NormalForm */ = {
208+
isa = PBXGroup;
209+
children = (
210+
50765460207DE07D0090D5D2 /* ContainmentEvaluator.swift */,
211+
50765468207DF05C0090D5D2 /* PhraseCollectionConvertible.swift */,
212+
);
213+
path = NormalForm;
214+
sourceTree = "<group>";
215+
};
196216
/* End PBXGroup section */
197217

198218
/* Begin PBXHeadersBuildPhase section */
@@ -308,11 +328,13 @@
308328
5048DF982078B1D700BAA4B0 /* PhraseExtractor.swift in Sources */,
309329
5048DF8220789DE400BAA4B0 /* TokenExtractor.swift in Sources */,
310330
5048DF7D20789B5000BAA4B0 /* TokenCharacterBuffer.swift in Sources */,
331+
50765461207DE07D0090D5D2 /* ContainmentEvaluator.swift in Sources */,
311332
5048DF9E2078BC3C00BAA4B0 /* NotExtractor.swift in Sources */,
312333
5048DFA62078CFB700BAA4B0 /* Parser.swift in Sources */,
313334
5048DFAE2078D50500BAA4B0 /* TokenBuffer.swift in Sources */,
314335
5048DF962078AB2A00BAA4B0 /* ParensExtractors.swift in Sources */,
315336
5048DF922078A61800BAA4B0 /* WordExtractor.swift in Sources */,
337+
50765469207DF05C0090D5D2 /* PhraseCollectionConvertible.swift in Sources */,
316338
5048DF9C2078B6F600BAA4B0 /* BangExtractor.swift in Sources */,
317339
5048DFA02078BF3600BAA4B0 /* AndExtractor.swift in Sources */,
318340
5048DFA22078C02400BAA4B0 /* OrExtractor.swift in Sources */,
@@ -326,9 +348,11 @@
326348
isa = PBXSourcesBuildPhase;
327349
buildActionMask = 2147483647;
328350
files = (
351+
50765463207DE1670090D5D2 /* ContainmentEvaluatorTests.swift in Sources */,
329352
50976269207937820002881A /* BalanceParenthesesTests.swift in Sources */,
330353
5048DFAA2078D00F00BAA4B0 /* ParserTests.swift in Sources */,
331354
5048DF6D2078985800BAA4B0 /* TokenizerTests.swift in Sources */,
355+
50765467207DEF160090D5D2 /* PhraseCollectionConvertibleTests.swift in Sources */,
332356
50765465207DEBC00090D5D2 /* Descriptions.swift in Sources */,
333357
50976270207B47760002881A /* ExpressionTests.swift in Sources */,
334358
5048DFB02078D57D00BAA4B0 /* XCTAssertEqual+Expression.swift in Sources */,
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright © 2018 Christian Tietze. All rights reserved. Distributed under the MIT License.
2+
3+
/// Takes an `Expression` and returns an array of String objects that
4+
/// represent the positive values, i.e. the search string parts that are
5+
/// supposed to be part of the haystack.
6+
///
7+
/// Useful to highlight matches.
8+
///
9+
/// Example:
10+
///
11+
/// foo AND bar and NOT baz
12+
///
13+
/// Returns `["foo", "bar"]` because the NOT clause indicates that `"baz"` is
14+
/// not supposed to be part of the haystack.
15+
///
16+
/// Performs normalization.
17+
public struct ContainmentEvaluator {
18+
19+
public typealias Evaluable = Expression & PhraseCollectionConvertible
20+
21+
public struct RecursionTooDeepError: Error {
22+
public init() {}
23+
}
24+
25+
public let evaluable: Evaluable
26+
public let maxRecursion: Int
27+
28+
public init(evaluable: Evaluable, maxRecursion: Int = 50) {
29+
30+
self.evaluable = evaluable
31+
self.maxRecursion = maxRecursion
32+
}
33+
34+
/// Produces an array of sets of phrases that may be
35+
/// contained in the haystack when evaluating `expression`.
36+
///
37+
/// These are not the actual phrases that _must_ be contained. These
38+
/// are phrases that _may_ be contained. `x OR y` will
39+
/// produce a collection of all candidates: `["x","y"]`.
40+
///
41+
/// See: `normalizedExpression()`.
42+
///
43+
/// - returns: Empty array if normalization of `expression` recurses too deep.
44+
public func phrases() -> [String] {
45+
do {
46+
let evaluable = try normalizedEvaluable()
47+
return evaluable.phrases
48+
} catch {
49+
return []
50+
}
51+
}
52+
53+
/// Negation normal form.
54+
/// - throws: `RecursionTooDeepError` if recursion is too deep. See `maxRecursion` to limit the depth of expressions.
55+
public func normalizedEvaluable() throws -> Evaluable {
56+
return try pushNegation(evaluable)
57+
}
58+
59+
private func pushNegation(_ evaluable: Evaluable, level: Int = 0) throws -> Evaluable {
60+
61+
guard level < maxRecursion else { throw RecursionTooDeepError() }
62+
guard let notNode = evaluable as? NotNode else { return evaluable }
63+
64+
switch notNode.expression {
65+
case let andSubNode as AndNode:
66+
return OrNode(
67+
try pushNegation(NotNode(andSubNode.lhs), level: level + 1),
68+
try pushNegation(NotNode(andSubNode.rhs), level: level + 1))
69+
70+
case let orSubNode as OrNode:
71+
return AndNode(
72+
try pushNegation(NotNode(orSubNode.lhs), level: level + 1),
73+
try pushNegation(NotNode(orSubNode.rhs), level: level + 1))
74+
75+
default:
76+
return notNode
77+
}
78+
}
79+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright © 2018 Christian Tietze. All rights reserved. Distributed under the MIT License.
2+
3+
/// - warning: Works in negation normal form (`ContainmentEvaluator.normalizedEvaluable()`) only.
4+
public protocol PhraseCollectionConvertible {
5+
var phrases: [String] { get }
6+
}
7+
8+
extension AnythingNode: PhraseCollectionConvertible {
9+
public var phrases: [String] { return [] }
10+
}
11+
12+
extension NotNode: PhraseCollectionConvertible {
13+
/// `NotNode` does not compute negation of terms but produces an empty array. Transform to negation formal form first.
14+
public var phrases: [String] { return [] }
15+
}
16+
17+
extension ContainsNode: PhraseCollectionConvertible {
18+
public var phrases: [String] { return [string] }
19+
}
20+
21+
extension AndNode: PhraseCollectionConvertible {
22+
public var phrases: [String] {
23+
return [
24+
(lhs as? PhraseCollectionConvertible)?.phrases ?? [],
25+
(rhs as? PhraseCollectionConvertible)?.phrases ?? []
26+
].flatMap { $0 }
27+
}
28+
}
29+
30+
extension OrNode: PhraseCollectionConvertible {
31+
public var phrases: [String] {
32+
return [
33+
(lhs as? PhraseCollectionConvertible)?.phrases ?? [],
34+
(rhs as? PhraseCollectionConvertible)?.phrases ?? []
35+
].flatMap { $0 }
36+
}
37+
}

SearchExpressionParser/Parsing/Expressions.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public protocol Expression {
2424

2525
/// Wildcard that is satisfied by any string.
2626
public struct AnythingNode: Expression {
27+
2728
public func isSatisfied(by satisfiable: StringExpressionSatisfiable) -> Bool {
2829
return true
2930
}
@@ -34,6 +35,7 @@ public struct AnythingNode: Expression {
3435
}
3536

3637
public struct ContainsNode: Expression {
38+
3739
public typealias CString = [CChar]
3840
public let string: String
3941
public let cString: CString
@@ -65,6 +67,7 @@ public struct ContainsNode: Expression {
6567
}
6668

6769
public struct NotNode: Expression {
70+
6871
public let expression: Expression
6972

7073
public init(_ expression: Expression) {
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright © 2018 Christian Tietze. All rights reserved. Distributed under the MIT License.
2+
3+
import XCTest
4+
@testable import SearchExpressionParser
5+
6+
class ContainmentEvaluatorTests: XCTestCase {
7+
8+
// MARK: - Normalization
9+
10+
func normalForm(_ evaluable: ContainmentEvaluator.Evaluable) throws -> Expression {
11+
return try ContainmentEvaluator(evaluable: evaluable).normalizedEvaluable()
12+
}
13+
14+
func testNormalized_Anything() {
15+
guard let normalization = XCTAssertNoThrows(try normalForm(AnythingNode())) else { return }
16+
XCTAssertEqual(
17+
normalization,
18+
AnythingNode())
19+
}
20+
21+
func testNormalized_Contains() {
22+
guard let normalization = XCTAssertNoThrows(try normalForm(ContainsNode("something"))) else { return }
23+
XCTAssertEqual(
24+
normalization,
25+
ContainsNode("something"))
26+
}
27+
28+
func testNormalized_Not_1LevelDeep_Contains() {
29+
let expression = NotNode(ContainsNode("x"))
30+
guard let normalization = XCTAssertNoThrows(try normalForm(expression)) else { return }
31+
XCTAssertEqual(
32+
normalization,
33+
NotNode(ContainsNode("x")))
34+
}
35+
36+
func testNormalized_Not_1LevelDeep_And() {
37+
let expression = NotNode(AndNode(ContainsNode("x"), ContainsNode("y")))
38+
guard let normalization = XCTAssertNoThrows(try normalForm(expression)) else { return }
39+
XCTAssertEqual(
40+
normalization,
41+
OrNode(NotNode(ContainsNode("x")),
42+
NotNode(ContainsNode("y"))))
43+
}
44+
45+
func testNormalized_Not_1LevelDeep_Or() {
46+
let expression = NotNode(OrNode(ContainsNode("x"), ContainsNode("y")))
47+
guard let normalization = XCTAssertNoThrows(try normalForm(expression)) else { return }
48+
XCTAssertEqual(
49+
normalization,
50+
AndNode(NotNode(ContainsNode("x")),
51+
NotNode(ContainsNode("y"))))
52+
}
53+
54+
func testNormalized_Not_2LevelsDeep() {
55+
let expression = NotNode(AndNode(
56+
OrNode(ContainsNode("a"),
57+
ContainsNode("b")),
58+
AndNode(ContainsNode("c"),
59+
ContainsNode("d"))))
60+
guard let normalization = XCTAssertNoThrows(try normalForm(expression)) else { return }
61+
XCTAssertEqual(
62+
normalization,
63+
OrNode(AndNode(NotNode(ContainsNode("a")),
64+
NotNode(ContainsNode("b"))),
65+
OrNode(NotNode(ContainsNode("c")),
66+
NotNode(ContainsNode("d")))))
67+
}
68+
69+
// MARK: - Phrases
70+
71+
func phrases(_ evaluable: ContainmentEvaluator.Evaluable) -> [String] {
72+
return ContainmentEvaluator(evaluable: evaluable).phrases()
73+
}
74+
75+
func testPhrases_Anything() {
76+
XCTAssertEqual(phrases(AnythingNode()), [])
77+
}
78+
79+
func testPhrases_Contains() {
80+
XCTAssertEqual(phrases(ContainsNode("foo")), ["foo"])
81+
XCTAssertEqual(phrases(ContainsNode("bar")), ["bar"])
82+
}
83+
84+
func testPhrases_Not() {
85+
XCTAssertEqual(phrases(NotNode(ContainsNode("foo"))), [])
86+
XCTAssertEqual(phrases(NotNode(ContainsNode("bar"))), [])
87+
}
88+
89+
func testPhrases_And() {
90+
XCTAssertEqual(phrases(AndNode(ContainsNode("foo"), ContainsNode("bar"))), ["foo", "bar"])
91+
XCTAssertEqual(phrases(AndNode(NotNode(ContainsNode("foo")), ContainsNode("bar"))), ["bar"])
92+
XCTAssertEqual(phrases(AndNode(ContainsNode("foo"), NotNode(ContainsNode("bar")))), ["foo"])
93+
}
94+
95+
func testPhrases_Or() {
96+
XCTAssertEqual(phrases(OrNode(ContainsNode("foo"), ContainsNode("bar"))), ["foo", "bar"])
97+
XCTAssertEqual(phrases(OrNode(NotNode(ContainsNode("foo")), ContainsNode("bar"))), ["bar"])
98+
XCTAssertEqual(phrases(OrNode(ContainsNode("foo"), NotNode(ContainsNode("bar")))), ["foo"])
99+
}
100+
101+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright © 2018 Christian Tietze. All rights reserved. Distributed under the MIT License.
2+
3+
import XCTest
4+
@testable import SearchExpressionParser
5+
6+
class PhraseCollectionConvertibleTests: XCTestCase {
7+
8+
func testPhrases() {
9+
XCTAssertEqual(AnythingNode().phrases, [])
10+
XCTAssertEqual(ContainsNode("foo").phrases, ["foo"])
11+
XCTAssertEqual(NotNode(ContainsNode("foo")).phrases, [])
12+
XCTAssertEqual(AndNode(ContainsNode("foo"), ContainsNode("bar")).phrases, ["foo", "bar"])
13+
XCTAssertEqual(OrNode(ContainsNode("foo"), ContainsNode("bar")).phrases, ["foo", "bar"])
14+
}
15+
}

0 commit comments

Comments
 (0)