Skip to content

Commit 8ea0c25

Browse files
Repo AssistCopilot
authored andcommitted
perf+feat: optimize exists/contains; add distinct/distinctBy/distinctByAsync
Performance (Task 8): - Add dedicated 'exists' implementation returning bool directly, avoiding the intermediate Option<'T> allocation that the previous tryFind+isSome approach incurred. Both exists/existsAsync and contains now use this path. - 'contains' also avoids the (=) closure allocation from the old impl. Coding improvement (Task 5): - Add TaskSeq.distinct: removes all duplicate elements (keeps first occurrence) using a HashSet, complementing the existing distinctUntilChanged. - Add TaskSeq.distinctBy: de-duplicates by a key projection function. - Add TaskSeq.distinctByAsync: async variant of distinctBy. - 75 new tests covering empty sequences, functionality, side effects, and comparison with Seq/distinctUntilChanged semantics. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 43eff83 commit 8ea0c25

File tree

6 files changed

+411
-9
lines changed

6 files changed

+411
-9
lines changed

release-notes.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ Release notes:
55
- update engineering to .NET 9/10
66
- adds TaskSeq.scan and TaskSeq.scanAsync, #289
77
- adds TaskSeq.pairwise, #289
8+
- adds TaskSeq.reduce and TaskSeq.reduceAsync, #289
9+
- adds TaskSeq.unfold and TaskSeq.unfoldAsync, #289
10+
- adds TaskSeq.distinct, TaskSeq.distinctBy, TaskSeq.distinctByAsync
11+
- performance: TaskSeq.exists, existsAsync, contains no longer allocate an intermediate Option value
812

913
0.4.0
1014
- overhaul all doc comments, add exceptions, improve IDE quick-info experience, #136, #220, #234

src/FSharp.Control.TaskSeq.Test/FSharp.Control.TaskSeq.Test.fsproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
<Compile Include="TaskSeq.ExactlyOne.Tests.fs" />
2121
<Compile Include="TaskSeq.Except.Tests.fs" />
2222
<Compile Include="TaskSeq.DistinctUntilChanged.Tests.fs" />
23+
<Compile Include="TaskSeq.Distinct.Tests.fs" />
2324
<Compile Include="TaskSeq.Pairwise.Tests.fs" />
2425
<Compile Include="TaskSeq.Exists.Tests.fs" />
2526
<Compile Include="TaskSeq.Filter.Tests.fs" />
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
module TaskSeq.Tests.Distinct
2+
3+
open Xunit
4+
open FsUnit.Xunit
5+
6+
open FSharp.Control
7+
8+
//
9+
// TaskSeq.distinct
10+
// TaskSeq.distinctBy
11+
// TaskSeq.distinctByAsync
12+
//
13+
14+
15+
module EmptySeq =
16+
[<Fact>]
17+
let ``TaskSeq-distinct with null source raises`` () = assertNullArg <| fun () -> TaskSeq.distinct null
18+
19+
[<Fact>]
20+
let ``TaskSeq-distinctBy with null source raises`` () = assertNullArg <| fun () -> TaskSeq.distinctBy id null
21+
22+
[<Fact>]
23+
let ``TaskSeq-distinctByAsync with null source raises`` () =
24+
assertNullArg
25+
<| fun () -> TaskSeq.distinctByAsync (fun x -> Task.fromResult x) null
26+
27+
[<Theory; ClassData(typeof<TestEmptyVariants>)>]
28+
let ``TaskSeq-distinct on empty returns empty`` variant =
29+
Gen.getEmptyVariant variant
30+
|> TaskSeq.distinct
31+
|> verifyEmpty
32+
33+
[<Theory; ClassData(typeof<TestEmptyVariants>)>]
34+
let ``TaskSeq-distinctBy on empty returns empty`` variant =
35+
Gen.getEmptyVariant variant
36+
|> TaskSeq.distinctBy id
37+
|> verifyEmpty
38+
39+
[<Theory; ClassData(typeof<TestEmptyVariants>)>]
40+
let ``TaskSeq-distinctByAsync on empty returns empty`` variant =
41+
Gen.getEmptyVariant variant
42+
|> TaskSeq.distinctByAsync (fun x -> Task.fromResult x)
43+
|> verifyEmpty
44+
45+
46+
module Functionality =
47+
[<Fact>]
48+
let ``TaskSeq-distinct removes duplicate ints`` () = task {
49+
let! result =
50+
taskSeq { yield! [ 1; 2; 2; 3; 1; 4; 3; 5 ] }
51+
|> TaskSeq.distinct
52+
|> TaskSeq.toListAsync
53+
54+
result |> should equal [ 1; 2; 3; 4; 5 ]
55+
}
56+
57+
[<Fact>]
58+
let ``TaskSeq-distinct removes duplicate strings`` () = task {
59+
let! result =
60+
taskSeq { yield! [ "a"; "b"; "b"; "a"; "c" ] }
61+
|> TaskSeq.distinct
62+
|> TaskSeq.toListAsync
63+
64+
result |> should equal [ "a"; "b"; "c" ]
65+
}
66+
67+
[<Fact>]
68+
let ``TaskSeq-distinct with all identical elements returns singleton`` () = task {
69+
let! result =
70+
taskSeq { yield! [ 7; 7; 7; 7; 7 ] }
71+
|> TaskSeq.distinct
72+
|> TaskSeq.toListAsync
73+
74+
result |> should equal [ 7 ]
75+
}
76+
77+
[<Fact>]
78+
let ``TaskSeq-distinct with all distinct elements returns all`` () = task {
79+
let! result =
80+
taskSeq { yield! [ 1..5 ] }
81+
|> TaskSeq.distinct
82+
|> TaskSeq.toListAsync
83+
84+
result |> should equal [ 1; 2; 3; 4; 5 ]
85+
}
86+
87+
[<Fact>]
88+
let ``TaskSeq-distinct on singleton returns singleton`` () = task {
89+
let! result =
90+
taskSeq { yield 42 }
91+
|> TaskSeq.distinct
92+
|> TaskSeq.toListAsync
93+
94+
result |> should equal [ 42 ]
95+
}
96+
97+
[<Fact>]
98+
let ``TaskSeq-distinct keeps first occurrence, not last`` () = task {
99+
// sequence [3;1;2;1;3] - first occurrences are at indices 0,1,2 for values 3,1,2
100+
let! result =
101+
taskSeq { yield! [ 3; 1; 2; 1; 3 ] }
102+
|> TaskSeq.distinct
103+
|> TaskSeq.toListAsync
104+
105+
result |> should equal [ 3; 1; 2 ]
106+
}
107+
108+
[<Fact>]
109+
let ``TaskSeq-distinct is different from distinctUntilChanged`` () = task {
110+
// [1;2;1] - distinct gives [1;2], distinctUntilChanged gives [1;2;1]
111+
let! distinct =
112+
taskSeq { yield! [ 1; 2; 1 ] }
113+
|> TaskSeq.distinct
114+
|> TaskSeq.toListAsync
115+
116+
let! distinctUntilChanged =
117+
taskSeq { yield! [ 1; 2; 1 ] }
118+
|> TaskSeq.distinctUntilChanged
119+
|> TaskSeq.toListAsync
120+
121+
distinct |> should equal [ 1; 2 ]
122+
distinctUntilChanged |> should equal [ 1; 2; 1 ]
123+
}
124+
125+
[<Fact>]
126+
let ``TaskSeq-distinctBy removes elements with duplicate projected keys`` () = task {
127+
let! result =
128+
taskSeq { yield! [ 1; 2; 3; 4; 5; 6 ] }
129+
|> TaskSeq.distinctBy (fun x -> x % 3)
130+
|> TaskSeq.toListAsync
131+
132+
// keys: 1%3=1, 2%3=2, 3%3=0, 4%3=1(dup), 5%3=2(dup), 6%3=0(dup)
133+
result |> should equal [ 1; 2; 3 ]
134+
}
135+
136+
[<Fact>]
137+
let ``TaskSeq-distinctBy with string length as key`` () = task {
138+
let! result =
139+
taskSeq { yield! [ "a"; "bb"; "c"; "dd"; "eee" ] }
140+
|> TaskSeq.distinctBy String.length
141+
|> TaskSeq.toListAsync
142+
143+
// lengths: 1, 2, 1(dup), 2(dup), 3
144+
result |> should equal [ "a"; "bb"; "eee" ]
145+
}
146+
147+
[<Fact>]
148+
let ``TaskSeq-distinctBy with identity projection equals distinct`` () = task {
149+
let input = [ 1; 2; 2; 3; 1; 4 ]
150+
151+
let! byId =
152+
taskSeq { yield! input }
153+
|> TaskSeq.distinctBy id
154+
|> TaskSeq.toListAsync
155+
156+
let! plain =
157+
taskSeq { yield! input }
158+
|> TaskSeq.distinct
159+
|> TaskSeq.toListAsync
160+
161+
byId |> should equal plain
162+
}
163+
164+
[<Fact>]
165+
let ``TaskSeq-distinctBy keeps first element with a given key`` () = task {
166+
let! result =
167+
taskSeq { yield! [ (1, "a"); (2, "b"); (1, "c") ] }
168+
|> TaskSeq.distinctBy fst
169+
|> TaskSeq.toListAsync
170+
171+
result |> should equal [ (1, "a"); (2, "b") ]
172+
}
173+
174+
[<Fact>]
175+
let ``TaskSeq-distinctByAsync removes elements with duplicate projected keys`` () = task {
176+
let! result =
177+
taskSeq { yield! [ 1; 2; 3; 4; 5; 6 ] }
178+
|> TaskSeq.distinctByAsync (fun x -> task { return x % 3 })
179+
|> TaskSeq.toListAsync
180+
181+
result |> should equal [ 1; 2; 3 ]
182+
}
183+
184+
[<Fact>]
185+
let ``TaskSeq-distinctByAsync behaves identically to distinctBy`` () = task {
186+
let input = [ 1; 2; 2; 3; 1; 4 ]
187+
let projection x = x % 2
188+
189+
let! bySync =
190+
taskSeq { yield! input }
191+
|> TaskSeq.distinctBy projection
192+
|> TaskSeq.toListAsync
193+
194+
let! byAsync =
195+
taskSeq { yield! input }
196+
|> TaskSeq.distinctByAsync (fun x -> task { return projection x })
197+
|> TaskSeq.toListAsync
198+
199+
bySync |> should equal byAsync
200+
}
201+
202+
[<Fact>]
203+
let ``TaskSeq-distinct with chars`` () = task {
204+
let! result =
205+
taskSeq { yield! [ 'A'; 'A'; 'B'; 'Z'; 'C'; 'C'; 'Z'; 'C'; 'D'; 'D'; 'D'; 'Z' ] }
206+
|> TaskSeq.distinct
207+
|> TaskSeq.toListAsync
208+
209+
result |> should equal [ 'A'; 'B'; 'Z'; 'C'; 'D' ]
210+
}
211+
212+
213+
module SideEffects =
214+
[<Fact>]
215+
let ``TaskSeq-distinct evaluates elements lazily`` () = task {
216+
let mutable sideEffects = 0
217+
218+
let ts = taskSeq {
219+
for i in 1..5 do
220+
sideEffects <- sideEffects + 1
221+
yield i
222+
}
223+
224+
let distinct = ts |> TaskSeq.distinct
225+
226+
// no evaluation yet
227+
sideEffects |> should equal 0
228+
229+
let! _ = distinct |> TaskSeq.toListAsync
230+
231+
// only evaluated when consumed
232+
sideEffects |> should equal 5
233+
}
234+
235+
[<Fact>]
236+
let ``TaskSeq-distinctBy evaluates projection lazily`` () = task {
237+
let mutable projections = 0
238+
239+
let! result =
240+
taskSeq { yield! [ 1; 2; 3; 1; 2 ] }
241+
|> TaskSeq.distinctBy (fun x ->
242+
projections <- projections + 1
243+
x)
244+
|> TaskSeq.toListAsync
245+
246+
result |> should equal [ 1; 2; 3 ]
247+
// projection called once per element (5 elements)
248+
projections |> should equal 5
249+
}

src/FSharp.Control.TaskSeq/TaskSeq.fs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -361,23 +361,21 @@ type TaskSeq private () =
361361
static member except itemsToExclude source = Internal.except itemsToExclude source
362362
static member exceptOfSeq itemsToExclude source = Internal.exceptOfSeq itemsToExclude source
363363

364+
static member distinct source = Internal.distinct source
365+
static member distinctBy projection source = Internal.distinctBy projection source
366+
static member distinctByAsync projection source = Internal.distinctByAsync projection source
367+
364368
static member distinctUntilChanged source = Internal.distinctUntilChanged source
365369
static member pairwise source = Internal.pairwise source
366370

367371
static member forall predicate source = Internal.forall (Predicate predicate) source
368372
static member forallAsync predicate source = Internal.forall (PredicateAsync predicate) source
369373

370-
static member exists predicate source =
371-
Internal.tryFind (Predicate predicate) source
372-
|> Task.map Option.isSome
374+
static member exists predicate source = Internal.exists (Predicate predicate) source
373375

374-
static member existsAsync predicate source =
375-
Internal.tryFind (PredicateAsync predicate) source
376-
|> Task.map Option.isSome
376+
static member existsAsync predicate source = Internal.exists (PredicateAsync predicate) source
377377

378-
static member contains value source =
379-
Internal.tryFind (Predicate((=) value)) source
380-
|> Task.map Option.isSome
378+
static member contains value source = Internal.contains value source
381379

382380
static member pick chooser source =
383381
Internal.tryPick (TryPick chooser) source

src/FSharp.Control.TaskSeq/TaskSeq.fsi

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,62 @@ type TaskSeq =
13251325
/// <exception cref="T:ArgumentNullException">Thrown when either of the two input task sequences is null.</exception>
13261326
static member exceptOfSeq<'T when 'T: equality> : itemsToExclude: seq<'T> -> source: TaskSeq<'T> -> TaskSeq<'T>
13271327

1328+
/// <summary>
1329+
/// Returns a new task sequence that contains no duplicate entries, using generic hash and equality comparisons.
1330+
/// If an element occurs multiple times in the sequence, only the first occurrence is returned.
1331+
/// </summary>
1332+
///
1333+
/// <remarks>
1334+
/// This function iterates the whole sequence and buffers all unique elements in a hash set, so it should not
1335+
/// be used on potentially infinite sequences.
1336+
/// </remarks>
1337+
///
1338+
/// <param name="source">The input task sequence.</param>
1339+
/// <returns>A sequence with duplicate elements removed.</returns>
1340+
///
1341+
/// <exception cref="T:ArgumentNullException">Thrown when the input task sequence is null.</exception>
1342+
static member distinct<'T when 'T: equality> : source: TaskSeq<'T> -> TaskSeq<'T>
1343+
1344+
/// <summary>
1345+
/// Returns a new task sequence that contains no duplicate entries according to the generic hash and equality
1346+
/// comparisons on the keys returned by the given projection function.
1347+
/// If two elements have the same projected key, only the first occurrence is returned.
1348+
/// If the projection function is asynchronous, consider using <see cref="TaskSeq.distinctByAsync" />.
1349+
/// </summary>
1350+
///
1351+
/// <remarks>
1352+
/// This function iterates the whole sequence and buffers all unique keys in a hash set, so it should not
1353+
/// be used on potentially infinite sequences.
1354+
/// </remarks>
1355+
///
1356+
/// <param name="projection">A function that transforms each element to a key that is used for equality comparison.</param>
1357+
/// <param name="source">The input task sequence.</param>
1358+
/// <returns>A sequence with elements whose projected keys are distinct.</returns>
1359+
///
1360+
/// <exception cref="T:ArgumentNullException">Thrown when the input task sequence is null.</exception>
1361+
static member distinctBy<'T, 'Key when 'Key: equality> :
1362+
projection: ('T -> 'Key) -> source: TaskSeq<'T> -> TaskSeq<'T>
1363+
1364+
/// <summary>
1365+
/// Returns a new task sequence that contains no duplicate entries according to the generic hash and equality
1366+
/// comparisons on the keys returned by the given asynchronous projection function.
1367+
/// If two elements have the same projected key, only the first occurrence is returned.
1368+
/// If the projection function is synchronous, consider using <see cref="TaskSeq.distinctBy" />.
1369+
/// </summary>
1370+
///
1371+
/// <remarks>
1372+
/// This function iterates the whole sequence and buffers all unique keys in a hash set, so it should not
1373+
/// be used on potentially infinite sequences.
1374+
/// </remarks>
1375+
///
1376+
/// <param name="projection">An asynchronous function that transforms each element to a key used for equality comparison.</param>
1377+
/// <param name="source">The input task sequence.</param>
1378+
/// <returns>A sequence with elements whose projected keys are distinct.</returns>
1379+
///
1380+
/// <exception cref="T:ArgumentNullException">Thrown when the input task sequence is null.</exception>
1381+
static member distinctByAsync:
1382+
projection: ('T -> #Task<'Key>) -> source: TaskSeq<'T> -> TaskSeq<'T> when 'Key: equality
1383+
13281384
/// <summary>
13291385
/// Returns a new task sequence without consecutive duplicate elements.
13301386
/// </summary>

0 commit comments

Comments
 (0)