Skip to content

Commit a84d407

Browse files
author
ladeak
committed
Add Bayesian probability comparison for perf results
Introduce Bayesian probability estimation using Student's t-distribution to compare session latencies. Add supporting statistical distribution classes and integrate results into DiffPrinter output. Extend unit tests to cover Bayesian comparison scenarios. Refactor variance calculation and update test dependencies to latest versions.
1 parent f9cf573 commit a84d407

8 files changed

Lines changed: 347 additions & 16 deletions

File tree

src/CHttp/Performance/Statitics/DiffPrinter.cs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public void Compare(PerformanceMeasurementResults session0, PerformanceMeasureme
3737

3838
int lineLength = _console.WindowWidth;
3939
var scaleNormalize = (double)lineLength / (stats0.Durations.Length + stats0.Durations.Length);
40-
string separator = new string('-', lineLength);
40+
string separator = new('-', lineLength);
4141

4242
// Histogram
4343
if (stats0.Durations.Length >= 100 && stats1.Durations.Length >= 100)
@@ -50,6 +50,9 @@ public void Compare(PerformanceMeasurementResults session0, PerformanceMeasureme
5050
PrintStatusCodes(stats0.StatusCodes, DiffStatusCodes(stats0, stats1));
5151
_console.WriteLine(separator);
5252

53+
if (PrintBayeisanComparison(session0, session1))
54+
_console.WriteLine(separator);
55+
5356
if (PrintWarnings(session0, session1))
5457
_console.WriteLine(separator);
5558
}
@@ -106,9 +109,7 @@ private static int[] DiffStatusCodes(Stats stats0, Stats stats1)
106109
{
107110
int[] diffStatusCodes = new int[stats0.StatusCodes.Length];
108111
for (int i = 0; i < diffStatusCodes.Length; i++)
109-
{
110112
diffStatusCodes[i] = stats1.StatusCodes[i] - stats0.StatusCodes[i];
111-
}
112113
return diffStatusCodes;
113114
}
114115

@@ -172,4 +173,13 @@ private static long GetCountForBucket(Vector<double> bucketLimit, ref Span<long>
172173
currentCounter += input.Length;
173174
return currentCounter;
174175
}
176+
177+
private bool PrintBayeisanComparison(PerformanceMeasurementResults session0, PerformanceMeasurementResults session1)
178+
{
179+
double? percent = StatisticsCalculator.CalculateBayesianProbability(session0, session1) * 100;
180+
if (percent.HasValue)
181+
_console.WriteLine($"With {percent:00.0}% probability, the base session's true mean latency is lower than compared session's.");
182+
return true;
183+
}
184+
175185
}

src/CHttp/Performance/Statitics/StatisticsCalculator.cs

Lines changed: 172 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Diagnostics.Metrics;
1+
using System.Diagnostics;
2+
using System.Diagnostics.Metrics;
23
using System.Numerics;
34
using CHttp.Data;
45
using CHttp.Performance.Data;
@@ -45,7 +46,7 @@ public static Stats GetStats(PerformanceMeasurementResults session)
4546
Array.Sort(durations);
4647

4748
var mean = totalTicks / (double)summaries.Count;
48-
double stdDev = Math.Sqrt(CalcSquaredStdDev(durations, mean));
49+
double stdDev = Math.Sqrt(CalculateVariance(durations, mean));
4950
double error = stdDev / Math.Sqrt(summaries.Count);
5051
double requestSec = (double)summaries.Count * TimeSpan.TicksPerSecond / (latestEnd - earliestStart);
5152
double throughput = session.TotalBytesRead / (mean / TimeSpan.TicksPerSecond);
@@ -72,7 +73,7 @@ public static Stats GetStats(PerformanceMeasurementResults session)
7273
return stats;
7374
}
7475

75-
private static double CalcSquaredStdDev(long[] durations, double mean)
76+
private static double CalculateVariance(long[] durations, double mean)
7677
{
7778
var avg = new Vector<double>(mean);
7879
var input = durations.AsSpan();
@@ -86,7 +87,7 @@ private static double CalcSquaredStdDev(long[] durations, double mean)
8687
var difference = Vector.Subtract(avg, vInput);
8788
var squared = Vector.Multiply(difference, difference);
8889
sum += Vector.Sum(squared);
89-
input = input.Slice(vSize);
90+
input = input[vSize..];
9091
}
9192
}
9293

@@ -95,7 +96,32 @@ private static double CalcSquaredStdDev(long[] durations, double mean)
9596
{
9697
var difference = mean - input[0];
9798
sum += difference * difference;
98-
input = input.Slice(1);
99+
input = input[1..];
100+
}
101+
102+
return sum / durations.Length;
103+
}
104+
105+
private static double CalculateVariance(double[] durations, double mean)
106+
{
107+
var avg = new Vector<double>(mean);
108+
var input = durations.AsSpan();
109+
double sum = 0;
110+
var vSize = Vector<double>.Count;
111+
while (input.Length >= vSize)
112+
{
113+
var vInput = new Vector<double>(input);
114+
var difference = Vector.Subtract(avg, vInput);
115+
var squared = Vector.Multiply(difference, difference);
116+
sum += Vector.Sum(squared);
117+
input = input[vSize..];
118+
}
119+
120+
// Remaining
121+
for (int i = 0; i < input.Length; i++)
122+
{
123+
var difference = mean - input[i];
124+
sum += difference * difference;
99125
}
100126

101127
return sum / durations.Length;
@@ -148,4 +174,145 @@ public static (double DisplayValue, string Qualifier) Display(double value)
148174
}
149175
return (displayAverage, qualifier);
150176
}
177+
178+
internal static double? CalculateBayesianProbability(
179+
PerformanceMeasurementResults session0,
180+
PerformanceMeasurementResults session1,
181+
Random? random = null)
182+
{
183+
if (session0.Summaries.Count < 100 || session0.Summaries.Count != session1.Summaries.Count)
184+
return null;
185+
186+
// Step1: Calculate Mean and Variance based logarithmic durations
187+
var (s0Mean, s0Variance) = LogMeanVariance(session0.Summaries);
188+
var (s1Mean, s1Variance) = LogMeanVariance(session1.Summaries);
189+
190+
// The above values are not the "true" values, as these are based on the approximation of the samples.
191+
// To calculate the "true" values use a Posterior Distributions (one for each session), in this case
192+
// Student's t distribution ref: https://en.wikipedia.org/wiki/Student%27s_t-distribution
193+
// and sample values of the this distribution. Compare the samples to derive the probability of one
194+
// session being faster to the other session.
195+
196+
// For Student t distribution: number of requests, mean and variance needed.
197+
random = random ?? Random.Shared;
198+
var student0 = new StudentDistribution(session0.Summaries.Count - 1, s0Mean, Math.Sqrt(s0Variance / session0.Summaries.Count), random);
199+
var student1 = new StudentDistribution(session1.Summaries.Count - 1, s1Mean, Math.Sqrt(s1Variance / session1.Summaries.Count), random);
200+
201+
// Simulate and compare
202+
long session0Faster = 0;
203+
double simulationCount = 8192;
204+
for (int i = 0; i < simulationCount; i++)
205+
{
206+
if (student0.Sample() < student1.Sample())
207+
session0Faster++;
208+
}
209+
return session0Faster / simulationCount;
210+
211+
static (double Mean, double Variance) LogMeanVariance(IReadOnlyCollection<Summary> summaries)
212+
{
213+
var durations = new double[summaries.Count];
214+
double totalLogTicks = 0;
215+
216+
int current = 0;
217+
foreach (var item in summaries)
218+
{
219+
var logTicks = Math.Log(item.Duration.Ticks);
220+
durations[current++] = logTicks;
221+
totalLogTicks += logTicks;
222+
}
223+
var mean = totalLogTicks / summaries.Count;
224+
var variance = CalculateVariance(durations, mean);
225+
return (mean, variance);
226+
}
227+
}
151228
}
229+
230+
internal class StudentDistribution
231+
{
232+
private readonly double _mean;
233+
private readonly double _scale;
234+
private readonly int _degreeOfFreedom;
235+
private readonly StandardNormalDistribution _standardNormal;
236+
private readonly ChiSquaredDistribution _chiSquared;
237+
238+
public StudentDistribution(int degreeOfFreedom, double mean, double scale, Random random)
239+
{
240+
_mean = mean;
241+
_scale = scale;
242+
_degreeOfFreedom = degreeOfFreedom;
243+
_standardNormal = new(random);
244+
_chiSquared = new(degreeOfFreedom, _standardNormal, random);
245+
}
246+
247+
public double Sample()
248+
{
249+
// Student's t-distribution with v (degreeOfFreedom) degree of freedom can be defined as the distribution
250+
// T = Z / Sqrt(V/degreeOfFreedom).
251+
// Z is a standard normal with expected value 0 and variance 1
252+
// V is a chi-squared distribution
253+
// degreeOfFreedom is the degree of freedom (sample count - 1)
254+
var z = _standardNormal.Sample();
255+
var v = _chiSquared.Sample();
256+
257+
var tStandard = z / Math.Sqrt(v / _degreeOfFreedom);
258+
259+
// Apply 'location' (mean) and 'scale' (variance).
260+
return _mean + tStandard * _scale;
261+
}
262+
}
263+
264+
internal class StandardNormalDistribution(Random random)
265+
{
266+
private readonly Random _random = random;
267+
public double Sample()
268+
{
269+
// Ref: https://en.wikipedia.org/wiki/Normal_distribution
270+
// The Box–Muller method uses two independent random numbers u1 and u2 distributed uniformly on (0,1).
271+
// Then the two random variables X and Y
272+
double u1 = 1.0 - _random.NextDouble();
273+
double u2 = 1.0 - _random.NextDouble();
274+
var sample = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2);
275+
return sample;
276+
}
277+
}
278+
279+
internal class ChiSquaredDistribution(
280+
int degreeOfFreedom,
281+
StandardNormalDistribution standardNormal,
282+
Random random)
283+
{
284+
private readonly double _degreeOfFreedom = degreeOfFreedom;
285+
private readonly Random _random = random;
286+
public readonly StandardNormalDistribution _standardNormal = standardNormal;
287+
288+
public double Sample()
289+
{
290+
// Ref: https://en.wikipedia.org/wiki/Chi-squared_distribution
291+
// Using the Gamma distribution method. x ~ Gamma(alpha = k/2, omega = 2)
292+
// k in the above is (the degree of freedom, degreeOfFreedom variable).
293+
return SampleGamma(_degreeOfFreedom / 2.0, 2);
294+
}
295+
296+
private double SampleGamma(double alpha, double omega)
297+
{
298+
if (alpha < 1)
299+
throw new InvalidOperationException("Unsupported Student-t distribution calculation");
300+
// Ref: https://en.wikipedia.org/wiki/Gamma_distribution
301+
// Marsaglia & Tsang GS algorithm, because this is faster
302+
// to generating a sum of squared normal samples.
303+
var d = alpha - 1 / 3.0;
304+
var c = 1 / Math.Sqrt(9 * d);
305+
306+
while (true)
307+
{
308+
var z = _standardNormal.Sample();
309+
var v = Math.Pow((1 + c * z), 3);
310+
if (v <= 0)
311+
continue;
312+
var u = _random.NextDouble(); // Uniform distribution;
313+
314+
if (Math.Log(u) < 0.5 * z * z + d - d * v + d * Math.Log(v))
315+
return d * v * omega;
316+
}
317+
}
318+
}

tests/CHttp.Api.Tests/CHttp.Api.Tests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
</ItemGroup>
2020

2121
<ItemGroup>
22-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
22+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.6.0" />
2323
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5">
2424
<PrivateAssets>all</PrivateAssets>
2525
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

tests/CHttp.Tests/CHttp.Tests.csproj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
</PropertyGroup>
1010

1111
<ItemGroup>
12-
<PackageReference Include="Microsoft.AspNetCore.TestHost" Version="10.0.2" />
13-
<PackageReference Include="Microsoft.Extensions.TimeProvider.Testing" Version="10.3.0" />
14-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
12+
<PackageReference Include="Microsoft.AspNetCore.TestHost" Version="11.0.0-preview.4.26230.115" />
13+
<PackageReference Include="Microsoft.Extensions.TimeProvider.Testing" Version="10.6.0" />
14+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.6.0" />
1515
<PackageReference Include="NSubstitute" Version="5.3.0" />
1616
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5">
1717
<PrivateAssets>all</PrivateAssets>

0 commit comments

Comments
 (0)