@@ -15,6 +15,8 @@ defmodule Statistex do
1515 alias Statistex . { Mode , Percentile }
1616 require Integer
1717
18+ import Statistex.Helper , only: [ maybe_sort: 2 ]
19+
1820 defstruct [
1921 :total ,
2022 :average ,
@@ -88,6 +90,7 @@ defmodule Statistex do
8890 @ empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number."
8991
9092 @ first_quartile 25
93+ @ median_percentile 50
9194 @ third_quartile 75
9295 # https://en.wikipedia.org/wiki/Interquartile_range#Outliers
9396 # https://builtin.com/articles/1-5-iqr-rule
@@ -167,17 +170,21 @@ defmodule Statistex do
167170 end
168171
169172 def statistics ( samples , configuration ) do
170- samples = Enum . sort ( samples )
173+ sorted_samples = Enum . sort ( samples )
171174
172175 # these statistics are required to do the outlier calculations
173176 % { minimum: minimum , maximum: maximum , percentiles: percentiles } =
174- base_statistics ( samples , configuration )
177+ base_statistics ( sorted_samples , configuration )
175178
176179 outlier_bounds =
177- do_outlier_bounds ( samples , percentiles: percentiles , minimum: minimum , maximum: maximum )
180+ do_outlier_bounds ( sorted_samples ,
181+ percentiles: percentiles ,
182+ minimum: minimum ,
183+ maximum: maximum
184+ )
178185
179186 # make sure rest remains sorted and so can be used again to ok results
180- { outliers , rest } = do_outliers ( samples , outlier_bounds: outlier_bounds )
187+ { outliers , rest } = do_outliers ( sorted_samples , outlier_bounds: outlier_bounds )
181188
182189 if exclude_outliers? ( configuration ) and Enum . any? ( outliers ) do
183190 # figure out to avoid double sorting
@@ -188,15 +195,22 @@ defmodule Statistex do
188195
189196 create_full_statistics ( rest , minimum , maximum , percentiles , outliers , outlier_bounds )
190197 else
191- create_full_statistics ( samples , minimum , maximum , percentiles , outliers , outlier_bounds )
198+ create_full_statistics (
199+ sorted_samples ,
200+ minimum ,
201+ maximum ,
202+ percentiles ,
203+ outliers ,
204+ outlier_bounds
205+ )
192206 end
193207 end
194208
195- defp base_statistics ( samples , configuration ) do
196- minimum = hd ( samples )
197- maximum = List . last ( samples )
209+ defp base_statistics ( sorted_samples , configuration ) do
210+ minimum = hd ( sorted_samples )
211+ maximum = List . last ( sorted_samples )
198212
199- percentiles = calculate_percentiles ( samples , configuration )
213+ percentiles = calculate_percentiles ( sorted_samples , configuration )
200214
201215 % { minimum: minimum , maximum: maximum , percentiles: percentiles }
202216 end
@@ -459,23 +473,26 @@ defmodule Statistex do
459473 end
460474 end
461475
462- @ median_percentile 50
463- defp calculate_percentiles ( samples , configuration ) do
476+ defp calculate_percentiles ( sorted_samples , configuration ) do
464477 percentiles_configuration = Keyword . get ( configuration , :percentiles , [ ] )
465478
466479 # median_percentile is manually added so that it can be used directly by median
467480 percentiles_configuration =
468- Enum . uniq ( [ 25 , @ median_percentile , 75 | percentiles_configuration ] )
481+ Enum . uniq ( [
482+ @ first_quartile ,
483+ @ median_percentile ,
484+ @ third_quartile | percentiles_configuration
485+ ] )
469486
470- Percentile . percentiles ( samples , percentiles_configuration )
487+ Percentile . percentiles ( sorted_samples , percentiles_configuration , sorted: true )
471488 end
472489
473490 @ doc """
474491 Calculates the value at the `percentile_rank`-th percentile.
475492
476493 Think of this as the
477494 value below which `percentile_rank` percent of the samples lie. For example,
478- if `Statistex.percentile(samples, 99)` == 123.45,
495+ if `Statistex.percentile(samples, 99) == 123.45` ,
479496 99% of samples are less than 123.45.
480497
481498 Passing a number for `percentile_rank` calculates a single percentile.
@@ -517,9 +534,8 @@ defmodule Statistex do
517534 """
518535 @ spec percentiles ( samples , number | [ number ( ) , ... ] ) ::
519536 percentiles ( )
520- def percentiles ( samples , percentiles ) do
521- samples |> Enum . sort ( ) |> Percentile . percentiles ( percentiles )
522- end
537+ defdelegate percentiles ( samples , percentiles , options ) , to: Percentile
538+ defdelegate percentiles ( samples , percentiles ) , to: Percentile
523539
524540 @ doc """
525541 A map showing which sample occurs how often in the samples.
@@ -631,6 +647,9 @@ defmodule Statistex do
631647 iex> Statistex.outlier_bounds([3, 4, 5])
632648 {0.0, 8.0}
633649
650+ iex> Statistex.outlier_bounds([4, 5, 3])
651+ {0.0, 8.0}
652+
634653 iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
635654 {22.5, 66.5}
636655
@@ -640,19 +659,21 @@ defmodule Statistex do
640659 @ spec outlier_bounds ( samples , keyword ) :: { lower :: number , upper :: number }
641660 def outlier_bounds ( samples , options \\ [ ] )
642661 def outlier_bounds ( [ ] , _ ) , do: raise ( ArgumentError , @ empty_list_error_message )
643- def outlier_bounds ( samples , options ) , do: samples |> Enum . sort ( ) |> do_outlier_bounds ( options )
662+ def outlier_bounds ( samples , options ) , do: do_outlier_bounds ( samples , options )
644663
645664 defp do_outlier_bounds ( samples , options ) do
665+ # double check do we need both get lazies here?
646666 percentiles =
647667 Keyword . get_lazy ( options , :percentiles , fn ->
648- Percentile . percentiles ( samples , [ @ first_quartile , @ third_quartile ] )
668+ Percentile . percentiles ( samples , [ @ first_quartile , @ third_quartile ] , options )
649669 end )
650670
651671 q1 = get_percentile ( samples , @ first_quartile , percentiles )
652672 q3 = get_percentile ( samples , @ third_quartile , percentiles )
653673 iqr = q3 - q1
674+ outlier_tolerance = iqr * @ iqr_factor
654675
655- { q1 - iqr * @ iqr_factor , q3 + iqr * @ iqr_factor }
676+ { q1 - outlier_tolerance , q3 + outlier_tolerance }
656677 end
657678
658679 @ doc """
@@ -671,21 +692,21 @@ defmodule Statistex do
671692 """
672693 @ spec outliers ( samples , keyword ) :: samples | [ ]
673694 def outliers ( samples , options \\ [ ] ) do
674- { outliers , _rest } = samples |> Enum . sort ( ) |> do_outliers ( options )
695+ sorted_samples = maybe_sort ( samples , options )
696+
697+ # maybe allow folks to get the same
698+ { outliers , _rest } = do_outliers ( sorted_samples , options )
675699
676700 outliers
677701 end
678702
679- defp do_outliers ( samples , options ) do
703+ defp do_outliers ( sorted_samples , options ) do
680704 { lower_bound , upper_bound } =
681- Keyword . get_lazy ( options , :outlier_bounds , fn -> do_outlier_bounds ( samples , options ) end )
682-
683- { min , rest } = Enum . split_while ( samples , fn sample -> sample < lower_bound end )
684-
685- { max , rest } =
686- rest |> Enum . reverse ( ) |> Enum . split_while ( fn sample -> sample > upper_bound end )
705+ Keyword . get_lazy ( options , :outlier_bounds , fn ->
706+ do_outlier_bounds ( sorted_samples , options )
707+ end )
687708
688- { min ++ max , rest }
709+ Enum . split_with ( sorted_samples , fn sample -> sample < lower_bound || sample > upper_bound end )
689710 end
690711
691712 defp get_percentile ( samples , percentile , percentiles ) do
0 commit comments