Skip to content

Commit b180a29

Browse files
committed
v0.9.0
1 parent 29e9e1b commit b180a29

11 files changed

Lines changed: 961 additions & 213 deletions

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
# Upcoming Release
1+
# Upcoming Release
2+
3+
4+
# 0.9.0
25
- Add exact argument for LAD. If exact is true then the linear programming based exact solution is found. Otherwise, a GA based search is performed to yield approximate solutions.
6+
- Remove dependency of Plots.jl. If Plots.jl is installed and loaded manually, the functionality that uses Plot is autmatically loaded by Requires.jl. Affected functions are `dataimage`, `mveltsplot`, and `bchplot`.
7+
38

49
# v0.8.19
510
- Update Satman(2013) algorithm

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LinRegOutliers"
22
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
33
authors = ["Mehmet Hakan Satman <mhsatman@gmail.com>", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris <angeris@stanford.edu>", "Emre Akadal <emre.akadal@istanbul.edu.tr>"]
4-
version = "0.8.19"
4+
version = "0.9.0"
55

66
[deps]
77
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
@@ -11,7 +11,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
1111
GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
1212
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
1313
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
14-
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
14+
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
1515
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
1616

1717
[compat]
@@ -21,6 +21,6 @@ DataFrames = "0.22, 1"
2121
Distributions = "0.17, 0.18, 0.19.1, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25"
2222
GLPK = "0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 1"
2323
JuMP = "0.19, 0.20, 0.21, 0.22, 0.23, 1"
24-
Plots = "0.22.2, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 1"
24+
Requires = "1"
2525
StatsModels = "0.4, 0.5, 0.6"
2626
julia = "1.4"

src/LinRegOutliers.jl

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
module LinRegOutliers
22

3+
using Requires
4+
5+
# After the module is loaded, we check if Plots is installed and loaded.
6+
# If Plots is installed and loaded, we load the corresponding modules.
7+
function __init__()
8+
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
9+
10+
import .Plots: RGB
11+
12+
include("mveltsplot.jl")
13+
include("dataimage.jl")
14+
include("bchplot.jl")
15+
16+
import .MVELTSPlot: mveltsplot
17+
import .DataImage: dataimage
18+
import .BCHPlot: bchplot
19+
20+
export mveltsplot, dataimage, bchplot, RGB
21+
22+
end
23+
end
324

425
# Basis
526
include("basis.jl")
@@ -91,13 +112,14 @@ import .ASM2000: asm2000
91112
include("mve.jl")
92113
import .MVE: mve, mcd
93114

115+
# Moved into grahhics.jl
94116
# MVE - LTS plot for visual detection of regression outliers
95-
include("mveltsplot.jl")
96-
import .MVELTSPlot: mveltsplot
117+
#include("mveltsplot.jl")
118+
#import .MVELTSPlot: mveltsplot
97119

98120
# Billor & Chatterjee & Hadi Algorithm for detecting outliers
99121
include("bch.jl")
100-
import .BCH: bch, bchplot
122+
import .BCH: bch
101123

102124
# Pena & Yohai (1995) algorithm
103125
include("py95.jl")
@@ -138,9 +160,10 @@ include("hadi1994.jl")
138160
import .Hadi94: hadi1994
139161

140162

163+
# Moved into graphics.jl
141164
# Gray-scale images of distance matrices
142-
include("dataimage.jl")
143-
import .DataImage: dataimage
165+
#include("dataimage.jl")
166+
#import .DataImage: dataimage
144167

145168

146169
# Modified and original Satman (2012) algorithms
@@ -218,8 +241,7 @@ export asm2000
218241
export lms
219242
export lts
220243
export mve, mcd
221-
export mveltsplot
222-
export bch, bchplot
244+
export bch
223245
export py95, py95SuspectedObservations
224246
export satman2013
225247
export satman2015, dominates
@@ -228,7 +250,6 @@ export quantileregression
228250
export lta
229251
export hadi1992
230252
export hadi1994
231-
export dataimage
232253
export gwcga, galts, ga, cga, RealChromosome
233254
export detectOutliers
234255
export ransac

src/bch.jl

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module BCH
22

33
export bch
4-
export bchplot
4+
55

66
import ..Basis:
77
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns
@@ -13,8 +13,6 @@ import Distributions: Chisq
1313
import LinearAlgebra: diag
1414
import DataFrames: DataFrame
1515

16-
using Plots
17-
1816

1917

2018
"""
@@ -207,59 +205,5 @@ end
207205

208206

209207

210-
"""
211-
212-
bchplot(setting::RegressionSetting; alpha=0.05, maxiter=1000, epsilon=0.00001)
213-
214-
Perform the Billor & Chatterjee & Hadi (2006) algorithm and generates outlier plot
215-
for the given regression setting.
216-
217-
# Arguments
218-
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
219-
- `alpha::Float64`: Optional argument of the probability of rejecting the null hypothesis.
220-
- `maxiter::Int`: Maximum number of iterations for calculating iterative weighted least squares estimates.
221-
- `epsilon::Float64`: Accuracy for determining convergency.
222-
223-
224-
# References
225-
Billor, Nedret, Samprit Chatterjee, and Ali S. Hadi. "A re-weighted least squares method
226-
for robust regression estimation." American journal of mathematical and management sciences 26.3-4 (2006): 229-252.
227-
"""
228-
function bchplot(
229-
setting::RegressionSetting;
230-
alpha = 0.05,
231-
maxiter = 1000,
232-
epsilon = 0.00001,
233-
)
234-
X = designMatrix(setting)
235-
y = responseVector(setting)
236-
return bchplot(X, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
237-
end
238-
239-
function bchplot(
240-
Xdesign::Array{Float64,2},
241-
y::Array{Float64,1};
242-
alpha = 0.05,
243-
maxiter = 1000,
244-
epsilon = 0.00001,
245-
)
246-
result = bch(Xdesign, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
247-
squared_normalized_residuals = result["squared.normalized.residuals"]
248-
squared_normalized_robust_distances = result["squared.normalized.robust.distances"]
249-
n = length(squared_normalized_robust_distances)
250-
scplot = scatter(
251-
squared_normalized_robust_distances,
252-
squared_normalized_residuals,
253-
legend = false,
254-
series_annotations = text.(1:n, :bottom),
255-
tickfont = font(10),
256-
guidefont = font(10),
257-
labelfont = font(10),
258-
)
259-
title!("Billor & Chatterjee & Hadi Plot")
260-
xlabel!("Squared Normalized Robust Distances")
261-
ylabel!("Squared Normalized Residuals")
262-
end
263-
264208

265209
end # end of module BCH

src/bchplot.jl

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
module BCHPlot
2+
3+
4+
export bchplot
5+
6+
7+
import ..BCH: bch
8+
import ..Basis: RegressionSetting
9+
10+
11+
"""
12+
13+
bchplot(setting::RegressionSetting; alpha=0.05, maxiter=1000, epsilon=0.00001)
14+
15+
Perform the Billor & Chatterjee & Hadi (2006) algorithm and generates outlier plot
16+
for the given regression setting.
17+
18+
# Arguments
19+
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
20+
- `alpha::Float64`: Optional argument of the probability of rejecting the null hypothesis.
21+
- `maxiter::Int`: Maximum number of iterations for calculating iterative weighted least squares estimates.
22+
- `epsilon::Float64`: Accuracy for determining convergency.
23+
24+
25+
# References
26+
Billor, Nedret, Samprit Chatterjee, and Ali S. Hadi. "A re-weighted least squares method
27+
for robust regression estimation." American journal of mathematical and management sciences 26.3-4 (2006): 229-252.
28+
29+
!!! warning "Dependencies"
30+
This method is enabled when the Plots package is installed and loaded.
31+
32+
"""
33+
function bchplot(
34+
setting::RegressionSetting;
35+
alpha = 0.05,
36+
maxiter = 1000,
37+
epsilon = 0.00001,
38+
)
39+
X = designMatrix(setting)
40+
y = responseVector(setting)
41+
return bchplot(X, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
42+
end
43+
44+
function bchplot(
45+
Xdesign::Array{Float64,2},
46+
y::Array{Float64,1};
47+
alpha = 0.05,
48+
maxiter = 1000,
49+
epsilon = 0.00001,
50+
)
51+
result = bch(Xdesign, y, alpha = alpha, maxiter = maxiter, epsilon = epsilon)
52+
squared_normalized_residuals = result["squared.normalized.residuals"]
53+
squared_normalized_robust_distances = result["squared.normalized.robust.distances"]
54+
n = length(squared_normalized_robust_distances)
55+
scplot = scatter(
56+
squared_normalized_robust_distances,
57+
squared_normalized_residuals,
58+
legend = false,
59+
series_annotations = text.(1:n, :bottom),
60+
tickfont = font(10),
61+
guidefont = font(10),
62+
labelfont = font(10),
63+
)
64+
title!("Billor & Chatterjee & Hadi Plot")
65+
xlabel!("Squared Normalized Robust Distances")
66+
ylabel!("Squared Normalized Residuals")
67+
end
68+
69+
end # end of module

src/dataimage.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
module DataImage
22

3-
3+
export dataimage
44

55
import ..Diagnostics:
66
mahalanobisSquaredMatrix, euclideanDistances, mahalanobisSquaredBetweenPairs
77

8-
9-
import Plots: RGB
10-
11-
8+
import ..RGB
129

1310
"""
1411
@@ -41,11 +38,15 @@ julia> Plots.plot(di)
4138
# References
4239
Marchette, David J., and Jeffrey L. Solka. "Using data images for outlier detection."
4340
Computational Statistics & Data Analysis 43.4 (2003): 541-552.
41+
42+
!!! warning "Dependencies"
43+
This method is enabled when the Plots package is installed and loaded.
44+
4445
"""
4546
function dataimage(
4647
dataMatrix::Array{Float64,2};
4748
distance = :mahalanobis,
48-
)::Array{RGB{Float64},2}
49+
)::Matrix{RGB{Float64}}
4950
d = nothing
5051
if distance == :mahalanobis
5152
d = mahalanobisSquaredBetweenPairs(dataMatrix)

src/mveltsplot.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import ..MVE: mve
99

1010
import Distributions: Chisq, quantile
1111

12-
using Plots
1312

1413
"""
1514
mveltsplot(setting; alpha = 0.05, showplot = true)
@@ -41,6 +40,10 @@ good leverage points (observations far from the remaining of data in both x and
4140
# References
4241
Van Aelst, Stefan, and Peter Rousseeuw. "Minimum volume ellipsoid." Wiley
4342
Interdisciplinary Reviews: Computational Statistics 1.1 (2009): 71-82.
43+
44+
!!! warning "Dependencies"
45+
This method is enabled when the Plots package is installed and loaded.
46+
4447
"""
4548
function mveltsplot(setting::RegressionSetting; alpha = 0.05, showplot = true)
4649
ltsresult = lts(setting)

0 commit comments

Comments
 (0)