Skip to content

Commit 7be933b

Browse files
committed
implement quantile regression estimator
1 parent 92a670c commit 7be933b

9 files changed

Lines changed: 181 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
# v0.8.16 (Upcoming Release)
1+
# v0.8.17 (Upcoming release)
2+
3+
4+
# v0.8.16
5+
- Quantile Regression implemented
26

37

48
# v0.8.15

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LinRegOutliers"
22
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
33
authors = ["Mehmet Hakan Satman <mhsatman@gmail.com>", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris <angeris@stanford.edu>", "Emre Akadal <emre.akadal@istanbul.edu.tr>"]
4-
version = "0.8.15"
4+
version = "0.8.16"
55

66
[deps]
77
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ A Julia package for outlier detection in linear regression.
2121
- Satman (2015)
2222
- Setan & Halim & Mohd (2000)
2323
- Least Absolute Deviations (LAD)
24+
- Quantile Regression Parameter Estimation (quantileregression)
2425
- Least Trimmed Absolute Deviations (LTA)
2526
- Hadi (1992)
2627
- Marchette & Solka (2003) Data Images

docs/src/algorithms.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ LinRegOutliers.hadi1994
125125
LinRegOutliers.cm97
126126
```
127127

128+
## Quantile Regression
129+
```@docs
130+
LinRegOutliers.quantileregression
131+
```
128132

129133

130134

src/LinRegOutliers.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ import .Satman2015: satman2015, dominates
103103
include("lad.jl")
104104
import .LAD: lad
105105

106+
# Quantile Regression Estimator
107+
include("quantileregression.jl")
108+
import .QuantileRegression: quantileregression
106109

107110
# Least Trimmed Absolute Deviations estimator
108111
include("lta.jl")
@@ -216,6 +219,7 @@ export py95, py95SuspectedObservations
216219
export satman2013
217220
export satman2015, dominates
218221
export lad
222+
export quantileregression
219223
export lta
220224
export hadi1992
221225
export hadi1994

src/lad.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Perform Least Absolute Deviations regression for a given regression setting.
1818
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
1919
2020
# Description
21-
The LAD estimator searches for regression parameters estimates that minimizes the sum of absolute residuals.
21+
The LAD estimator searches for regression the parameters estimates that minimize the sum of absolute residuals.
2222
The optimization problem is
2323
2424
Min z = u1(-) + u1(+) + u2(-) + u2(+) + .... + un(-) + un(+)

src/quantileregression.jl

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
module QuantileRegression
2+
3+
export quantileregression
4+
5+
using JuMP
6+
using GLPK
7+
8+
import ..Basis:
9+
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector, applyColumns
10+
11+
"""
12+
13+
quantileregression(setting; tau = 0.5)
14+
15+
Perform Quantile Regression for a given regression setting (multiple linear regression).
16+
17+
# Arguments
18+
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
19+
- `tau::Float64`: Quantile level. Default is 0.5.
20+
21+
# Description
22+
The Quantile Regression estimator searches for the regression parameter estimates that minimize the
23+
24+
25+
Min z = (1 - tau) (u1(-) + u2(-) + ... + un(-)) + tau (u1(+) + u2(+) + ... + un(+))
26+
Subject to:
27+
y_1 - beta0 - beta1 * x_2 + u1(-) - u1(+) = 0
28+
y_2 - beta0 - beta1 * x_2 + u2(-) - u2(+) = 0
29+
.
30+
.
31+
.
32+
y_n - beta0 - beta1 * x_n + un(-) - un(+) = 0
33+
where
34+
ui(-), ui(+) >= 0
35+
i = 1, 2, ..., n
36+
beta0, beta1 in R
37+
n : Number of observations
38+
model is the y = beta1 + beta2 * x + u
39+
40+
# Output
41+
- `["betas"]`: Estimated regression coefficients
42+
- `["residuals"]`: Regression residuals
43+
- `["model"]`: Linear Programming Model
44+
45+
# Examples
46+
```julia-repl
47+
julia> reg0001 = createRegressionSetting(@formula(calls ~ year), phones);
48+
julia> quantileregression(reg0001)
49+
```
50+
51+
"""
52+
function quantileregression(setting::RegressionSetting; tau::Float64 = 0.5)
53+
X, y = @extractRegressionSetting setting
54+
return quantileregression(X, y, tau = tau)
55+
end
56+
57+
58+
"""
59+
60+
quantileregression(X, y, tau = 0.5)
61+
62+
Estimates parameters of linear regression using Quantile Regression Estimator for a given regression setting.
63+
64+
# Arguments
65+
- `X::Array{Float64, 2}`: Design matrix of the linear model.
66+
- `y::Array{Float64, 1}`: Response vector of the linear model.
67+
- `tau::Float64`: Quantile level. Default is 0.5.
68+
69+
70+
# Examples
71+
```julia-repl
72+
julia> income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606];
73+
julia> foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775];
74+
75+
julia> n = length(income)
76+
julia> X = hcat(ones(Float64, n), income)
77+
78+
julia> result = quantileregression(X, foodexp, tau = 0.25)
79+
```
80+
81+
82+
"""
83+
function quantileregression(X::Array{Float64,2}, y::Array{Float64,1}; tau::Float64 = 0.5)
84+
n, p = size(X)
85+
86+
m = JuMP.Model(GLPK.Optimizer)
87+
88+
JuMP.@variable(m, d[1:(2n)])
89+
JuMP.@variable(m, beta[1:p])
90+
91+
JuMP.@objective(
92+
m,
93+
Min,
94+
sum((1 - tau) * d[i] for i = 1:n) + sum(tau * d[i] for i = (n+1):2n)
95+
)
96+
97+
for i = 1:n
98+
c = JuMP.@constraint(m, y[i] - sum(X[i, :] .* beta) + d[i] - d[n+i] == 0)
99+
end
100+
101+
for i = 1:(2n)
102+
JuMP.@constraint(m, d[i] >= 0)
103+
end
104+
105+
JuMP.optimize!(m)
106+
107+
betahats = JuMP.value.(beta)
108+
residuals = y .- X * betahats
109+
110+
result = Dict()
111+
result["betas"] = betahats
112+
result["residuals"] = residuals
113+
result["model"] = m
114+
return result
115+
end
116+
117+
end # end of module QuantileRegression

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ using LinearAlgebra
66
using LinRegOutliers
77
import Plots: RGB
88

9-
109
include("testdiagnostics.jl")
1110
include("testbasis.jl")
1211
include("testols.jl")
@@ -20,6 +19,7 @@ include("testmvemcd.jl")
2019
include("testbch.jl")
2120
include("testpy95.jl")
2221
include("testlad.jl")
22+
include("testquantileregression.jl")
2323
include("testga.jl")
2424
include("testccf.jl")
2525
include("testsatman2013.jl")

test/testquantileregression.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
@testset "Quantile Regression" begin
2+
3+
eps = 0.001
4+
5+
@testset "Quantile Regression - q = 0.5" begin
6+
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
7+
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]
8+
9+
n = length(income)
10+
X = hcat(ones(Float64, n), income)
11+
12+
result = quantileregression(X, foodexp, tau = 0.5)
13+
14+
betas2 = result["betas"]
15+
@test abs(betas2[1] - 55.0716060) < eps
16+
@test abs(betas2[2] - 0.4778393) < eps
17+
end
18+
19+
@testset "Quantile Regression - q = 0.25" begin
20+
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
21+
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]
22+
23+
n = length(income)
24+
X = hcat(ones(Float64, n), income)
25+
26+
result = quantileregression(X, foodexp, tau = 0.25)
27+
28+
betas2 = result["betas"]
29+
@test abs(betas2[1] - 48.0057823) < eps
30+
@test abs(betas2[2] - 0.4856801 ) < eps
31+
end
32+
33+
@testset "Quantile Regression - q = 0.95" begin
34+
income = [420.157651, 541.411707, 901.157457, 639.080229, 750.875606]
35+
foodexp = [255.839425, 310.958667, 485.680014, 402.997356, 495.560775]
36+
37+
n = length(income)
38+
X = hcat(ones(Float64, n), income)
39+
40+
result = quantileregression(X, foodexp, tau = 0.95)
41+
42+
betas2 = result["betas"]
43+
@test abs(betas2[1] - (-48.7124077)) < eps
44+
@test abs(betas2[2] - 0.7248513) < eps
45+
end
46+
47+
end

0 commit comments

Comments
 (0)