-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathRadon.hs
More file actions
130 lines (119 loc) · 5.78 KB
/
Radon.hs
File metadata and controls
130 lines (119 loc) · 5.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Redundant return" #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeApplications #-}
{- | A [case study](https://docs.pymc.io/en/v3/pymc-examples/examples/case_studies/multilevel_modeling.html)
by Gelman and Hill as a hierarchical linear regression model, modelling the relationship between radon levels
in households in different counties and whether these houses contain basements.
-}
module Radon where
import Control.Algebra (Has)
import Control.Monad (replicateM)
import DataSets (countyIdx, dataFloorValues, logRadon,
n_counties)
import Env (Assign ((:=)), nil, Observable, get,
Observables, (<:>), Env)
import Inference.MH as MH (mhRaw)
import Inference.SIM as SIM (simulate)
import Model (Model, halfCauchy', halfNormal, normal)
import Sampler (Sampler, liftS)
-- | Return all the positions that a value occurs within a list
findIndexes :: Eq a => a -> [a] -> [Int]
findIndexes x = map fst . filter ((x==) . snd) . zip [0..]
-- | Radon model environment
type RadonEnv =
'[ "mu_a" ':= Double -- ^ hyperprior mean for county intercept
, "mu_b" ':= Double -- ^ hyperprior mean for county gradient
, "sigma_a" ':= Double -- ^ hyperprior noise for county intercept
, "sigma_b" ':= Double -- ^ hyperprior noise for county gradient
, "a" ':= Double -- ^ intercept for a county
, "b" ':= Double -- ^ gradient for a county
, "log_radon" ':= Double -- ^ log-radon level for a house
]
-- | Prior distribution over model hyperparameters
radonPrior :: (Observables env '["mu_a", "mu_b", "sigma_a", "sigma_b"] Double)
=> Model env sig m (Double, Double, Double, Double)
radonPrior = do
mu_a <- normal 0 10 #mu_a
sigma_a <- halfNormal 5 #sigma_a
mu_b <- normal 0 10 #mu_b
sigma_b <- halfNormal 5 #sigma_b
return (mu_a, sigma_a, mu_b, sigma_b)
-- | The Radon model
-- | We have predefined parameters: n counties = 85, len(floor_x) = 919, len(county_idx) = 919
radonModel :: (Observables env '["mu_a", "mu_b", "sigma_a", "sigma_b", "a", "b", "log_radon"] Double)
-- | number of counties
=> Int
-- | whether each house has a basement (1) or not (0)
-> [Int]
-- | the county (as an integer) each house belongs to
-> [Int]
-- | radon levels for houses
-> Model env sig m [Double]
radonModel n_counties floor_x county_idx = do
(mu_a, sigma_a, mu_b, sigma_b) <- radonPrior
-- Intercept for each county
a <- replicateM n_counties (normal mu_a sigma_a #a) -- length = 85
-- Gradient for each county
b <- replicateM n_counties (normal mu_b sigma_b #b) -- length = 85
-- Model error
eps <- halfCauchy' 5
let -- Get county intercept for each datapoint
a_county_idx = map (a !!) county_idx
-- Get county gradient for each datapoint
b_county_idx = map (b !!) county_idx
floor_values = map fromIntegral floor_x
-- Get radon estimate for each data point
radon_est = zipWith (+) a_county_idx (zipWith (*) b_county_idx floor_values)
-- Sample radon amount for each data point
radon_like <- mapM (\rad_est -> normal rad_est eps #log_radon) radon_est
return radon_like
mkRecordHLR :: ([Double], [Double], [Double], [Double], [Double], [Double], [Double]) -> Env RadonEnv
mkRecordHLR (mua, mub, siga, sigb, a, b, lograds) =
#mu_a := mua <:> #mu_b := mub <:> #sigma_a := siga <:> #sigma_b := sigb <:> #a := a <:> #b := b <:> #log_radon := lograds <:> nil
-- | Simulate from the Radon model
simRadon :: Sampler ([Double], [Double])
simRadon = do
let -- Specify model environment
env_in :: Env RadonEnv
env_in = mkRecordHLR ([1.45], [-0.68], [0.3], [0.2], [], [], [])
-- Simulate model
(bs, env_out) <- SIM.simulate env_in (radonModel n_counties dataFloorValues countyIdx)
-- Retrieve and return the radon-levels for houses with basements and those without basements
let basementIdxs = findIndexes 0 dataFloorValues
noBasementIdxs = findIndexes 1 dataFloorValues
basementPoints = map (bs !!) basementIdxs
nobasementPoints = map (bs !!) noBasementIdxs
return (basementPoints, nobasementPoints)
-- | Run MH inference and return posterior for hyperparameter means of intercept and gradient
mhRadonpost :: Sampler ([Double], [Double])
mhRadonpost = do
let -- Specify model environment
env_in :: Env RadonEnv
env_in = mkRecordHLR ([], [], [], [], [], [], logRadon)
-- Run MH inference for 2000 iterations
env_outs <- MH.mhRaw 2000 (radonModel n_counties dataFloorValues countyIdx) env_in nil (#mu_a <:> #mu_b <:> #sigma_a <:> #sigma_b <:> nil)
-- Retrieve sampled values for model hyperparameters mu_a and mu_b
let mu_a = concatMap (get #mu_a) env_outs
mu_b = concatMap (get #mu_a) env_outs
return (mu_a, mu_b)
-- | Run MH inference and return predictive posterior for intercepts and gradients
mhRadon :: Sampler ([Double], [Double])
mhRadon = do
-- Specify model environment
let -- Specify model environment
env_in :: Env RadonEnv
env_in = mkRecordHLR ([], [], [], [], [], [], logRadon)
-- Run MH inference for 1500 iterations
env_outs <- MH.mhRaw 1500 (radonModel @RadonEnv n_counties dataFloorValues countyIdx) env_in nil (#mu_a <:> #mu_b <:> #sigma_a <:> #sigma_b <:> nil)
-- Retrieve most recently sampled values for each house's intercept and gradient, a and b
let env_pred = head env_outs
as = get #a env_pred
bs = get #b env_pred
return (as, bs)