Skip to content

Commit 6cf4ea8

Browse files
authored
Merge pull request #131 from Open-Earth-Foundation/waste_eu_DE
pipeline eurostats
2 parents fc1106f + 15ed406 commit 6cf4ea8

21 files changed

Lines changed: 1767 additions & 0 deletions
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
WITH activity_data AS (
2+
SELECT DISTINCT
3+
activity_name,
4+
activity_units,
5+
methodology_name,
6+
gpc_reference_number
7+
FROM raw_data.mc_eurostats_staging
8+
WHERE activity_name IS NOT NULL
9+
)
10+
INSERT INTO modelled.activity_subcategory (
11+
activity_id,
12+
activity_name,
13+
activity_units,
14+
gpcmethod_id,
15+
activity_subcategory_type
16+
)
17+
SELECT DISTINCT
18+
-- Build activity_id using activity info + derived gpcmethod_id
19+
MD5(CONCAT_WS(
20+
'-',
21+
activity_name,
22+
activity_units,
23+
jsonb_build_object(
24+
'methane-commitment-solid-waste-inboundary-oxidation-factor', 'oxidation-factor-well-managed-landfill'
25+
)::TEXT,
26+
MD5(CONCAT_WS(
27+
'-',
28+
methodology_name,
29+
gpc_reference_number
30+
))::TEXT
31+
))::UUID AS activity_id,
32+
33+
activity_name,
34+
activity_units,
35+
36+
-- Build gpcmethod_id from methodology_name + gpc_reference_number
37+
MD5(CONCAT_WS(
38+
'-',
39+
methodology_name,
40+
gpc_reference_number
41+
))::UUID AS gpcmethod_id,
42+
43+
jsonb_build_object(
44+
'methane-commitment-solid-waste-inboundary-oxidation-factor', 'oxidation-factor-well-managed-landfill'
45+
) AS activity_subcategory_type
46+
47+
FROM activity_data
48+
49+
ON CONFLICT (activity_id) DO UPDATE SET
50+
activity_name = EXCLUDED.activity_name,
51+
activity_units = EXCLUDED.activity_units,
52+
gpcmethod_id = EXCLUDED.gpcmethod_id,
53+
activity_subcategory_type = EXCLUDED.activity_subcategory_type;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
WITH activity_data AS (
2+
SELECT DISTINCT
3+
activity_name,
4+
activity_units,
5+
methodology_name,
6+
gpc_reference_number
7+
FROM raw_data.biolog_treatment_eurostats_staging
8+
WHERE activity_name IS NOT NULL
9+
)
10+
INSERT INTO modelled.activity_subcategory (
11+
activity_id,
12+
activity_name,
13+
activity_units,
14+
gpcmethod_id,
15+
activity_subcategory_type
16+
)
17+
SELECT DISTINCT
18+
-- Build activity_id using activity info + derived gpcmethod_id
19+
MD5(CONCAT_WS(
20+
'-',
21+
activity_name,
22+
activity_units,
23+
jsonb_build_object(
24+
'biological-treatment-inboundary-waste-state', 'waste-state-wet-waste',
25+
'biological-treatment-inboundary-treatment-type', 'treatment-type-composting'
26+
)::TEXT,
27+
MD5(CONCAT_WS(
28+
'-',
29+
methodology_name,
30+
gpc_reference_number
31+
))::TEXT
32+
))::UUID AS activity_id,
33+
34+
activity_name,
35+
activity_units,
36+
37+
-- Build gpcmethod_id from methodology_name + gpc_reference_number
38+
MD5(CONCAT_WS(
39+
'-',
40+
methodology_name,
41+
gpc_reference_number
42+
))::UUID AS gpcmethod_id,
43+
44+
jsonb_build_object(
45+
'biological-treatment-inboundary-waste-state', 'waste-state-wet-waste',
46+
'biological-treatment-inboundary-treatment-type', 'treatment-type-composting'
47+
) AS activity_subcategory_type
48+
49+
FROM activity_data
50+
51+
ON CONFLICT (activity_id) DO UPDATE SET
52+
activity_name = EXCLUDED.activity_name,
53+
activity_units = EXCLUDED.activity_units,
54+
gpcmethod_id = EXCLUDED.gpcmethod_id,
55+
activity_subcategory_type = EXCLUDED.activity_subcategory_type;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
WITH activity_data AS (
2+
SELECT DISTINCT
3+
activity_name,
4+
activity_units,
5+
methodology_name,
6+
gpc_reference_number
7+
FROM raw_data.incineration_eurostats_staging
8+
WHERE activity_name IS NOT NULL
9+
)
10+
INSERT INTO modelled.activity_subcategory (
11+
activity_id,
12+
activity_name,
13+
activity_units,
14+
gpcmethod_id,
15+
activity_subcategory_type
16+
)
17+
SELECT DISTINCT
18+
-- Build activity_id using activity info + derived gpcmethod_id
19+
MD5(CONCAT_WS(
20+
'-',
21+
activity_name,
22+
activity_units,
23+
jsonb_build_object(
24+
'incineration-waste-inboundary-technology', 'technology-continuous-incineration',
25+
'incineration-waste-inboundary-boiler-type', 'boiler-type-stoker',
26+
'incineration-waste-inboundary-waste-composition', 'waste-composition-municipal-solid-waste'
27+
)::TEXT,
28+
MD5(CONCAT_WS(
29+
'-',
30+
methodology_name,
31+
gpc_reference_number
32+
))::TEXT
33+
))::UUID AS activity_id,
34+
35+
activity_name,
36+
activity_units,
37+
38+
-- Build gpcmethod_id from methodology_name + gpc_reference_number
39+
MD5(CONCAT_WS(
40+
'-',
41+
methodology_name,
42+
gpc_reference_number
43+
))::UUID AS gpcmethod_id,
44+
45+
jsonb_build_object(
46+
'incineration-waste-inboundary-technology', 'technology-continuous-incineration',
47+
'incineration-waste-inboundary-boiler-type', 'boiler-type-stoker',
48+
'incineration-waste-inboundary-waste-composition', 'waste-composition-municipal-solid-waste'
49+
) AS activity_subcategory_type
50+
51+
FROM activity_data
52+
53+
ON CONFLICT (activity_id) DO UPDATE SET
54+
activity_name = EXCLUDED.activity_name,
55+
activity_units = EXCLUDED.activity_units,
56+
gpcmethod_id = EXCLUDED.gpcmethod_id,
57+
activity_subcategory_type = EXCLUDED.activity_subcategory_type;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from mage_ai.settings.repo import get_repo_path
2+
from mage_ai.io.config import ConfigFileLoader
3+
from mage_ai.io.postgres import Postgres
4+
from pandas import DataFrame
5+
from os import path
6+
7+
if 'data_exporter' not in globals():
8+
from mage_ai.data_preparation.decorators import data_exporter
9+
10+
@data_exporter
11+
def export_data_to_postgres(df: DataFrame, **kwargs) -> None:
12+
13+
schema_name = 'raw_data'
14+
table_name = 'biolog_treatment_eurostats_staging'
15+
config_path = path.join(get_repo_path(), 'io_config.yaml')
16+
config_profile = 'default'
17+
18+
with Postgres.with_config(ConfigFileLoader(config_path, config_profile)) as loader:
19+
loader.export(
20+
df,
21+
schema_name,
22+
table_name,
23+
index=False,
24+
if_exists='replace',
25+
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
DROP TABLE IF EXISTS
2+
raw_data.mc_eurostats_staging,
3+
raw_data.biolog_treatment_eurostats_staging,
4+
raw_data.incineration_eurostats_staging
5+
CASCADE;
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
WITH ef_raw AS (
2+
SELECT DISTINCT
3+
gas_name,
4+
gpc_reference_number,
5+
emissionfactor_value,
6+
activity_units,
7+
methodology_name,
8+
activity_name,
9+
locode AS actor_id
10+
FROM raw_data.biolog_treatment_eurostats_staging
11+
WHERE emissionfactor_value IS NOT NULL
12+
),
13+
ef_data AS (
14+
SELECT *,
15+
MD5(CONCAT_WS(
16+
'-',
17+
'IPCC',
18+
'https://www.ipcc.ch/'
19+
))::UUID AS publisher_id,
20+
MD5(CONCAT_WS(
21+
'-',
22+
'IPCC',
23+
'IPCC Emission Factor Database (EFDB) [2006 IPCC Guidelines]',
24+
'https://www.ipcc-nggip.iges.or.jp/EFDB/main.php'
25+
))::UUID AS dataset_id,
26+
MD5(CONCAT_WS(
27+
'-',
28+
activity_name,
29+
activity_units,
30+
jsonb_build_object(
31+
'biological-treatment-inboundary-waste-state', 'waste-state-wet-waste',
32+
'biological-treatment-inboundary-treatment-type', 'treatment-type-composting'
33+
)::TEXT,
34+
MD5(CONCAT_WS(
35+
'-',
36+
methodology_name,
37+
gpc_reference_number
38+
))::TEXT
39+
))::UUID AS activity_id
40+
FROM ef_raw
41+
)
42+
INSERT INTO modelled.emissions_factor (
43+
emissionfactor_id,
44+
publisher_id,
45+
dataset_id,
46+
activity_id,
47+
gas_name,
48+
emissionfactor_value,
49+
unit_denominator,
50+
active_to,
51+
active_from,
52+
actor_id
53+
)
54+
SELECT
55+
MD5(CONCAT_WS(
56+
'-',
57+
publisher_id::TEXT,
58+
dataset_id::TEXT,
59+
activity_id::TEXT,
60+
'kg',
61+
gas_name,
62+
actor_id,
63+
2023
64+
))::UUID AS emissionfactor_id,
65+
publisher_id,
66+
dataset_id,
67+
activity_id,
68+
gas_name,
69+
emissionfactor_value,
70+
'kg',
71+
NULL::DATE AS active_to,
72+
NULL::DATE AS active_from,
73+
actor_id
74+
FROM ef_data
75+
ON CONFLICT (emissionfactor_id) DO UPDATE SET
76+
publisher_id = EXCLUDED.publisher_id,
77+
dataset_id = EXCLUDED.dataset_id,
78+
activity_id = EXCLUDED.activity_id,
79+
gas_name = EXCLUDED.gas_name,
80+
emissionfactor_value = EXCLUDED.emissionfactor_value,
81+
unit_denominator = EXCLUDED.unit_denominator,
82+
active_to = EXCLUDED.active_to,
83+
active_from = EXCLUDED.active_from,
84+
actor_id = EXCLUDED.actor_id;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
WITH ef_raw AS (
2+
SELECT DISTINCT
3+
gas_name,
4+
gpc_reference_number,
5+
emissionfactor_value,
6+
activity_units,
7+
methodology_name,
8+
activity_name,
9+
locode AS actor_id
10+
FROM raw_data.incineration_eurostats_staging
11+
WHERE emissionfactor_value IS NOT NULL
12+
),
13+
ef_data AS (
14+
SELECT *,
15+
MD5(CONCAT_WS(
16+
'-',
17+
'IPCC',
18+
'https://www.ipcc.ch/'
19+
))::UUID AS publisher_id,
20+
MD5(CONCAT_WS(
21+
'-',
22+
'IPCC',
23+
'IPCC Emission Factor Database (EFDB) [2006 IPCC Guidelines]',
24+
'https://www.ipcc-nggip.iges.or.jp/EFDB/main.php'
25+
))::UUID AS dataset_id,
26+
MD5(CONCAT_WS(
27+
'-',
28+
activity_name,
29+
activity_units,
30+
jsonb_build_object(
31+
'incineration-waste-inboundary-technology', 'technology-continuous-incineration',
32+
'incineration-waste-inboundary-boiler-type', 'boiler-type-stoker',
33+
'incineration-waste-inboundary-waste-composition', 'waste-composition-municipal-solid-waste'
34+
)::TEXT,
35+
MD5(CONCAT_WS(
36+
'-',
37+
methodology_name,
38+
gpc_reference_number
39+
))::TEXT
40+
))::UUID AS activity_id
41+
FROM ef_raw
42+
)
43+
INSERT INTO modelled.emissions_factor (
44+
emissionfactor_id,
45+
publisher_id,
46+
dataset_id,
47+
activity_id,
48+
gas_name,
49+
emissionfactor_value,
50+
unit_denominator,
51+
active_to,
52+
active_from,
53+
actor_id
54+
)
55+
SELECT
56+
MD5(CONCAT_WS(
57+
'-',
58+
publisher_id::TEXT,
59+
dataset_id::TEXT,
60+
activity_id::TEXT,
61+
'kg',
62+
gas_name,
63+
actor_id,
64+
2023
65+
))::UUID AS emissionfactor_id,
66+
publisher_id,
67+
dataset_id,
68+
activity_id,
69+
gas_name,
70+
emissionfactor_value,
71+
'kg',
72+
NULL::DATE AS active_to,
73+
NULL::DATE AS active_from,
74+
actor_id
75+
FROM ef_data
76+
ON CONFLICT (emissionfactor_id) DO UPDATE SET
77+
publisher_id = EXCLUDED.publisher_id,
78+
dataset_id = EXCLUDED.dataset_id,
79+
activity_id = EXCLUDED.activity_id,
80+
gas_name = EXCLUDED.gas_name,
81+
emissionfactor_value = EXCLUDED.emissionfactor_value,
82+
unit_denominator = EXCLUDED.unit_denominator,
83+
active_to = EXCLUDED.active_to,
84+
active_from = EXCLUDED.active_from,
85+
actor_id = EXCLUDED.actor_id;

0 commit comments

Comments
 (0)