-
Notifications
You must be signed in to change notification settings - Fork 84
Expand file tree
/
Copy pathexample_emr_serverless.py
More file actions
60 lines (54 loc) · 2.18 KB
/
example_emr_serverless.py
File metadata and controls
60 lines (54 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
from datetime import datetime
from airflow import DAG
from airflow.models import Variable
from airflow.providers.amazon.aws.operators.emr import EmrServerlessStartJobOperator
APPLICATION_ID = Variable.get("emr_serverless_application_id")
JOB_ROLE_ARN = Variable.get("emr_serverless_job_role")
S3_LOGS_BUCKET = Variable.get("emr_serverless_log_bucket")
with DAG(
dag_id='example_emr_serverless_job',
schedule_interval=None,
start_date=datetime(2021, 1, 1),
tags=['example'],
catchup=False,
) as dag:
# An example of how to get the cluster id and arn from an Airflow connection
# APPLICATION_ID = '{{ conn.emr_eks.extra_dejson["virtual_cluster_id"] }}'
# JOB_ROLE_ARN = '{{ conn.emr_eks.extra_dejson["job_role_arn"] }}'
# [START howto_operator_emr_serverless_job]
job_starter = EmrServerlessStartJobOperator(
task_id="start_job",
application_id=APPLICATION_ID,
execution_role_arn=JOB_ROLE_ARN,
job_driver={
"sparkSubmit": {
"entryPoint": "local:///usr/lib/spark/examples/src/main/python/pi.py",
}
},
configuration_overrides={
"monitoringConfiguration": {
"s3MonitoringConfiguration": {
"logUri": f"s3://{S3_LOGS_BUCKET}/logs/"
}
},
},
config={"name": "sample-job"}
)
# [END howto_operator_emr_serverless_job]