Skip to content

Commit 94b552f

Browse files
shepazonbrmur
authored andcommitted
Add initial files for example
1 parent 95d6ac1 commit 94b552f

4 files changed

Lines changed: 322 additions & 0 deletions

File tree

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// swift-tools-version: 5.9
2+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
// SPDX-License-Identifier: Apache-2.0
4+
//
5+
// (swift-tools-version has two lines here because it needs to be the first
6+
// line in the file, but it should also appear in the snippet below)
7+
//
8+
// snippet-start:[swift.cognito-identity-provider.scenario.package]
9+
// swift-tools-version: 5.9
10+
//
11+
// The swift-tools-version declares the minimum version of Swift required to
12+
// build this package.
13+
14+
import PackageDescription
15+
16+
let package = Package(
17+
name: "glue-scenario",
18+
// Let Xcode know the minimum Apple platforms supported.
19+
platforms: [
20+
.macOS(.v13),
21+
.iOS(.v15)
22+
],
23+
dependencies: [
24+
// Dependencies declare other packages that this package depends on.
25+
.package(
26+
url: "https://github.com/awslabs/aws-sdk-swift",
27+
from: "1.0.0"),
28+
.package(
29+
url: "https://github.com/apple/swift-argument-parser.git",
30+
branch: "main"
31+
)
32+
],
33+
targets: [
34+
// Targets are the basic building blocks of a package, defining a module or a test suite.
35+
// Targets can depend on other targets in this package and products
36+
// from dependencies.
37+
.executableTarget(
38+
name: "glue-scenario",
39+
dependencies: [
40+
.product(name: "AWSGlue", package: "aws-sdk-swift"),
41+
.product(name: "AWSS3", package: "aws-sdk-swift"),
42+
.product(name: "ArgumentParser", package: "swift-argument-parser")
43+
],
44+
path: "Sources")
45+
46+
]
47+
)
48+
// snippet-end:[swift.cognito-identity-provider.scenario.package]
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
// An example that shows how to use the AWS SDK for Swift to demonstrate
5+
// creating and using crawlers and jobs using AWS Glue.
6+
//
7+
// 0. Upload the Python job script to Amazon S3 so it can be used when
8+
// calling `startJobRun()` later.
9+
// 1. Create a crawler, pass it the IAM role and the URL of the public Amazon
10+
// S3 bucket that contains the source data:
11+
// s3://crawler-public-us-east-1/flight/2016/csv.
12+
// 2. Start the crawler. This takes time, so after starting it, use a loop
13+
// that calls `getCrawler()` until the state is "READY".
14+
// 3. Get the database created by the crawler, and the tables in the
15+
// database. Display them to the user.
16+
// 4. Create a job. Pass it the IAM role and the URL to a Python ETL script
17+
// previously uploaded to the user's S3 bucket.
18+
// 5. Start a job run, passing the following custom arguments. These are
19+
// expected by the ETL script, so must exactly match.
20+
// * `--input_database: <name of the database created by the crawler>`
21+
// * `--input_table: <name of the table created by the crawler>`
22+
// * `--output_bucket_url: <URL to the scaffold bucket created for the
23+
// user>`
24+
// 6. Loop and get the job run until it returns one of the following states:
25+
// "SUCCEEDED", "STOPPED", "FAILED", or "TIMEOUT".
26+
// 7. Output data is stored in a group of files in the user's S3 bucket.
27+
// Either direct the user to their location or download a file and display
28+
// the results inline.
29+
// 8. List the jobs for the user's account.
30+
// 9. Get job run details for a job run.
31+
// 10. Delete the demo job.
32+
// 11. Delete the database and tables created by the example.
33+
// 12. Delete the crawler created by the example.
34+
35+
import ArgumentParser
36+
import AWSS3
37+
import Foundation
38+
39+
// snippet-start:[swift.glue.import]
40+
import AWSClientRuntime
41+
import AWSGlue
42+
// snippet-end:[swift.glue.import]
43+
44+
struct ExampleCommand: ParsableCommand {
45+
@Option(help: "The AWS IAM role to use for AWS Glue calls.")
46+
var role: String
47+
48+
@Option(help: "The Amazon S3 bucket to use for this example.")
49+
var bucket: String
50+
51+
@Option(help: "The Amazon S3 URL of the data to crawl.")
52+
var s3url: String = "s3://crawler-public-us-east-1/flight/2016/csv"
53+
54+
@Option(help: "The Python script to run as a job with AWS Glue.")
55+
var script: String = "./flight_etl_job_script.py"
56+
57+
@Option(help: "The AWS Region to run AWS API calls in.")
58+
var awsRegion = "us-east-1"
59+
60+
@Flag(help: "If this flag is set, output files will have the '.json' extension.")
61+
var rename = false
62+
63+
@Option(help: "A prefix string to use when naming tables.")
64+
var tablePrefix = "swift-glue-basics-table"
65+
66+
@Option(
67+
help: ArgumentHelp("The level of logging for the Swift SDK to perform."),
68+
completion: .list([
69+
"critical",
70+
"debug",
71+
"error",
72+
"info",
73+
"notice",
74+
"trace",
75+
"warning"
76+
])
77+
)
78+
var logLevel: String = "error"
79+
80+
static var configuration = CommandConfiguration(
81+
commandName: "glue-scenario",
82+
abstract: """
83+
Demonstrates various features of AWS Glue.
84+
""",
85+
discussion: """
86+
An example showing how to use AWS Glue to create, run, and monitor
87+
crawlers and jobs.
88+
"""
89+
)
90+
91+
/*
92+
/// Prompt for an input string of at least a minimum length.
93+
///
94+
/// - Parameters:
95+
/// - prompt: The prompt string to display.
96+
/// - minLength: The minimum number of characters to allow in the
97+
/// response. Default value is 0.
98+
///
99+
/// - Returns: The entered string.
100+
func stringRequest(_ prompt: String, minLength: Int = 1) -> String {
101+
while true {
102+
print(prompt, terminator: "")
103+
let str = readLine()
104+
105+
guard let str else {
106+
continue
107+
}
108+
if str.count >= minLength {
109+
return str
110+
} else {
111+
print("*** Response must be at least \(minLength) character(s) long.")
112+
}
113+
}
114+
}
115+
116+
/// Ask a yes/no question.
117+
///
118+
/// - Parameter prompt: A prompt string to print.
119+
///
120+
/// - Returns: `true` if the user answered "Y", otherwise `false`.
121+
func yesNoRequest(_ prompt: String) -> Bool {
122+
while true {
123+
let answer = stringRequest(prompt).lowercased()
124+
if answer == "y" || answer == "n" {
125+
return answer == "y"
126+
}
127+
}
128+
}
129+
*/
130+
131+
/// Generate and return a unique file name that begins with the specified
132+
/// string.
133+
///
134+
/// - Parameters:
135+
/// - prefix: Text to use at the beginning of the returned name.
136+
///
137+
/// - Returns: A string containing a unique filename that begins with the
138+
/// specified `prefix`.
139+
///
140+
/// The returned name uses a random number between 1 million and 1 billion to
141+
/// provide reasonable certainty of uniqueness for the purposes of this
142+
/// example.
143+
func tempName(prefix: String) -> String {
144+
return "\(prefix)-\(Int.random(in: 1000000..<1000000000))"
145+
}
146+
147+
/// Called by ``main()`` to run the bulk of the example.
148+
func runAsync() async throws {
149+
let config = try await GlueClient.GlueClientConfiguration(region: awsRegion)
150+
let glueClient = GlueClient(config: config)
151+
152+
print("Welcome to the AWS SDK for Swift basic scenario for AWS Glue!")
153+
154+
// Create random names for things that need them.
155+
156+
let crawlerName = tempName(prefix: "swift-glue-basics-crawler")
157+
let databaseName = tempName(prefix: "swift-glue-basics-db")
158+
159+
// A name for the AWS Glue job.
160+
161+
let jobName = tempName(prefix: "scenario-job")
162+
163+
// A name to give the Python script upon upload to the Amazon S3
164+
// bucket, and the full URL of the <<script on S3.
165+
let scriptName = "jobscript.py"
166+
let scriptURL = "s3://\(bucket)/\(scriptName)"
167+
168+
// Schedule string in `cron` format, as described here:
169+
// https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html
170+
let cron = "cron(15 12 * * ? *)"
171+
172+
//=====================================================================
173+
// 0. Upload the Python script to the target bucket so it's available
174+
// for use by the Amazon Glue service.
175+
//=====================================================================
176+
}
177+
}
178+
179+
/// The program's asynchronous entry point.
180+
@main
181+
struct Main {
182+
static func main() async {
183+
let args = Array(CommandLine.arguments.dropFirst())
184+
185+
do {
186+
let command = try ExampleCommand.parse(args)
187+
try await command.runAsync()
188+
} catch {
189+
ExampleCommand.exit(withError: error)
190+
}
191+
}
192+
}
193+
// snippet-end:[swift.glue.scenario]
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
This script is used by the AWS Glue _getting started with crawlers and jobs_
6+
scenario to perform extract, transform, and load (ETL) operations on sample
7+
flight data. As part of the example, it is uploaded to an Amazon Simple
8+
Storage Service (Amazon S3) bucket so that AWS Glue can access it.
9+
"""
10+
11+
# pylint: disable=undefined-variable
12+
13+
# snippet-start:[glue.swift.basics.job-script]
14+
import sys
15+
from awsglue.transforms import *
16+
from awsglue.utils import getResolvedOptions
17+
from pyspark.context import SparkContext
18+
from awsglue.context import GlueContext
19+
from awsglue.job import Job
20+
21+
"""
22+
These custom arguments must be passed as Arguments to the StartJobRun request.
23+
--input_database The name of a metadata database that is contained in
24+
your AWS Glue Data Catalog and that contains tables
25+
that describe the data to be processed.
26+
--input_table The name of a table in the database that describes the
27+
data to be processed.
28+
--output_bucket_url An S3 bucket that receives the transformed output
29+
data.
30+
"""
31+
args = getResolvedOptions(sys.argv, [
32+
"JOB_NAME", "input_database", "input_table", "output_bucket_url"])
33+
sc = SparkContext()
34+
glueContext = GlueContext(sc)
35+
spark = glueContext.spark_session
36+
job = Job(glueContext)
37+
job.init(args["JOB_NAME"], args)
38+
39+
# Script generated for node S3 Flight Data.
40+
S3FlightData_node1 = glueContext.create_dynamic_frame.from_catalog(
41+
database=args['input_database'],
42+
table_name=args['input_table'],
43+
transformation_ctx="S3FlightData_node1",
44+
)
45+
46+
# This mapping performs two main functions:
47+
# 1. It simplifies the output by removing most of the fields from the data.
48+
# 2. It renames some fields. For example, `fl_date` is renamed to `flight_date`.
49+
ApplyMapping_node2 = ApplyMapping.apply(
50+
frame=S3FlightData_node1,
51+
mappings=[
52+
("year", "long", "year", "long"),
53+
("month", "long", "month", "tinyint"),
54+
("day_of_month", "long", "day", "tinyint"),
55+
("fl_date", "string", "flight_date", "string"),
56+
("carrier", "string", "carrier", "string"),
57+
("fl_num", "long", "flight_num", "long"),
58+
("origin_city_name", "string", "origin_city_name", "string"),
59+
("origin_state_abr", "string", "origin_state_abr", "string"),
60+
("dest_city_name", "string", "dest_city_name", "string"),
61+
("dest_state_abr", "string", "dest_state_abr", "string"),
62+
("dep_time", "long", "departure_time", "long"),
63+
("wheels_off", "long", "wheels_off", "long"),
64+
("wheels_on", "long", "wheels_on", "long"),
65+
("arr_time", "long", "arrival_time", "long"),
66+
("mon", "string", "mon", "string"),
67+
],
68+
transformation_ctx="ApplyMapping_node2",
69+
)
70+
71+
# Script generated for node Revised Flight Data.
72+
RevisedFlightData_node3 = glueContext.write_dynamic_frame.from_options(
73+
frame=ApplyMapping_node2,
74+
connection_type="s3",
75+
format="json",
76+
connection_options={"path": args['output_bucket_url'], "partitionKeys": []},
77+
transformation_ctx="RevisedFlightData_node3",
78+
)
79+
80+
job.commit()
81+
# snippet-end:[glue.swift.basics.job-script]

swift/example_code/glue/scenario/test.sh

Whitespace-only changes.

0 commit comments

Comments
 (0)