|
| 1 | +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | +// An example that shows how to use the AWS SDK for Swift to demonstrate |
| 5 | +// creating and using crawlers and jobs using AWS Glue. |
| 6 | +// |
| 7 | +// 0. Upload the Python job script to Amazon S3 so it can be used when |
| 8 | +// calling `startJobRun()` later. |
| 9 | +// 1. Create a crawler, pass it the IAM role and the URL of the public Amazon |
| 10 | +// S3 bucket that contains the source data: |
| 11 | +// s3://crawler-public-us-east-1/flight/2016/csv. |
| 12 | +// 2. Start the crawler. This takes time, so after starting it, use a loop |
| 13 | +// that calls `getCrawler()` until the state is "READY". |
| 14 | +// 3. Get the database created by the crawler, and the tables in the |
| 15 | +// database. Display them to the user. |
| 16 | +// 4. Create a job. Pass it the IAM role and the URL to a Python ETL script |
| 17 | +// previously uploaded to the user's S3 bucket. |
| 18 | +// 5. Start a job run, passing the following custom arguments. These are |
| 19 | +// expected by the ETL script, so must exactly match. |
| 20 | +// * `--input_database: <name of the database created by the crawler>` |
| 21 | +// * `--input_table: <name of the table created by the crawler>` |
| 22 | +// * `--output_bucket_url: <URL to the scaffold bucket created for the |
| 23 | +// user>` |
| 24 | +// 6. Loop and get the job run until it returns one of the following states: |
| 25 | +// "SUCCEEDED", "STOPPED", "FAILED", or "TIMEOUT". |
| 26 | +// 7. Output data is stored in a group of files in the user's S3 bucket. |
| 27 | +// Either direct the user to their location or download a file and display |
| 28 | +// the results inline. |
| 29 | +// 8. List the jobs for the user's account. |
| 30 | +// 9. Get job run details for a job run. |
| 31 | +// 10. Delete the demo job. |
| 32 | +// 11. Delete the database and tables created by the example. |
| 33 | +// 12. Delete the crawler created by the example. |
| 34 | + |
| 35 | +import ArgumentParser |
| 36 | +import AWSS3 |
| 37 | +import Foundation |
| 38 | + |
| 39 | +// snippet-start:[swift.glue.import] |
| 40 | +import AWSClientRuntime |
| 41 | +import AWSGlue |
| 42 | +// snippet-end:[swift.glue.import] |
| 43 | + |
| 44 | +struct ExampleCommand: ParsableCommand { |
| 45 | + @Option(help: "The AWS IAM role to use for AWS Glue calls.") |
| 46 | + var role: String |
| 47 | + |
| 48 | + @Option(help: "The Amazon S3 bucket to use for this example.") |
| 49 | + var bucket: String |
| 50 | + |
| 51 | + @Option(help: "The Amazon S3 URL of the data to crawl.") |
| 52 | + var s3url: String = "s3://crawler-public-us-east-1/flight/2016/csv" |
| 53 | + |
| 54 | + @Option(help: "The Python script to run as a job with AWS Glue.") |
| 55 | + var script: String = "./flight_etl_job_script.py" |
| 56 | + |
| 57 | + @Option(help: "The AWS Region to run AWS API calls in.") |
| 58 | + var awsRegion = "us-east-1" |
| 59 | + |
| 60 | + @Flag(help: "If this flag is set, output files will have the '.json' extension.") |
| 61 | + var rename = false |
| 62 | + |
| 63 | + @Option(help: "A prefix string to use when naming tables.") |
| 64 | + var tablePrefix = "swift-glue-basics-table" |
| 65 | + |
| 66 | + @Option( |
| 67 | + help: ArgumentHelp("The level of logging for the Swift SDK to perform."), |
| 68 | + completion: .list([ |
| 69 | + "critical", |
| 70 | + "debug", |
| 71 | + "error", |
| 72 | + "info", |
| 73 | + "notice", |
| 74 | + "trace", |
| 75 | + "warning" |
| 76 | + ]) |
| 77 | + ) |
| 78 | + var logLevel: String = "error" |
| 79 | + |
| 80 | + static var configuration = CommandConfiguration( |
| 81 | + commandName: "glue-scenario", |
| 82 | + abstract: """ |
| 83 | + Demonstrates various features of AWS Glue. |
| 84 | + """, |
| 85 | + discussion: """ |
| 86 | + An example showing how to use AWS Glue to create, run, and monitor |
| 87 | + crawlers and jobs. |
| 88 | + """ |
| 89 | + ) |
| 90 | + |
| 91 | +/* |
| 92 | + /// Prompt for an input string of at least a minimum length. |
| 93 | + /// |
| 94 | + /// - Parameters: |
| 95 | + /// - prompt: The prompt string to display. |
| 96 | + /// - minLength: The minimum number of characters to allow in the |
| 97 | + /// response. Default value is 0. |
| 98 | + /// |
| 99 | + /// - Returns: The entered string. |
| 100 | + func stringRequest(_ prompt: String, minLength: Int = 1) -> String { |
| 101 | + while true { |
| 102 | + print(prompt, terminator: "") |
| 103 | + let str = readLine() |
| 104 | + |
| 105 | + guard let str else { |
| 106 | + continue |
| 107 | + } |
| 108 | + if str.count >= minLength { |
| 109 | + return str |
| 110 | + } else { |
| 111 | + print("*** Response must be at least \(minLength) character(s) long.") |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + /// Ask a yes/no question. |
| 117 | + /// |
| 118 | + /// - Parameter prompt: A prompt string to print. |
| 119 | + /// |
| 120 | + /// - Returns: `true` if the user answered "Y", otherwise `false`. |
| 121 | + func yesNoRequest(_ prompt: String) -> Bool { |
| 122 | + while true { |
| 123 | + let answer = stringRequest(prompt).lowercased() |
| 124 | + if answer == "y" || answer == "n" { |
| 125 | + return answer == "y" |
| 126 | + } |
| 127 | + } |
| 128 | + } |
| 129 | +*/ |
| 130 | + |
| 131 | + /// Generate and return a unique file name that begins with the specified |
| 132 | + /// string. |
| 133 | + /// |
| 134 | + /// - Parameters: |
| 135 | + /// - prefix: Text to use at the beginning of the returned name. |
| 136 | + /// |
| 137 | + /// - Returns: A string containing a unique filename that begins with the |
| 138 | + /// specified `prefix`. |
| 139 | + /// |
| 140 | + /// The returned name uses a random number between 1 million and 1 billion to |
| 141 | + /// provide reasonable certainty of uniqueness for the purposes of this |
| 142 | + /// example. |
| 143 | + func tempName(prefix: String) -> String { |
| 144 | + return "\(prefix)-\(Int.random(in: 1000000..<1000000000))" |
| 145 | + } |
| 146 | + |
| 147 | + /// Called by ``main()`` to run the bulk of the example. |
| 148 | + func runAsync() async throws { |
| 149 | + let config = try await GlueClient.GlueClientConfiguration(region: awsRegion) |
| 150 | + let glueClient = GlueClient(config: config) |
| 151 | + |
| 152 | + print("Welcome to the AWS SDK for Swift basic scenario for AWS Glue!") |
| 153 | + |
| 154 | + // Create random names for things that need them. |
| 155 | + |
| 156 | + let crawlerName = tempName(prefix: "swift-glue-basics-crawler") |
| 157 | + let databaseName = tempName(prefix: "swift-glue-basics-db") |
| 158 | + |
| 159 | + // A name for the AWS Glue job. |
| 160 | + |
| 161 | + let jobName = tempName(prefix: "scenario-job") |
| 162 | + |
| 163 | + // A name to give the Python script upon upload to the Amazon S3 |
| 164 | + // bucket, and the full URL of the <<script on S3. |
| 165 | + let scriptName = "jobscript.py" |
| 166 | + let scriptURL = "s3://\(bucket)/\(scriptName)" |
| 167 | + |
| 168 | + // Schedule string in `cron` format, as described here: |
| 169 | + // https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html |
| 170 | + let cron = "cron(15 12 * * ? *)" |
| 171 | + |
| 172 | + //===================================================================== |
| 173 | + // 0. Upload the Python script to the target bucket so it's available |
| 174 | + // for use by the Amazon Glue service. |
| 175 | + //===================================================================== |
| 176 | + } |
| 177 | +} |
| 178 | + |
| 179 | +/// The program's asynchronous entry point. |
| 180 | +@main |
| 181 | +struct Main { |
| 182 | + static func main() async { |
| 183 | + let args = Array(CommandLine.arguments.dropFirst()) |
| 184 | + |
| 185 | + do { |
| 186 | + let command = try ExampleCommand.parse(args) |
| 187 | + try await command.runAsync() |
| 188 | + } catch { |
| 189 | + ExampleCommand.exit(withError: error) |
| 190 | + } |
| 191 | + } |
| 192 | +} |
| 193 | +// snippet-end:[swift.glue.scenario] |
0 commit comments