-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Expand file tree
/
Copy pathentry.swift
More file actions
850 lines (741 loc) · 32.1 KB
/
Copy pathentry.swift
File metadata and controls
850 lines (741 loc) · 32.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// snippet-start:[swift.glue.scenario]
// An example that shows how to use the AWS SDK for Swift to demonstrate
// creating and using crawlers and jobs using AWS Glue.
//
// 0. Upload the Python job script to Amazon S3 so it can be used when
// calling `startJobRun()` later.
// 1. Create a crawler, pass it the IAM role and the URL of the public Amazon
// S3 bucket that contains the source data:
// s3://crawler-public-us-east-1/flight/2016/csv.
// 2. Start the crawler. This takes time, so after starting it, use a loop
// that calls `getCrawler()` until the state is "READY".
// 3. Get the database created by the crawler, and the tables in the
// database. Display them to the user.
// 4. Create a job. Pass it the IAM role and the URL to a Python ETL script
// previously uploaded to the user's S3 bucket.
// 5. Start a job run, passing the following custom arguments. These are
// expected by the ETL script, so must exactly match.
// * `--input_database: <name of the database created by the crawler>`
// * `--input_table: <name of the table created by the crawler>`
// * `--output_bucket_url: <URL to the scaffold bucket created for the
// user>`
// 6. Loop and get the job run until it returns one of the following states:
// "SUCCEEDED", "STOPPED", "FAILED", or "TIMEOUT".
// 7. Output data is stored in a group of files in the user's S3 bucket.
// Either direct the user to their location or download a file and display
// the results inline.
// 8. List the jobs for the user's account.
// 9. Get job run details for a job run.
// 10. Delete the demo job.
// 11. Delete the database and tables created by the example.
// 12. Delete the crawler created by the example.
import ArgumentParser
import AWSS3
import Foundation
import Smithy
// snippet-start:[swift.glue.import]
import AWSClientRuntime
import AWSGlue
// snippet-end:[swift.glue.import]
struct ExampleCommand: ParsableCommand {
@Option(help: "The AWS IAM role to use for AWS Glue calls.")
var role: String
@Option(help: "The Amazon S3 bucket to use for this example.")
var bucket: String
@Option(help: "The Amazon S3 URL of the data to crawl.")
var s3url: String = "s3://crawler-public-us-east-1/flight/2016/csv"
@Option(help: "The Python script to run as a job with AWS Glue.")
var script: String = "./flight_etl_job_script.py"
@Option(help: "The AWS Region to run AWS API calls in.")
var awsRegion = "us-east-1"
@Option(help: "A prefix string to use when naming tables.")
var tablePrefix = "swift-glue-basics-table"
@Option(
help: ArgumentHelp("The level of logging for the Swift SDK to perform."),
completion: .list([
"critical",
"debug",
"error",
"info",
"notice",
"trace",
"warning"
])
)
var logLevel: String = "error"
static var configuration = CommandConfiguration(
commandName: "glue-scenario",
abstract: """
Demonstrates various features of AWS Glue.
""",
discussion: """
An example showing how to use AWS Glue to create, run, and monitor
crawlers and jobs.
"""
)
/// Generate and return a unique file name that begins with the specified
/// string.
///
/// - Parameters:
/// - prefix: Text to use at the beginning of the returned name.
///
/// - Returns: A string containing a unique filename that begins with the
/// specified `prefix`.
///
/// The returned name uses a random number between 1 million and 1 billion to
/// provide reasonable certainty of uniqueness for the purposes of this
/// example.
func tempName(prefix: String) -> String {
return "\(prefix)-\(Int.random(in: 1000000..<1000000000))"
}
/// Upload a file to an Amazon S3 bucket.
///
/// - Parameters:
/// - s3Client: The S3 client to use when uploading the file.
/// - path: The local path of the source file to upload.
/// - toBucket: The name of the S3 bucket into which to upload the file.
/// - key: The key (name) to give the file in the S3 bucket.
///
/// - Returns: `true` if the file is uploaded successfully, otherwise `false`.
func uploadFile(s3Client: S3Client, path: String, toBucket: String, key: String) async -> Bool {
do {
let fileData: Data = try Data(contentsOf: URL(fileURLWithPath: path))
let dataStream = ByteStream.data(fileData)
_ = try await s3Client.putObject(
input: PutObjectInput(
body: dataStream,
bucket: toBucket,
key: key
)
)
} catch {
print("*** An unexpected error occurred uploading the script to the Amazon S3 bucket \"\(bucket)\".")
return false
}
return true
}
// snippet-start:[swift.glue.CreateCrawler]
/// Create a new AWS Glue crawler.
///
/// - Parameters:
/// - glueClient: An AWS Glue client to use for the crawler.
/// - crawlerName: A name for the new crawler.
/// - iamRole: The name of an Amazon IAM role for the crawler to use.
/// - s3Path: The path of an Amazon S3 folder to use as a target location.
/// - cronSchedule: A `cron` schedule indicating when to run the crawler.
/// - databaseName: The name of an AWS Glue database to operate on.
///
/// - Returns: `true` if the crawler is created successfully, otherwise `false`.
func createCrawler(glueClient: GlueClient, crawlerName: String, iamRole: String,
s3Path: String, cronSchedule: String, databaseName: String) async -> Bool {
let s3Target = GlueClientTypes.S3Target(path: s3url)
let targetList = GlueClientTypes.CrawlerTargets(s3Targets: [s3Target])
do {
_ = try await glueClient.createCrawler(
input: CreateCrawlerInput(
databaseName: databaseName,
description: "Created by the AWS SDK for Swift Scenario Example for AWS Glue.",
name: crawlerName,
role: iamRole,
schedule: cronSchedule,
tablePrefix: tablePrefix,
targets: targetList
)
)
} catch _ as AlreadyExistsException {
print("*** A crawler named \"\(crawlerName)\" already exists.")
return false
} catch _ as OperationTimeoutException {
print("*** The attempt to create the AWS Glue crawler timed out.")
return false
} catch {
print("*** An unexpected error occurred creating the AWS Glue crawler: \(error.localizedDescription)")
return false
}
return true
}
// snippet-end:[swift.glue.CreateCrawler]
// snippet-start:[swift.glue.DeleteCrawler]
/// Delete an AWS Glue crawler.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - name: The name of the crawler to delete.
///
/// - Returns: `true` if successful, otherwise `false`.
func deleteCrawler(glueClient: GlueClient, name: String) async -> Bool {
do {
_ = try await glueClient.deleteCrawler(
input: DeleteCrawlerInput(name: name)
)
} catch {
return false
}
return true
}
// snippet-end:[swift.glue.DeleteCrawler]
// snippet-start:[swift.glue.StartCrawler]
/// Start running an AWS Glue crawler.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use when starting the crawler.
/// - name: The name of the crawler to start running.
///
/// - Returns: `true` if the crawler is started successfully, otherwise `false`.
func startCrawler(glueClient: GlueClient, name: String) async -> Bool {
do {
_ = try await glueClient.startCrawler(
input: StartCrawlerInput(name: name)
)
} catch {
print("*** An unexpected error occurred starting the crawler.")
return false
}
return true
}
// snippet-end:[swift.glue.StartCrawler]
// snippet-start:[swift.glue.GetCrawler]
/// Get the state of the specified AWS Glue crawler.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - name: The name of the crawler whose state should be returned.
///
/// - Returns: A `GlueClientTypes.CrawlerState` value describing the
/// state of the crawler.
func getCrawlerState(glueClient: GlueClient, name: String) async -> GlueClientTypes.CrawlerState {
do {
let output = try await glueClient.getCrawler(
input: GetCrawlerInput(name: name)
)
// If the crawler or its state is `nil`, report that the crawler
// is stopping. This may not be what you want for your
// application but it works for this one!
guard let crawler = output.crawler else {
return GlueClientTypes.CrawlerState.stopping
}
guard let state = crawler.state else {
return GlueClientTypes.CrawlerState.stopping
}
return state
} catch {
return GlueClientTypes.CrawlerState.stopping
}
}
// snippet-end:[swift.glue.GetCrawler]
// snippet-start:[swift.glue.getCrawlerState]
/// Wait until the specified crawler is ready to run.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - name: The name of the crawler to wait for.
///
/// - Returns: `true` if the crawler is ready, `false` if the client is
/// stopping (and will therefore never be ready).
func waitUntilCrawlerReady(glueClient: GlueClient, name: String) async -> Bool {
while true {
let state = await getCrawlerState(glueClient: glueClient, name: name)
if state == .ready {
return true
} else if state == .stopping {
return false
}
Thread.sleep(forTimeInterval: 4)
}
}
// snippet-end:[swift.glue.getCrawlerState]
// snippet-start:[swift.glue.CreateJob]
/// Create a new AWS Glue job.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - jobName: The name to give the new job.
/// - role: The IAM role for the job to use when accessing AWS services.
/// - scriptLocation: The AWS S3 URI of the script to be run by the job.
///
/// - Returns: `true` if the job is created successfully, otherwise `false`.
func createJob(glueClient: GlueClient, name jobName: String, role: String,
scriptLocation: String) async -> Bool {
let command = GlueClientTypes.JobCommand(
name: "glueetl",
pythonVersion: "3",
scriptLocation: scriptLocation
)
do {
_ = try await glueClient.createJob(
input: CreateJobInput(
command: command,
description: "Created by the AWS SDK for Swift Glue basic scenario example.",
glueVersion: "3.0",
name: jobName,
numberOfWorkers: 10,
role: role,
workerType: .g1x
)
)
} catch {
return false
}
return true
}
// snippet-end:[swift.glue.CreateJob]
// snippet-start:[swift.glue.ListJobs]
/// Return a list of the AWS Glue jobs listed on the user's account.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - maxJobs: The maximum number of jobs to return (default: 100).
///
/// - Returns: An array of strings listing the names of all available AWS
/// Glue jobs.
func listJobs(glueClient: GlueClient, maxJobs: Int = 100) async -> [String] {
var jobList: [String] = []
var nextToken: String?
repeat {
do {
let output = try await glueClient.listJobs(
input: ListJobsInput(
maxResults: maxJobs,
nextToken: nextToken
)
)
guard let jobs = output.jobNames else {
return jobList
}
jobList = jobList + jobs
nextToken = output.nextToken
} catch {
return jobList
}
} while (nextToken != nil)
return jobList
}
// snippet-end:[swift.glue.ListJobs]
// snippet-start:[swift.glue.DeleteJob]
/// Delete an AWS Glue job.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - jobName: The name of the job to delete.
///
/// - Returns: `true` if the job is successfully deleted, otherwise `false`.
func deleteJob(glueClient: GlueClient, name jobName: String) async -> Bool {
do {
_ = try await glueClient.deleteJob(
input: DeleteJobInput(jobName: jobName)
)
} catch {
return false
}
return true
}
// snippet-end:[swift.glue.DeleteJob]
// snippet-start:[swift.glue.CreateDatabase]
/// Create an AWS Glue database.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - databaseName: The name to give the new database.
/// - location: The URL of the source data to use with AWS Glue.
///
/// - Returns: `true` if the database is created successfully, otherwise `false`.
func createDatabase(glueClient: GlueClient, name databaseName: String, location: String) async -> Bool {
let databaseInput = GlueClientTypes.DatabaseInput(
description: "Created by the AWS SDK for Swift Glue basic scenario example.",
locationUri: location,
name: databaseName
)
do {
_ = try await glueClient.createDatabase(
input: CreateDatabaseInput(
databaseInput: databaseInput
)
)
} catch {
return false
}
return true
}
// snippet-end:[swift.glue.CreateDatabase]
// snippet-start:[swift.glue.GetDatabase]
/// Get the AWS Glue database with the specified name.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - name: The name of the database to return.
///
/// - Returns: The `GlueClientTypes.Database` object describing the
/// specified database, or `nil` if an error occurs or the database
/// isn't found.
func getDatabase(glueClient: GlueClient, name: String) async -> GlueClientTypes.Database? {
do {
let output = try await glueClient.getDatabase(
input: GetDatabaseInput(name: name)
)
return output.database
} catch {
return nil
}
}
// snippet-end:[swift.glue.GetDatabase]
// snippet-start:[swift.glue.GetTables]
/// Returns a list of the tables in the specified database.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - databaseName: The name of the database whose tables are to be
/// returned.
///
/// - Returns: An array of `GlueClientTypes.Table` objects, each
/// describing one table in the named database. An empty array indicates
/// that there are either no tables in the database, or an error
/// occurred before any tables could be found.
func getTablesInDatabase(glueClient: GlueClient, databaseName: String) async -> [GlueClientTypes.Table] {
var tables: [GlueClientTypes.Table] = []
var nextToken: String?
repeat {
do {
let output = try await glueClient.getTables(
input: GetTablesInput(
databaseName: databaseName,
nextToken: nextToken
)
)
guard let tableList = output.tableList else {
return tables
}
tables = tables + tableList
nextToken = output.nextToken
} catch {
return tables
}
} while nextToken != nil
return tables
}
// snippet-end:[swift.glue.GetTables]
// snippet-start:[swift.glue.BatchDeleteTable]
// snippet-start:[swift.glue.DeleteDatabase]
/// Delete the specified database.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - databaseName: The name of the database to delete.
/// - deleteTables: A Bool indicating whether or not to delete the
/// tables in the database before attempting to delete the database.
///
/// - Returns: `true` if the database (and optionally its tables) are
/// deleted, otherwise `false`.
func deleteDatabase(glueClient: GlueClient, name databaseName: String,
withTables deleteTables: Bool = false) async -> Bool {
if deleteTables {
var tableNames: [String] = []
// Get a list of the names of all of the tables in the database.
let tableList = await self.getTablesInDatabase(glueClient: glueClient, databaseName: databaseName)
for table in tableList {
guard let name = table.name else {
continue
}
tableNames.append(name)
}
// Delete the tables.
do {
_ = try await glueClient.batchDeleteTable(
input: BatchDeleteTableInput(
databaseName: databaseName,
tablesToDelete: tableNames
)
)
} catch {
print("*** Unable to delete the tables.")
}
return true
}
// Delete the database itself.
do {
_ = try await glueClient.deleteDatabase(
input: DeleteDatabaseInput(name: databaseName)
)
} catch {
print("*** Unable to delete the database.")
}
return true
}
// snippet-end:[swift.glue.DeleteDatabase]
// snippet-end:[swift.glue.BatchDeleteTable]
// snippet-start:[swift.glue.StartJobRun]
/// Start an AWS Glue job run.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - jobName: The name of the job to run.
/// - databaseName: The name of the AWS Glue database to run the job against.
/// - tableName: The name of the table in the database to run the job against.
/// - outputURL: The AWS S3 URI of the bucket location into which to
/// write the resulting output.
///
/// - Returns: `true` if the job run is started successfully, otherwise `false`.
func startJobRun(glueClient: GlueClient, name jobName: String, databaseName: String,
tableName: String, outputURL: String) async -> String? {
do {
let output = try await glueClient.startJobRun(
input: StartJobRunInput(
arguments: [
"--input_database": databaseName,
"--input_table": tableName,
"--output_bucket_url": outputURL
],
jobName: jobName,
numberOfWorkers: 10,
workerType: .g1x
)
)
guard let id = output.jobRunId else {
return nil
}
return id
} catch {
return nil
}
}
// snippet-end:[swift.glue.StartJobRun]
// snippet-start:[swift.glue.GetJobRun]
/// Get information about a specific AWS Glue job run.
///
/// - Parameters:
/// - glueClient: The AWS Glue client to use.
/// - jobName: The name of the job to return job run data for.
/// - id: The run ID of the specific job run to return.
///
/// - Returns: A `GlueClientTypes.JobRun` object describing the state of
/// the job run, or `nil` if an error occurs.
func getJobRun(glueClient: GlueClient, name jobName: String, id: String) async -> GlueClientTypes.JobRun? {
do {
let output = try await glueClient.getJobRun(
input: GetJobRunInput(
jobName: jobName,
runId: id
)
)
return output.jobRun
} catch {
return nil
}
}
// snippet-end:[swift.glue.GetJobRun]
/// Called by ``main()`` to run the bulk of the example.
func runAsync() async throws {
// A name to give the Python script upon upload to the Amazon S3
// bucket.
let scriptName = "jobscript.py"
// Schedule string in `cron` format, as described here:
// https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html
let cron = "cron(15 12 * * ? *)"
let glueConfig = try await GlueClient.GlueClientConfiguration(region: awsRegion)
let glueClient = GlueClient(config: glueConfig)
let s3Config = try await S3Client.S3ClientConfiguration(region: awsRegion)
let s3Client = S3Client(config: s3Config)
// Create random names for things that need them.
let crawlerName = tempName(prefix: "swift-glue-basics-crawler")
let databaseName = tempName(prefix: "swift-glue-basics-db")
// Create a name for the AWS Glue job.
let jobName = tempName(prefix: "scenario-job")
// The URL of the Python script on S3.
let scriptURL = "s3://\(bucket)/\(scriptName)"
print("Welcome to the AWS SDK for Swift basic scenario for AWS Glue!")
//=====================================================================
// 0. Upload the Python script to the target bucket so it's available
// for use by the Amazon Glue service.
//=====================================================================
print("Uploading the Python script: \(script) as key \(scriptName)")
print("Destination bucket: \(bucket)")
if !(await uploadFile(s3Client: s3Client, path: script, toBucket: bucket, key: scriptName)) {
return
}
//=====================================================================
// 1. Create the database and crawler using the randomized names
// generated previously.
//=====================================================================
print("Creating database \"\(databaseName)\"...")
if !(await createDatabase(glueClient: glueClient, name: databaseName, location: s3url)) {
print("*** Unable to create the database.")
return
}
print("Creating crawler \"\(crawlerName)\"...")
if !(await createCrawler(glueClient: glueClient, crawlerName: crawlerName,
iamRole: role, s3Path: s3url, cronSchedule: cron,
databaseName: databaseName)) {
return
}
//=====================================================================
// 2. Start the crawler, then wait for it to be ready.
//=====================================================================
print("Starting the crawler and waiting until it's ready...")
if !(await startCrawler(glueClient: glueClient, name: crawlerName)) {
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
if !(await waitUntilCrawlerReady(glueClient: glueClient, name: crawlerName)) {
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
}
//=====================================================================
// 3. Get the database and table created by the crawler.
//=====================================================================
print("Getting the crawler's database...")
let database = await getDatabase(glueClient: glueClient, name: databaseName)
let tableList = await getTablesInDatabase(glueClient: glueClient, databaseName: databaseName)
print("Found \(tableList.count) table(s):")
for table in tableList {
print(" \(table.name ?? "<unnamed>")")
}
if tableList.count != 1 {
print("*** Incorrect number of tables found. There should only be one.")
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
guard let tableName = tableList[0].name else {
print("*** Table is unnamed.")
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
//=====================================================================
// 4. Create a job.
//=====================================================================
print("Creating a job...")
if !(await createJob(glueClient: glueClient, name: jobName, role: role,
scriptLocation: scriptURL)) {
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
//=====================================================================
// 5. Start a job run.
//=====================================================================
print("Starting the job...")
// Construct the Amazon S3 URL for the job run's output. This is in
// the bucket specified on the command line, with a folder name that's
// unique for this job run.
let timeStamp = Date().timeIntervalSince1970
let jobPath = "\(jobName)-\(Int(timeStamp))"
let outputURL = "s3://\(bucket)/\(jobPath)"
// Start the job run.
let jobRunID = await startJobRun(glueClient: glueClient, name: jobName,
databaseName: databaseName,
tableName: tableName,
outputURL: outputURL)
guard let jobRunID else {
print("*** Job run ID is invalid.")
_ = await deleteJob(glueClient: glueClient, name: jobName)
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
//=====================================================================
// 6. Wait for the job run to indicate that the run is complete.
//=====================================================================
print("Waiting for job run to end...")
var jobRunFinished = false
var jobRunState: GlueClientTypes.JobRunState
repeat {
let jobRun = await getJobRun(glueClient: glueClient, name: jobName, id: jobRunID)
guard let jobRun else {
print("*** Unable to get the job run.")
_ = await deleteJob(glueClient: glueClient, name: jobName)
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
jobRunState = jobRun.jobRunState ?? .failed
//=====================================================================
// 7. Output where to find the data if the job run was successful.
// If the job run failed for any reason, output an appropriate
// error message.
//=====================================================================
switch jobRunState {
case .succeeded:
print("Job run succeeded. JSON files are in the Amazon S3 path:")
print(" \(outputURL)")
jobRunFinished = true
case .stopped:
jobRunFinished = true
case .error:
print("*** Error: Job run ended in an error. \(jobRun.errorMessage ?? "")")
jobRunFinished = true
case .failed:
print("*** Error: Job run failed. \(jobRun.errorMessage ?? "")")
jobRunFinished = true
case .timeout:
print("*** Warning: Job run timed out.")
jobRunFinished = true
default:
Thread.sleep(forTimeInterval: 0.25)
}
} while jobRunFinished != true
//=====================================================================
// 8. List the jobs for the user's account.
//=====================================================================
print("\nThe account has the following jobs:")
let jobs = await listJobs(glueClient: glueClient)
if jobs.count == 0 {
print(" <no jobs found>")
} else {
for job in jobs {
print(" \(job)")
}
}
//=====================================================================
// 9. Get the job run details for a job run.
//=====================================================================
print("Information about the job run:")
let jobRun = await getJobRun(glueClient: glueClient, name: jobName, id: jobRunID)
guard let jobRun else {
print("*** Unable to retrieve the job run.")
_ = await deleteJob(glueClient: glueClient, name: jobName)
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
_ = await deleteCrawler(glueClient: glueClient, name: crawlerName)
return
}
let startDate = jobRun.startedOn ?? Date(timeIntervalSince1970: 0)
let endDate = jobRun.completedOn ?? Date(timeIntervalSince1970: 0)
let dateFormatter: DateFormatter = DateFormatter()
dateFormatter.dateStyle = .long
dateFormatter.timeStyle = .long
print(" Started at: \(dateFormatter.string(from: startDate))")
print(" Completed at: \(dateFormatter.string(from: endDate))")
//=====================================================================
// 10. Delete the job.
//=====================================================================
print("\nDeleting the job...")
_ = await deleteJob(glueClient: glueClient, name: jobName)
//=====================================================================
// 11. Delete the database and tables created by this example.
//=====================================================================
print("Deleting the database...")
_ = await deleteDatabase(glueClient: glueClient, name: databaseName, withTables: true)
//=====================================================================
// 12. Delete the crawler.
//=====================================================================
print("Deleting the crawler...")
if !(await deleteCrawler(glueClient: glueClient, name: crawlerName)) {
return
}
}
}
/// The program's asynchronous entry point.
@main
struct Main {
static func main() async {
let args = Array(CommandLine.arguments.dropFirst())
do {
let command = try ExampleCommand.parse(args)
try await command.runAsync()
} catch {
ExampleCommand.exit(withError: error)
}
}
}
// snippet-end:[swift.glue.scenario]