Skip to content

Commit 92073df

Browse files
VirtueMeclaude
andauthored
feat(iam): add Athena service integration permissions (#769)
Auto-generates the IAM execution role permissions documented at https://docs.aws.amazon.com/step-functions/latest/dg/connect-athena.html for state machines that invoke Athena. Without this, Athena queries fail at runtime because the state machine's role lacks the necessary catalog and storage grants. - New iamStrategies/athena.js handles the five integration ARNs: startQueryExecution(.sync), stopQueryExecution, getQueryExecution, getQueryResults - For startQueryExecution(.sync), grants the full action set plus S3, Glue, and Lake Formation permissions per AWS template — Athena uses the caller identity for catalog and data access - Resource scoping: workgroup/<name> when WorkGroup is a static string, workgroup/* when it is a runtime path or absent - Standalone get/stop actions scope to workgroup/* (resource is identified at runtime by QueryExecutionId) - getQueryResults additionally grants s3:GetObject for the result location - Athena .sync uses polling, not EventBridge — no events:Put* permission (unlike sagemaker .sync) - Integration fixture exercises three machines (sync, request-response with static WorkGroup, request-response with runtime WorkGroup) and asserts the generated CF role matches AWS's documented template Closes #450 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 41d1cb5 commit 92073df

5 files changed

Lines changed: 468 additions & 0 deletions

File tree

fixtures/athena/serverless.yml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
service: integration-athena
2+
3+
provider: ${file(../base.yml):provider}
4+
plugins: ${file(../base.yml):plugins}
5+
package: ${file(../base.yml):package}
6+
custom: ${file(../base.yml):custom}
7+
8+
stepFunctions:
9+
stateMachines:
10+
athenaMachine:
11+
name: integration-athena-${opt:stage, 'test'}
12+
definition:
13+
StartAt: StartQuery
14+
States:
15+
StartQuery:
16+
Type: Task
17+
Resource: arn:aws:states:::athena:startQueryExecution.sync
18+
Parameters:
19+
QueryString: SELECT 1
20+
WorkGroup: primary
21+
ResultConfiguration:
22+
OutputLocation: s3://example-results/
23+
Next: GetQueryExecution
24+
GetQueryExecution:
25+
Type: Task
26+
Resource: arn:aws:states:::athena:getQueryExecution
27+
Parameters:
28+
QueryExecutionId.$: $.QueryExecution.QueryExecutionId
29+
Next: GetQueryResults
30+
GetQueryResults:
31+
Type: Task
32+
Resource: arn:aws:states:::athena:getQueryResults
33+
Parameters:
34+
QueryExecutionId.$: $.QueryExecution.QueryExecutionId
35+
Next: StopQuery
36+
StopQuery:
37+
Type: Task
38+
Resource: arn:aws:states:::athena:stopQueryExecution
39+
Parameters:
40+
QueryExecutionId.$: $.QueryExecution.QueryExecutionId
41+
End: true
42+
athenaAsyncMachine:
43+
name: integration-athena-async-${opt:stage, 'test'}
44+
definition:
45+
StartAt: StartQueryAsync
46+
States:
47+
StartQueryAsync:
48+
Type: Task
49+
Resource: arn:aws:states:::athena:startQueryExecution
50+
Parameters:
51+
QueryString: SELECT 1
52+
WorkGroup: primary
53+
ResultConfiguration:
54+
OutputLocation: s3://example-results/
55+
End: true
56+
athenaRuntimeWorkgroupMachine:
57+
name: integration-athena-runtime-wg-${opt:stage, 'test'}
58+
definition:
59+
StartAt: StartQueryRuntimeWg
60+
States:
61+
StartQueryRuntimeWg:
62+
Type: Task
63+
Resource: arn:aws:states:::athena:startQueryExecution
64+
Parameters:
65+
QueryString: SELECT 1
66+
WorkGroup.$: $.workgroup
67+
ResultConfiguration:
68+
OutputLocation: s3://example-results/
69+
End: true

fixtures/athena/verify.test.js

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
'use strict';
2+
3+
const fs = require('node:fs');
4+
const path = require('node:path');
5+
const expect = require('chai').expect;
6+
7+
const templatePath = path.join(__dirname, '.serverless', 'cloudformation-template-update-stack.json');
8+
9+
const collectActions = (role) => role.Properties.Policies[0].PolicyDocument.Statement
10+
.flatMap((s) => [].concat(s.Action));
11+
12+
const findStatementByAction = (role, actionName) => role.Properties.Policies[0]
13+
.PolicyDocument.Statement.find((s) => [].concat(s.Action).includes(actionName));
14+
15+
const arnSubStrings = (statement) => {
16+
const resources = [].concat(statement.Resource);
17+
return resources
18+
.map((r) => r && r['Fn::Sub'])
19+
.filter(Boolean)
20+
.map((sub) => (Array.isArray(sub) ? sub[0] : sub));
21+
};
22+
23+
const workGroupArn = (statement) => arnSubStrings(statement).find((s) => s.includes(':workgroup/'));
24+
25+
describe('athena fixture — generated IAM role', () => {
26+
let resources;
27+
let syncRole;
28+
let asyncRole;
29+
let runtimeWgRole;
30+
31+
before(() => {
32+
const template = JSON.parse(fs.readFileSync(templatePath, 'utf8'));
33+
resources = template.Resources;
34+
35+
const roles = Object.values(resources).filter((r) => r.Type === 'AWS::IAM::Role');
36+
// The sync machine grants the full Run-a-Job action set including ListQueryExecutions.
37+
syncRole = roles.find((r) => collectActions(r).includes('athena:ListQueryExecutions'));
38+
const asyncRoles = roles.filter((r) => {
39+
const actions = collectActions(r);
40+
return actions.includes('athena:startQueryExecution')
41+
&& !actions.includes('athena:ListQueryExecutions');
42+
});
43+
asyncRole = asyncRoles.find((r) => workGroupArn(
44+
findStatementByAction(r, 'athena:startQueryExecution'),
45+
).endsWith(':workgroup/primary'));
46+
runtimeWgRole = asyncRoles.find((r) => workGroupArn(
47+
findStatementByAction(r, 'athena:startQueryExecution'),
48+
).endsWith(':workgroup/*'));
49+
});
50+
51+
it('creates an IAM role for each athena state machine', () => {
52+
expect(syncRole, 'sync machine role should exist').to.not.equal(undefined);
53+
expect(asyncRole, 'async-only static-WorkGroup role should exist').to.not.equal(undefined);
54+
expect(runtimeWgRole, 'runtime-WorkGroup role should exist').to.not.equal(undefined);
55+
});
56+
57+
describe('sync machine (startQueryExecution.sync)', () => {
58+
it('grants the full Run-a-Job athena action set (matches AWS template)', () => {
59+
const stmt = findStatementByAction(syncRole, 'athena:startQueryExecution');
60+
const actions = [].concat(stmt.Action);
61+
for (const action of [
62+
'athena:startQueryExecution',
63+
'athena:stopQueryExecution',
64+
'athena:getQueryExecution',
65+
'athena:getDataCatalog',
66+
'athena:GetWorkGroup',
67+
'athena:BatchGetQueryExecution',
68+
'athena:GetQueryResults',
69+
'athena:ListQueryExecutions',
70+
]) {
71+
expect(actions).to.include(action);
72+
}
73+
});
74+
75+
it('scopes the athena resource to workgroup/<name> + datacatalog/* when WorkGroup is static', () => {
76+
const stmt = findStatementByAction(syncRole, 'athena:startQueryExecution');
77+
const arns = arnSubStrings(stmt);
78+
expect(arns.some((a) => /:workgroup\/primary$/.test(a))).to.equal(true);
79+
expect(arns.some((a) => /:datacatalog\/\*$/.test(a))).to.equal(true);
80+
});
81+
82+
it('does not grant events:* (Athena .sync uses polling, not the EventBridge rule)', () => {
83+
const actions = collectActions(syncRole);
84+
expect(actions).to.not.include('events:PutTargets');
85+
expect(actions).to.not.include('events:PutRule');
86+
expect(actions).to.not.include('events:DescribeRule');
87+
});
88+
89+
it('grants the documented S3 action set on arn:aws:s3:::*', () => {
90+
const stmt = findStatementByAction(syncRole, 's3:GetBucketLocation');
91+
const actions = [].concat(stmt.Action);
92+
for (const action of [
93+
's3:GetBucketLocation', 's3:GetObject', 's3:ListBucket', 's3:PutObject',
94+
]) {
95+
expect(actions).to.include(action);
96+
}
97+
expect([].concat(stmt.Resource)).to.include('arn:aws:s3:::*');
98+
});
99+
100+
it('grants the documented Glue action set on catalog/database/table/userDefinedFunction', () => {
101+
const stmt = findStatementByAction(syncRole, 'glue:GetDatabase');
102+
const arns = arnSubStrings(stmt);
103+
expect(arns.some((a) => /:catalog$/.test(a))).to.equal(true);
104+
expect(arns.some((a) => /:database\/\*$/.test(a))).to.equal(true);
105+
expect(arns.some((a) => /:table\/\*$/.test(a))).to.equal(true);
106+
expect(arns.some((a) => /:userDefinedFunction\/\*$/.test(a))).to.equal(true);
107+
});
108+
109+
it('grants lakeformation:GetDataAccess on *', () => {
110+
const stmt = findStatementByAction(syncRole, 'lakeformation:GetDataAccess');
111+
expect(stmt).to.not.equal(undefined);
112+
expect([].concat(stmt.Resource)).to.include('*');
113+
});
114+
});
115+
116+
describe('async-only machine with static WorkGroup (startQueryExecution request-response)', () => {
117+
it('grants only startQueryExecution and getDataCatalog (no polling/stop)', () => {
118+
const stmt = findStatementByAction(asyncRole, 'athena:startQueryExecution');
119+
const actions = [].concat(stmt.Action);
120+
expect(actions).to.include('athena:startQueryExecution');
121+
expect(actions).to.include('athena:getDataCatalog');
122+
expect(actions).to.not.include('athena:stopQueryExecution');
123+
expect(actions).to.not.include('athena:getQueryResults');
124+
expect(actions).to.not.include('athena:ListQueryExecutions');
125+
});
126+
127+
it('still grants S3 + Glue + LakeFormation (Athena uses caller identity for data access)', () => {
128+
const actions = collectActions(asyncRole);
129+
expect(actions).to.include('s3:GetObject');
130+
expect(actions).to.include('glue:GetTable');
131+
expect(actions).to.include('lakeformation:GetDataAccess');
132+
});
133+
134+
it('scopes the athena resource to workgroup/primary', () => {
135+
const stmt = findStatementByAction(asyncRole, 'athena:startQueryExecution');
136+
expect(workGroupArn(stmt)).to.match(/:workgroup\/primary$/);
137+
});
138+
});
139+
140+
describe('runtime-WorkGroup machine (WorkGroup.$ path)', () => {
141+
it('falls back to workgroup/* when the WorkGroup is a runtime JSON path', () => {
142+
const stmt = findStatementByAction(runtimeWgRole, 'athena:startQueryExecution');
143+
expect(workGroupArn(stmt)).to.match(/:workgroup\/\*$/);
144+
});
145+
});
146+
});
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
'use strict';
2+
3+
// IAM permissions follow AWS's documented templates for Step Functions ↔ Athena
4+
// integration. Athena is unusual: it uses the *caller's* IAM identity for Glue
5+
// catalog and S3 data access, so the state machine's execution role needs Glue,
6+
// S3, and Lake Formation grants in addition to the athena: actions.
7+
// Reference: https://docs.aws.amazon.com/step-functions/latest/dg/connect-athena.html
8+
9+
const S3_PERMISSIONS = {
10+
action: 's3:GetBucketLocation,s3:GetObject,s3:ListBucket,s3:ListBucketMultipartUploads,s3:ListMultipartUploadParts,s3:AbortMultipartUpload,s3:CreateBucket,s3:PutObject',
11+
resource: 'arn:aws:s3:::*',
12+
};
13+
14+
const GLUE_PERMISSIONS = {
15+
action: 'glue:CreateDatabase,glue:GetDatabase,glue:GetDatabases,glue:UpdateDatabase,glue:DeleteDatabase,glue:CreateTable,glue:UpdateTable,glue:GetTable,glue:GetTables,glue:DeleteTable,glue:BatchDeleteTable,glue:BatchCreatePartition,glue:CreatePartition,glue:UpdatePartition,glue:GetPartition,glue:GetPartitions,glue:BatchGetPartition,glue:DeletePartition,glue:BatchDeletePartition',
16+
resource: [
17+
{ 'Fn::Sub': ['arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog', {}] },
18+
{ 'Fn::Sub': ['arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:database/*', {}] },
19+
{ 'Fn::Sub': ['arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:table/*', {}] },
20+
{ 'Fn::Sub': ['arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:userDefinedFunction/*', {}] },
21+
],
22+
};
23+
24+
const LAKE_FORMATION_PERMISSION = {
25+
action: 'lakeformation:GetDataAccess',
26+
resource: '*',
27+
};
28+
29+
function workGroupArn(workGroupName) {
30+
const target = workGroupName || '*';
31+
return {
32+
'Fn::Sub': [
33+
`arn:\${AWS::Partition}:athena:\${AWS::Region}:\${AWS::AccountId}:workgroup/${target}`,
34+
{},
35+
],
36+
};
37+
}
38+
39+
const dataCatalogArn = {
40+
'Fn::Sub': [
41+
'arn:${AWS::Partition}:athena:${AWS::Region}:${AWS::AccountId}:datacatalog/*',
42+
{},
43+
],
44+
};
45+
46+
function staticWorkGroup(state) {
47+
const wg = state && state.Parameters && state.Parameters.WorkGroup;
48+
return typeof wg === 'string' && wg.length > 0 ? wg : null;
49+
}
50+
51+
function getStartQueryPermissions({ sync = false, state } = {}) {
52+
const athenaActions = sync
53+
? 'athena:startQueryExecution,athena:stopQueryExecution,athena:getQueryExecution,athena:getDataCatalog,athena:GetWorkGroup,athena:BatchGetQueryExecution,athena:GetQueryResults,athena:ListQueryExecutions'
54+
: 'athena:startQueryExecution,athena:getDataCatalog';
55+
56+
return [
57+
{
58+
action: athenaActions,
59+
resource: [workGroupArn(staticWorkGroup(state)), dataCatalogArn],
60+
},
61+
S3_PERMISSIONS,
62+
GLUE_PERMISSIONS,
63+
LAKE_FORMATION_PERMISSION,
64+
];
65+
}
66+
67+
function getStopQueryPermissions() {
68+
return [{ action: 'athena:stopQueryExecution', resource: workGroupArn(null) }];
69+
}
70+
71+
function getGetQueryExecutionPermissions() {
72+
return [{ action: 'athena:getQueryExecution', resource: workGroupArn(null) }];
73+
}
74+
75+
function getGetQueryResultsPermissions() {
76+
return [
77+
{ action: 'athena:getQueryResults', resource: workGroupArn(null) },
78+
{ action: 's3:GetObject', resource: 'arn:aws:s3:::*' },
79+
];
80+
}
81+
82+
module.exports = {
83+
getStartQueryPermissions,
84+
getStopQueryPermissions,
85+
getGetQueryExecutionPermissions,
86+
getGetQueryResultsPermissions,
87+
};

0 commit comments

Comments
 (0)