Skip to content

Commit ae12460

Browse files
authored
Fix infinite requeue for Role/Policy with null description (aws-controllers-k8s#98)
Fixes aws-controllers-k8s/community#1919 Fixes aws-controllers-k8s/community#1772 Fixes aws-controllers-k8s/community#1610 Fixes aws-controllers-k8s/community#1490 Fixes aws-controllers-k8s/community#2000 Fixes aws-controllers-k8s/community#1939 Fixes aws-controllers-k8s/community#1932 Currently, the controller requeues infinitely if a Role or Policy resource is created with a null (empty string) description. This issue stems from the late initialization process used to populate fields defaulted by the AWS API. For `CreateRole` and `CreatePolicy` APIs, AWS accept a Description field but fails to return it. This disrupts the controller's runtime logic causing unnecessary infinite requeues and preventing resources from being marked Synced While a fix could target code-gen and runtime, this is a unique behavior specific to IAM APIs. As a temporary solution, we disable late initialization for the Description field and avoid unpopulating it after a Create call This commit fixes the infinite requeue bug for Roles and Policies with null descriptions and adds e2e tests to prevent regressions By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 15a6365 commit ae12460

12 files changed

+238
-34
lines changed
+4-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
ack_generate_info:
2-
build_date: "2024-03-06T21:39:35Z"
3-
build_hash: a5ba3c851434263128a1464a2c41e528779eeefa
2+
build_date: "2024-03-12T15:06:30Z"
3+
build_hash: d86061f5f579fe5d3a07528917d95e34e79c4dc0
44
go_version: go1.22.0
5-
version: v0.32.1
5+
version: v0.32.1-1-gd86061f-dirty
66
api_directory_checksum: 761a2c708651b0273bf39d98dddaf029de23d337
77
api_version: v1alpha1
88
aws_sdk_go_version: v1.49.0
99
generator_config_info:
10-
file_checksum: ee030a9c18c7fa34d8f7f4777997df355c029971
10+
file_checksum: 8f85dee132779d172c0fd19537c3bbd709e079d6
1111
original_file_name: generator.yaml
1212
last_modification:
1313
reason: API generation

apis/v1alpha1/generator.yaml

+16-2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,13 @@ resources:
148148
- InvalidInput
149149
- MalformedPolicyDocument
150150
fields:
151+
# Left for historical purposes. It looks like late_initialize is was
152+
# causing the controller to infinitely requeue (every 5 seconds) when the
153+
# description was set to nil. Not it looks like this is not needed
154+
# anymore.
155+
# Note(a-hilaly): Very likely the API behavior has changed and the
156+
# late_initialize is no longer needed.
157+
#
151158
Description:
152159
# You might be wondering why description is late-initialized, since
153160
# there isn't a default server-side value for description.
@@ -165,7 +172,10 @@ resources:
165172
# we set the late initialize property of the Description field here to
166173
# override the Spec.Description to the original value we set in the
167174
# CreatePolicy *input* shape.
168-
late_initialize: {}
175+
#late_initialize: {}
176+
set:
177+
- ignore: true
178+
method: Create
169179
Path:
170180
late_initialize: {}
171181
Tags:
@@ -211,9 +221,13 @@ resources:
211221
set:
212222
# The input and output shapes are different...
213223
- from: PermissionsBoundary.PermissionsBoundaryArn
224+
# Left for historical purposes.
214225
Description:
226+
set:
227+
- ignore: true
228+
method: Create
215229
# See above in Policy resource about why this is here.
216-
late_initialize: {}
230+
# late_initialize: {}
217231
Path:
218232
late_initialize: {}
219233
# In order to support attaching zero or more policies to a role, we use

generator.yaml

+16-2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,13 @@ resources:
148148
- InvalidInput
149149
- MalformedPolicyDocument
150150
fields:
151+
# Left for historical purposes. It looks like late_initialize is was
152+
# causing the controller to infinitely requeue (every 5 seconds) when the
153+
# description was set to nil. Not it looks like this is not needed
154+
# anymore.
155+
# Note(a-hilaly): Very likely the API behavior has changed and the
156+
# late_initialize is no longer needed.
157+
#
151158
Description:
152159
# You might be wondering why description is late-initialized, since
153160
# there isn't a default server-side value for description.
@@ -165,7 +172,10 @@ resources:
165172
# we set the late initialize property of the Description field here to
166173
# override the Spec.Description to the original value we set in the
167174
# CreatePolicy *input* shape.
168-
late_initialize: {}
175+
#late_initialize: {}
176+
set:
177+
- ignore: true
178+
method: Create
169179
Path:
170180
late_initialize: {}
171181
Tags:
@@ -211,9 +221,13 @@ resources:
211221
set:
212222
# The input and output shapes are different...
213223
- from: PermissionsBoundary.PermissionsBoundaryArn
224+
# Left for historical purposes.
214225
Description:
226+
set:
227+
- ignore: true
228+
method: Create
215229
# See above in Policy resource about why this is here.
216-
late_initialize: {}
230+
# late_initialize: {}
217231
Path:
218232
late_initialize: {}
219233
# In order to support attaching zero or more policies to a role, we use

pkg/resource/policy/manager.go

+1-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/resource/policy/sdk.go

-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/resource/role/manager.go

+1-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/resource/role/sdk.go

-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: iam.services.k8s.aws/v1alpha1
2+
kind: Policy
3+
metadata:
4+
name: $POLICY_NAME
5+
spec:
6+
name: $POLICY_NAME
7+
policyDocument: '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":"s3:ListAllMyBuckets","Resource":"arn:aws:s3:::*"},{"Effect":"Allow","Action":["s3:List*"],"Resource":["*"]}]}'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: iam.services.k8s.aws/v1alpha1
2+
kind: Role
3+
metadata:
4+
name: $ROLE_NAME
5+
spec:
6+
name: $ROLE_NAME
7+
assumeRolePolicyDocument: >
8+
{
9+
"Version":"2012-10-17",
10+
"Statement": [{
11+
"Effect":"Allow",
12+
"Principal": {
13+
"Service": [
14+
"ec2.amazonaws.com"
15+
]
16+
},
17+
"Action": ["sts:AssumeRole"]
18+
}]
19+
}

test/e2e/tests/test_descriptions.py

+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
# not use this file except in compliance with the License. A copy of the
5+
# License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is distributed
10+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
# express or implied. See the License for the specific language governing
12+
# permissions and limitations under the License.
13+
14+
"""Integration tests for resource description fields
15+
The reason we are dedicating a test file for this is because the description/lateinitialize
16+
bug is the most common bug we have seen in the ACK project. This test file is dedicated
17+
to testing the description field for policy and role resources.
18+
19+
See bugs:
20+
-
21+
"""
22+
23+
import json
24+
import time
25+
26+
import pytest
27+
28+
from acktest.k8s import condition
29+
from acktest.k8s import resource as k8s
30+
from acktest.resources import random_suffix_name
31+
from e2e import service_marker, CRD_GROUP, CRD_VERSION, load_resource
32+
from e2e.common.types import POLICY_RESOURCE_PLURAL
33+
from e2e.replacement_values import REPLACEMENT_VALUES
34+
from e2e import policy
35+
from e2e import role
36+
37+
DELETE_WAIT_SECONDS = 10
38+
CREATE_WAIT_SECONDS = 10
39+
MODIFY_WAIT_SECONDS = 10
40+
41+
42+
@pytest.fixture(scope="module")
43+
def policy_with_no_description():
44+
policy_name = random_suffix_name("my-simple-policy", 24)
45+
46+
replacements = REPLACEMENT_VALUES.copy()
47+
replacements['POLICY_NAME'] = policy_name
48+
49+
resource_data = load_resource(
50+
"policy_no_description",
51+
additional_replacements=replacements,
52+
)
53+
54+
ref = k8s.CustomResourceReference(
55+
CRD_GROUP, CRD_VERSION, POLICY_RESOURCE_PLURAL,
56+
policy_name, namespace="default",
57+
)
58+
k8s.create_custom_resource(ref, resource_data)
59+
cr = k8s.wait_resource_consumed_by_controller(ref)
60+
61+
cr = k8s.get_resource(ref)
62+
assert cr is not None
63+
assert 'status' in cr
64+
assert 'ackResourceMetadata' in cr['status']
65+
assert 'arn' in cr['status']['ackResourceMetadata']
66+
policy_arn = cr['status']['ackResourceMetadata']['arn']
67+
68+
policy.wait_until_exists(policy_arn)
69+
70+
assert cr is not None
71+
assert k8s.get_resource_exists(ref)
72+
73+
yield (ref, cr)
74+
75+
_, deleted = k8s.delete_custom_resource(
76+
ref,
77+
period_length=DELETE_WAIT_SECONDS,
78+
)
79+
assert deleted
80+
81+
policy.wait_until_deleted(policy_arn)
82+
83+
@pytest.fixture(scope="module")
84+
def role_with_no_description():
85+
role_name = random_suffix_name("my-simple-role", 24)
86+
87+
replacements = REPLACEMENT_VALUES.copy()
88+
replacements['ROLE_NAME'] = role_name
89+
90+
resource_data = load_resource(
91+
"role_no_description",
92+
additional_replacements=replacements,
93+
)
94+
95+
ref = k8s.CustomResourceReference(
96+
CRD_GROUP, CRD_VERSION, "roles",
97+
role_name, namespace="default",
98+
)
99+
k8s.create_custom_resource(ref, resource_data)
100+
cr = k8s.wait_resource_consumed_by_controller(ref)
101+
102+
role.wait_until_exists(role_name)
103+
104+
assert cr is not None
105+
assert k8s.get_resource_exists(ref)
106+
107+
yield (ref, cr)
108+
109+
_, deleted = k8s.delete_custom_resource(
110+
ref,
111+
period_length=DELETE_WAIT_SECONDS,
112+
)
113+
assert deleted
114+
115+
role.wait_until_deleted(role_name)
116+
117+
@service_marker
118+
@pytest.mark.canary
119+
class TestRole:
120+
def test_role_empty_description(self, role_with_no_description):
121+
ref, res = role_with_no_description
122+
role_name = ref.name
123+
124+
time.sleep(CREATE_WAIT_SECONDS)
125+
condition.assert_synced(ref)
126+
condition.assert_type_status(
127+
ref,
128+
cond_type_match=condition.CONDITION_TYPE_LATE_INITIALIZED,
129+
cond_status_match=True,
130+
)
131+
132+
updates = {
133+
"spec": {
134+
"description": "non empty description",
135+
},
136+
}
137+
k8s.patch_custom_resource(ref, updates)
138+
time.sleep(MODIFY_WAIT_SECONDS)
139+
condition.assert_synced(ref)
140+
141+
latest = role.get(role_name)
142+
assert latest is not None
143+
assert latest["Description"] == "non empty description"
144+
145+
def test_policy_empty_description(self, policy_with_no_description):
146+
ref, res = policy_with_no_description
147+
148+
time.sleep(CREATE_WAIT_SECONDS)
149+
condition.assert_synced(ref)
150+
condition.assert_type_status(
151+
ref,
152+
cond_type_match=condition.CONDITION_TYPE_LATE_INITIALIZED,
153+
cond_status_match=True,
154+
)
155+

test/e2e/tests/test_policy.py

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ def test_crud(self, simple_policy):
109109
k8s.patch_custom_resource(ref, updates)
110110
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
111111

112+
condition.assert_synced(ref)
113+
112114
latest_tags = policy.get_tags(policy_arn)
113115
after_update_expected_tags = [
114116
{
@@ -129,6 +131,8 @@ def test_crud(self, simple_policy):
129131
k8s.patch_custom_resource(ref, updates)
130132
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
131133

134+
condition.assert_synced(ref)
135+
132136
latest_tags = policy.get_tags(policy_arn)
133137
after_update_expected_tags = [
134138
{

0 commit comments

Comments
 (0)