Skip to content

Commit 861f7ed

Browse files
authored
Properly handle CARM errors and requeues (#140)
Prior to this patch we introduced new mechanisms to handle Role ARN retrievals from the CARM cache. Which improved ACK runtime scaling capabilities and addressed possible race condition scenarios. However in the process we missed two things: - Setting an `ACK.ResourceSynced` condition stating that the resource isn't synced, yet. - Returning the **correct** runtime error that will cause the reconciller to requeue every 15seconds. Both of the problems stemmed from the fact that we're not "yet" in the reconcile function (`rm.Sync`) that is wrapped by a proper error handler (that triggers requeues, and resets/sets resource conditions). In this special case, we need to manually inject the condition and return a controller-runtime error that will trigger a requeue after 15seconds. While this is a "fair" fix, we're planning on refactoring a lot of the runtime logic to make easier to read, maintain and more importantly expose reusable component that will help avoid falling into such traps. Signed-off-by: Amine Hilaly <[email protected]> By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 24070d9 commit 861f7ed

File tree

3 files changed

+22
-11
lines changed

3 files changed

+22
-11
lines changed

pkg/condition/condition.go

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ var (
3333
NotSyncedMessage = "Resource not synced"
3434
SyncedMessage = "Resource synced successfully"
3535
FailedReferenceResolutionMessage = "Reference resolution failed"
36+
UnavailableIAMRoleMessage = "IAM Role is not available"
3637
)
3738

3839
// Synced returns the Condition in the resource's Conditions collection that is

pkg/runtime/adoption_reconciler.go

+1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ func (r *adoptionReconciler) reconcile(ctx context.Context, req ctrlrt.Request)
124124
// is not available.
125125
roleARN, err = r.getRoleARN(acctID)
126126
if err != nil {
127+
ackrtlog.InfoAdoptedResource(r.log, res, fmt.Sprintf("Unable to start adoption reconcilliation %s: %v", acctID, err))
127128
// r.getRoleARN errors are not terminal, we should requeue.
128129
return requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay)
129130
}

pkg/runtime/reconciler.go

+20-11
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,18 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
158158
return ctrlrt.Result{}, err
159159
}
160160

161+
rlog := ackrtlog.NewResourceLogger(
162+
r.log, desired,
163+
// All the fields for a resource that do not change during reconciliation
164+
// can be initialized during resourceLogger creation
165+
"kind", r.rd.GroupVersionKind().Kind,
166+
"namespace", req.Namespace,
167+
"name", req.Name,
168+
)
169+
// We're storing a logger pointer in the context, so that any changes to the logger
170+
// will be reflected in the context.
171+
ctx = context.WithValue(ctx, ackrtlog.ContextKey, rlog)
172+
161173
// If a user has specified a namespace that is annotated with the
162174
// an owner account ID, we need an appropriate role ARN to assume
163175
// in order to perform the reconciliation. The roles ARN are typically
@@ -174,12 +186,16 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
174186
// is not available.
175187
roleARN, err = r.getRoleARN(acctID)
176188
if err != nil {
177-
// r.getRoleARN errors are not terminal, we should requeue.
178-
return ctrlrt.Result{}, requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay)
189+
// TODO(a-hilaly): Refactor all the reconcile function to make it
190+
// easier to understand and maintain.
191+
reason := err.Error()
192+
latest := desired.DeepCopy()
193+
// set ResourceSynced condition to false with proper error message
194+
condition.SetSynced(latest, corev1.ConditionFalse, &condition.UnavailableIAMRoleMessage, &reason)
195+
return r.HandleReconcileError(ctx, desired, latest, requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay))
179196
}
180197
}
181198
region := r.getRegion(desired)
182-
183199
endpointURL := r.getEndpointURL(desired)
184200
gvk := r.rd.GroupVersionKind()
185201
// New session will only pivot to the roleARN if it is not empty.
@@ -188,18 +204,11 @@ func (r *resourceReconciler) Reconcile(ctx context.Context, req ctrlrt.Request)
188204
return ctrlrt.Result{}, err
189205
}
190206

191-
rlog := ackrtlog.NewResourceLogger(
192-
r.log, desired,
207+
rlog.WithValues(
193208
"account", acctID,
194209
"role", roleARN,
195210
"region", region,
196-
// All the fields for a resource that do not change during reconciliation
197-
// can be initialized during resourceLogger creation
198-
"kind", r.rd.GroupVersionKind().Kind,
199-
"namespace", req.Namespace,
200-
"name", req.Name,
201211
)
202-
ctx = context.WithValue(ctx, ackrtlog.ContextKey, rlog)
203212

204213
rm, err := r.rmf.ManagerFor(
205214
r.cfg, r.log, r.metrics, r, sess, acctID, region,

0 commit comments

Comments
 (0)