Skip to content

Commit d126e71

Browse files
committed
Add prometheus metrics
1 parent 879e715 commit d126e71

30 files changed

+1112
-227
lines changed

controllers/elbv2/targetgroupbinding_controller.go

+36-11
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22+
"time"
23+
2224
discv1 "k8s.io/api/discovery/v1"
2325
"sigs.k8s.io/controller-runtime/pkg/handler"
2426
"sigs.k8s.io/controller-runtime/pkg/reconcile"
25-
"time"
2627

2728
"github.com/aws/aws-sdk-go-v2/aws"
2829
"github.com/pkg/errors"
@@ -31,16 +32,19 @@ import (
3132
"k8s.io/client-go/util/workqueue"
3233
"sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2/eventhandlers"
3334
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
35+
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
3436
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
3537
"sigs.k8s.io/aws-load-balancer-controller/pkg/runtime"
3638
"sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding"
3739
"sigs.k8s.io/controller-runtime/pkg/controller"
3840

3941
"github.com/go-logr/logr"
42+
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
4043
ctrl "sigs.k8s.io/controller-runtime"
4144
"sigs.k8s.io/controller-runtime/pkg/client"
4245

4346
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
47+
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util"
4448
)
4549

4650
const (
@@ -51,7 +55,7 @@ const (
5155
// NewTargetGroupBindingReconciler constructs new targetGroupBindingReconciler
5256
func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder record.EventRecorder, finalizerManager k8s.FinalizerManager,
5357
tgbResourceManager targetgroupbinding.ResourceManager, config config.ControllerConfig, deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler,
54-
logger logr.Logger) *targetGroupBindingReconciler {
58+
logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *targetGroupBindingReconciler {
5559

5660
return &targetGroupBindingReconciler{
5761
k8sClient: k8sClient,
@@ -60,6 +64,8 @@ func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder reco
6064
tgbResourceManager: tgbResourceManager,
6165
deferredTargetGroupBindingReconciler: deferredTargetGroupBindingReconciler,
6266
logger: logger,
67+
metricsCollector: metricsCollector,
68+
reconcileCounters: reconcileCounters,
6369

6470
maxConcurrentReconciles: config.TargetGroupBindingMaxConcurrentReconciles,
6571
maxExponentialBackoffDelay: config.TargetGroupBindingMaxExponentialBackoffDelay,
@@ -75,6 +81,8 @@ type targetGroupBindingReconciler struct {
7581
tgbResourceManager targetgroupbinding.ResourceManager
7682
deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler
7783
logger logr.Logger
84+
metricsCollector lbcmetrics.MetricCollector
85+
reconcileCounters *metricsutil.ReconcileCounters
7886

7987
maxConcurrentReconciles int
8088
maxExponentialBackoffDelay time.Duration
@@ -93,13 +101,19 @@ type targetGroupBindingReconciler struct {
93101
// +kubebuilder:rbac:groups="discovery.k8s.io",resources=endpointslices,verbs=get;list;watch
94102

95103
func (r *targetGroupBindingReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
104+
r.reconcileCounters.IncrementTGB(req.NamespacedName)
96105
r.logger.V(1).Info("Reconcile request", "name", req.Name)
97106
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger)
98107
}
99108

100109
func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconcile.Request) error {
101110
tgb := &elbv2api.TargetGroupBinding{}
102-
if err := r.k8sClient.Get(ctx, req.NamespacedName, tgb); err != nil {
111+
var err error
112+
fetchTargetGroupBindingFn := func() {
113+
err = r.k8sClient.Get(ctx, req.NamespacedName, tgb)
114+
}
115+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "fetch_targetGroupBinding", fetchTargetGroupBindingFn)
116+
if err != nil {
103117
return client.IgnoreNotFound(err)
104118
}
105119

@@ -110,25 +124,36 @@ func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconc
110124
}
111125

112126
func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error {
113-
if err := r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer); err != nil {
127+
var err error
128+
finalizerFn := func() {
129+
err = r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer)
130+
}
131+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "add_finalizers", finalizerFn)
132+
if err != nil {
114133
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
115-
return err
134+
return errmetrics.NewErrorWithMetrics(controllerName, "add_finalizers_error", err, r.metricsCollector)
116135
}
117136

118-
deferred, err := r.tgbResourceManager.Reconcile(ctx, tgb)
119-
137+
var deferred bool
138+
tgbResourceFn := func() {
139+
deferred, err = r.tgbResourceManager.Reconcile(ctx, tgb)
140+
}
141+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "reconcile_targetgroupbinding", tgbResourceFn)
120142
if err != nil {
121-
return err
143+
return errmetrics.NewErrorWithMetrics(controllerName, "reconcile_targetgroupbinding_error", err, r.metricsCollector)
122144
}
123145

124146
if deferred {
125147
r.deferredTargetGroupBindingReconciler.Enqueue(tgb)
126148
return nil
127149
}
128150

129-
if err := r.updateTargetGroupBindingStatus(ctx, tgb); err != nil {
130-
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
131-
return err
151+
updateTargetGroupBindingStatusFn := func() {
152+
err = r.updateTargetGroupBindingStatus(ctx, tgb)
153+
}
154+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "update_status", updateTargetGroupBindingStatusFn)
155+
if err != nil {
156+
return errmetrics.NewErrorWithMetrics(controllerName, "update_status_error", err, r.metricsCollector)
132157
}
133158

134159
r.eventRecorder.Event(tgb, corev1.EventTypeNormal, k8s.TargetGroupBindingEventReasonSuccessfullyReconciled, "Successfully reconciled")

controllers/ingress/group_controller.go

+66-20
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package ingress
33
import (
44
"context"
55
"fmt"
6+
67
"sigs.k8s.io/controller-runtime/pkg/reconcile"
78

89
"github.com/go-logr/logr"
@@ -21,8 +22,11 @@ import (
2122
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy"
2223
elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2"
2324
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking"
25+
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
2426
"sigs.k8s.io/aws-load-balancer-controller/pkg/ingress"
2527
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
28+
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
29+
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util"
2630
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/core"
2731
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
2832
networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
@@ -48,7 +52,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
4852
finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager,
4953
networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver,
5054
elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider,
51-
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger) *groupReconciler {
55+
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *groupReconciler {
5256

5357
annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress)
5458
authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser)
@@ -61,10 +65,10 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
6165
authConfigBuilder, enhancedBackendBuilder, trackingProvider, elbv2TaggingManager, controllerConfig.FeatureGates,
6266
cloud.VpcID(), controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags,
6367
controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver,
64-
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger)
68+
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector)
6569
stackMarshaller := deploy.NewDefaultStackMarshaller()
6670
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager,
67-
controllerConfig, ingressTagPrefix, logger)
71+
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName)
6872
classLoader := ingress.NewDefaultClassLoader(k8sClient, true)
6973
classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass)
7074
manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == ""
@@ -83,6 +87,9 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
8387
groupLoader: groupLoader,
8488
groupFinalizerManager: groupFinalizerManager,
8589
logger: logger,
90+
metricsCollector: metricsCollector,
91+
controllerName: controllerName,
92+
reconcileCounters: reconcileCounters,
8693

8794
maxConcurrentReconciles: controllerConfig.IngressConfig.MaxConcurrentReconciles,
8895
}
@@ -102,6 +109,9 @@ type groupReconciler struct {
102109
groupLoader ingress.GroupLoader
103110
groupFinalizerManager ingress.FinalizerManager
104111
logger logr.Logger
112+
metricsCollector lbcmetrics.MetricCollector
113+
controllerName string
114+
reconcileCounters *metricsutil.ReconcileCounters
105115

106116
maxConcurrentReconciles int
107117
}
@@ -116,40 +126,64 @@ type groupReconciler struct {
116126
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
117127

118128
func (r *groupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
129+
r.reconcileCounters.IncrementIngress(req.NamespacedName)
119130
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger)
120131
}
121132

122133
func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) error {
123134
ingGroupID := ingress.DecodeGroupIDFromReconcileRequest(req)
124-
ingGroup, err := r.groupLoader.Load(ctx, ingGroupID)
135+
var err error
136+
var ingGroup ingress.Group
137+
loadIngressFn := func() {
138+
ingGroup, err = r.groupLoader.Load(ctx, ingGroupID)
139+
}
140+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "fetch_ingress", loadIngressFn)
125141
if err != nil {
126-
return err
142+
return errmetrics.NewErrorWithMetrics(controllerName, "fetch_ingress_error", err, r.metricsCollector)
127143
}
128144

129-
if err := r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members); err != nil {
145+
addFinalizerFn := func() {
146+
err = r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members)
147+
}
148+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "add_group_finalizer", addFinalizerFn)
149+
if err != nil {
130150
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
131-
return err
151+
return errmetrics.NewErrorWithMetrics(controllerName, "add_group_finalizer_error", err, r.metricsCollector)
132152
}
153+
133154
_, lb, err := r.buildAndDeployModel(ctx, ingGroup)
134155
if err != nil {
135156
return err
136157
}
137158

138159
if len(ingGroup.Members) > 0 && lb != nil {
139-
lbDNS, err := lb.DNSName().Resolve(ctx)
140-
if err != nil {
141-
return err
160+
var statusErr error
161+
dnsResolveAndUpdateStatus := func() {
162+
var lbDNS string
163+
lbDNS, statusErr = lb.DNSName().Resolve(ctx)
164+
if statusErr != nil {
165+
return
166+
}
167+
statusErr = r.updateIngressGroupStatus(ctx, ingGroup, lbDNS)
168+
if statusErr != nil {
169+
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus,
170+
fmt.Sprintf("Failed update status due to %v", statusErr))
171+
}
142172
}
143-
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil {
144-
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
145-
return err
173+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "dns_resolve_and_update_status", dnsResolveAndUpdateStatus)
174+
if statusErr != nil {
175+
return errmetrics.NewErrorWithMetrics(controllerName, "dns_resolve_and_update_status_error", statusErr, r.metricsCollector)
146176
}
147177
}
148178

149179
if len(ingGroup.InactiveMembers) > 0 {
150-
if err := r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers); err != nil {
180+
removeGroupFinalizerFn := func() {
181+
err = r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers)
182+
}
183+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "remove_group_finalizer", removeGroupFinalizerFn)
184+
if err != nil {
151185
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedRemoveFinalizer, fmt.Sprintf("Failed remove finalizer due to %v", err))
152-
return err
186+
return errmetrics.NewErrorWithMetrics(controllerName, "remove_group_finalizer_error", err, r.metricsCollector)
153187
}
154188
}
155189

@@ -158,10 +192,18 @@ func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request)
158192
}
159193

160194
func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingress.Group) (core.Stack, *elbv2model.LoadBalancer, error) {
161-
stack, lb, secrets, backendSGRequired, err := r.modelBuilder.Build(ctx, ingGroup)
195+
var stack core.Stack
196+
var lb *elbv2model.LoadBalancer
197+
var secrets []types.NamespacedName
198+
var backendSGRequired bool
199+
var err error
200+
buildModelFn := func() {
201+
stack, lb, secrets, backendSGRequired, err = r.modelBuilder.Build(ctx, ingGroup, r.metricsCollector)
202+
}
203+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "build_model", buildModelFn)
162204
if err != nil {
163205
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err))
164-
return nil, nil, err
206+
return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "build_model_error", err, r.metricsCollector)
165207
}
166208
stackJSON, err := r.stackMarshaller.Marshal(stack)
167209
if err != nil {
@@ -170,13 +212,17 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
170212
}
171213
r.logger.Info("successfully built model", "model", stackJSON)
172214

173-
if err := r.stackDeployer.Deploy(ctx, stack); err != nil {
215+
deployModelFn := func() {
216+
err = r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "ingress")
217+
}
218+
r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "deploy_model", deployModelFn)
219+
if err != nil {
174220
var requeueNeededAfter *runtime.RequeueNeededAfter
175221
if errors.As(err, &requeueNeededAfter) {
176222
return nil, nil, err
177223
}
178224
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedDeployModel, fmt.Sprintf("Failed deploy model due to %v", err))
179-
return nil, nil, err
225+
return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "deploy_model_error", err, r.metricsCollector)
180226
}
181227
r.logger.Info("successfully deployed model", "ingressGroup", ingGroup.ID)
182228
r.secretsManager.MonitorSecrets(ingGroup.ID.String(), secrets)
@@ -186,7 +232,7 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
186232
inactiveResources = append(inactiveResources, k8s.ToSliceOfNamespacedNames(ingGroup.Members)...)
187233
}
188234
if err := r.backendSGProvider.Release(ctx, networkingpkg.ResourceTypeIngress, inactiveResources); err != nil {
189-
return nil, nil, err
235+
return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "release_auto_generated_backend_sg_error", err, r.metricsCollector)
190236
}
191237
return stack, lb, nil
192238
}

0 commit comments

Comments
 (0)