diff --git a/controllers/elbv2/targetgroupbinding_controller.go b/controllers/elbv2/targetgroupbinding_controller.go index 5533133d1..d2ed00064 100644 --- a/controllers/elbv2/targetgroupbinding_controller.go +++ b/controllers/elbv2/targetgroupbinding_controller.go @@ -19,10 +19,11 @@ package controllers import ( "context" "fmt" + "time" + discv1 "k8s.io/api/discovery/v1" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/pkg/errors" @@ -31,16 +32,19 @@ import ( "k8s.io/client-go/util/workqueue" "sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2/eventhandlers" "sigs.k8s.io/aws-load-balancer-controller/pkg/config" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" "sigs.k8s.io/aws-load-balancer-controller/pkg/runtime" "sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding" "sigs.k8s.io/controller-runtime/pkg/controller" "github.com/go-logr/logr" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" ) const ( @@ -51,7 +55,7 @@ const ( // NewTargetGroupBindingReconciler constructs new targetGroupBindingReconciler func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder record.EventRecorder, finalizerManager k8s.FinalizerManager, tgbResourceManager targetgroupbinding.ResourceManager, config config.ControllerConfig, deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler, - logger logr.Logger) *targetGroupBindingReconciler { + logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *targetGroupBindingReconciler { return &targetGroupBindingReconciler{ k8sClient: k8sClient, @@ -60,6 +64,8 @@ func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder reco tgbResourceManager: tgbResourceManager, deferredTargetGroupBindingReconciler: deferredTargetGroupBindingReconciler, logger: logger, + metricsCollector: metricsCollector, + reconcileCounters: reconcileCounters, maxConcurrentReconciles: config.TargetGroupBindingMaxConcurrentReconciles, maxExponentialBackoffDelay: config.TargetGroupBindingMaxExponentialBackoffDelay, @@ -75,6 +81,8 @@ type targetGroupBindingReconciler struct { tgbResourceManager targetgroupbinding.ResourceManager deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler logger logr.Logger + metricsCollector lbcmetrics.MetricCollector + reconcileCounters *metricsutil.ReconcileCounters maxConcurrentReconciles int maxExponentialBackoffDelay time.Duration @@ -93,13 +101,19 @@ type targetGroupBindingReconciler struct { // +kubebuilder:rbac:groups="discovery.k8s.io",resources=endpointslices,verbs=get;list;watch func (r *targetGroupBindingReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { + r.reconcileCounters.IncrementTGB(req.NamespacedName) r.logger.V(1).Info("Reconcile request", "name", req.Name) return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger) } func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconcile.Request) error { tgb := &elbv2api.TargetGroupBinding{} - if err := r.k8sClient.Get(ctx, req.NamespacedName, tgb); err != nil { + var err error + fetchTargetGroupBindingFn := func() { + err = r.k8sClient.Get(ctx, req.NamespacedName, tgb) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "fetch_targetGroupBinding", fetchTargetGroupBindingFn) + if err != nil { return client.IgnoreNotFound(err) } @@ -110,15 +124,23 @@ func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconc } func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error { - if err := r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer); err != nil { + var err error + finalizerFn := func() { + err = r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "add_finalizers", finalizerFn) + if err != nil { r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err)) - return err + return errmetrics.NewErrorWithMetrics(controllerName, "add_finalizers_error", err, r.metricsCollector) } - deferred, err := r.tgbResourceManager.Reconcile(ctx, tgb) - + var deferred bool + tgbResourceFn := func() { + deferred, err = r.tgbResourceManager.Reconcile(ctx, tgb) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "reconcile_targetgroupbinding", tgbResourceFn) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "reconcile_targetgroupbinding_error", err, r.metricsCollector) } if deferred { @@ -126,9 +148,12 @@ func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.C return nil } - if err := r.updateTargetGroupBindingStatus(ctx, tgb); err != nil { - r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) - return err + updateTargetGroupBindingStatusFn := func() { + err = r.updateTargetGroupBindingStatus(ctx, tgb) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "update_status", updateTargetGroupBindingStatusFn) + if err != nil { + return errmetrics.NewErrorWithMetrics(controllerName, "update_status_error", err, r.metricsCollector) } r.eventRecorder.Event(tgb, corev1.EventTypeNormal, k8s.TargetGroupBindingEventReasonSuccessfullyReconciled, "Successfully reconciled") diff --git a/controllers/ingress/group_controller.go b/controllers/ingress/group_controller.go index 932541a8a..13cd9234d 100644 --- a/controllers/ingress/group_controller.go +++ b/controllers/ingress/group_controller.go @@ -3,6 +3,7 @@ package ingress import ( "context" "fmt" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/go-logr/logr" @@ -21,8 +22,11 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy" elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/ingress" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" "sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" @@ -48,7 +52,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager, networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver, elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider, - sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger) *groupReconciler { + sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *groupReconciler { annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress) authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser) @@ -61,10 +65,10 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco authConfigBuilder, enhancedBackendBuilder, trackingProvider, elbv2TaggingManager, controllerConfig.FeatureGates, cloud.VpcID(), controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags, controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver, - controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger) + controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector) stackMarshaller := deploy.NewDefaultStackMarshaller() stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, - controllerConfig, ingressTagPrefix, logger) + controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName) classLoader := ingress.NewDefaultClassLoader(k8sClient, true) classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass) manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == "" @@ -83,6 +87,9 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco groupLoader: groupLoader, groupFinalizerManager: groupFinalizerManager, logger: logger, + metricsCollector: metricsCollector, + controllerName: controllerName, + reconcileCounters: reconcileCounters, maxConcurrentReconciles: controllerConfig.IngressConfig.MaxConcurrentReconciles, } @@ -102,6 +109,9 @@ type groupReconciler struct { groupLoader ingress.GroupLoader groupFinalizerManager ingress.FinalizerManager logger logr.Logger + metricsCollector lbcmetrics.MetricCollector + controllerName string + reconcileCounters *metricsutil.ReconcileCounters maxConcurrentReconciles int } @@ -116,40 +126,64 @@ type groupReconciler struct { // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch func (r *groupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { + r.reconcileCounters.IncrementIngress(req.NamespacedName) return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger) } func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) error { ingGroupID := ingress.DecodeGroupIDFromReconcileRequest(req) - ingGroup, err := r.groupLoader.Load(ctx, ingGroupID) + var err error + var ingGroup ingress.Group + loadIngressFn := func() { + ingGroup, err = r.groupLoader.Load(ctx, ingGroupID) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "fetch_ingress", loadIngressFn) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "fetch_ingress_error", err, r.metricsCollector) } - if err := r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members); err != nil { + addFinalizerFn := func() { + err = r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "add_group_finalizer", addFinalizerFn) + if err != nil { r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err)) - return err + return errmetrics.NewErrorWithMetrics(controllerName, "add_group_finalizer_error", err, r.metricsCollector) } + _, lb, err := r.buildAndDeployModel(ctx, ingGroup) if err != nil { return err } if len(ingGroup.Members) > 0 && lb != nil { - lbDNS, err := lb.DNSName().Resolve(ctx) - if err != nil { - return err + var statusErr error + dnsResolveAndUpdateStatus := func() { + var lbDNS string + lbDNS, statusErr = lb.DNSName().Resolve(ctx) + if statusErr != nil { + return + } + statusErr = r.updateIngressGroupStatus(ctx, ingGroup, lbDNS) + if statusErr != nil { + r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, + fmt.Sprintf("Failed update status due to %v", statusErr)) + } } - if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil { - r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) - return err + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "dns_resolve_and_update_status", dnsResolveAndUpdateStatus) + if statusErr != nil { + return errmetrics.NewErrorWithMetrics(controllerName, "dns_resolve_and_update_status_error", statusErr, r.metricsCollector) } } if len(ingGroup.InactiveMembers) > 0 { - if err := r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers); err != nil { + removeGroupFinalizerFn := func() { + err = r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "remove_group_finalizer", removeGroupFinalizerFn) + if err != nil { r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedRemoveFinalizer, fmt.Sprintf("Failed remove finalizer due to %v", err)) - return err + return errmetrics.NewErrorWithMetrics(controllerName, "remove_group_finalizer_error", err, r.metricsCollector) } } @@ -158,10 +192,18 @@ func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) } func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingress.Group) (core.Stack, *elbv2model.LoadBalancer, error) { - stack, lb, secrets, backendSGRequired, err := r.modelBuilder.Build(ctx, ingGroup) + var stack core.Stack + var lb *elbv2model.LoadBalancer + var secrets []types.NamespacedName + var backendSGRequired bool + var err error + buildModelFn := func() { + stack, lb, secrets, backendSGRequired, err = r.modelBuilder.Build(ctx, ingGroup, r.metricsCollector) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "build_model", buildModelFn) if err != nil { r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err)) - return nil, nil, err + return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "build_model_error", err, r.metricsCollector) } stackJSON, err := r.stackMarshaller.Marshal(stack) if err != nil { @@ -170,13 +212,17 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr } r.logger.Info("successfully built model", "model", stackJSON) - if err := r.stackDeployer.Deploy(ctx, stack); err != nil { + deployModelFn := func() { + err = r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "ingress") + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "deploy_model", deployModelFn) + if err != nil { var requeueNeededAfter *runtime.RequeueNeededAfter if errors.As(err, &requeueNeededAfter) { return nil, nil, err } r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedDeployModel, fmt.Sprintf("Failed deploy model due to %v", err)) - return nil, nil, err + return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "deploy_model_error", err, r.metricsCollector) } r.logger.Info("successfully deployed model", "ingressGroup", ingGroup.ID) r.secretsManager.MonitorSecrets(ingGroup.ID.String(), secrets) @@ -186,7 +232,7 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr inactiveResources = append(inactiveResources, k8s.ToSliceOfNamespacedNames(ingGroup.Members)...) } if err := r.backendSGProvider.Release(ctx, networkingpkg.ResourceTypeIngress, inactiveResources); err != nil { - return nil, nil, err + return nil, nil, errmetrics.NewErrorWithMetrics(controllerName, "release_auto_generated_backend_sg_error", err, r.metricsCollector) } return stack, lb, nil } diff --git a/controllers/service/service_controller.go b/controllers/service/service_controller.go index dc80bad94..dcdd5f860 100644 --- a/controllers/service/service_controller.go +++ b/controllers/service/service_controller.go @@ -3,6 +3,7 @@ package service import ( "context" "fmt" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/go-logr/logr" @@ -17,7 +18,10 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy" elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" "sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" @@ -39,7 +43,7 @@ func NewServiceReconciler(cloud services.Cloud, k8sClient client.Client, eventRe finalizerManager k8s.FinalizerManager, networkingSGManager networking.SecurityGroupManager, networkingSGReconciler networking.SecurityGroupReconciler, subnetsResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, - backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger) *serviceReconciler { + backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *serviceReconciler { annotationParser := annotations.NewSuffixAnnotationParser(serviceAnnotationPrefix) trackingProvider := tracking.NewDefaultProvider(serviceTagPrefix, controllerConfig.ClusterName) @@ -47,9 +51,9 @@ func NewServiceReconciler(cloud services.Cloud, k8sClient client.Client, eventRe modelBuilder := service.NewDefaultModelBuilder(annotationParser, subnetsResolver, vpcInfoProvider, cloud.VpcID(), trackingProvider, elbv2TaggingManager, cloud.EC2(), controllerConfig.FeatureGates, controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags, controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), serviceUtils, - backendSGProvider, sgResolver, controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, logger) + backendSGProvider, sgResolver, controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, logger, metricsCollector) stackMarshaller := deploy.NewDefaultStackMarshaller() - stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, serviceTagPrefix, logger) + stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, serviceTagPrefix, logger, metricsCollector, controllerName) return &serviceReconciler{ k8sClient: k8sClient, eventRecorder: eventRecorder, @@ -65,6 +69,8 @@ func NewServiceReconciler(cloud services.Cloud, k8sClient client.Client, eventRe logger: logger, maxConcurrentReconciles: controllerConfig.ServiceMaxConcurrentReconciles, + metricsCollector: metricsCollector, + reconcileCounters: reconcileCounters, } } @@ -77,10 +83,12 @@ type serviceReconciler struct { serviceUtils service.ServiceUtils backendSGProvider networking.BackendSGProvider - modelBuilder service.ModelBuilder - stackMarshaller deploy.StackMarshaller - stackDeployer deploy.StackDeployer - logger logr.Logger + modelBuilder service.ModelBuilder + stackMarshaller deploy.StackMarshaller + stackDeployer deploy.StackDeployer + logger logr.Logger + metricsCollector lbcmetrics.MetricCollector + reconcileCounters *metricsutil.ReconcileCounters maxConcurrentReconciles int } @@ -90,26 +98,46 @@ type serviceReconciler struct { // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch func (r *serviceReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { + r.reconcileCounters.IncrementService(req.NamespacedName) return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger) } func (r *serviceReconciler) reconcile(ctx context.Context, req reconcile.Request) error { svc := &corev1.Service{} - if err := r.k8sClient.Get(ctx, req.NamespacedName, svc); err != nil { + var err error + fetchServiceFn := func() { + err = r.k8sClient.Get(ctx, req.NamespacedName, svc) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "fetch_service", fetchServiceFn) + if err != nil { return client.IgnoreNotFound(err) } - stack, lb, backendSGRequired, err := r.buildModel(ctx, svc) + + var stack core.Stack + var lb *elbv2model.LoadBalancer + var backendSGRequired bool + buildModelFn := func() { + stack, lb, backendSGRequired, err = r.buildModel(ctx, svc) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "build_model", buildModelFn) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_model_error", err, r.metricsCollector) } + if lb == nil { - return r.cleanupLoadBalancerResources(ctx, svc, stack) + cleanupLoadBalancerFn := func() { + err = r.cleanupLoadBalancerResources(ctx, svc, stack) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "cleanup_load_balancer", cleanupLoadBalancerFn) + if err != nil { + return errmetrics.NewErrorWithMetrics(controllerName, "cleanup_load_balancer_error", err, r.metricsCollector) + } } return r.reconcileLoadBalancerResources(ctx, svc, stack, lb, backendSGRequired) } func (r *serviceReconciler) buildModel(ctx context.Context, svc *corev1.Service) (core.Stack, *elbv2model.LoadBalancer, bool, error) { - stack, lb, backendSGRequired, err := r.modelBuilder.Build(ctx, svc) + stack, lb, backendSGRequired, err := r.modelBuilder.Build(ctx, svc, r.metricsCollector) if err != nil { r.eventRecorder.Event(svc, corev1.EventTypeWarning, k8s.ServiceEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err)) return nil, nil, false, err @@ -124,7 +152,7 @@ func (r *serviceReconciler) buildModel(ctx context.Context, svc *corev1.Service) } func (r *serviceReconciler) deployModel(ctx context.Context, svc *corev1.Service, stack core.Stack) error { - if err := r.stackDeployer.Deploy(ctx, stack); err != nil { + if err := r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "service"); err != nil { var requeueNeededAfter *runtime.RequeueNeededAfter if errors.As(err, &requeueNeededAfter) { return err @@ -139,28 +167,47 @@ func (r *serviceReconciler) deployModel(ctx context.Context, svc *corev1.Service func (r *serviceReconciler) reconcileLoadBalancerResources(ctx context.Context, svc *corev1.Service, stack core.Stack, lb *elbv2model.LoadBalancer, backendSGRequired bool) error { - if err := r.finalizerManager.AddFinalizers(ctx, svc, serviceFinalizer); err != nil { + + var err error + addFinalizersFn := func() { + err = r.finalizerManager.AddFinalizers(ctx, svc, serviceFinalizer) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "add_finalizers", addFinalizersFn) + if err != nil { r.eventRecorder.Event(svc, corev1.EventTypeWarning, k8s.ServiceEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err)) - return err + return errmetrics.NewErrorWithMetrics(controllerName, "add_finalizers_error", err, r.metricsCollector) + } + + deployModelFn := func() { + err = r.deployModel(ctx, svc, stack) } - err := r.deployModel(ctx, svc, stack) + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "deploy_model", deployModelFn) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "deploy_model_error", err, r.metricsCollector) + } + + var lbDNS string + dnsResolveFn := func() { + lbDNS, err = lb.DNSName().Resolve(ctx) } - lbDNS, err := lb.DNSName().Resolve(ctx) + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "DNS_resolve", dnsResolveFn) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "dns_resolve_error", err, r.metricsCollector) } if !backendSGRequired { if err := r.backendSGProvider.Release(ctx, networking.ResourceTypeService, []types.NamespacedName{k8s.NamespacedName(svc)}); err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "release_auto_generated_backend_sg_error", err, r.metricsCollector) } } - if err = r.updateServiceStatus(ctx, lbDNS, svc); err != nil { + updateStatusFn := func() { + err = r.updateServiceStatus(ctx, lbDNS, svc) + } + r.metricsCollector.ObserveControllerReconcileLatency(controllerName, "update_status", updateStatusFn) + if err != nil { r.eventRecorder.Event(svc, corev1.EventTypeWarning, k8s.ServiceEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) - return err + return errmetrics.NewErrorWithMetrics(controllerName, "update_status_error", err, r.metricsCollector) } r.eventRecorder.Event(svc, corev1.EventTypeNormal, k8s.ServiceEventReasonSuccessfullyReconciled, "Successfully reconciled") return nil diff --git a/docs/guide/metrics/prometheus/index.md b/docs/guide/metrics/prometheus/index.md new file mode 100644 index 000000000..0a3e25542 --- /dev/null +++ b/docs/guide/metrics/prometheus/index.md @@ -0,0 +1,81 @@ +# Monitoring Controller Metrics with Prometheus +This document describes how to set up Prometheus for monitoring your AWS Load Balancer Controller, what are available metrics and how to access and query them for insights. + +## Setting Up Prometheus +### Set up Prometheus with kube-prometheus-stack +To monitor the controller, Prometheus needs to be deployed and configured to scrape metrics from the controller’s HTTP endpoint, This can be done by manually deploy [Promethues Operator](https://github.com/prometheus-operator/prometheus-operator) and the controller expose a metric service and define ServiceMonitor to allow Prometheus to scrape its metric. The easiest way to do this is to install the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart by following the readme file, which provide an automated solution that performs all the set up for you. + +### Verify Metric Collection +Based on your metric service config, running the similar command to access it locally + +```bash +kubectl port-forward -n kube-system svc/aws-load-balancer-controller-metrics 8080:8080 +``` + +Navigate to [http://localhost:8080/metrics](http://localhost:8080/metrics), you should be able to see all metric in one place. + +## Metrics Available +### Controller-Runtime Build in Metric +This project use controller-runtime to implement controllers and admission webhooks, which automatically exposes key metrics for controllers and webhooks following kubernetes instrumentation guidelines. They are available via HTTP endpoint in prometheus metric format + +Following metrics are instrumented by default: + +* Latency of processing admission requests. +* Current number of admission requests being served +* Histogram of the latency of processing admission requests +* Total number of admission requests by HTTP status code +* Total number of reconciliations per controller +* Total number of reconciliation errors per controller +* Total number of terminal reconciliation errors per controller +* Total number of reconciliation panics per controller +* Length of time per reconciliation per controller +* Maximum number of concurrent reconciles per controller +* Number of currently used workers per controller + +### Custom Metrics from AWS Load Balancer Controller +In addition to the default metrics provided by controller-runtime, this project emits several custom metrics to provide fine-grained view of its behavior and performances. + +Following metrics are added: + +| Name | Type | Description | +|------|------|-------------| +| aws_api_calls_total | Counter | Total number of SDK API calls from the customer's code to AWS services | +| aws_api_call_duration_seconds | Histogram | Perceived latency from when your code makes an SDK call, includes retries | +| aws_api_call_call_retries | Counter | Number of times the SDK retried requests to AWS services for SDK API calls | +| aws_api_requests_total | Counter | Total number of HTTP requests that the SDK made | +| aws_request_duration_seconds | Histogram | Latency of an individual HTTP request to the service endpoint | +| api_call_permission_errors_total | Counter | Number of failed AWS API calls due to auth or authorization failures | +| api_call_service_limit_exceeded_errors_total | Counter | Number of failed AWS API calls due to exceeding service limit | +| api_call_throttled_errors_total | Counter| Number of failed AWS API calls due to throttling error | +| api_call_validation_errors_total | Counter | Number of failed AWS API calls due to validation error | +| awslbc_readiness_gate_ready_seconds | Histogram | Time to flip a readiness gate to true | +| awslbc_reconcile_stage_duration | Histogram | Latency of different reconcile stages | +| awslbc_reconcile_errors_total | Counter | Number of controller errors by error type | +| awslbc_webhook_validation_failures_total | Counter | Number of validation errors by webhook type | +| awslbc_webhook_mutation_failures_total | Counter | Number of mutation errors by webhook type | +| awslbc_top_talkers | Gauge | Number of reconciliations by resource | + + +## Accessing and Querying the Metrics in Prometheus UI +To explore and query the collected metrics, access the Prometheus web UI. Running the following command to access it locally + +```bash +kubectl port-forward -n prometheus svc/prometheus-operated 9090:9090 +``` + +Navigate to [http://localhost:9090](http://localhost:9090) and check the Status - Target Health page. Ensure that your controller’s endpoint is listed and marked as UP. + +Once inside the Prometheus UI, you can use PromQL queries. Here are some examples: + +* Get the total reconcile count : `sum(awslbc_controller_reconcile_errors_total)` +* Get the average reconcile duration for stage : `avg(awslbc_controller_reconcile_stage_duration_sum{controller="service", reconcile_stage="DNS_resolve"})` +* Get the cached object: `sum(awslbc_cache_object_total)` + + + +## Visualizing Metrics +If you want to further visualize there metrics, one of option is to use Grafana, For set up you can + +* Use the Grafana instance included in the kube-promethues-stack. +* Deploy Grafana separately and connect it to Prometheus as a data source. +* Import or create custom dashboards to display relevant metrics. \ No newline at end of file diff --git a/main.go b/main.go index c4641fb10..b6da39951 100644 --- a/main.go +++ b/main.go @@ -17,9 +17,10 @@ limitations under the License. package main import ( - "k8s.io/client-go/util/workqueue" "os" + "k8s.io/client-go/util/workqueue" + elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "github.com/go-logr/logr" @@ -41,6 +42,7 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws" lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" "sigs.k8s.io/aws-load-balancer-controller/pkg/runtime" "sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding" @@ -84,7 +86,6 @@ func main() { klog.SetLoggerWithOptions(appLogger, klog.ContextualLogger(true)) var awsMetricsCollector *awsmetrics.Collector - lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry) if metrics.Registry != nil { awsMetricsCollector = awsmetrics.NewCollector(metrics.Registry) @@ -107,6 +108,9 @@ func main() { os.Exit(1) } + reconcileCounters := metricsutil.NewReconcileCounters() + lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry, mgr, reconcileCounters, ctrl.Log.WithName("controller_metrics")) + clientSet, err := kubernetes.NewForConfig(mgr.GetConfig()) if err != nil { setupLog.Error(err, "unable to obtain clientSet") @@ -131,10 +135,10 @@ func main() { elbv2TaggingManager := elbv2deploy.NewDefaultTaggingManager(cloud.ELBV2(), cloud.VpcID(), controllerCFG.FeatureGates, cloud.RGT(), ctrl.Log) ingGroupReconciler := ingress.NewGroupReconciler(cloud, mgr.GetClient(), mgr.GetEventRecorderFor("ingress"), finalizerManager, sgManager, sgReconciler, subnetResolver, elbv2TaggingManager, - controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("ingress")) + controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("ingress"), lbcMetricsCollector, reconcileCounters) svcReconciler := service.NewServiceReconciler(cloud, mgr.GetClient(), mgr.GetEventRecorderFor("service"), finalizerManager, sgManager, sgReconciler, subnetResolver, vpcInfoProvider, elbv2TaggingManager, - controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("service")) + controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("service"), lbcMetricsCollector, reconcileCounters) delayingQueue := workqueue.NewDelayingQueueWithConfig(workqueue.DelayingQueueConfig{ Name: "delayed-target-group-binding", @@ -142,7 +146,7 @@ func main() { deferredTGBQueue := elbv2controller.NewDeferredTargetGroupBindingReconciler(delayingQueue, controllerCFG.RuntimeConfig.SyncPeriod, mgr.GetClient(), ctrl.Log.WithName("deferredTGBQueue")) tgbReconciler := elbv2controller.NewTargetGroupBindingReconciler(mgr.GetClient(), mgr.GetEventRecorderFor("targetGroupBinding"), - finalizerManager, tgbResManager, controllerCFG, deferredTGBQueue, ctrl.Log.WithName("controllers").WithName("targetGroupBinding")) + finalizerManager, tgbResManager, controllerCFG, deferredTGBQueue, ctrl.Log.WithName("controllers").WithName("targetGroupBinding"), lbcMetricsCollector, reconcileCounters) ctx := ctrl.SetupSignalHandler() if err = ingGroupReconciler.SetupWithManager(ctx, mgr, clientSet); err != nil { @@ -181,12 +185,12 @@ func main() { podReadinessGateInjector := inject.NewPodReadinessGate(controllerCFG.PodWebhookConfig, mgr.GetClient(), ctrl.Log.WithName("pod-readiness-gate-injector")) - corewebhook.NewPodMutator(podReadinessGateInjector).SetupWithManager(mgr) - corewebhook.NewServiceMutator(controllerCFG.ServiceConfig.LoadBalancerClass, ctrl.Log).SetupWithManager(mgr) - elbv2webhook.NewIngressClassParamsValidator().SetupWithManager(mgr) - elbv2webhook.NewTargetGroupBindingMutator(cloud.ELBV2(), ctrl.Log).SetupWithManager(mgr) - elbv2webhook.NewTargetGroupBindingValidator(mgr.GetClient(), cloud.ELBV2(), cloud.VpcID(), ctrl.Log).SetupWithManager(mgr) - networkingwebhook.NewIngressValidator(mgr.GetClient(), controllerCFG.IngressConfig, ctrl.Log).SetupWithManager(mgr) + corewebhook.NewPodMutator(podReadinessGateInjector, lbcMetricsCollector).SetupWithManager(mgr) + corewebhook.NewServiceMutator(controllerCFG.ServiceConfig.LoadBalancerClass, ctrl.Log, lbcMetricsCollector).SetupWithManager(mgr) + elbv2webhook.NewIngressClassParamsValidator(lbcMetricsCollector).SetupWithManager(mgr) + elbv2webhook.NewTargetGroupBindingMutator(cloud.ELBV2(), ctrl.Log, lbcMetricsCollector).SetupWithManager(mgr) + elbv2webhook.NewTargetGroupBindingValidator(mgr.GetClient(), cloud.ELBV2(), cloud.VpcID(), ctrl.Log, lbcMetricsCollector).SetupWithManager(mgr) + networkingwebhook.NewIngressValidator(mgr.GetClient(), controllerCFG.IngressConfig, ctrl.Log, lbcMetricsCollector).SetupWithManager(mgr) //+kubebuilder:scaffold:builder go func() { @@ -206,6 +210,17 @@ func main() { setupLog.Error(err, "problem wait for podInfo repo sync") os.Exit(1) } + + go func() { + setupLog.Info("starting collect cache size") + lbcMetricsCollector.StartCollectCacheSize(ctx) + }() + + go func() { + setupLog.Info("starting collect top talkers") + lbcMetricsCollector.StartCollectTopTalkers(ctx) + }() + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) diff --git a/mkdocs.yml b/mkdocs.yml index 1a34639f3..5a23ffd12 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,8 @@ nav: - Frontend Security Groups: guide/use_cases/frontend_sg/index.md - Blue/Green: guide/use_cases/blue_green/index.md - MultiCluster Target Groups: guide/use_cases/multi_cluster/index.md + - Metrics: + - Prometheus: guide/metrics/prometheus/index.md - Examples: - EchoServer: examples/echo_server.md - gRPCServer: examples/grpc_server.md diff --git a/pkg/deploy/stack_deployer.go b/pkg/deploy/stack_deployer.go index b7787200d..7ca4aa3af 100644 --- a/pkg/deploy/stack_deployer.go +++ b/pkg/deploy/stack_deployer.go @@ -2,6 +2,7 @@ package deploy import ( "context" + "fmt" "github.com/go-logr/logr" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" @@ -12,6 +13,8 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/wafregional" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/wafv2" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" "sigs.k8s.io/controller-runtime/pkg/client" @@ -20,14 +23,14 @@ import ( // StackDeployer will deploy a resource stack into AWS and K8S. type StackDeployer interface { // Deploy a resource stack. - Deploy(ctx context.Context, stack core.Stack) error + Deploy(ctx context.Context, stack core.Stack, metricsCollector lbcmetrics.MetricCollector, controllerName string) error } // NewDefaultStackDeployer constructs new defaultStackDeployer. func NewDefaultStackDeployer(cloud services.Cloud, k8sClient client.Client, networkingSGManager networking.SecurityGroupManager, networkingSGReconciler networking.SecurityGroupReconciler, elbv2TaggingManager elbv2.TaggingManager, - config config.ControllerConfig, tagPrefix string, logger logr.Logger) *defaultStackDeployer { + config config.ControllerConfig, tagPrefix string, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, controllerName string) *defaultStackDeployer { trackingProvider := tracking.NewDefaultProvider(tagPrefix, config.ClusterName) ec2TaggingManager := ec2.NewDefaultTaggingManager(cloud.EC2(), networkingSGManager, cloud.VpcID(), logger) @@ -52,6 +55,8 @@ func NewDefaultStackDeployer(cloud services.Cloud, k8sClient client.Client, featureGates: config.FeatureGates, vpcID: cloud.VpcID(), logger: logger, + metricsCollector: metricsCollector, + controllerName: controllerName, } } @@ -77,6 +82,8 @@ type defaultStackDeployer struct { shieldProtectionManager shield.ProtectionManager featureGates config.FeatureGates vpcID string + metricsCollector lbcmetrics.MetricCollector + controllerName string logger logr.Logger } @@ -87,7 +94,7 @@ type ResourceSynthesizer interface { } // Deploy a resource stack. -func (d *defaultStackDeployer) Deploy(ctx context.Context, stack core.Stack) error { +func (d *defaultStackDeployer) Deploy(ctx context.Context, stack core.Stack, metricsCollector lbcmetrics.MetricCollector, controllerName string) error { synthesizers := []ResourceSynthesizer{ ec2.NewSecurityGroupSynthesizer(d.cloud.EC2(), d.trackingProvider, d.ec2TaggingManager, d.ec2SGManager, d.vpcID, d.logger, stack), elbv2.NewTargetGroupSynthesizer(d.cloud.ELBV2(), d.trackingProvider, d.elbv2TaggingManager, d.elbv2TGManager, d.logger, d.featureGates, stack), @@ -113,8 +120,15 @@ func (d *defaultStackDeployer) Deploy(ctx context.Context, stack core.Stack) err } for _, synthesizer := range synthesizers { - if err := synthesizer.Synthesize(ctx); err != nil { - return err + var err error + // Get synthesizer type name for better context + synthesizerType := fmt.Sprintf("%T", synthesizer) + synthesizeFn := func() { + err = synthesizer.Synthesize(ctx) + } + d.metricsCollector.ObserveControllerReconcileLatency(controllerName, synthesizerType, synthesizeFn) + if err != nil { + return errmetrics.NewErrorWithMetrics(controllerName, synthesizerType, err, d.metricsCollector) } } for i := len(synthesizers) - 1; i >= 0; i-- { diff --git a/pkg/error/error_with_metrics.go b/pkg/error/error_with_metrics.go new file mode 100644 index 000000000..ffdfc8a4f --- /dev/null +++ b/pkg/error/error_with_metrics.go @@ -0,0 +1,25 @@ +package errmetrics + +import ( + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" +) + +type ErrorWithMetrics struct { + ResourceType string + ErrorCategory string + Err error +} + +func NewErrorWithMetrics(resourceType string, errorCategory string, err error, metricCollector lbcmetrics.MetricCollector) *ErrorWithMetrics { + reconcileErr := &ErrorWithMetrics{ + ResourceType: resourceType, + ErrorCategory: errorCategory, + Err: err, + } + metricCollector.ObserveControllerReconcileError(resourceType, errorCategory) + return reconcileErr +} + +func (e *ErrorWithMetrics) Error() string { + return e.Err.Error() +} diff --git a/pkg/ingress/model_builder.go b/pkg/ingress/model_builder.go index 1f8aacb12..9f0246d03 100644 --- a/pkg/ingress/model_builder.go +++ b/pkg/ingress/model_builder.go @@ -2,10 +2,11 @@ package ingress import ( "context" - elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" "reflect" "strconv" + elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" + awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/go-logr/logr" "github.com/pkg/errors" @@ -19,7 +20,9 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/config" elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" @@ -28,12 +31,13 @@ import ( const ( lbAttrsDeletionProtectionEnabled = "deletion_protection.enabled" + controllerName = "ingress" ) // ModelBuilder is responsible for build mode stack for a IngressGroup. type ModelBuilder interface { // build mode stack for a IngressGroup. - Build(ctx context.Context, ingGroup Group) (core.Stack, *elbv2model.LoadBalancer, []types.NamespacedName, bool, error) + Build(ctx context.Context, ingGroup Group, metricsCollector lbcmetrics.MetricCollector) (core.Stack, *elbv2model.LoadBalancer, []types.NamespacedName, bool, error) } // NewDefaultModelBuilder constructs new defaultModelBuilder. @@ -44,7 +48,7 @@ func NewDefaultModelBuilder(k8sClient client.Client, eventRecorder record.EventR trackingProvider tracking.Provider, elbv2TaggingManager elbv2deploy.TaggingManager, featureGates config.FeatureGates, vpcID string, clusterName string, defaultTags map[string]string, externalManagedTags []string, defaultSSLPolicy string, defaultTargetType string, defaultLoadBalancerScheme string, backendSGProvider networkingpkg.BackendSGProvider, sgResolver networkingpkg.SecurityGroupResolver, - enableBackendSG bool, disableRestrictedSGRules bool, allowedCAARNs []string, enableIPTargetType bool, logger logr.Logger) *defaultModelBuilder { + enableBackendSG bool, disableRestrictedSGRules bool, allowedCAARNs []string, enableIPTargetType bool, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *defaultModelBuilder { certDiscovery := NewACMCertDiscovery(acmClient, allowedCAARNs, logger) ruleOptimizer := NewDefaultRuleOptimizer(logger) return &defaultModelBuilder{ @@ -74,6 +78,7 @@ func NewDefaultModelBuilder(k8sClient client.Client, eventRecorder record.EventR disableRestrictedSGRules: disableRestrictedSGRules, enableIPTargetType: enableIPTargetType, logger: logger, + metricsCollector: metricsCollector, } } @@ -109,11 +114,12 @@ type defaultModelBuilder struct { disableRestrictedSGRules bool enableIPTargetType bool - logger logr.Logger + logger logr.Logger + metricsCollector lbcmetrics.MetricCollector } // build mode stack for a IngressGroup. -func (b *defaultModelBuilder) Build(ctx context.Context, ingGroup Group) (core.Stack, *elbv2model.LoadBalancer, []types.NamespacedName, bool, error) { +func (b *defaultModelBuilder) Build(ctx context.Context, ingGroup Group, metricsCollector lbcmetrics.MetricCollector) (core.Stack, *elbv2model.LoadBalancer, []types.NamespacedName, bool, error) { stack := core.NewDefaultStack(core.StackID(ingGroup.ID)) task := &defaultModelBuildTask{ k8sClient: b.k8sClient, @@ -137,6 +143,7 @@ func (b *defaultModelBuilder) Build(ctx context.Context, ingGroup Group) (core.S enableBackendSG: b.enableBackendSG, disableRestrictedSGRules: b.disableRestrictedSGRules, enableIPTargetType: b.enableIPTargetType, + metricsCollector: b.metricsCollector, ingGroup: ingGroup, stack: stack, @@ -219,6 +226,8 @@ type defaultModelBuildTask struct { tgByResID map[string]*elbv2model.TargetGroup backendServices map[types.NamespacedName]*corev1.Service secretKeys []types.NamespacedName + + metricsCollector lbcmetrics.MetricCollector } func (t *defaultModelBuildTask) run(ctx context.Context) error { @@ -265,29 +274,26 @@ func (t *defaultModelBuildTask) run(ctx context.Context) error { lb, err := t.buildLoadBalancer(ctx, listenPortConfigByPort) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_load_balancer_error", err, t.metricsCollector) } t.sslRedirectConfig, err = t.buildSSLRedirectConfig(ctx, listenPortConfigByPort) if err != nil { - return err - } - if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_ssl_redirct_config_error", err, t.metricsCollector) } for port, cfg := range listenPortConfigByPort { ingList := ingListByPort[port] ls, err := t.buildListener(ctx, lb.LoadBalancerARN(), port, cfg, ingList) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_listener_error", err, t.metricsCollector) } if err := t.buildListenerRules(ctx, ls.ListenerARN(), port, cfg.protocol, ingList); err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_listener_rule_error", err, t.metricsCollector) } } if err := t.buildLoadBalancerAddOns(ctx, lb.LoadBalancerARN()); err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_load_balancer_addons", err, t.metricsCollector) } return nil } diff --git a/pkg/ingress/model_builder_test.go b/pkg/ingress/model_builder_test.go index 7fd522b76..51f570657 100644 --- a/pkg/ingress/model_builder_test.go +++ b/pkg/ingress/model_builder_test.go @@ -3,11 +3,12 @@ package ingress import ( "context" "encoding/json" - ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" - elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" "testing" "time" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" + awssdk "github.com/aws/aws-sdk-go-v2/aws" jsonpatch "github.com/evanphx/json-patch" "github.com/go-logr/logr" @@ -29,6 +30,7 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" testclient "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -614,6 +616,7 @@ func Test_defaultModelBuilder_Build(t *testing.T) { fields fields wantStackPatch string wantErr string + wantMetric bool }{ { name: "Ingress - vanilla internal", @@ -2328,7 +2331,8 @@ func Test_defaultModelBuilder_Build(t *testing.T) { }, }, }, - wantErr: "deletion_protection is enabled, cannot delete the ingress: hello-ingress", + wantErr: "deletion_protection is enabled, cannot delete the ingress: hello-ingress", + wantMetric: false, }, { name: "Ingress - with SG annotation", @@ -2534,7 +2538,8 @@ func Test_defaultModelBuilder_Build(t *testing.T) { }, }, }, - wantErr: "backendSG feature is required to manage worker node SG rules when frontendSG manually specified", + wantErr: "backendSG feature is required to manage worker node SG rules when frontendSG manually specified", + wantMetric: true, }, { name: "Ingress with IPv6 service", @@ -2784,7 +2789,8 @@ func Test_defaultModelBuilder_Build(t *testing.T) { }, }, }, - wantErr: "ingress: ns-1/ing-1: unsupported IPv6 configuration, lb not dual-stack", + wantErr: "ingress: ns-1/ing-1: unsupported IPv6 configuration, lb not dual-stack", + wantMetric: true, }, { name: "target type IP with enableIPTargetType set to false", @@ -2837,7 +2843,8 @@ func Test_defaultModelBuilder_Build(t *testing.T) { }, }, }, - wantErr: "ingress: ns-1/ing-1: unsupported targetType: ip when EnableIPTargetType is false", + wantErr: "ingress: ns-1/ing-1: unsupported targetType: ip when EnableIPTargetType is false", + wantMetric: true, }, { name: "target type IP with named target port", @@ -3939,6 +3946,7 @@ func Test_defaultModelBuilder_Build(t *testing.T) { defaultSSLPolicy: "ELBSecurityPolicy-2016-08", defaultTargetType: elbv2model.TargetType(defaultTargetType), defaultLoadBalancerScheme: elbv2model.LoadBalancerScheme(defaultLoadBalancerScheme), + metricsCollector: lbcmetrics.NewMockCollector(), } if tt.enableIPTargetType == nil { @@ -3947,7 +3955,7 @@ func Test_defaultModelBuilder_Build(t *testing.T) { b.enableIPTargetType = *tt.enableIPTargetType } - gotStack, _, _, _, err := b.Build(context.Background(), tt.args.ingGroup) + gotStack, _, _, _, err := b.Build(context.Background(), tt.args.ingGroup, b.metricsCollector) if tt.wantErr != "" { assert.EqualError(t, err, tt.wantErr) } else { @@ -3993,6 +4001,9 @@ func Test_defaultModelBuilder_Build(t *testing.T) { t.Log(string(patch)) } } + mockCollector := b.metricsCollector.(*lbcmetrics.MockCollector) + assert.Equal(t, tt.wantMetric, len(mockCollector.Invocations[lbcmetrics.MetricControllerReconcileErrors]) == 1) + }) } } diff --git a/pkg/metrics/aws/collector.go b/pkg/metrics/aws/collector.go index f6fe66e98..5649edc31 100644 --- a/pkg/metrics/aws/collector.go +++ b/pkg/metrics/aws/collector.go @@ -2,6 +2,10 @@ package aws import ( "context" + "strconv" + "strings" + "time" + awsmiddleware "github.com/aws/aws-sdk-go-v2/aws/middleware" "github.com/aws/aws-sdk-go-v2/aws/retry" "github.com/aws/smithy-go" @@ -9,8 +13,6 @@ import ( smithyhttp "github.com/aws/smithy-go/transport/http" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "strconv" - "time" ) const ( @@ -65,12 +67,24 @@ func WithSDKCallMetricCollector(c *Collector) func(stack *smithymiddleware.Stack errorCode := errorCodeForRequest(err) retryCount := getRetryMetricsForRequest(metadata) duration := time.Since(start) - c.instruments.apiCallsTotal.With(map[string]string{ + labels := map[string]string{ labelService: service, labelOperation: operation, labelStatusCode: statusCode, labelErrorCode: errorCode, - }).Inc() + } + c.instruments.apiCallsTotal.With(labels).Inc() + + if statusCode == "401" || statusCode == "403" || errorCode == "AccessDeniedException" || errorCode == "AuthFailure" { + c.instruments.apiCallPermissionErrorsTotal.With(labels).Inc() + } else if strings.Contains(errorCode, "LimitExceeded") { + c.instruments.apiCallLimitExceededErrorsTotal.With(labels).Inc() + } else if isThrottleError(errorCode) { + c.instruments.apiCallThrottledErrorsTotal.With(labels).Inc() + } else if errorCode == "ValidationError" { + c.instruments.apiCallValidationErrorsTotal.With(labels).Inc() + } + c.instruments.apiCallDurationSeconds.With(map[string]string{ labelService: service, labelOperation: operation, @@ -158,3 +172,8 @@ func operationForRequest(ctx context.Context) string { } return "?" } + +func isThrottleError(errorCode string) bool { + _, exists := retry.DefaultThrottleErrorCodes[errorCode] + return exists +} diff --git a/pkg/metrics/aws/instruments.go b/pkg/metrics/aws/instruments.go index 150bf1101..4391335e2 100644 --- a/pkg/metrics/aws/instruments.go +++ b/pkg/metrics/aws/instruments.go @@ -13,6 +13,11 @@ const ( metricAPIRequestsTotal = "api_requests_total" metricAPIRequestDurationSeconds = "api_request_duration_seconds" + + metricAPIPermissionErrorsTotal = "api_call_permission_errors_total" + metricAPIServiceLimitExceededErrorsTotal = "api_call_service_limit_exceeded_errors_total" + metricAPIThrottledErrorsTotal = "api_call_throttled_errors_total" + metricAPIValidationErrorsTotal = "api_call_validation_errors_total" ) const ( @@ -28,6 +33,11 @@ type instruments struct { apiCallRetries *prometheus.HistogramVec apiRequestsTotal *prometheus.CounterVec apiRequestDurationSecond *prometheus.HistogramVec + + apiCallPermissionErrorsTotal *prometheus.CounterVec + apiCallLimitExceededErrorsTotal *prometheus.CounterVec + apiCallThrottledErrorsTotal *prometheus.CounterVec + apiCallValidationErrorsTotal *prometheus.CounterVec } // newInstruments allocates and register new metrics to registerer @@ -60,12 +70,41 @@ func newInstruments(registerer prometheus.Registerer) *instruments { Help: "Latency of an individual HTTP request to the service endpoint", }, []string{labelService, labelOperation}) - registerer.MustRegister(apiCallsTotal, apiCallDurationSeconds, apiCallRetries, apiRequestsTotal, apiRequestDurationSecond) + apiCallPermissionErrorsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubSystem, + Name: metricAPIPermissionErrorsTotal, + Help: "Number of failed AWS API calls due to auth or authrorization failures", + }, []string{labelService, labelOperation, labelStatusCode, labelErrorCode}) + + apiCallLimitExceededErrorsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubSystem, + Name: metricAPIServiceLimitExceededErrorsTotal, + Help: "Number of failed AWS API calls due to exceeding servce limit", + }, []string{labelService, labelOperation, labelStatusCode, labelErrorCode}) + + apiCallThrottledErrorsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubSystem, + Name: metricAPIThrottledErrorsTotal, + Help: "Number of failed AWS API calls due to throtting error", + }, []string{labelService, labelOperation, labelStatusCode, labelErrorCode}) + + apiCallValidationErrorsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubSystem, + Name: metricAPIValidationErrorsTotal, + Help: "Number of failed AWS API calls due to validation error", + }, []string{labelService, labelOperation, labelStatusCode, labelErrorCode}) + + registerer.MustRegister(apiCallsTotal, apiCallDurationSeconds, apiCallRetries, apiRequestsTotal, apiRequestDurationSecond, apiCallPermissionErrorsTotal, apiCallLimitExceededErrorsTotal, apiCallThrottledErrorsTotal, apiCallValidationErrorsTotal) + return &instruments{ - apiCallsTotal: apiCallsTotal, - apiCallDurationSeconds: apiCallDurationSeconds, - apiCallRetries: apiCallRetries, - apiRequestsTotal: apiRequestsTotal, - apiRequestDurationSecond: apiRequestDurationSecond, + apiCallsTotal: apiCallsTotal, + apiCallDurationSeconds: apiCallDurationSeconds, + apiCallRetries: apiCallRetries, + apiRequestsTotal: apiRequestsTotal, + apiRequestDurationSecond: apiRequestDurationSecond, + apiCallPermissionErrorsTotal: apiCallPermissionErrorsTotal, + apiCallLimitExceededErrorsTotal: apiCallLimitExceededErrorsTotal, + apiCallThrottledErrorsTotal: apiCallThrottledErrorsTotal, + apiCallValidationErrorsTotal: apiCallValidationErrorsTotal, } } diff --git a/pkg/metrics/lbc/collector.go b/pkg/metrics/lbc/collector.go index 34da12848..8f92a5093 100644 --- a/pkg/metrics/lbc/collector.go +++ b/pkg/metrics/lbc/collector.go @@ -1,18 +1,38 @@ package lbc import ( - "github.com/prometheus/client_golang/prometheus" + "context" + "fmt" "time" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" + networking "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/api/meta" + elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) type MetricCollector interface { // ObservePodReadinessGateReady this metric is useful to determine how fast pods are becoming ready in the load balancer. // Due to some architectural constraints, we can only emit this metric for pods that are using readiness gates. ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration) + ObserveControllerReconcileError(controller string, errorType string) + ObserveControllerReconcileLatency(controller string, stage string, fn func()) + ObserveWebhookValidationError(webhookName string, errorType string) + ObserveWebhookMutationError(webhookName string, errorType string) + StartCollectTopTalkers(ctx context.Context) + StartCollectCacheSize(ctx context.Context) } type collector struct { instruments *instruments + mgr ctrl.Manager + counter *metricsutil.ReconcileCounters + logger logr.Logger } type noOpCollector struct{} @@ -20,7 +40,28 @@ type noOpCollector struct{} func (n *noOpCollector) ObservePodReadinessGateReady(_ string, _ string, _ time.Duration) { } -func NewCollector(registerer prometheus.Registerer) MetricCollector { +func (n *noOpCollector) ObserveControllerReconcileError(_ string, _ string) { +} + +func (n *noOpCollector) ObserveWebhookValidationError(_ string, _ string) { +} + +func (n *noOpCollector) ObserveWebhookMutationError(_ string, _ string) { +} + +func (n *noOpCollector) ObserveControllerCacheSize(_ string, _ int) { +} + +func (n *noOpCollector) ObserveControllerReconcileLatency(_ string, _ string, fn func()) { +} + +func (n *noOpCollector) StartCollectTopTalkers(_ context.Context) { +} + +func (n *noOpCollector) StartCollectCacheSize(_ context.Context) { +} + +func NewCollector(registerer prometheus.Registerer, mgr ctrl.Manager, reconcileCounters *metricsutil.ReconcileCounters, logger logr.Logger) MetricCollector { if registerer == nil { return &noOpCollector{} } @@ -28,6 +69,9 @@ func NewCollector(registerer prometheus.Registerer) MetricCollector { instruments := newInstruments(registerer) return &collector{ instruments: instruments, + mgr: mgr, + counter: reconcileCounters, + logger: logger, } } @@ -37,3 +81,110 @@ func (c *collector) ObservePodReadinessGateReady(namespace string, tgbName strin labelName: tgbName, }).Observe(duration.Seconds()) } + +func (c *collector) ObserveControllerReconcileError(controller string, errorCategory string) { + c.instruments.controllerReconcileErrors.With(prometheus.Labels{ + labelController: controller, + labelErrorCategory: errorCategory, + }).Inc() +} + +func (c *collector) ObserveControllerReconcileLatency(controller string, stage string, fn func()) { + start := time.Now() + defer func() { + c.instruments.controllerReconcileLatency.With(prometheus.Labels{ + labelController: controller, + labelReconcileStage: stage, + }).Observe(time.Since(start).Seconds()) + }() + fn() +} + +func (c *collector) ObserveWebhookValidationError(webhookName string, errorCategory string) { + c.instruments.webhookValidationFailure.With(prometheus.Labels{ + labelWebhookName: webhookName, + labelErrorCategory: errorCategory, + }).Inc() +} + +func (c *collector) ObserveWebhookMutationError(webhookName string, errorCategory string) { + c.instruments.webhookMutationFailure.With(prometheus.Labels{ + labelWebhookName: webhookName, + labelErrorCategory: errorCategory, + }).Inc() +} + +func (c *collector) ObserveControllerCacheSize(resource string, count int) { + c.instruments.controllerCacheObjectCount.With(prometheus.Labels{ + LabelResource: resource, + }).Set(float64(count)) +} + +func (c *collector) ObserveControllerTopThreeTalkers(controller, namespace string, name string, count int) { + c.instruments.controllerReconcileTopTalkers.With(prometheus.Labels{ + labelController: controller, + labelNamespace: namespace, + labelName: name, + }).Set(float64(count)) +} + +func (c *collector) StartCollectCacheSize(ctx context.Context) { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if err := c.CollectCacheSize(ctx); err != nil { + c.logger.Error(err, "failed to collect cache size") + } + } + } +} + +func (c *collector) CollectCacheSize(ctx context.Context) error { + cache := c.mgr.GetCache() + collectableResources := map[string]client.ObjectList{ + "service": &corev1.ServiceList{}, + "ingress": &networking.IngressList{}, + "targetgroupbinding": &elbv2api.TargetGroupBindingList{}, + } + for resourceType, resourceList := range collectableResources { + if err := cache.List(ctx, resourceList); err != nil { + return fmt.Errorf("failed to list %s: %w", resourceType, err) + } + items, err := meta.ExtractList(resourceList) + if err != nil { + return fmt.Errorf("failed to extract items from %s list: %w", resourceType, err) + } + + c.ObserveControllerCacheSize(resourceType, len(items)) + } + return nil +} + +func (c *collector) StartCollectTopTalkers(ctx context.Context) { + ticker := time.NewTicker(3 * time.Minute) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + c.CollectTopTalker(ctx) + } + } +} + +func (c *collector) CollectTopTalker(ctx context.Context) { + topThreeReconciles := c.counter.GetTopReconciles(3) + for resourceType, items := range topThreeReconciles { + for _, item := range items { + c.ObserveControllerTopThreeTalkers(resourceType, item.Resource.Namespace, item.Resource.Name, item.Count) + } + } + c.counter.ResetCounter() +} diff --git a/pkg/metrics/lbc/instruments.go b/pkg/metrics/lbc/instruments.go index 8fafd2f98..c087a7552 100644 --- a/pkg/metrics/lbc/instruments.go +++ b/pkg/metrics/lbc/instruments.go @@ -12,15 +12,38 @@ const ( const ( // MetricPodReadinessGateReady tracks the time to flip a readiness gate to true MetricPodReadinessGateReady = "readiness_gate_ready_seconds" + // MetricControllerReconcileErrors tracks the total number of controller errors by error type. + MetricControllerReconcileErrors = "controller_reconcile_errors_total" + // MetricControllerReconcileStageDuration tracks latencies of different reconcile stages. + MetricControllerReconcileStageDuration = "controller_reconcile_stage_duration" + // MetricWebhookValidationFailure tracks the total number of validation errors by error type. + MetricWebhookValidationFailure = "webhook_validation_failure_total" + // MetricWebhookMutationFailure tracks the total number of mutation errors by error type. + MetricWebhookMutationFailure = "webhook_mutation_failure_total" + // MetricControllerCacheObjectCount tracks the total number of object in the controller runtime cache. + MetricControllerCacheObjectCount = "controller_cache_object_total" + // MetricTopTalker tracks what resources are causing the most reconciles. + MetricControllerTopTalkers = "controller_top_talkers" ) const ( - labelNamespace = "namespace" - labelName = "name" + labelNamespace = "namespace" + labelName = "name" + labelController = "controller" + labelErrorCategory = "error_category" + labelReconcileStage = "reconcile_stage" + labelWebhookName = "webhook_name" + LabelResource = "resource" ) type instruments struct { - podReadinessFlipSeconds *prometheus.HistogramVec + podReadinessFlipSeconds *prometheus.HistogramVec + controllerReconcileErrors *prometheus.CounterVec + controllerReconcileLatency *prometheus.HistogramVec + webhookValidationFailure *prometheus.CounterVec + webhookMutationFailure *prometheus.CounterVec + controllerCacheObjectCount *prometheus.GaugeVec + controllerReconcileTopTalkers *prometheus.GaugeVec } // newInstruments allocates and register new metrics to registerer @@ -32,8 +55,51 @@ func newInstruments(registerer prometheus.Registerer) *instruments { Buckets: []float64{10, 30, 60, 120, 180, 240, 300, 360, 420, 480, 540, 600}, }, []string{labelNamespace, labelName}) - registerer.MustRegister(podReadinessFlipSeconds) + controllerReconcileErrors := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubsystem, + Name: MetricControllerReconcileErrors, + Help: "Counts the number of reconcile error, categorized by error type.", + }, []string{labelController, labelErrorCategory}) + + controllerReconcileStageDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Subsystem: metricSubsystem, + Name: MetricControllerReconcileStageDuration, + Help: "latencies of different reconcile stages.", + Buckets: prometheus.DefBuckets, + }, []string{labelController, labelReconcileStage}) + + webhookValidationFailure := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubsystem, + Name: MetricWebhookValidationFailure, + Help: "Counts the number of webhook validation failure, categorized by error type.", + }, []string{labelWebhookName, labelErrorCategory}) + + webhookMutationFailure := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: metricSubsystem, + Name: MetricWebhookMutationFailure, + Help: "Counts the number of webhook mutation failure, categorized by error type.", + }, []string{labelWebhookName, labelErrorCategory}) + + controllerCacheObjectCount := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: metricSubsystem, + Name: MetricControllerCacheObjectCount, + Help: "Counts the number of objects in the controller cache.", + }, []string{LabelResource}) + + controllerReconcileTopTalkers := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: metricSubsystem, + Name: MetricControllerTopTalkers, + Help: "Counts the number of reconciliations triggered per resource", + }, []string{labelController, labelNamespace, labelName}) + + registerer.MustRegister(podReadinessFlipSeconds, controllerReconcileErrors, controllerReconcileStageDuration, webhookValidationFailure, webhookMutationFailure, controllerCacheObjectCount, controllerReconcileTopTalkers) return &instruments{ - podReadinessFlipSeconds: podReadinessFlipSeconds, + podReadinessFlipSeconds: podReadinessFlipSeconds, + controllerReconcileErrors: controllerReconcileErrors, + controllerReconcileLatency: controllerReconcileStageDuration, + webhookValidationFailure: webhookValidationFailure, + webhookMutationFailure: webhookMutationFailure, + controllerCacheObjectCount: controllerCacheObjectCount, + controllerReconcileTopTalkers: controllerReconcileTopTalkers, } } diff --git a/pkg/metrics/lbc/mockcollector.go b/pkg/metrics/lbc/mockcollector.go index 9c8fb6a43..32c5fd99a 100644 --- a/pkg/metrics/lbc/mockcollector.go +++ b/pkg/metrics/lbc/mockcollector.go @@ -1,11 +1,23 @@ package lbc import ( + "context" + "fmt" "time" + + corev1 "k8s.io/api/core/v1" + networking "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/api/meta" + elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" + metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) type MockCollector struct { Invocations map[string][]interface{} + mgr ctrl.Manager + counter *metricsutil.ReconcileCounters } type MockHistogramMetric struct { @@ -14,10 +26,62 @@ type MockHistogramMetric struct { duration time.Duration } +type MockCounterMetric struct { + labelController string + labelErrorCategory string + labelNamespace string + labelName string + resource string + webhookName string + errorType string +} + func (m *MockCollector) ObservePodReadinessGateReady(namespace string, tgbName string, d time.Duration) { m.recordHistogram(MetricPodReadinessGateReady, namespace, tgbName, d) } +func (m *MockCollector) ObserveControllerReconcileError(controller string, errorCategory string) { + m.Invocations[MetricControllerReconcileErrors] = append(m.Invocations[MetricControllerReconcileErrors], MockCounterMetric{ + labelController: controller, + labelErrorCategory: errorCategory, + }) +} + +func (m *MockCollector) ObserveControllerReconcileLatency(controller string, stage string, fn func()) { + m.Invocations[MetricControllerReconcileStageDuration] = append(m.Invocations[MetricControllerReconcileStageDuration], MockCounterMetric{ + labelController: controller, + labelErrorCategory: stage, + }) +} + +func (m *MockCollector) ObserveWebhookValidationError(webhookName string, errorCategory string) { + m.Invocations[MetricWebhookValidationFailure] = append(m.Invocations[MetricWebhookValidationFailure], MockCounterMetric{ + webhookName: webhookName, + labelErrorCategory: errorCategory, + }) +} + +func (m *MockCollector) ObserveWebhookMutationError(webhookName string, errorCategory string) { + m.Invocations[MetricWebhookMutationFailure] = append(m.Invocations[MetricWebhookMutationFailure], MockCounterMetric{ + webhookName: webhookName, + labelErrorCategory: errorCategory, + }) +} + +func (m *MockCollector) ObserveControllerCacheSize(resource string, count int) { + m.Invocations[MetricControllerCacheObjectCount] = append(m.Invocations[MetricControllerCacheObjectCount], MockCounterMetric{ + resource: resource, + }) +} + +func (m *MockCollector) ObserveControllerTopTalkers(controller, namespace string, name string) { + m.Invocations[MetricControllerTopTalkers] = append(m.Invocations[MetricControllerTopTalkers], MockCounterMetric{ + labelController: controller, + labelNamespace: namespace, + labelName: name, + }) +} + func (m *MockCollector) recordHistogram(metricName string, namespace string, name string, d time.Duration) { m.Invocations[metricName] = append(m.Invocations[MetricPodReadinessGateReady], MockHistogramMetric{ namespace: namespace, @@ -26,10 +90,75 @@ func (m *MockCollector) recordHistogram(metricName string, namespace string, nam }) } -func NewMockCollector() MetricCollector { +func (m *MockCollector) CollectTopTalker(ctx context.Context) { + topThreeReconciles := m.counter.GetTopReconciles(3) + for resourceType, items := range topThreeReconciles { + for _, item := range items { + m.ObserveControllerTopTalkers(resourceType, item.Resource.Namespace, item.Resource.Name) + } + } +} + +func (m *MockCollector) StartCollectTopTalkers(ctx context.Context) { + ticker := time.NewTicker(30 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + m.CollectTopTalker(ctx) + } + } + +} + +func (m *MockCollector) StartCollectCacheSize(ctx context.Context) { + ticker := time.NewTicker(30 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + m.CollectCacheSize(ctx) + } + } + +} + +func (c *MockCollector) CollectCacheSize(ctx context.Context) error { + cache := c.mgr.GetCache() + collectableResources := map[string]client.ObjectList{ + "service": &corev1.ServiceList{}, + "ingress": &networking.IngressList{}, + "targetgroupbinding": &elbv2api.TargetGroupBindingList{}, + } + for resourceType, resourceList := range collectableResources { + if err := cache.List(ctx, resourceList); err != nil { + return fmt.Errorf("failed to list %s: %w", resourceType, err) + } + items, err := meta.ExtractList(resourceList) + if err != nil { + return fmt.Errorf("failed to extract items from %s list: %w", resourceType, err) + } + + c.ObserveControllerCacheSize(resourceType, len(items)) + } + return nil +} + +func NewMockCollector() MetricCollector { mockInvocations := make(map[string][]interface{}) mockInvocations[MetricPodReadinessGateReady] = make([]interface{}, 0) + mockInvocations[MetricControllerReconcileErrors] = make([]interface{}, 0) + mockInvocations[MetricControllerReconcileStageDuration] = make([]interface{}, 0) + mockInvocations[MetricWebhookValidationFailure] = make([]interface{}, 0) + mockInvocations[MetricWebhookMutationFailure] = make([]interface{}, 0) + mockInvocations[MetricControllerCacheObjectCount] = make([]interface{}, 0) + mockInvocations[MetricControllerTopTalkers] = make([]interface{}, 0) return &MockCollector{ Invocations: mockInvocations, diff --git a/pkg/metrics/util/reconcile_counter.go b/pkg/metrics/util/reconcile_counter.go new file mode 100644 index 000000000..8d29dd323 --- /dev/null +++ b/pkg/metrics/util/reconcile_counter.go @@ -0,0 +1,81 @@ +package util + +import ( + "sort" + "sync" + + "k8s.io/apimachinery/pkg/types" +) + +type ReconcileCounters struct { + serviceReconciles map[types.NamespacedName]int + ingressReconciles map[types.NamespacedName]int + tgbReconciles map[types.NamespacedName]int + mutex sync.Mutex +} + +type ResourceReconcileCount struct { + Resource types.NamespacedName + Count int +} + +func NewReconcileCounters() *ReconcileCounters { + return &ReconcileCounters{ + serviceReconciles: make(map[types.NamespacedName]int), + ingressReconciles: make(map[types.NamespacedName]int), + tgbReconciles: make(map[types.NamespacedName]int), + mutex: sync.Mutex{}, + } +} + +func (c *ReconcileCounters) IncrementService(namespaceName types.NamespacedName) { + c.mutex.Lock() + defer c.mutex.Unlock() + c.serviceReconciles[namespaceName]++ +} + +func (c *ReconcileCounters) IncrementIngress(namespaceName types.NamespacedName) { + c.mutex.Lock() + defer c.mutex.Unlock() + c.ingressReconciles[namespaceName]++ +} + +func (c *ReconcileCounters) IncrementTGB(namespaceName types.NamespacedName) { + c.mutex.Lock() + defer c.mutex.Unlock() + c.tgbReconciles[namespaceName]++ +} + +func (c *ReconcileCounters) GetTopReconciles(n int) map[string][]ResourceReconcileCount { + c.mutex.Lock() + defer c.mutex.Unlock() + topReconciles := make(map[string][]ResourceReconcileCount) + getTopN := func(m map[types.NamespacedName]int) []ResourceReconcileCount { + reconciles := make([]ResourceReconcileCount, 0, len(m)) + for k, v := range m { + reconciles = append(reconciles, ResourceReconcileCount{Resource: k, Count: v}) + } + + sort.Slice(reconciles, func(i, j int) bool { + return reconciles[i].Count > reconciles[j].Count + }) + if len(reconciles) > n { + reconciles = reconciles[:n] + } + return reconciles + } + + topReconciles["service"] = getTopN(c.serviceReconciles) + topReconciles["ingress"] = getTopN(c.ingressReconciles) + topReconciles["targetgroupbinding"] = getTopN(c.tgbReconciles) + + return topReconciles +} + +func (c *ReconcileCounters) ResetCounter() { + c.mutex.Lock() + defer c.mutex.Unlock() + c.serviceReconciles = make(map[types.NamespacedName]int) + c.ingressReconciles = make(map[types.NamespacedName]int) + c.tgbReconciles = make(map[types.NamespacedName]int) +} diff --git a/pkg/service/model_builder.go b/pkg/service/model_builder.go index 76d6300f7..028a723ed 100644 --- a/pkg/service/model_builder.go +++ b/pkg/service/model_builder.go @@ -2,10 +2,11 @@ package service import ( "context" - ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "strconv" "sync" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/go-logr/logr" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -15,7 +16,9 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/config" elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" @@ -27,12 +30,13 @@ const ( LoadBalancerTargetTypeIP = "ip" LoadBalancerTargetTypeInstance = "instance" lbAttrsDeletionProtection = "deletion_protection.enabled" + controllerName = "service" ) // ModelBuilder builds the model stack for the service resource. type ModelBuilder interface { // Build model stack for service - Build(ctx context.Context, service *corev1.Service) (core.Stack, *elbv2model.LoadBalancer, bool, error) + Build(ctx context.Context, service *corev1.Service, metricsCollector lbcmetrics.MetricCollector) (core.Stack, *elbv2model.LoadBalancer, bool, error) } // NewDefaultModelBuilder construct a new defaultModelBuilder @@ -41,7 +45,7 @@ func NewDefaultModelBuilder(annotationParser annotations.Parser, subnetsResolver elbv2TaggingManager elbv2deploy.TaggingManager, ec2Client services.EC2, featureGates config.FeatureGates, clusterName string, defaultTags map[string]string, externalManagedTags []string, defaultSSLPolicy string, defaultTargetType string, defaultLoadBalancerScheme string, enableIPTargetType bool, serviceUtils ServiceUtils, backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, enableBackendSG bool, - disableRestrictedSGRules bool, logger logr.Logger) *defaultModelBuilder { + disableRestrictedSGRules bool, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *defaultModelBuilder { return &defaultModelBuilder{ annotationParser: annotationParser, subnetsResolver: subnetsResolver, @@ -64,6 +68,7 @@ func NewDefaultModelBuilder(annotationParser annotations.Parser, subnetsResolver enableBackendSG: enableBackendSG, disableRestrictedSGRules: disableRestrictedSGRules, logger: logger, + metricsCollector: metricsCollector, } } @@ -92,9 +97,10 @@ type defaultModelBuilder struct { defaultLoadBalancerScheme elbv2model.LoadBalancerScheme enableIPTargetType bool logger logr.Logger + metricsCollector lbcmetrics.MetricCollector } -func (b *defaultModelBuilder) Build(ctx context.Context, service *corev1.Service) (core.Stack, *elbv2model.LoadBalancer, bool, error) { +func (b *defaultModelBuilder) Build(ctx context.Context, service *corev1.Service, metricsCollector lbcmetrics.MetricCollector) (core.Stack, *elbv2model.LoadBalancer, bool, error) { stack := core.NewDefaultStack(core.StackID(k8s.NamespacedName(service))) task := &defaultModelBuildTask{ clusterName: b.clusterName, @@ -113,6 +119,7 @@ func (b *defaultModelBuilder) Build(ctx context.Context, service *corev1.Service enableBackendSG: b.enableBackendSG, disableRestrictedSGRules: b.disableRestrictedSGRules, logger: b.logger, + metricsCollector: b.metricsCollector, service: service, stack: stack, @@ -170,6 +177,7 @@ type defaultModelBuildTask struct { enableIPTargetType bool ec2Client services.EC2 logger logr.Logger + metricsCollector lbcmetrics.MetricCollector service *corev1.Service @@ -239,19 +247,19 @@ func (t *defaultModelBuildTask) run(ctx context.Context) error { func (t *defaultModelBuildTask) buildModel(ctx context.Context) error { scheme, err := t.buildLoadBalancerScheme(ctx) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_load_balancer_scheme_error", err, t.metricsCollector) } t.ec2Subnets, err = t.buildLoadBalancerSubnets(ctx, scheme) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_load_balancer_subnets_error", err, t.metricsCollector) } err = t.buildLoadBalancer(ctx, scheme) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_load_balancer_error", err, t.metricsCollector) } err = t.buildListeners(ctx, scheme) if err != nil { - return err + return errmetrics.NewErrorWithMetrics(controllerName, "build_listeners_error", err, t.metricsCollector) } return nil } diff --git a/pkg/service/model_builder_test.go b/pkg/service/model_builder_test.go index 9246b6a65..0e865bd61 100644 --- a/pkg/service/model_builder_test.go +++ b/pkg/service/model_builder_test.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" "sigs.k8s.io/controller-runtime/pkg/log" @@ -123,6 +124,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { wantValue string wantNumResources int featureGates map[config.Feature]bool + wantMetric bool }{ { testName: "Simple service", @@ -155,6 +157,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { featureGates: map[config.Feature]bool{ config.NLBSecurityGroup: false, }, + wantMetric: false, wantValue: ` { "id":"default/nlb-ip-svc-tls", @@ -303,6 +306,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { resolveViaDiscoveryCalls: []resolveViaDiscoveryCall{resolveViaDiscoveryCallForOneSubnet}, listLoadBalancerCalls: []listLoadBalancerCall{listLoadBalancerCallForEmptyLB}, wantError: false, + wantMetric: false, featureGates: map[config.Feature]bool{ config.NLBSecurityGroup: false, }, @@ -1434,7 +1438,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { }, }, }, - wantError: false, + wantError: false, + wantMetric: false, wantValue: ` { "id":"app/traffic-local", @@ -2250,6 +2255,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { resolveViaDiscoveryCalls: []resolveViaDiscoveryCall{resolveViaDiscoveryCallForOneSubnet}, listLoadBalancerCalls: []listLoadBalancerCall{listLoadBalancerCallForEmptyLB}, wantError: true, + wantMetric: true, }, { testName: "list load balancers error", @@ -2278,7 +2284,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { err: errors.New("error listing load balancer"), }, }, - wantError: true, + wantError: true, + wantMetric: true, }, { testName: "resolve VPC CIDRs error", @@ -2314,7 +2321,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { featureGates: map[config.Feature]bool{ config.NLBSecurityGroup: false, }, - wantError: true, + wantError: true, + wantMetric: true, }, { testName: "deletion protection enabled error", @@ -2347,6 +2355,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { }, enableBackendSG: true, wantError: true, + wantMetric: false, }, { testName: "ipv6 service without dualstask", @@ -2376,6 +2385,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { resolveViaDiscoveryCalls: []resolveViaDiscoveryCall{resolveViaDiscoveryCallForOneSubnet}, listLoadBalancerCalls: []listLoadBalancerCall{listLoadBalancerCallForEmptyLB}, wantError: true, + wantMetric: true, }, { testName: "ipv6 for NLB", @@ -6032,7 +6042,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { featureGates: map[config.Feature]bool{ config.NLBSecurityGroup: false, }, - wantError: true, + wantError: true, + wantMetric: true, }, { testName: "With security groups annotation", @@ -6365,7 +6376,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { want: []string{"sg-id1", "sg-id2"}, }, }, - wantError: true, + wantError: true, + wantMetric: true, }, { testName: "Manage backend rules with manual security groups, resolve SG error", @@ -6401,7 +6413,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { err: errors.New("unable to resolve security group"), }, }, - wantError: true, + wantError: true, + wantMetric: true, }, { testName: "ipv6 source ranges, but lb not dual stack", @@ -6433,6 +6446,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { resolveViaDiscoveryCalls: []resolveViaDiscoveryCall{resolveViaDiscoveryCallForOneSubnet}, listLoadBalancerCalls: []listLoadBalancerCall{listLoadBalancerCallForEmptyLB}, wantError: true, + wantMetric: true, }, { testName: "Simple service with default load balancer scheme internet-facing", @@ -6461,6 +6475,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { resolveViaDiscoveryCalls: []resolveViaDiscoveryCall{resolveViaDiscoveryCallForOneSubnet}, listLoadBalancerCalls: []listLoadBalancerCall{listLoadBalancerCallForEmptyLB}, wantError: false, + wantMetric: false, wantNumResources: 4, featureGates: map[config.Feature]bool{ config.NLBSecurityGroup: false, @@ -6645,11 +6660,12 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { } else { enableIPTargetType = *tt.enableIPTargetType } + mockMetricsCollector := lbcmetrics.NewMockCollector() builder := NewDefaultModelBuilder(annotationParser, subnetsResolver, vpcInfoProvider, "vpc-xxx", trackingProvider, elbv2TaggingManager, ec2Client, featureGates, "my-cluster", nil, nil, "ELBSecurityPolicy-2016-08", defaultTargetType, defaultLoadBalancerScheme, enableIPTargetType, serviceUtils, - backendSGProvider, sgResolver, tt.enableBackendSG, tt.disableRestrictedSGRules, logr.New(&log.NullLogSink{})) + backendSGProvider, sgResolver, tt.enableBackendSG, tt.disableRestrictedSGRules, logr.New(&log.NullLogSink{}), mockMetricsCollector) ctx := context.Background() - stack, _, _, err := builder.Build(ctx, tt.svc) + stack, _, _, err := builder.Build(ctx, tt.svc, mockMetricsCollector) if tt.wantError { assert.Error(t, err) } else { @@ -6662,6 +6678,10 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) { stack.TopologicalTraversal(visitor) assert.Equal(t, tt.wantNumResources, len(visitor.resources)) } + + mockCollector := builder.metricsCollector.(*lbcmetrics.MockCollector) + assert.Equal(t, tt.wantMetric, len(mockCollector.Invocations[lbcmetrics.MetricControllerReconcileErrors]) == 1) + }) } } diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go index 9e89ae69c..730d290a1 100644 --- a/pkg/targetgroupbinding/resource_manager.go +++ b/pkg/targetgroupbinding/resource_manager.go @@ -3,28 +3,28 @@ package targetgroupbinding import ( "context" "fmt" - "k8s.io/apimachinery/pkg/util/cache" "net/netip" - lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sync" "time" - elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" - "github.com/aws/smithy-go" - - "k8s.io/client-go/tools/record" + "k8s.io/apimachinery/pkg/util/cache" awssdk "github.com/aws/aws-sdk-go-v2/aws" + elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types" + "github.com/aws/smithy-go" "github.com/go-logr/logr" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" "sigs.k8s.io/aws-load-balancer-controller/pkg/backend" + errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" "sigs.k8s.io/aws-load-balancer-controller/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,6 +34,9 @@ const ( defaultRequeueDuration = 15 * time.Second invalidVPCTTL = 60 * time.Minute ) +const ( + controllerName = "targetGroupBinding" +) // ResourceManager manages the TargetGroupBinding resource. type ResourceManager interface { @@ -165,13 +168,13 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, m.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonBackendNotFound, err.Error()) return "", "", false, m.Cleanup(ctx, tgb) } - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "resolve_pod_endpoints_error", err, m.metricsCollector) } newCheckPoint, err := calculateTGBReconcileCheckpoint(endpoints, tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "calculate_tgb_reconcile_checkpoint_error", err, m.metricsCollector) } oldCheckPoint := GetTGBReconcileCheckpoint(tgb) @@ -183,7 +186,7 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, targets, err := m.targetsManager.ListTargets(ctx, tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) @@ -216,7 +219,7 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, err = m.updateTGBCheckPoint(ctx, tgb, "", oldCheckPoint) if err != nil { tgbScopedLogger.Error(err, "Unable to update checkpoint before mutating change") - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tgb_checkpoint_error", err, m.metricsCollector) } } @@ -224,7 +227,7 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, if len(unmatchedTargets) > 0 { updateTrackedTargets, err = m.deregisterTargets(ctx, tgb, unmatchedTargets) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "deregister_targets_error", err, m.metricsCollector) } } @@ -242,21 +245,21 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, updateTrackedTargets = false if err := m.multiClusterManager.UpdateTrackedIPTargets(ctx, true, endpoints, tgb); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } if err := m.registerPodEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "register_pod_endpoint_error", err, m.metricsCollector) } } if err := m.multiClusterManager.UpdateTrackedIPTargets(ctx, updateTrackedTargets, endpoints, tgb); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } anyPodNeedFurtherProbe, err := m.updateTargetHealthPodCondition(ctx, targetHealthCondType, matchedEndpointAndTargets, unmatchedEndpoints, tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_target_health_pod_condition_error", err, m.metricsCollector) } if anyPodNeedFurtherProbe { @@ -283,7 +286,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con svcKey := buildServiceReferenceKey(tgb, tgb.Spec.ServiceRef) nodeSelector, err := backend.GetTrafficProxyNodeSelector(tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "get_traffic_proxy_node_selector_error", err, m.metricsCollector) } resolveOpts := []backend.EndpointResolveOption{backend.WithNodeSelector(nodeSelector)} @@ -293,13 +296,13 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con m.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonBackendNotFound, err.Error()) return "", "", false, m.Cleanup(ctx, tgb) } - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "resolve_nodeport_endpoints_error", err, m.metricsCollector) } newCheckPoint, err := calculateTGBReconcileCheckpoint(endpoints, tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "calculate_tgb_reconcile_checkpoint_error", err, m.metricsCollector) } oldCheckPoint := GetTGBReconcileCheckpoint(tgb) @@ -311,7 +314,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con targets, err := m.targetsManager.ListTargets(ctx, tgb) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) @@ -320,7 +323,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con if err := m.networkingManager.ReconcileForNodePortEndpoints(ctx, tgb, endpoints); err != nil { tgbScopedLogger.Error(err, "Requesting network requeue due to error from ReconcileForNodePortEndpoints") - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "reconcile_nodeport_endpoints_error", err, m.metricsCollector) } if len(unmatchedEndpoints) > 0 || len(unmatchedTargets) > 0 { @@ -328,7 +331,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con err = m.updateTGBCheckPoint(ctx, tgb, "", oldCheckPoint) if err != nil { tgbScopedLogger.Error(err, "Unable to update checkpoint before mutating change") - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tgb_checkpoint_error", err, m.metricsCollector) } } @@ -337,23 +340,23 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con if len(unmatchedTargets) > 0 { updateTrackedTargets, err = m.deregisterTargets(ctx, tgb, unmatchedTargets) if err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "deregister_targets_error", err, m.metricsCollector) } } if len(unmatchedEndpoints) > 0 { updateTrackedTargets = false if err := m.multiClusterManager.UpdateTrackedInstanceTargets(ctx, true, endpoints, tgb); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } if err := m.registerNodePortEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_node_port_endpoints_error", err, m.metricsCollector) } } if err := m.multiClusterManager.UpdateTrackedInstanceTargets(ctx, updateTrackedTargets, endpoints, tgb); err != nil { - return "", "", false, err + return "", "", false, errmetrics.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } tgbScopedLogger.Info("Successful reconcile", "checkpoint", newCheckPoint) diff --git a/webhooks/core/pod_mutator.go b/webhooks/core/pod_mutator.go index 1d8974953..c8133eae3 100644 --- a/webhooks/core/pod_mutator.go +++ b/webhooks/core/pod_mutator.go @@ -6,6 +6,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" inject "sigs.k8s.io/aws-load-balancer-controller/pkg/inject" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -16,9 +17,10 @@ const ( ) // NewPodMutator returns a mutator for Pod. -func NewPodMutator(podReadinessGateInjector *inject.PodReadinessGate) *podMutator { +func NewPodMutator(podReadinessGateInjector *inject.PodReadinessGate, metricsCollector lbcmetrics.MetricCollector) *podMutator { return &podMutator{ podReadinessGateInjector: podReadinessGateInjector, + metricsCollector: metricsCollector, } } @@ -26,6 +28,7 @@ var _ webhook.Mutator = &podMutator{} type podMutator struct { podReadinessGateInjector *inject.PodReadinessGate + metricsCollector lbcmetrics.MetricCollector } func (m *podMutator) Prototype(_ admission.Request) (runtime.Object, error) { @@ -35,6 +38,7 @@ func (m *podMutator) Prototype(_ admission.Request) (runtime.Object, error) { func (m *podMutator) MutateCreate(ctx context.Context, obj runtime.Object) (runtime.Object, error) { pod := obj.(*corev1.Pod) if err := m.podReadinessGateInjector.Mutate(ctx, pod); err != nil { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutatePod, "podReadinessGateInjector") return pod, err } return pod, nil diff --git a/webhooks/core/service_mutator.go b/webhooks/core/service_mutator.go index 4ccf9b1c1..65a2bf743 100644 --- a/webhooks/core/service_mutator.go +++ b/webhooks/core/service_mutator.go @@ -6,6 +6,7 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -16,10 +17,11 @@ const ( ) // NewServiceMutator returns a mutator for Service. -func NewServiceMutator(lbClass string, logger logr.Logger) *serviceMutator { +func NewServiceMutator(lbClass string, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *serviceMutator { return &serviceMutator{ logger: logger, loadBalancerClass: lbClass, + metricsCollector: metricsCollector, } } @@ -28,6 +30,7 @@ var _ webhook.Mutator = &serviceMutator{} type serviceMutator struct { logger logr.Logger loadBalancerClass string + metricsCollector lbcmetrics.MetricCollector } func (m *serviceMutator) Prototype(_ admission.Request) (runtime.Object, error) { diff --git a/webhooks/elbv2/ingressclassparams_validator.go b/webhooks/elbv2/ingressclassparams_validator.go index 89a86bdd6..10acde4dd 100644 --- a/webhooks/elbv2/ingressclassparams_validator.go +++ b/webhooks/elbv2/ingressclassparams_validator.go @@ -9,6 +9,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -17,13 +18,16 @@ import ( const apiPathValidateELBv2IngressClassParams = "/validate-elbv2-k8s-aws-v1beta1-ingressclassparams" // NewIngressClassParamsValidator returns a validator for the IngressClassParams CRD. -func NewIngressClassParamsValidator() *ingressClassParamsValidator { - return &ingressClassParamsValidator{} +func NewIngressClassParamsValidator(metricsCollector lbcmetrics.MetricCollector) *ingressClassParamsValidator { + return &ingressClassParamsValidator{ + metricsCollector: metricsCollector, + } } var _ webhook.Validator = &ingressClassParamsValidator{} type ingressClassParamsValidator struct { + metricsCollector lbcmetrics.MetricCollector } func (v *ingressClassParamsValidator) Prototype(_ admission.Request) (runtime.Object, error) { @@ -33,18 +37,28 @@ func (v *ingressClassParamsValidator) Prototype(_ admission.Request) (runtime.Ob func (v *ingressClassParamsValidator) ValidateCreate(ctx context.Context, obj runtime.Object) error { icp := obj.(*elbv2api.IngressClassParams) allErrs := field.ErrorList{} - allErrs = append(allErrs, v.checkInboundCIDRs(icp)...) - allErrs = append(allErrs, v.checkSubnetSelectors(icp)...) - + if errs := v.checkInboundCIDRs(icp); len(errs) > 0 { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2IngressClassParams, "checkInboundCIDRs") + allErrs = append(allErrs, errs...) + } + if errs := v.checkSubnetSelectors(icp); len(errs) > 0 { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2IngressClassParams, "checkSubnetSelectors") + allErrs = append(allErrs, errs...) + } return allErrs.ToAggregate() } func (v *ingressClassParamsValidator) ValidateUpdate(ctx context.Context, obj runtime.Object, oldObj runtime.Object) error { icp := obj.(*elbv2api.IngressClassParams) allErrs := field.ErrorList{} - allErrs = append(allErrs, v.checkInboundCIDRs(icp)...) - allErrs = append(allErrs, v.checkSubnetSelectors(icp)...) - + if errs := v.checkInboundCIDRs(icp); len(errs) > 0 { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2IngressClassParams, "checkInboundCIDRs") + allErrs = append(allErrs, errs...) + } + if errs := v.checkSubnetSelectors(icp); len(errs) > 0 { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2IngressClassParams, "checkSubnetSelectors") + allErrs = append(allErrs, errs...) + } return allErrs.ToAggregate() } diff --git a/webhooks/elbv2/ingressclassparams_validator_test.go b/webhooks/elbv2/ingressclassparams_validator_test.go index 6c312cfef..11eeabf3b 100644 --- a/webhooks/elbv2/ingressclassparams_validator_test.go +++ b/webhooks/elbv2/ingressclassparams_validator_test.go @@ -6,13 +6,15 @@ import ( "github.com/stretchr/testify/assert" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" ) func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { tests := []struct { - name string - obj *elbv2api.IngressClassParams - wantErr string + name string + obj *elbv2api.IngressClassParams + wantErr string + wantMetric bool }{ { name: "empty", @@ -38,7 +40,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.inboundCIDRs[0]: Invalid value: \"192.168.0.1\": Could not be parsed as a CIDR (did you mean \"192.168.0.1/32\")", + wantErr: "spec.inboundCIDRs[0]: Invalid value: \"192.168.0.1\": Could not be parsed as a CIDR (did you mean \"192.168.0.1/32\")", + wantMetric: true, }, { name: "inboundCIDRs IPv6 no length", @@ -49,7 +52,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.inboundCIDRs[0]: Invalid value: \"2001:DB8::\": Could not be parsed as a CIDR (did you mean \"2001:DB8::/64\")", + wantErr: "spec.inboundCIDRs[0]: Invalid value: \"2001:DB8::\": Could not be parsed as a CIDR (did you mean \"2001:DB8::/64\")", + wantMetric: true, }, { name: "inboundCIDRs bits outside prefix", @@ -60,7 +64,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.inboundCIDRs[0]: Invalid value: \"10.128.0.0/8\": Network contains bits outside prefix (did you mean \"10.0.0.0/8\")", + wantErr: "spec.inboundCIDRs[0]: Invalid value: \"10.128.0.0/8\": Network contains bits outside prefix (did you mean \"10.0.0.0/8\")", + wantMetric: true, }, { name: "inboundCIDRs empty string", @@ -71,7 +76,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.inboundCIDRs[0]: Invalid value: \"\": Could not be parsed as a CIDR", + wantErr: "spec.inboundCIDRs[0]: Invalid value: \"\": Could not be parsed as a CIDR", + wantMetric: true, }, { name: "inboundCIDRs domain", @@ -82,7 +88,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.inboundCIDRs[0]: Invalid value: \"invalid.example.com\": Could not be parsed as a CIDR", + wantErr: "spec.inboundCIDRs[0]: Invalid value: \"invalid.example.com\": Could not be parsed as a CIDR", + wantMetric: true, }, { name: "subnet is valid ID list", @@ -113,7 +120,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { Subnets: &elbv2api.SubnetSelector{}, }, }, - wantErr: "spec.subnets: Required value: must have either `ids` or `tags`", + wantErr: "spec.subnets: Required value: must have either `ids` or `tags`", + wantMetric: true, }, { name: "subnet selector with both id and tag", @@ -127,7 +135,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.subnets.tags: Forbidden: may not have both `ids` and `tags` set", + wantErr: "spec.subnets.tags: Forbidden: may not have both `ids` and `tags` set", + wantMetric: true, }, { name: "subnet duplicate id", @@ -138,7 +147,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.subnets.ids[2]: Duplicate value: \"subnet-1\"", + wantErr: "spec.subnets.ids[2]: Duplicate value: \"subnet-1\"", + wantMetric: true, }, { name: "subnet duplicate tag value", @@ -152,7 +162,8 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.subnets.tags[Other][2]: Duplicate value: \"other1\"", + wantErr: "spec.subnets.tags[Other][2]: Duplicate value: \"other1\"", + wantMetric: true, }, { name: "subnet empty tags map", @@ -163,12 +174,15 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { }, }, }, - wantErr: "spec.subnets.tags: Required value: must have at least one tag key", + wantErr: "spec.subnets.tags: Required value: must have at least one tag key", + wantMetric: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - v := &ingressClassParamsValidator{} + + mockMetricsCollector := lbcmetrics.NewMockCollector() + v := &ingressClassParamsValidator{metricsCollector: mockMetricsCollector} t.Run("create", func(t *testing.T) { err := v.ValidateCreate(context.Background(), tt.obj) if tt.wantErr != "" { @@ -185,6 +199,9 @@ func Test_ingressClassParamsValidator_ValidateCreate(t *testing.T) { assert.NoError(t, err) } }) + + mockCollector := v.metricsCollector.(*lbcmetrics.MockCollector) + assert.Equal(t, tt.wantMetric, len(mockCollector.Invocations[lbcmetrics.MetricWebhookValidationFailure]) == 2) }) } } diff --git a/webhooks/elbv2/targetgroupbinding_mutator.go b/webhooks/elbv2/targetgroupbinding_mutator.go index aed2f6dd6..9b38fdbde 100644 --- a/webhooks/elbv2/targetgroupbinding_mutator.go +++ b/webhooks/elbv2/targetgroupbinding_mutator.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -19,18 +20,20 @@ import ( const apiPathMutateELBv2TargetGroupBinding = "/mutate-elbv2-k8s-aws-v1beta1-targetgroupbinding" // NewTargetGroupBindingMutator returns a mutator for TargetGroupBinding CRD. -func NewTargetGroupBindingMutator(elbv2Client services.ELBV2, logger logr.Logger) *targetGroupBindingMutator { +func NewTargetGroupBindingMutator(elbv2Client services.ELBV2, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *targetGroupBindingMutator { return &targetGroupBindingMutator{ - elbv2Client: elbv2Client, - logger: logger, + elbv2Client: elbv2Client, + logger: logger, + metricsCollector: metricsCollector, } } var _ webhook.Mutator = &targetGroupBindingMutator{} type targetGroupBindingMutator struct { - elbv2Client services.ELBV2 - logger logr.Logger + elbv2Client services.ELBV2 + logger logr.Logger + metricsCollector lbcmetrics.MetricCollector } func (m *targetGroupBindingMutator) Prototype(_ admission.Request) (runtime.Object, error) { @@ -40,18 +43,23 @@ func (m *targetGroupBindingMutator) Prototype(_ admission.Request) (runtime.Obje func (m *targetGroupBindingMutator) MutateCreate(ctx context.Context, obj runtime.Object) (runtime.Object, error) { tgb := obj.(*elbv2api.TargetGroupBinding) if tgb.Spec.TargetGroupARN == "" && tgb.Spec.TargetGroupName == "" { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutateELBv2TargetGroupBinding, "checkTargetGroupArnOrName") return nil, errors.Errorf("must provide either TargetGroupARN or TargetGroupName") } if err := m.getArnFromNameIfNeeded(ctx, tgb); err != nil { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutateELBv2TargetGroupBinding, "getArnFromNameIfNeeded") return nil, err } if err := m.defaultingTargetType(ctx, tgb); err != nil { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutateELBv2TargetGroupBinding, "defaultingTargetType") return nil, err } if err := m.defaultingIPAddressType(ctx, tgb); err != nil { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutateELBv2TargetGroupBinding, "defaultingIPAddressType") return nil, err } if err := m.defaultingVpcID(ctx, tgb); err != nil { + m.metricsCollector.ObserveWebhookMutationError(apiPathMutateELBv2TargetGroupBinding, "defaultingVpcID") return nil, err } return tgb, nil diff --git a/webhooks/elbv2/targetgroupbinding_mutator_test.go b/webhooks/elbv2/targetgroupbinding_mutator_test.go index 0348d21de..8b06b3d57 100644 --- a/webhooks/elbv2/targetgroupbinding_mutator_test.go +++ b/webhooks/elbv2/targetgroupbinding_mutator_test.go @@ -15,6 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -48,11 +49,12 @@ func Test_targetGroupBindingMutator_MutateCreate(t *testing.T) { obj *elbv2api.TargetGroupBinding } tests := []struct { - name string - fields fields - args args - want *elbv2api.TargetGroupBinding - wantErr error + name string + fields fields + args args + want *elbv2api.TargetGroupBinding + wantErr error + wantMetric bool }{ { name: "targetGroupBinding with TargetType and ipAddressType and vpcID already set", @@ -169,7 +171,8 @@ func Test_targetGroupBindingMutator_MutateCreate(t *testing.T) { }, }, }, - wantErr: errors.New("unsupported TargetType: lambda"), + wantErr: errors.New("unsupported TargetType: lambda"), + wantMetric: true, }, { name: "targetGroupBinding with IPAddressType already set to ipv6", @@ -250,7 +253,8 @@ func Test_targetGroupBindingMutator_MutateCreate(t *testing.T) { }, }, }, - wantErr: errors.New("unable to get target group VpcID: vpcid not found"), + wantErr: errors.New("unable to get target group VpcID: vpcid not found"), + wantMetric: true, }, { name: "targetGroupBinding with TargetGroupName instead of TargetGroupARN", @@ -310,10 +314,11 @@ func Test_targetGroupBindingMutator_MutateCreate(t *testing.T) { elbv2Client.EXPECT().DescribeTargetGroupsAsList(gomock.Any(), call.req).Return(call.resp, call.err).AnyTimes() elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } - + mockMetricsCollector := lbcmetrics.NewMockCollector() m := &targetGroupBindingMutator{ - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } got, err := m.MutateCreate(context.Background(), tt.args.obj) if tt.wantErr != nil { @@ -322,7 +327,11 @@ func Test_targetGroupBindingMutator_MutateCreate(t *testing.T) { assert.NoError(t, err) assert.Equal(t, tt.want, got) } + + mockCollector := m.metricsCollector.(*lbcmetrics.MockCollector) + assert.Equal(t, tt.wantMetric, len(mockCollector.Invocations[lbcmetrics.MetricWebhookMutationFailure]) == 1) }) + } } @@ -417,10 +426,11 @@ func Test_targetGroupBindingMutator_obtainSDKTargetTypeFromAWS(t *testing.T) { elbv2Client.EXPECT().DescribeTargetGroupsAsList(gomock.Any(), call.req).Return(call.resp, call.err) elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } - + mockMetricsCollector := lbcmetrics.NewMockCollector() m := &targetGroupBindingMutator{ - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } got, err := m.obtainSDKTargetTypeFromAWS(context.Background(), makeTargetGroupBinding(tt.args.tgARN)) if tt.wantErr != nil { @@ -548,9 +558,11 @@ func Test_targetGroupBindingMutator_getIPAddressTypeFromAWS(t *testing.T) { elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } + mockMetricsCollector := lbcmetrics.NewMockCollector() m := &targetGroupBindingMutator{ - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } got, err := m.getTargetGroupIPAddressTypeFromAWS(context.Background(), makeTargetGroupBinding(tt.args.tgARN)) if tt.wantErr != nil { @@ -633,10 +645,11 @@ func Test_targetGroupBindingMutator_obtainSDKVpcIDFromAWS(t *testing.T) { elbv2Client.EXPECT().DescribeTargetGroupsAsList(gomock.Any(), call.req).Return(call.resp, call.err) elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } - + mockMetricsCollector := lbcmetrics.NewMockCollector() m := &targetGroupBindingMutator{ - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } got, err := m.getVpcIDFromAWS(context.Background(), makeTargetGroupBinding(tt.args.tgARN)) if tt.wantErr != nil { diff --git a/webhooks/elbv2/targetgroupbinding_validator.go b/webhooks/elbv2/targetgroupbinding_validator.go index 7b0e6450c..b3c519935 100644 --- a/webhooks/elbv2/targetgroupbinding_validator.go +++ b/webhooks/elbv2/targetgroupbinding_validator.go @@ -17,6 +17,7 @@ import ( elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" "sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -32,22 +33,24 @@ const ( var vpcIDPatternRegex = regexp.MustCompile("^(?:vpc-[0-9a-f]{8}|vpc-[0-9a-f]{17}|vpc-[0-9a-f]{32})$") // NewTargetGroupBindingValidator returns a validator for TargetGroupBinding CRD. -func NewTargetGroupBindingValidator(k8sClient client.Client, elbv2Client services.ELBV2, vpcID string, logger logr.Logger) *targetGroupBindingValidator { +func NewTargetGroupBindingValidator(k8sClient client.Client, elbv2Client services.ELBV2, vpcID string, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *targetGroupBindingValidator { return &targetGroupBindingValidator{ - k8sClient: k8sClient, - elbv2Client: elbv2Client, - logger: logger, - vpcID: vpcID, + k8sClient: k8sClient, + elbv2Client: elbv2Client, + logger: logger, + vpcID: vpcID, + metricsCollector: metricsCollector, } } var _ webhook.Validator = &targetGroupBindingValidator{} type targetGroupBindingValidator struct { - k8sClient client.Client - elbv2Client services.ELBV2 - logger logr.Logger - vpcID string + k8sClient client.Client + elbv2Client services.ELBV2 + logger logr.Logger + vpcID string + metricsCollector lbcmetrics.MetricCollector } func (v *targetGroupBindingValidator) Prototype(_ admission.Request) (runtime.Object, error) { @@ -57,21 +60,28 @@ func (v *targetGroupBindingValidator) Prototype(_ admission.Request) (runtime.Ob func (v *targetGroupBindingValidator) ValidateCreate(ctx context.Context, obj runtime.Object) error { tgb := obj.(*elbv2api.TargetGroupBinding) if err := v.checkRequiredFields(ctx, tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkRequiredFields") return err } if err := v.checkNodeSelector(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkNodeSelector") return err } if err := v.checkExistingTargetGroups(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkExistingTargetGroups") return err } if err := v.checkTargetGroupIPAddressType(ctx, tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkTargetGroupIPAddressType") return err } if err := v.checkTargetGroupVpcID(ctx, tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkTargetGroupVpcID") return err + } if err := v.checkAssumeRoleConfig(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkAssumeRoleConfig") return err } return nil @@ -81,18 +91,23 @@ func (v *targetGroupBindingValidator) ValidateUpdate(ctx context.Context, obj ru tgb := obj.(*elbv2api.TargetGroupBinding) oldTgb := oldObj.(*elbv2api.TargetGroupBinding) if err := v.checkRequiredFields(ctx, tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkRequiredFields") return err } if err := v.checkImmutableFields(tgb, oldTgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkImmutableFields") return err } if err := v.checkNodeSelector(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkNodeSelector") return err } if err := v.checkAssumeRoleConfig(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkAssumeRoleConfig") return err } if err := v.checkExistingTargetGroups(tgb); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateELBv2TargetGroupBinding, "checkExistingTargetGroups") return err } return nil diff --git a/webhooks/elbv2/targetgroupbinding_validator_test.go b/webhooks/elbv2/targetgroupbinding_validator_test.go index 6fd0cc7b3..6f0d236a0 100644 --- a/webhooks/elbv2/targetgroupbinding_validator_test.go +++ b/webhooks/elbv2/targetgroupbinding_validator_test.go @@ -16,6 +16,7 @@ import ( "github.com/go-logr/logr" "github.com/golang/mock/gomock" "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "github.com/pkg/errors" "github.com/stretchr/testify/assert" @@ -335,10 +336,12 @@ func Test_targetGroupBindingValidator_ValidateCreate(t *testing.T) { elbv2Client.EXPECT().DescribeTargetGroupsAsList(gomock.Any(), call.req).Return(call.resp, call.err) elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - k8sClient: k8sClient, - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + k8sClient: k8sClient, + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } err := v.ValidateCreate(context.Background(), tt.args.obj) if tt.wantErr != nil { @@ -361,9 +364,10 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { oldObj *elbv2api.TargetGroupBinding } tests := []struct { - name string - args args - wantErr error + name string + args args + wantErr error + wantMetric bool }{ { name: "tgb updated removes TargetType", @@ -381,7 +385,8 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { }, }, }, - wantErr: errors.New("TargetGroupBinding must specify these fields: spec.targetType"), + wantErr: errors.New("TargetGroupBinding must specify these fields: spec.targetType"), + wantMetric: true, }, { name: "tgb updated mutates TargetGroupARN", @@ -399,7 +404,8 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { }, }, }, - wantErr: errors.New("TargetGroupBinding update may not change these immutable fields: spec.targetGroupARN"), + wantErr: errors.New("TargetGroupBinding update may not change these immutable fields: spec.targetGroupARN"), + wantMetric: true, }, { name: "[err] targetType is ip, nodeSelector is set", @@ -419,7 +425,8 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { }, }, }, - wantErr: errors.New("TargetGroupBinding cannot set NodeSelector when TargetType is ip"), + wantErr: errors.New("TargetGroupBinding cannot set NodeSelector when TargetType is ip"), + wantMetric: true, }, { name: "ipAddressType modified", @@ -439,7 +446,8 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { }, }, }, - wantErr: errors.New("TargetGroupBinding update may not change these immutable fields: spec.ipAddressType"), + wantErr: errors.New("TargetGroupBinding update may not change these immutable fields: spec.ipAddressType"), + wantMetric: true, }, { name: "[ok] no update to spec", @@ -457,7 +465,8 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { }, }, }, - wantErr: nil, + wantErr: nil, + wantMetric: false, }, } for _, tt := range tests { @@ -467,9 +476,11 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { elbv2api.AddToScheme(k8sSchema) k8sClient := testclient.NewClientBuilder().WithScheme(k8sSchema).Build() + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - logger: logr.New(&log.NullLogSink{}), - k8sClient: k8sClient, + logger: logr.New(&log.NullLogSink{}), + k8sClient: k8sClient, + metricsCollector: mockMetricsCollector, } err := v.ValidateUpdate(context.Background(), tt.args.obj, tt.args.oldObj) if tt.wantErr != nil { @@ -477,6 +488,10 @@ func Test_targetGroupBindingValidator_ValidateUpdate(t *testing.T) { } else { assert.NoError(t, err) } + + mockCollector := v.metricsCollector.(*lbcmetrics.MockCollector) + assert.Equal(t, tt.wantMetric, len(mockCollector.Invocations[lbcmetrics.MetricWebhookValidationFailure]) == 1) + }) } } @@ -532,8 +547,10 @@ func Test_targetGroupBindingValidator_checkRequiredFields(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - logger: logr.New(&log.NullLogSink{}), + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } err := v.checkRequiredFields(context.Background(), tt.args.tgb) if tt.wantErr != nil { @@ -844,9 +861,11 @@ func Test_targetGroupBindingValidator_checkImmutableFields(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - logger: logr.New(&log.NullLogSink{}), - vpcID: clusterVpcID, + logger: logr.New(&log.NullLogSink{}), + vpcID: clusterVpcID, + metricsCollector: mockMetricsCollector, } err := v.checkImmutableFields(tt.args.tgb, tt.args.oldTGB) if tt.wantErr != nil { @@ -923,8 +942,10 @@ func Test_targetGroupBindingValidator_checkNodeSelector(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - logger: logr.New(&log.NullLogSink{}), + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } err := v.checkNodeSelector(tt.args.tgb) if tt.wantErr != nil { @@ -1251,9 +1272,11 @@ func Test_targetGroupBindingValidator_checkExistingTargetGroups(t *testing.T) { clientgoscheme.AddToScheme(k8sSchema) elbv2api.AddToScheme(k8sSchema) k8sClient := testclient.NewClientBuilder().WithScheme(k8sSchema).Build() + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - k8sClient: k8sClient, - logger: logr.New(&log.NullLogSink{}), + k8sClient: k8sClient, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } for _, tgb := range tt.env.existingTGBs { assert.NoError(t, k8sClient.Create(context.Background(), tgb.DeepCopy())) @@ -1518,10 +1541,12 @@ func Test_targetGroupBindingValidator_checkTargetGroupVpcID(t *testing.T) { elbv2Client.EXPECT().DescribeTargetGroupsAsList(gomock.Any(), call.req).Return(call.resp, call.err) elbv2Client.EXPECT().AssumeRole(ctx, gomock.Any(), gomock.Any()).Return(elbv2Client, nil).AnyTimes() } + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - k8sClient: k8sClient, - elbv2Client: elbv2Client, - logger: logr.New(&log.NullLogSink{}), + k8sClient: k8sClient, + elbv2Client: elbv2Client, + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } err := v.checkTargetGroupVpcID(context.Background(), tt.args.obj) if tt.wantErr != nil { @@ -1580,8 +1605,10 @@ func TestCheckAssumeRoleConfig(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &targetGroupBindingValidator{ - logger: logr.New(&log.NullLogSink{}), + logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } err := v.checkAssumeRoleConfig(tc.tgb) diff --git a/webhooks/networking/ingress_validator.go b/webhooks/networking/ingress_validator.go index acb71c3fd..52bfeec90 100644 --- a/webhooks/networking/ingress_validator.go +++ b/webhooks/networking/ingress_validator.go @@ -12,6 +12,7 @@ import ( "sigs.k8s.io/aws-load-balancer-controller/pkg/annotations" "sigs.k8s.io/aws-load-balancer-controller/pkg/config" "sigs.k8s.io/aws-load-balancer-controller/pkg/ingress" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" "sigs.k8s.io/aws-load-balancer-controller/pkg/webhook" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -23,7 +24,7 @@ const ( ) // NewIngressValidator returns a validator for Ingress API. -func NewIngressValidator(client client.Client, ingConfig config.IngressConfig, logger logr.Logger) *ingressValidator { +func NewIngressValidator(client client.Client, ingConfig config.IngressConfig, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *ingressValidator { return &ingressValidator{ annotationParser: annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress), classAnnotationMatcher: ingress.NewDefaultClassAnnotationMatcher(ingConfig.IngressClass), @@ -32,6 +33,7 @@ func NewIngressValidator(client client.Client, ingConfig config.IngressConfig, l disableIngressGroupAnnotation: ingConfig.DisableIngressGroupNameAnnotation, manageIngressesWithoutIngressClass: ingConfig.IngressClass == "", logger: logger, + metricsCollector: metricsCollector, } } @@ -47,6 +49,7 @@ type ingressValidator struct { // and "spec.ingressClassName" should be managed or not. manageIngressesWithoutIngressClass bool logger logr.Logger + metricsCollector lbcmetrics.MetricCollector } func (v *ingressValidator) Prototype(req admission.Request) (runtime.Object, error) { @@ -56,18 +59,23 @@ func (v *ingressValidator) Prototype(req admission.Request) (runtime.Object, err func (v *ingressValidator) ValidateCreate(ctx context.Context, obj runtime.Object) error { ing := obj.(*networking.Ingress) if skip, err := v.checkIngressClass(ctx, ing); skip || err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClass") return err } if err := v.checkIngressClassAnnotationUsage(ing, nil); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClassAnnotationUsage") return err } if err := v.checkGroupNameAnnotationUsage(ing, nil); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkGroupNameAnnotationUsage") return err } if err := v.checkIngressClassUsage(ctx, ing, nil); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClassUsage") return err } if err := v.checkIngressAnnotationConditions(ing); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressAnnotationConditions") return err } return nil @@ -77,18 +85,23 @@ func (v *ingressValidator) ValidateUpdate(ctx context.Context, obj runtime.Objec ing := obj.(*networking.Ingress) oldIng := oldObj.(*networking.Ingress) if skip, err := v.checkIngressClass(ctx, ing); skip || err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClass") return err } if err := v.checkIngressClassAnnotationUsage(ing, oldIng); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClassAnnotationUsage") return err } if err := v.checkGroupNameAnnotationUsage(ing, oldIng); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkGroupNameAnnotationUsage") return err } if err := v.checkIngressClassUsage(ctx, ing, oldIng); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressClassUsage") return err } if err := v.checkIngressAnnotationConditions(ing); err != nil { + v.metricsCollector.ObserveWebhookValidationError(apiPathValidateNetworkingIngress, "checkIngressAnnotationConditions") return err } return nil diff --git a/webhooks/networking/ingress_validator_test.go b/webhooks/networking/ingress_validator_test.go index a902c9326..4fc5edb8d 100644 --- a/webhooks/networking/ingress_validator_test.go +++ b/webhooks/networking/ingress_validator_test.go @@ -17,6 +17,7 @@ import ( elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" "sigs.k8s.io/aws-load-balancer-controller/pkg/annotations" "sigs.k8s.io/aws-load-balancer-controller/pkg/ingress" + lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" testclient "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -243,11 +244,13 @@ func Test_ingressValidator_checkIngressClass(t *testing.T) { assert.NoError(t, k8sClient.Create(ctx, ingClass.DeepCopy())) } classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(tt.configuredIngressClass) + mockMetricsCollector := lbcmetrics.NewMockCollector() v := &ingressValidator{ classLoader: ingress.NewDefaultClassLoader(k8sClient, false), classAnnotationMatcher: classAnnotationMatcher, manageIngressesWithoutIngressClass: tt.configuredIngressClass == "", logger: logr.New(&log.NullLogSink{}), + metricsCollector: mockMetricsCollector, } skip, err := v.checkIngressClass(ctx, tt.ing) if tt.expectedErr == "" {