@@ -3,8 +3,10 @@ package targetgroupbinding
3
3
import (
4
4
"context"
5
5
"fmt"
6
+ "k8s.io/apimachinery/pkg/util/cache"
6
7
"net/netip"
7
8
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
9
+ "sync"
8
10
"time"
9
11
10
12
elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
@@ -28,7 +30,10 @@ import (
28
30
"sigs.k8s.io/controller-runtime/pkg/client"
29
31
)
30
32
31
- const defaultRequeueDuration = 15 * time .Second
33
+ const (
34
+ defaultRequeueDuration = 15 * time .Second
35
+ invalidVPCTTL = 60 * time .Minute
36
+ )
32
37
33
38
// ResourceManager manages the TargetGroupBinding resource.
34
39
type ResourceManager interface {
@@ -64,6 +69,9 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB
64
69
multiClusterManager : multiClusterManager ,
65
70
metricsCollector : metricsCollector ,
66
71
72
+ invalidVpcCache : cache .NewExpiring (),
73
+ invalidVpcCacheTTL : defaultTargetsCacheTTL ,
74
+
67
75
requeueDuration : defaultRequeueDuration ,
68
76
}
69
77
}
@@ -84,6 +92,10 @@ type defaultResourceManager struct {
84
92
metricsCollector lbcmetrics.MetricCollector
85
93
vpcID string
86
94
95
+ invalidVpcCache * cache.Expiring
96
+ invalidVpcCacheTTL time.Duration
97
+ invalidVpcCacheMutex sync.RWMutex
98
+
87
99
requeueDuration time.Duration
88
100
}
89
101
@@ -550,29 +562,10 @@ func (m *defaultResourceManager) registerPodEndpoints(ctx context.Context, tgb *
550
562
"registering endpoints using the targetGroup's vpcID %s which is different from the cluster's vpcID %s" , tgb .Spec .VpcID , m .vpcID ))
551
563
}
552
564
553
- var overrideAzFn func (addr netip.Addr ) bool
554
- if tgb .Spec .IamRoleArnToAssume != "" {
555
- // If we're interacting with another account, then we should always be setting "all" AZ to allow this
556
- // target to get registered by the ELB API.
557
- overrideAzFn = func (_ netip.Addr ) bool {
558
- return true
559
- }
560
- } else {
561
- vpcInfo , err := m .vpcInfoProvider .FetchVPCInfo (ctx , vpcID )
562
- if err != nil {
563
- return err
564
- }
565
- var vpcRawCIDRs []string
566
- vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv4CIDRs ()... )
567
- vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv6CIDRs ()... )
568
- vpcCIDRs , err := networking .ParseCIDRs (vpcRawCIDRs )
569
- if err != nil {
570
- return err
571
- }
572
- // If the pod ip resides out of all the VPC CIDRs, then the only way to force the ELB API is to use "all" AZ.
573
- overrideAzFn = func (addr netip.Addr ) bool {
574
- return ! networking .IsIPWithinCIDRs (addr , vpcCIDRs )
575
- }
565
+ overrideAzFn , err := m .generateOverrideAzFn (ctx , vpcID , tgb .Spec .IamRoleArnToAssume )
566
+
567
+ if err != nil {
568
+ return err
576
569
}
577
570
578
571
sdkTargets , err := m .prepareRegistrationCall (endpoints , overrideAzFn )
@@ -626,6 +619,66 @@ func (m *defaultResourceManager) updateTGBCheckPoint(ctx context.Context, tgb *e
626
619
return nil
627
620
}
628
621
622
+ func (m * defaultResourceManager ) generateOverrideAzFn (ctx context.Context , vpcID string , assumeRole string ) (func (addr netip.Addr ) bool , error ) {
623
+ // Cross-Account is configured by assuming a role.
624
+ usingCrossAccount := assumeRole != ""
625
+
626
+ // We need to cache the vpc response for the various assume roles.
627
+ // There are two cases to consider when using assuming a role:
628
+ // 1. Using a peered VPC connection to provide connectivity among accounts.
629
+ // 2. Using RAM shared subnet(s) to provide connectivity among accounts.
630
+ // We need to handle the case where the user is potentially using the same VPC in the peered context
631
+ // as well as the RAM shared context.
632
+ // Using peered VPC connection, we will always need to override the AZ.
633
+ // Using a RAM shared subnet / VPC means that we follow the standard logic of checking the pod ip against the VPC CIDRs.
634
+
635
+ invalidVPCCacheKey := fmt .Sprintf ("%s-%s" , assumeRole , vpcID )
636
+
637
+ if usingCrossAccount {
638
+ // Prevent spamming EC2 with requests.
639
+ // We can use the cached result for this VPC ID given for the current assume role ARN
640
+ m .invalidVpcCacheMutex .RLock ()
641
+ _ , invalidVPC := m .invalidVpcCache .Get (invalidVPCCacheKey )
642
+ m .invalidVpcCacheMutex .RUnlock ()
643
+
644
+ // In this case, we already received that this VPC was invalid, we can shortcut the EC2 call and just override the AZ.
645
+ if invalidVPC {
646
+ return func (addr netip.Addr ) bool {
647
+ return true
648
+ }, nil
649
+ }
650
+ }
651
+
652
+ vpcInfo , err := m .vpcInfoProvider .FetchVPCInfo (ctx , vpcID )
653
+ if err != nil {
654
+ // A VPC Not Found Error along with cross-account usage means that the VPC either, is not shared with the assume
655
+ // role account OR this falls into case (1) from above where the VPC is just peered but not shared with RAM.
656
+ // As we can't differentiate if RAM sharing wasn't set up correctly OR the VPC is set up via peering, we will
657
+ // just default to assume that the VPC is peered but not shared.
658
+ if isVPCNotFoundError (err ) && usingCrossAccount {
659
+ m .invalidVpcCacheMutex .Lock ()
660
+ m .invalidVpcCache .Set (invalidVPCCacheKey , true , m .invalidVpcCacheTTL )
661
+ m .invalidVpcCacheMutex .Unlock ()
662
+ return func (addr netip.Addr ) bool {
663
+ return true
664
+ }, nil
665
+ }
666
+ return nil , err
667
+ }
668
+ var vpcRawCIDRs []string
669
+ vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv4CIDRs ()... )
670
+ vpcRawCIDRs = append (vpcRawCIDRs , vpcInfo .AssociatedIPv6CIDRs ()... )
671
+ vpcCIDRs , err := networking .ParseCIDRs (vpcRawCIDRs )
672
+ if err != nil {
673
+ return nil , err
674
+ }
675
+ // By getting here, we have a valid VPC for whatever credential was used. We return "true" in the function below
676
+ // when the pod ip falls outside the VPCs configured CIDRs, other we return "false" to ensure that the "all" is NOT injected.
677
+ return func (addr netip.Addr ) bool {
678
+ return ! networking .IsIPWithinCIDRs (addr , vpcCIDRs )
679
+ }, nil
680
+ }
681
+
629
682
type podEndpointAndTargetPair struct {
630
683
endpoint backend.PodEndpoint
631
684
target TargetInfo
@@ -747,3 +800,12 @@ func isELBV2TargetGroupARNInvalidError(err error) bool {
747
800
}
748
801
return false
749
802
}
803
+
804
+ func isVPCNotFoundError (err error ) bool {
805
+ var apiErr smithy.APIError
806
+ if errors .As (err , & apiErr ) {
807
+ code := apiErr .ErrorCode ()
808
+ return code == "InvalidVpcID.NotFound"
809
+ }
810
+ return false
811
+ }
0 commit comments