Skip to content

Commit eb19f68

Browse files
authored
Support multiple replicas of ASO pod (#4466)
- Use Deployment mode Recreate, which ensures that all webhook pods serving CRD conversion requests during upgrade are running the latest version of ASO. - Take lease during CRD management, to ensure that ASO doesn't fight with itself during CRD installation. - Enable using lease in multitenant (webhooks) mode as well.
1 parent 0a6f488 commit eb19f68

8 files changed

+250
-74
lines changed

v2/charts/azure-service-operator/templates/apps_v1_deployment_azureserviceoperator-controller-manager.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ spec:
1717
selector:
1818
matchLabels:
1919
control-plane: controller-manager
20+
strategy:
21+
type: Recreate
2022
template:
2123
metadata:
2224
annotations:
@@ -55,9 +57,7 @@ spec:
5557
- --profiling-metrics={{ .Values.metrics.profiling }}
5658
{{- end }}
5759
- --health-addr=:8081
58-
{{- if or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "watchers") }}
5960
- --enable-leader-election
60-
{{- end }}
6161
- --v=2
6262
{{- if and (eq .Values.installCRDs true) (or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "webhooks")) }}
6363
- --crd-pattern={{ .Values.crdPattern }}

v2/charts/azure-service-operator/templates/rbac.authorization.k8s.io_v1_clusterrole_azureserviceoperator-crd-manager-role.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
{{- if or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "webhooks") }}
21
{{- if and (eq .Values.installCRDs true) (or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "webhooks")) }}
32
apiVersion: rbac.authorization.k8s.io/v1
43
kind: ClusterRole
@@ -29,4 +28,3 @@ rules:
2928
verbs:
3029
- create
3130
{{- end }}
32-
{{- end }}

v2/charts/azure-service-operator/templates/rbac.authorization.k8s.io_v1_clusterrolebinding_azureserviceoperator-crd-manager-rolebinding.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
{{- if or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "webhooks") }}
21
{{- if and (eq .Values.installCRDs true) (or (eq .Values.multitenant.enable false) (eq .Values.azureOperatorMode "webhooks")) }}
32
apiVersion: rbac.authorization.k8s.io/v1
43
kind: ClusterRoleBinding
@@ -13,4 +12,3 @@ subjects:
1312
name: {{ include "azure-service-operator.serviceAccountName" . }}
1413
namespace: {{ .Release.Namespace }}
1514
{{- end }}
16-
{{- end }}

v2/cmd/controller/app/setup.go

+35-30
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import (
4949
"github.com/Azure/azure-service-operator/v2/internal/util/interval"
5050
"github.com/Azure/azure-service-operator/v2/internal/util/kubeclient"
5151
"github.com/Azure/azure-service-operator/v2/internal/util/lockedrand"
52+
"github.com/Azure/azure-service-operator/v2/internal/util/to"
5253
common "github.com/Azure/azure-service-operator/v2/pkg/common/config"
5354
"github.com/Azure/azure-service-operator/v2/pkg/genruntime"
5455
"github.com/Azure/azure-service-operator/v2/pkg/genruntime/conditions"
@@ -92,11 +93,17 @@ func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Fla
9293
}
9394

9495
k8sConfig := ctrl.GetConfigOrDie()
95-
mgr, err := ctrl.NewManager(k8sConfig, ctrl.Options{
96+
ctrlOptions := ctrl.Options{
9697
Scheme: scheme,
9798
NewCache: cacheFunc,
9899
LeaderElection: flgs.EnableLeaderElection,
99100
LeaderElectionID: "controllers-leader-election-azinfra-generated",
101+
// Manually set lease duration (to default) so that we can use it for our leader elector too.
102+
// See https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/manager/internal.go#L52
103+
LeaseDuration: to.Ptr(15 * time.Second),
104+
RenewDeadline: to.Ptr(10 * time.Second),
105+
RetryPeriod: to.Ptr(2 * time.Second),
106+
GracefulShutdownTimeout: to.Ptr(30 * time.Second),
100107
// It's only safe to set LeaderElectionReleaseOnCancel to true if the manager binary ends
101108
// when the manager exits. This is the case with us today, so we set this to true whenever
102109
// flgs.EnableLeaderElection is true.
@@ -107,7 +114,8 @@ func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Fla
107114
Port: flgs.WebhookPort,
108115
CertDir: flgs.WebhookCertDir,
109116
}),
110-
})
117+
}
118+
mgr, err := ctrl.NewManager(k8sConfig, ctrlOptions)
111119
if err != nil {
112120
setupLog.Error(err, "unable to create manager")
113121
os.Exit(1)
@@ -119,45 +127,38 @@ func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Fla
119127
os.Exit(1)
120128
}
121129

122-
// TODO: Put all of the CRD stuff into a method?
123-
crdManager, err := newCRDManager(clients.log, mgr.GetConfig())
130+
var leaderElector *crdmanagement.LeaderElector
131+
if flgs.EnableLeaderElection {
132+
// nolint: contextcheck // false positive?
133+
leaderElector, err = crdmanagement.NewLeaderElector(k8sConfig, setupLog, ctrlOptions, mgr)
134+
if err != nil {
135+
setupLog.Error(err, "failed to initialize leader elector")
136+
os.Exit(1)
137+
}
138+
}
139+
140+
crdManager, err := newCRDManager(clients.log, mgr.GetConfig(), leaderElector)
124141
if err != nil {
125142
setupLog.Error(err, "failed to initialize CRD client")
126143
os.Exit(1)
127144
}
128-
existingCRDs, err := crdManager.ListOperatorCRDs(ctx)
145+
existingCRDs, err := crdManager.ListCRDs(ctx)
129146
if err != nil {
130147
setupLog.Error(err, "failed to list current CRDs")
131148
os.Exit(1)
132149
}
133150

134151
switch flgs.CRDManagementMode {
135152
case "auto":
136-
var goalCRDs []apiextensions.CustomResourceDefinition
137-
goalCRDs, err = crdManager.LoadOperatorCRDs(crdmanagement.CRDLocation, cfg.PodNamespace)
138-
if err != nil {
139-
setupLog.Error(err, "failed to load CRDs from disk")
140-
os.Exit(1)
141-
}
142-
143153
// We only apply CRDs if we're in webhooks mode. No other mode will have CRD CRUD permissions
144154
if cfg.OperatorMode.IncludesWebhooks() {
145-
var installationInstructions []*crdmanagement.CRDInstallationInstruction
146-
installationInstructions, err = crdManager.DetermineCRDsToInstallOrUpgrade(goalCRDs, existingCRDs, flgs.CRDPatterns)
147-
if err != nil {
148-
setupLog.Error(err, "failed to determine CRDs to apply")
149-
os.Exit(1)
150-
}
151-
152-
included := crdmanagement.IncludedCRDs(installationInstructions)
153-
if len(included) == 0 {
154-
err = eris.New("No existing CRDs in cluster and no --crd-pattern specified")
155-
setupLog.Error(err, "failed to apply CRDs")
156-
os.Exit(1)
157-
}
158-
159155
// Note that this step will restart the pod when it succeeds
160-
err = crdManager.ApplyCRDs(ctx, installationInstructions)
156+
err = crdManager.Install(ctx, crdmanagement.Options{
157+
CRDPatterns: flgs.CRDPatterns,
158+
ExistingCRDs: existingCRDs,
159+
Path: crdmanagement.CRDLocation,
160+
Namespace: cfg.PodNamespace,
161+
})
161162
if err != nil {
162163
setupLog.Error(err, "failed to apply CRDs")
163164
os.Exit(1)
@@ -172,7 +173,7 @@ func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Fla
172173

173174
// There are 3 possibilities once we reach here:
174175
// 1. Webhooks mode + crd-management-mode=auto: existingCRDs will be up to date (upgraded, crd-pattern applied, etc)
175-
// by the time we get here as the pod will keep exiting until it is so (see crdManager.ApplyCRDs above).
176+
// by the time we get here as the pod will keep exiting until it is so (see crdManager.applyCRDs above).
176177
// 2. Non-webhooks mode + auto: As outlined in https://azure.github.io/azure-service-operator/guide/authentication/multitenant-deployment/#upgrading
177178
// the webhooks mode pod must be upgraded first, so there's not really much practical difference between this and
178179
// crd-management-mode=none (see below).
@@ -458,14 +459,18 @@ func makeControllerOptions(log logr.Logger, cfg config.Values) generic.Options {
458459
}
459460
}
460461

461-
func newCRDManager(logger logr.Logger, k8sConfig *rest.Config) (*crdmanagement.Manager, error) {
462+
func newCRDManager(
463+
logger logr.Logger,
464+
k8sConfig *rest.Config,
465+
leaderElection *crdmanagement.LeaderElector,
466+
) (*crdmanagement.Manager, error) {
462467
crdScheme := runtime.NewScheme()
463468
_ = apiextensions.AddToScheme(crdScheme)
464469
crdClient, err := client.New(k8sConfig, client.Options{Scheme: crdScheme})
465470
if err != nil {
466471
return nil, eris.Wrap(err, "unable to create CRD client")
467472
}
468473

469-
crdManager := crdmanagement.NewManager(logger, kubeclient.NewClient(crdClient))
474+
crdManager := crdmanagement.NewManager(logger, kubeclient.NewClient(crdClient), leaderElection)
470475
return crdManager, nil
471476
}

v2/config/manager/manager.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ spec:
2525
matchLabels:
2626
control-plane: controller-manager
2727
replicas: 1
28+
strategy:
29+
type: Recreate
2830
revisionHistoryLimit: 10
2931
template:
3032
metadata:

v2/internal/crdmanagement/helpers_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func testSetup(t *testing.T) *testData {
114114
logger := testcommon.NewTestLogger(t)
115115
cfg := config.Values{}
116116

117-
crdManager := crdmanagement.NewManager(logger, kubeClient)
117+
crdManager := crdmanagement.NewManager(logger, kubeClient, nil)
118118

119119
return &testData{
120120
cfg: cfg,

0 commit comments

Comments
 (0)