13
13
14
14
from .helpers import (
15
15
APP_NAME ,
16
+ KAFKA_CONTAINER ,
16
17
ZK_NAME ,
17
18
balancer_exporter_is_up ,
18
19
balancer_is_ready ,
@@ -38,27 +39,23 @@ async def balancer_app(ops_test: OpsTest, request):
38
39
class TestBalancer :
39
40
@pytest .mark .abort_on_fail
40
41
async def test_build_and_deploy (self , ops_test : OpsTest , kafka_charm , balancer_app ):
41
- await ops_test .model .add_machine (series = "jammy" )
42
- machine_ids = await ops_test .model .get_machines ()
43
42
44
43
await asyncio .gather (
45
44
ops_test .model .deploy (
46
45
kafka_charm ,
47
46
application_name = APP_NAME ,
48
47
num_units = 1 ,
49
- series = "jammy" ,
50
- to = machine_ids [0 ],
51
48
config = {"roles" : "broker,balancer" if balancer_app == APP_NAME else "broker" },
49
+ resources = {"kafka-image" : KAFKA_CONTAINER },
52
50
),
53
51
ops_test .model .deploy (
54
- ZK_NAME , channel = "edge" , application_name = ZK_NAME , num_units = 1 , series = "jammy"
52
+ ZK_NAME , channel = "3/ edge" , application_name = ZK_NAME , num_units = 3 , series = "jammy"
55
53
),
56
54
ops_test .model .deploy (
57
55
"kafka-test-app" ,
58
56
application_name = PRODUCER_APP ,
59
57
channel = "edge" ,
60
58
num_units = 1 ,
61
- series = "jammy" ,
62
59
config = {
63
60
"topic_name" : "HOT-TOPIC" ,
64
61
"num_messages" : 100000 ,
@@ -74,12 +71,15 @@ async def test_build_and_deploy(self, ops_test: OpsTest, kafka_charm, balancer_a
74
71
kafka_charm ,
75
72
application_name = balancer_app ,
76
73
num_units = 1 ,
77
- series = "jammy" ,
78
74
config = {"roles" : balancer_app },
75
+ resources = {"kafka-image" : KAFKA_CONTAINER },
79
76
)
80
77
81
78
await ops_test .model .wait_for_idle (
82
- apps = list ({APP_NAME , ZK_NAME , balancer_app }), idle_period = 30 , timeout = 3600
79
+ apps = list ({APP_NAME , ZK_NAME , balancer_app }),
80
+ idle_period = 30 ,
81
+ timeout = 3600 ,
82
+ raise_on_error = False ,
83
83
)
84
84
assert ops_test .model .applications [APP_NAME ].status == "blocked"
85
85
assert ops_test .model .applications [ZK_NAME ].status == "active"
@@ -237,6 +237,132 @@ async def test_remove_unit_full_rebalance(self, ops_test: OpsTest, balancer_app)
237
237
# verify that post-rebalance, surviving units increased replica counts
238
238
assert int (value ) < int (post_rebalance_replica_counts .get (key , 0 ))
239
239
240
+ @pytest .mark .abort_on_fail
241
+ async def test_add_unit_targeted_rebalance (self , ops_test : OpsTest , balancer_app ):
242
+ await ops_test .model .applications [APP_NAME ].add_units (
243
+ count = 1 # up to 4, new unit won't have any partitions
244
+ )
245
+ await ops_test .model .block_until (
246
+ lambda : len (ops_test .model .applications [APP_NAME ].units ) == 4
247
+ )
248
+ await ops_test .model .wait_for_idle (
249
+ apps = list ({APP_NAME , ZK_NAME , PRODUCER_APP , balancer_app }),
250
+ status = "active" ,
251
+ timeout = 1800 ,
252
+ idle_period = 30 ,
253
+ )
254
+ async with ops_test .fast_forward (fast_interval = "20s" ):
255
+ await asyncio .sleep (120 ) # ensure update-status adds broker-capacities if missed
256
+
257
+ assert balancer_is_ready (ops_test = ops_test , app_name = balancer_app )
258
+
259
+ await asyncio .sleep (30 ) # let the API breathe after so many requests
260
+
261
+ # verify CC can find the new broker_id 3, with no replica partitions allocated
262
+ broker_replica_count = get_replica_count_by_broker_id (ops_test , balancer_app )
263
+ new_broker_id = max (map (int , broker_replica_count .keys ()))
264
+ pre_rebalance_replica_counts = {
265
+ key : value for key , value in broker_replica_count .items () if key != str (new_broker_id )
266
+ }
267
+ new_broker_replica_count = int (broker_replica_count .get (str (new_broker_id ), 0 ))
268
+
269
+ assert not new_broker_replica_count
270
+
271
+ for unit in ops_test .model .applications [balancer_app ].units :
272
+ if await unit .is_leader_from_status ():
273
+ leader_unit = unit
274
+
275
+ rebalance_action_dry_run = await leader_unit .run_action (
276
+ "rebalance" , mode = "add" , brokerid = new_broker_id , dryrun = True , timeout = 600 , block = True
277
+ )
278
+ response = await rebalance_action_dry_run .wait ()
279
+ assert response .results
280
+
281
+ rebalance_action = await leader_unit .run_action (
282
+ "rebalance" ,
283
+ mode = "add" ,
284
+ brokerid = new_broker_id ,
285
+ dryrun = False ,
286
+ timeout = 600 ,
287
+ block = True ,
288
+ )
289
+ response = await rebalance_action .wait ()
290
+ assert response .results
291
+
292
+ post_rebalance_replica_counts = get_replica_count_by_broker_id (ops_test , balancer_app )
293
+
294
+ # Partition only were moved from existing brokers to the new one
295
+ for existing_broker , previous_replica_count in pre_rebalance_replica_counts .items ():
296
+ assert previous_replica_count >= post_rebalance_replica_counts .get (
297
+ str (existing_broker )
298
+ )
299
+
300
+ # New broker has partition(s)
301
+ assert int (
302
+ get_replica_count_by_broker_id (ops_test , balancer_app ).get (str (new_broker_id ), 0 )
303
+ ) # replicas were successfully moved
304
+
305
+ # Total sum of partition conserved
306
+ assert sum (pre_rebalance_replica_counts .values ()) == sum (
307
+ post_rebalance_replica_counts .values ()
308
+ )
309
+
310
+ @pytest .mark .abort_on_fail
311
+ async def test_balancer_prepare_unit_removal (self , ops_test : OpsTest , balancer_app ):
312
+ broker_replica_count = get_replica_count_by_broker_id (ops_test , balancer_app )
313
+ new_broker_id = max (map (int , broker_replica_count .keys ()))
314
+
315
+ # storing the current replica counts of 0, 1, 2 - they will persist
316
+ pre_rebalance_replica_counts = {
317
+ key : value
318
+ for key , value in get_replica_count_by_broker_id (ops_test , balancer_app ).items ()
319
+ if key != str (new_broker_id )
320
+ }
321
+
322
+ for unit in ops_test .model .applications [balancer_app ].units :
323
+ if await unit .is_leader_from_status ():
324
+ leader_unit = unit
325
+
326
+ rebalance_action_dry_run = await leader_unit .run_action (
327
+ "rebalance" ,
328
+ mode = "remove" ,
329
+ brokerid = new_broker_id ,
330
+ dryrun = True ,
331
+ timeout = 600 ,
332
+ block = True ,
333
+ )
334
+ response = await rebalance_action_dry_run .wait ()
335
+ assert response .results
336
+
337
+ rebalance_action = await leader_unit .run_action (
338
+ "rebalance" ,
339
+ mode = "remove" ,
340
+ brokerid = [new_broker_id ],
341
+ dryrun = False ,
342
+ timeout = 600 ,
343
+ block = True ,
344
+ )
345
+ response = await rebalance_action .wait ()
346
+ assert response .results
347
+
348
+ post_rebalance_replica_counts = get_replica_count_by_broker_id (ops_test , balancer_app )
349
+
350
+ # Partition only were moved from the removed broker to the other ones
351
+ for existing_broker , previous_replica_count in pre_rebalance_replica_counts .items ():
352
+ assert previous_replica_count <= post_rebalance_replica_counts .get (
353
+ str (existing_broker )
354
+ )
355
+
356
+ # Replicas were successfully moved
357
+ assert not int (
358
+ get_replica_count_by_broker_id (ops_test , balancer_app ).get (str (new_broker_id ), 0 )
359
+ )
360
+
361
+ # Total sum of partition conserved
362
+ assert sum (pre_rebalance_replica_counts .values ()) == sum (
363
+ post_rebalance_replica_counts .values ()
364
+ )
365
+
240
366
@pytest .mark .abort_on_fail
241
367
async def test_tls (self , ops_test : OpsTest , balancer_app ):
242
368
# deploy and integrate tls
0 commit comments