Skip to content

Commit 6022b22

Browse files
authored
[DPE-2642] Pod rescheduling test (#70)
1 parent a7ded90 commit 6022b22

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

tests/integration/ha/ha_helpers.py

+9
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,15 @@ def remove_k8s_hosts(ops_test: OpsTest):
188188
logger.info(f"Removed {unit_name} from /etc/hosts")
189189

190190

191+
def delete_pod(ops_test: OpsTest, unit_name: str):
192+
check_output(
193+
f"kubectl delete pod {unit_name.replace('/', '-')} -n {ops_test.model.info.name}",
194+
stderr=PIPE,
195+
shell=True,
196+
universal_newlines=True,
197+
)
198+
199+
191200
def assert_continuous_writes_consistency(result: ContinuousWritesResult):
192201
"""Check results of a stopped ContinuousWrites call against expected results."""
193202
assert (

tests/integration/ha/test_ha.py

+39
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from integration.ha.ha_helpers import (
1212
add_k8s_hosts,
1313
assert_continuous_writes_consistency,
14+
delete_pod,
1415
get_topic_description,
1516
get_topic_offsets,
1617
modify_pebble_restart_delay,
@@ -335,3 +336,41 @@ async def test_full_cluster_restart(
335336

336337
result = c_writes.stop()
337338
assert_continuous_writes_consistency(result=result)
339+
340+
341+
async def test_pod_reschedule(
342+
ops_test: OpsTest,
343+
c_writes: ContinuousWrites,
344+
c_writes_runner: ContinuousWrites,
345+
):
346+
# Let some time pass to create messages
347+
await asyncio.sleep(5)
348+
topic_description = get_topic_description(ops_test=ops_test, topic=ContinuousWrites.TOPIC_NAME)
349+
initial_leader_num = topic_description.leader
350+
351+
logger.info(
352+
f"Killing pod of leader for topic '{ContinuousWrites.TOPIC_NAME}': {initial_leader_num}"
353+
)
354+
delete_pod(ops_test, unit_name=f"{APP_NAME}/{initial_leader_num}")
355+
356+
# let pod reschedule process be noticed up by juju
357+
await ops_test.model.wait_for_idle(
358+
apps=[APP_NAME], idle_period=30, status="active", timeout=1000
359+
)
360+
361+
# refresh hosts with the new ip
362+
remove_k8s_hosts(ops_test=ops_test)
363+
add_k8s_hosts(ops_test=ops_test)
364+
365+
# Check offsets after killing leader
366+
initial_offsets = get_topic_offsets(ops_test=ops_test, topic=ContinuousWrites.TOPIC_NAME)
367+
await asyncio.sleep(CLIENT_TIMEOUT * 2)
368+
next_offsets = get_topic_offsets(ops_test=ops_test, topic=ContinuousWrites.TOPIC_NAME)
369+
assert int(next_offsets[-1]) > int(initial_offsets[-1])
370+
371+
topic_description = get_topic_description(ops_test=ops_test, topic=ContinuousWrites.TOPIC_NAME)
372+
assert initial_leader_num != topic_description.leader
373+
assert topic_description.in_sync_replicas == {0, 1, 2}
374+
375+
result = c_writes.stop()
376+
assert_continuous_writes_consistency(result=result)

0 commit comments

Comments
 (0)