Files
akvorado/outlet/kafka/scaler_test.go
Vincent Bernat 1322d42549 outlet/kafka: fix scaler hysteresis
Previously, the scaler was scaling up and down independently. Because
when scaling up/down, Kafka rebalances the topic, temporarily, we get
scale down requests and the rate limiter won't stop them as it is
independant from the scale up rate limiter. Instead, the rate limit for
increase acts as a gracetime where everything is ignored, then between
that and the rate limit for decrease, we only consider increasing the
number of workers, past that, we scaling down as long as we have a
majority of scale down requests (compared to steady ones).

Fix #2080 (hopefully)
2025-11-11 21:26:05 +01:00

358 lines
9.1 KiB
Go

// SPDX-FileCopyrightText: 2025 Free Mobile
// SPDX-License-Identifier: AGPL-3.0-only
package kafka
import (
"context"
"sync"
"testing"
"testing/synctest"
"time"
"akvorado/common/helpers"
)
func TestScalerWithoutRateLimiter(t *testing.T) {
for _, tc := range []struct {
name string
minWorkers int
maxWorkers int
requests []ScaleRequest
expected []int
}{
{
name: "scale up",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{ScaleIncrease},
expected: []int{9},
}, {
name: "scale up twice",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{ScaleIncrease, ScaleIncrease},
expected: []int{9, 13},
}, {
name: "scale up many times",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease,
ScaleIncrease, ScaleIncrease,
},
expected: []int{9, 13, 15, 16},
}, {
name: "scale up twice, then down a lot",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease,
// We need 10 decrease to decrease
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
},
expected: []int{9, 13, 12},
}, {
name: "scale up twice, then down, steady, and repeat",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
},
expected: []int{9, 13},
}, {
name: "scale up twice, then down, steady, down, steady, down, down, repeat",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
},
expected: []int{9, 13, 12},
},
// No more tests, the state logic is tested in TestScalerState
} {
t.Run(tc.name, func(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
ctx, cancel := context.WithCancel(t.Context())
defer cancel()
var mu sync.Mutex
currentWorkers := tc.minWorkers
got := []int{}
config := scalerConfiguration{
minWorkers: tc.minWorkers,
maxWorkers: tc.maxWorkers,
increaseRateLimit: time.Second,
decreaseRateLimit: time.Second,
getWorkerCount: func() int {
mu.Lock()
defer mu.Unlock()
return currentWorkers
},
increaseWorkers: func(from, to int) {
t.Logf("increaseWorkers(from: %d, to: %d)", from, to)
mu.Lock()
defer mu.Unlock()
got = append(got, to)
currentWorkers = to
},
decreaseWorkers: func(from, to int) {
t.Logf("decreaseWorkers(from: %d, to: %d)", from, to)
mu.Lock()
defer mu.Unlock()
got = append(got, to)
currentWorkers = to
},
}
ch := runScaler(ctx, config)
for _, req := range tc.requests {
ch <- req
time.Sleep(5 * time.Second)
}
mu.Lock()
defer mu.Unlock()
if diff := helpers.Diff(got, tc.expected); diff != "" {
t.Fatalf("runScaler() (-got, +want):\n%s", diff)
}
})
})
}
}
func TestScalerRateLimiter(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
ctx, cancel := context.WithCancel(t.Context())
defer cancel()
var mu sync.Mutex
currentWorkers := 1
got := []int{}
config := scalerConfiguration{
minWorkers: 1,
maxWorkers: 15,
increaseRateLimit: time.Minute,
decreaseRateLimit: 5 * time.Minute,
getWorkerCount: func() int {
mu.Lock()
defer mu.Unlock()
return currentWorkers
},
increaseWorkers: func(from, to int) {
t.Logf("increaseWorkers(from: %d, to: %d)", from, to)
mu.Lock()
defer mu.Unlock()
got = append(got, to)
currentWorkers = to
},
decreaseWorkers: func(from, to int) {
t.Logf("decreaseWorkers(from: %d, to: %d)", from, to)
mu.Lock()
defer mu.Unlock()
got = append(got, to)
currentWorkers = to
},
}
ch := runScaler(ctx, config)
check := func(expected []int) {
t.Helper()
time.Sleep(time.Millisecond)
mu.Lock()
defer mu.Unlock()
if diff := helpers.Diff(got, expected); diff != "" {
t.Fatalf("runScaler() (-got, +want):\n%s", diff)
}
}
// Increase on first scale request
ch <- ScaleIncrease
check([]int{8})
// Collapsing further increases
for range 10 {
time.Sleep(5 * time.Second)
ch <- ScaleIncrease
}
// time == 50 seconds
check([]int{8})
// Then increase again
time.Sleep(10 * time.Second)
ch <- ScaleIncrease
// time = 1 minute
check([]int{8, 12})
// Do not decrease (too soon)
for range 10 {
time.Sleep(6 * time.Second)
ch <- ScaleDecrease
}
// time = 1 minute
check([]int{8, 12})
// Do not decrease even after 4 minutes
for range 40 {
time.Sleep(6 * time.Second)
ch <- ScaleDecrease
}
// time = 5 minutes
check([]int{8, 12})
// Decrease (5-second timeout done)
for range 10 {
time.Sleep(6 * time.Second)
ch <- ScaleDecrease
}
// time = 6 minutes
check([]int{8, 12, 11})
// Do not increase
for range 10 {
time.Sleep(5 * time.Second)
ch <- ScaleIncrease
}
// time = 50 seconds
check([]int{8, 12, 11})
// Increase after 10 more seconds
time.Sleep(10 * time.Second)
ch <- ScaleIncrease
// time = 1 minute
check([]int{8, 12, 11, 12})
// When mixing increase and decrease, increase
for range 60 {
time.Sleep(time.Second)
ch <- ScaleIncrease
ch <- ScaleDecrease
}
// time = 1 minute
check([]int{8, 12, 11, 12, 13})
// When we only have a few increase at the beginning, but mostly decrease after that, decrease
time.Sleep(55 * time.Second)
ch <- ScaleIncrease
ch <- ScaleIncrease
ch <- ScaleIncrease
ch <- ScaleIncrease
for range 295 {
time.Sleep(time.Second)
ch <- ScaleDecrease
}
check([]int{8, 12, 11, 12, 13, 12})
// If we have many decrease requests at once, we decrease
time.Sleep(300 * time.Second)
for range 10 {
ch <- ScaleDecrease
}
check([]int{8, 12, 11, 12, 13, 12, 11})
// But if they are mixed with steady requests, we shouldn't decrease
time.Sleep(300 * time.Second)
for range 10 {
ch <- ScaleDecrease
ch <- ScaleSteady
}
check([]int{8, 12, 11, 12, 13, 12, 11})
// But if we have less Steady than decrease, we should scale down
for range 10 {
ch <- ScaleDecrease
}
check([]int{8, 12, 11, 12, 13, 12, 11, 10})
})
}
func TestScalerState(t *testing.T) {
tests := []struct {
name string
minWorkers int
maxWorkers int
requests []ScaleRequest
expected []int
}{
{
name: "simple up",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{ScaleIncrease},
expected: []int{9},
},
{
name: "up, up, up, down, down, up",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease, ScaleIncrease,
ScaleDecrease, ScaleDecrease,
ScaleIncrease},
expected: []int{9, 13, 15, 14, 13, 14},
},
{
name: "up, up, down, down, down, down, down, down",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease,
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
},
expected: []int{9, 13, 12, 11, 10, 9, 8, 7},
},
{
name: "down, up, up, down, down, down, down, down, down",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleDecrease,
ScaleIncrease, ScaleIncrease,
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
},
expected: []int{1, 2, 3, 2, 1, 1, 1, 1, 1},
},
{
name: "simple down from min",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{ScaleDecrease},
expected: []int{1},
},
{
name: "reach max",
minWorkers: 1,
maxWorkers: 16,
requests: []ScaleRequest{
ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease,
},
expected: []int{9, 13, 15, 16, 16, 16},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
state := new(scalerState)
current := tt.minWorkers
results := []int{}
for _, req := range tt.requests {
current = state.nextWorkerCount(req, current, tt.minWorkers, tt.maxWorkers)
results = append(results, current)
}
if diff := helpers.Diff(results, tt.expected); diff != "" {
t.Fatalf("nextWorkerCount() (-got, +want):\n%s", diff)
}
})
}
}