mirror of
https://github.com/akvorado/akvorado.git
synced 2025-12-12 06:24:10 +01:00
Previously, the scaler was scaling up and down independently. Because when scaling up/down, Kafka rebalances the topic, temporarily, we get scale down requests and the rate limiter won't stop them as it is independant from the scale up rate limiter. Instead, the rate limit for increase acts as a gracetime where everything is ignored, then between that and the rate limit for decrease, we only consider increasing the number of workers, past that, we scaling down as long as we have a majority of scale down requests (compared to steady ones). Fix #2080 (hopefully)
358 lines
9.1 KiB
Go
358 lines
9.1 KiB
Go
// SPDX-FileCopyrightText: 2025 Free Mobile
|
|
// SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
package kafka
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"testing"
|
|
"testing/synctest"
|
|
"time"
|
|
|
|
"akvorado/common/helpers"
|
|
)
|
|
|
|
func TestScalerWithoutRateLimiter(t *testing.T) {
|
|
for _, tc := range []struct {
|
|
name string
|
|
minWorkers int
|
|
maxWorkers int
|
|
requests []ScaleRequest
|
|
expected []int
|
|
}{
|
|
{
|
|
name: "scale up",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{ScaleIncrease},
|
|
expected: []int{9},
|
|
}, {
|
|
name: "scale up twice",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{ScaleIncrease, ScaleIncrease},
|
|
expected: []int{9, 13},
|
|
}, {
|
|
name: "scale up many times",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease,
|
|
ScaleIncrease, ScaleIncrease,
|
|
},
|
|
expected: []int{9, 13, 15, 16},
|
|
}, {
|
|
name: "scale up twice, then down a lot",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease,
|
|
// We need 10 decrease to decrease
|
|
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
|
|
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
|
|
},
|
|
expected: []int{9, 13, 12},
|
|
}, {
|
|
name: "scale up twice, then down, steady, and repeat",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady,
|
|
},
|
|
expected: []int{9, 13},
|
|
}, {
|
|
name: "scale up twice, then down, steady, down, steady, down, down, repeat",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
|
|
ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleSteady, ScaleDecrease, ScaleDecrease,
|
|
},
|
|
expected: []int{9, 13, 12},
|
|
},
|
|
// No more tests, the state logic is tested in TestScalerState
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
synctest.Test(t, func(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(t.Context())
|
|
defer cancel()
|
|
|
|
var mu sync.Mutex
|
|
currentWorkers := tc.minWorkers
|
|
got := []int{}
|
|
config := scalerConfiguration{
|
|
minWorkers: tc.minWorkers,
|
|
maxWorkers: tc.maxWorkers,
|
|
increaseRateLimit: time.Second,
|
|
decreaseRateLimit: time.Second,
|
|
getWorkerCount: func() int {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
return currentWorkers
|
|
},
|
|
increaseWorkers: func(from, to int) {
|
|
t.Logf("increaseWorkers(from: %d, to: %d)", from, to)
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
got = append(got, to)
|
|
currentWorkers = to
|
|
},
|
|
decreaseWorkers: func(from, to int) {
|
|
t.Logf("decreaseWorkers(from: %d, to: %d)", from, to)
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
got = append(got, to)
|
|
currentWorkers = to
|
|
},
|
|
}
|
|
ch := runScaler(ctx, config)
|
|
for _, req := range tc.requests {
|
|
ch <- req
|
|
time.Sleep(5 * time.Second)
|
|
}
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if diff := helpers.Diff(got, tc.expected); diff != "" {
|
|
t.Fatalf("runScaler() (-got, +want):\n%s", diff)
|
|
}
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestScalerRateLimiter(t *testing.T) {
|
|
synctest.Test(t, func(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(t.Context())
|
|
defer cancel()
|
|
|
|
var mu sync.Mutex
|
|
currentWorkers := 1
|
|
got := []int{}
|
|
config := scalerConfiguration{
|
|
minWorkers: 1,
|
|
maxWorkers: 15,
|
|
increaseRateLimit: time.Minute,
|
|
decreaseRateLimit: 5 * time.Minute,
|
|
getWorkerCount: func() int {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
return currentWorkers
|
|
},
|
|
increaseWorkers: func(from, to int) {
|
|
t.Logf("increaseWorkers(from: %d, to: %d)", from, to)
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
got = append(got, to)
|
|
currentWorkers = to
|
|
},
|
|
decreaseWorkers: func(from, to int) {
|
|
t.Logf("decreaseWorkers(from: %d, to: %d)", from, to)
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
got = append(got, to)
|
|
currentWorkers = to
|
|
},
|
|
}
|
|
ch := runScaler(ctx, config)
|
|
check := func(expected []int) {
|
|
t.Helper()
|
|
time.Sleep(time.Millisecond)
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if diff := helpers.Diff(got, expected); diff != "" {
|
|
t.Fatalf("runScaler() (-got, +want):\n%s", diff)
|
|
}
|
|
}
|
|
// Increase on first scale request
|
|
ch <- ScaleIncrease
|
|
check([]int{8})
|
|
|
|
// Collapsing further increases
|
|
for range 10 {
|
|
time.Sleep(5 * time.Second)
|
|
ch <- ScaleIncrease
|
|
}
|
|
// time == 50 seconds
|
|
check([]int{8})
|
|
|
|
// Then increase again
|
|
time.Sleep(10 * time.Second)
|
|
ch <- ScaleIncrease
|
|
// time = 1 minute
|
|
check([]int{8, 12})
|
|
|
|
// Do not decrease (too soon)
|
|
for range 10 {
|
|
time.Sleep(6 * time.Second)
|
|
ch <- ScaleDecrease
|
|
}
|
|
// time = 1 minute
|
|
check([]int{8, 12})
|
|
|
|
// Do not decrease even after 4 minutes
|
|
for range 40 {
|
|
time.Sleep(6 * time.Second)
|
|
ch <- ScaleDecrease
|
|
}
|
|
// time = 5 minutes
|
|
check([]int{8, 12})
|
|
|
|
// Decrease (5-second timeout done)
|
|
for range 10 {
|
|
time.Sleep(6 * time.Second)
|
|
ch <- ScaleDecrease
|
|
}
|
|
// time = 6 minutes
|
|
check([]int{8, 12, 11})
|
|
|
|
// Do not increase
|
|
for range 10 {
|
|
time.Sleep(5 * time.Second)
|
|
ch <- ScaleIncrease
|
|
}
|
|
// time = 50 seconds
|
|
check([]int{8, 12, 11})
|
|
|
|
// Increase after 10 more seconds
|
|
time.Sleep(10 * time.Second)
|
|
ch <- ScaleIncrease
|
|
// time = 1 minute
|
|
check([]int{8, 12, 11, 12})
|
|
|
|
// When mixing increase and decrease, increase
|
|
for range 60 {
|
|
time.Sleep(time.Second)
|
|
ch <- ScaleIncrease
|
|
ch <- ScaleDecrease
|
|
}
|
|
// time = 1 minute
|
|
check([]int{8, 12, 11, 12, 13})
|
|
|
|
// When we only have a few increase at the beginning, but mostly decrease after that, decrease
|
|
time.Sleep(55 * time.Second)
|
|
ch <- ScaleIncrease
|
|
ch <- ScaleIncrease
|
|
ch <- ScaleIncrease
|
|
ch <- ScaleIncrease
|
|
for range 295 {
|
|
time.Sleep(time.Second)
|
|
ch <- ScaleDecrease
|
|
}
|
|
check([]int{8, 12, 11, 12, 13, 12})
|
|
|
|
// If we have many decrease requests at once, we decrease
|
|
time.Sleep(300 * time.Second)
|
|
for range 10 {
|
|
ch <- ScaleDecrease
|
|
}
|
|
check([]int{8, 12, 11, 12, 13, 12, 11})
|
|
|
|
// But if they are mixed with steady requests, we shouldn't decrease
|
|
time.Sleep(300 * time.Second)
|
|
for range 10 {
|
|
ch <- ScaleDecrease
|
|
ch <- ScaleSteady
|
|
}
|
|
check([]int{8, 12, 11, 12, 13, 12, 11})
|
|
|
|
// But if we have less Steady than decrease, we should scale down
|
|
for range 10 {
|
|
ch <- ScaleDecrease
|
|
}
|
|
check([]int{8, 12, 11, 12, 13, 12, 11, 10})
|
|
})
|
|
}
|
|
|
|
func TestScalerState(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
minWorkers int
|
|
maxWorkers int
|
|
requests []ScaleRequest
|
|
expected []int
|
|
}{
|
|
{
|
|
name: "simple up",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{ScaleIncrease},
|
|
expected: []int{9},
|
|
},
|
|
{
|
|
name: "up, up, up, down, down, up",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease, ScaleIncrease,
|
|
ScaleDecrease, ScaleDecrease,
|
|
ScaleIncrease},
|
|
expected: []int{9, 13, 15, 14, 13, 14},
|
|
},
|
|
{
|
|
name: "up, up, down, down, down, down, down, down",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease,
|
|
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
|
|
},
|
|
expected: []int{9, 13, 12, 11, 10, 9, 8, 7},
|
|
},
|
|
{
|
|
name: "down, up, up, down, down, down, down, down, down",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleDecrease,
|
|
ScaleIncrease, ScaleIncrease,
|
|
ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease, ScaleDecrease,
|
|
},
|
|
expected: []int{1, 2, 3, 2, 1, 1, 1, 1, 1},
|
|
},
|
|
{
|
|
name: "simple down from min",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{ScaleDecrease},
|
|
expected: []int{1},
|
|
},
|
|
{
|
|
name: "reach max",
|
|
minWorkers: 1,
|
|
maxWorkers: 16,
|
|
requests: []ScaleRequest{
|
|
ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease, ScaleIncrease,
|
|
},
|
|
expected: []int{9, 13, 15, 16, 16, 16},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
state := new(scalerState)
|
|
current := tt.minWorkers
|
|
results := []int{}
|
|
|
|
for _, req := range tt.requests {
|
|
current = state.nextWorkerCount(req, current, tt.minWorkers, tt.maxWorkers)
|
|
results = append(results, current)
|
|
}
|
|
|
|
if diff := helpers.Diff(results, tt.expected); diff != "" {
|
|
t.Fatalf("nextWorkerCount() (-got, +want):\n%s", diff)
|
|
}
|
|
})
|
|
}
|
|
}
|