mirror of
https://github.com/akvorado/akvorado.git
synced 2025-12-11 22:14:02 +01:00
Previously, the scaler was scaling up and down independently. Because when scaling up/down, Kafka rebalances the topic, temporarily, we get scale down requests and the rate limiter won't stop them as it is independant from the scale up rate limiter. Instead, the rate limit for increase acts as a gracetime where everything is ignored, then between that and the rate limit for decrease, we only consider increasing the number of workers, past that, we scaling down as long as we have a majority of scale down requests (compared to steady ones). Fix #2080 (hopefully)
112 lines
2.9 KiB
Go
112 lines
2.9 KiB
Go
// SPDX-FileCopyrightText: 2025 Free Mobile
|
|
// SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
package core
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"akvorado/common/pb"
|
|
"akvorado/common/reporter"
|
|
"akvorado/common/schema"
|
|
"akvorado/outlet/clickhouse"
|
|
"akvorado/outlet/kafka"
|
|
)
|
|
|
|
// worker represents a worker processing incoming flows.
|
|
type worker struct {
|
|
c *Component
|
|
l reporter.Logger
|
|
cw clickhouse.Worker
|
|
bf *schema.FlowMessage
|
|
rawFlow pb.RawFlow
|
|
|
|
scaleRequestChan chan<- kafka.ScaleRequest
|
|
}
|
|
|
|
// newWorker instantiates a new worker and returns a callback function to
|
|
// process an incoming flow and a function to call on shutdown.
|
|
func (c *Component) newWorker(i int, scaleRequestChan chan<- kafka.ScaleRequest) (kafka.ReceiveFunc, kafka.ShutdownFunc) {
|
|
bf := c.d.Schema.NewFlowMessage()
|
|
w := worker{
|
|
c: c,
|
|
l: c.r.With().Int("worker", i).Logger(),
|
|
bf: bf,
|
|
cw: c.d.ClickHouse.NewWorker(i, bf),
|
|
scaleRequestChan: scaleRequestChan,
|
|
}
|
|
return w.processIncomingFlow, w.shutdown
|
|
}
|
|
|
|
// shutdown shutdowns the worker, flushing any remaining data.
|
|
func (w *worker) shutdown() {
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
|
defer cancel()
|
|
w.cw.Flush(ctx)
|
|
}
|
|
|
|
// processIncomingFlow processes one incoming flow from Kafka.
|
|
func (w *worker) processIncomingFlow(ctx context.Context, data []byte) error {
|
|
// Do nothing if we are shutting down
|
|
if !w.c.t.Alive() {
|
|
return kafka.ErrStopProcessing
|
|
}
|
|
|
|
// Raw flow decoding: fatal
|
|
w.c.metrics.rawFlowsReceived.Inc()
|
|
w.rawFlow.ResetVT()
|
|
if err := w.rawFlow.UnmarshalVT(data); err != nil {
|
|
w.c.metrics.rawFlowsErrors.WithLabelValues("cannot decode protobuf")
|
|
return fmt.Errorf("cannot decode raw flow: %w", err)
|
|
}
|
|
|
|
// Process each decoded flow
|
|
finalize := func() {
|
|
// Accounting
|
|
exporter := w.bf.ExporterAddress.Unmap().String()
|
|
w.c.metrics.flowsReceived.WithLabelValues(exporter).Inc()
|
|
|
|
// Enrichment: not fatal
|
|
ip := w.bf.ExporterAddress
|
|
if skip := w.enrichFlow(ip, exporter); skip {
|
|
w.bf.Undo()
|
|
return
|
|
}
|
|
|
|
// If we have HTTP clients, send to them too
|
|
if atomic.LoadUint32(&w.c.httpFlowClients) > 0 {
|
|
if jsonBytes, err := json.Marshal(w.bf); err == nil {
|
|
select {
|
|
case w.c.httpFlowChannel <- jsonBytes: // OK
|
|
default: // Overflow, best effort and ignore
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finalize and forward to ClickHouse
|
|
w.c.metrics.flowsForwarded.WithLabelValues(exporter).Inc()
|
|
status := w.cw.FinalizeAndSend(ctx)
|
|
switch status {
|
|
case clickhouse.WorkerStatusOverloaded:
|
|
w.scaleRequestChan <- kafka.ScaleIncrease
|
|
case clickhouse.WorkerStatusUnderloaded:
|
|
w.scaleRequestChan <- kafka.ScaleDecrease
|
|
default:
|
|
w.scaleRequestChan <- kafka.ScaleSteady
|
|
}
|
|
}
|
|
|
|
// Flow decoding: not fatal
|
|
err := w.c.d.Flow.Decode(&w.rawFlow, w.bf, finalize)
|
|
if err != nil {
|
|
w.c.metrics.rawFlowsErrors.WithLabelValues("cannot decode payload")
|
|
return nil
|
|
}
|
|
|
|
return nil
|
|
}
|