inlet: split inlet into new inlet and outlet

This change split the inlet component into a simpler inlet and a new
outlet component. The new inlet component receive flows and put them in
Kafka, unparsed. The outlet component takes them from Kafka and resume
the processing from here (flow parsing, enrichment) and puts them in
ClickHouse.

The main goal is to ensure the inlet does a minimal work to not be late
when processing packets (and restart faster). It also brings some
simplification as the number of knobs to tune everything is reduced: for
inlet, we only need to tune the queue size for UDP, the number of
workers and a few Kafka parameters; for outlet, we need to tune a few
Kafka parameters, the number of workers and a few ClickHouse parameters.

The outlet component features a simple Kafka input component. The core
component becomes just a callback function. There is also a new
ClickHouse component to push data to ClickHouse using the low-level
ch-go library with batch inserts.

This processing has an impact on the internal representation of a
FlowMessage. Previously, it was tailored to dynamically build the
protobuf message to be put in Kafka. Now, it builds the batch request to
be sent to ClickHouse. This makes the FlowMessage structure hides the
content of the next batch request and therefore, it should be reused.
This also changes the way we decode flows as they don't output
FlowMessage anymore, they reuse one that is provided to each worker.

The ClickHouse tables are slightly updated. Instead of using Kafka
engine, the Null engine is used instead.

Fix #1122
This commit is contained in:
Vincent Bernat
2024-12-17 06:31:10 +01:00
parent ad59598831
commit ac68c5970e
231 changed files with 6488 additions and 3891 deletions

View File

@@ -0,0 +1,193 @@
// SPDX-FileCopyrightText: 2025 Free Mobile
// SPDX-License-Identifier: AGPL-3.0-only
package schema_test
import (
"context"
"encoding/json"
"fmt"
"io"
"net/netip"
"testing"
"time"
"github.com/ClickHouse/ch-go"
"github.com/ClickHouse/clickhouse-go/v2"
"akvorado/common/helpers"
"akvorado/common/schema"
)
func TestInsertMemory(t *testing.T) {
c := schema.NewMock(t)
bf := c.NewFlowMessage()
exporterAddress := netip.MustParseAddr("::ffff:203.0.113.14")
bf.TimeReceived = 1000
bf.SamplingRate = 20000
bf.ExporterAddress = exporterAddress
bf.AppendString(schema.ColumnExporterName, "router1.example.net")
bf.AppendUint(schema.ColumnSrcAS, 65000)
bf.AppendUint(schema.ColumnDstAS, 12322)
bf.AppendUint(schema.ColumnBytes, 20)
bf.AppendUint(schema.ColumnPackets, 3)
bf.AppendUint(schema.ColumnInIfBoundary, uint64(schema.InterfaceBoundaryInternal))
bf.AppendUint(schema.ColumnOutIfBoundary, uint64(schema.InterfaceBoundaryExternal))
bf.AppendUint(schema.ColumnInIfSpeed, 10000)
bf.AppendUint(schema.ColumnEType, helpers.ETypeIPv4)
bf.Finalize()
bf.TimeReceived = 1001
bf.SamplingRate = 20000
bf.ExporterAddress = exporterAddress
bf.AppendString(schema.ColumnExporterName, "router1.example.net")
bf.AppendUint(schema.ColumnSrcAS, 12322)
bf.AppendUint(schema.ColumnDstAS, 65000)
bf.AppendUint(schema.ColumnBytes, 200)
bf.AppendUint(schema.ColumnPackets, 3)
bf.AppendUint(schema.ColumnInIfBoundary, uint64(schema.InterfaceBoundaryExternal))
bf.AppendUint(schema.ColumnOutIfSpeed, 10000)
bf.AppendUint(schema.ColumnEType, helpers.ETypeIPv4)
bf.AppendArrayUInt32(schema.ColumnDstASPath, []uint32{65400, 65500, 65001})
bf.AppendArrayUInt128(schema.ColumnDstLargeCommunities, []schema.UInt128{
{
High: 65401,
Low: (100 << 32) + 200,
},
{
High: 65401,
Low: (100 << 32) + 201,
},
})
bf.Finalize()
server := helpers.CheckExternalService(t, "ClickHouse", []string{"clickhouse:9000", "127.0.0.1:9000"})
ctx := t.Context()
conn, err := ch.Dial(ctx, ch.Options{
Address: server,
DialTimeout: 100 * time.Millisecond,
Settings: []ch.Setting{
{Key: "allow_suspicious_low_cardinality_types", Value: "1"},
},
})
if err != nil {
t.Fatalf("Dial() error:\n%+v", err)
}
// Create the table
q := fmt.Sprintf(
`CREATE OR REPLACE TABLE test_table_insert (%s) ENGINE = Memory`,
c.ClickHouseCreateTable(schema.ClickHouseSkipAliasedColumns, schema.ClickHouseSkipGeneratedColumns),
)
t.Logf("Query: %s", q)
if err := conn.Do(ctx, ch.Query{
Body: q,
}); err != nil {
t.Fatalf("Do() error:\n%+v", err)
}
// Insert
input := bf.ClickHouseProtoInput()
if err := conn.Do(ctx, ch.Query{
Body: input.Into("test_table_insert"),
Input: input,
OnInput: func(ctx context.Context) error {
bf.Clear()
// No more data to send!
return io.EOF
},
}); err != nil {
t.Fatalf("Do() error:\n%+v", err)
}
// Check the result (with the full-featured client)
{
conn, err := clickhouse.Open(&clickhouse.Options{
Addr: []string{server},
DialTimeout: 100 * time.Millisecond,
})
if err != nil {
t.Fatalf("clickhouse.Open() error:\n%+v", err)
}
// Use formatRow to get JSON representation
rows, err := conn.Query(ctx, "SELECT formatRow('JSONEachRow', *) FROM test_table_insert ORDER BY TimeReceived")
if err != nil {
t.Fatalf("clickhouse.Query() error:\n%+v", err)
}
var got []map[string]any
for rows.Next() {
var jsonRow string
if err := rows.Scan(&jsonRow); err != nil {
t.Fatalf("rows.Scan() error:\n%+v", err)
}
var row map[string]any
if err := json.Unmarshal([]byte(jsonRow), &row); err != nil {
t.Fatalf("json.Unmarshal() error:\n%+v", err)
}
// Remove fields with default values
for k, v := range row {
switch val := v.(type) {
case string:
if val == "" || val == "::" {
delete(row, k)
}
case float64:
if val == 0 {
delete(row, k)
}
case []any:
if len(val) == 0 {
delete(row, k)
}
}
}
got = append(got, row)
}
rows.Close()
expected := []map[string]any{
{
"TimeReceived": "1970-01-01 00:16:40",
"SamplingRate": "20000",
"ExporterAddress": "::ffff:203.0.113.14",
"ExporterName": "router1.example.net",
"SrcAS": 65000,
"DstAS": 12322,
"Bytes": "20",
"Packets": "3",
"InIfBoundary": "internal",
"OutIfBoundary": "external",
"InIfSpeed": 10000,
"EType": helpers.ETypeIPv4,
}, {
"TimeReceived": "1970-01-01 00:16:41",
"SamplingRate": "20000",
"ExporterAddress": "::ffff:203.0.113.14",
"ExporterName": "router1.example.net",
"SrcAS": 12322,
"DstAS": 65000,
"Bytes": "200",
"Packets": "3",
"InIfBoundary": "external",
"OutIfBoundary": "undefined",
"OutIfSpeed": 10000,
"EType": helpers.ETypeIPv4,
"DstASPath": []uint32{65400, 65500, 65001},
"DstLargeCommunities": []string{
"1206435509165107881967816", // 65401:100:200
"1206435509165107881967817", // 65401:100:201
},
},
}
if diff := helpers.Diff(got, expected); diff != "" {
t.Errorf("Insert (-got, +want):\n%s", diff)
}
}
}