orchestrator/clickhouse: remove init.sh endpoint

Instead, just map configuration files inside the container. As we don't
have to push the schema anymore, pushing some arbitrary configuration
does not seem to be our job.
This commit is contained in:
Vincent Bernat
2024-06-18 18:50:34 +02:00
parent 8ae23f9ae3
commit c70f3b74bf
16 changed files with 52 additions and 155 deletions

View File

@@ -820,11 +820,6 @@ provided inside `clickhouse`:
- `resolutions` defines the various resolutions to keep data - `resolutions` defines the various resolutions to keep data
- `max-partitions` defines the number of partitions to use when - `max-partitions` defines the number of partitions to use when
creating consolidated tables creating consolidated tables
- `system-log-ttl` defines the TTL for system log tables. Set to 0 to disable.
As these tables are partitioned by month, it's useless to use a too low value.
The default value is 30 days. This requires a restart of ClickHouse.
- `prometheus-endpoint` defines the endpoint to configure to expose ClickHouse
metrics to Prometheus. When not defined, this is left unconfigured.
- `networks` maps subnets to attributes. Attributes are `name`, `role`, `site`, - `networks` maps subnets to attributes. Attributes are `name`, `role`, `site`,
`region`, and `tenant`. They are exposed as `SrcNetName`, `DstNetName`, `region`, and `tenant`. They are exposed as `SrcNetName`, `DstNetName`,
`SrcNetRole`, `DstNetRole`, etc. It is also possible to override GeoIP `SrcNetRole`, `DstNetRole`, etc. It is also possible to override GeoIP

View File

@@ -67,9 +67,6 @@ services:
The following endpoints are exposed for use by ClickHouse: The following endpoints are exposed for use by ClickHouse:
- `/api/v0/orchestrator/clickhouse/init.sh` contains the schemas in the form of a
script to execute during initialization to get them installed at the
proper location
- `/api/v0/orchestrator/clickhouse/protocols.csv` contains a CSV with the mapping - `/api/v0/orchestrator/clickhouse/protocols.csv` contains a CSV with the mapping
between protocol numbers and names between protocol numbers and names
- `/api/v0/orchestrator/clickhouse/asns.csv` contains a CSV with the mapping - `/api/v0/orchestrator/clickhouse/asns.csv` contains a CSV with the mapping

View File

@@ -372,14 +372,3 @@ FROM system.errors
ORDER BY last_error_time LIMIT 10 ORDER BY last_error_time LIMIT 10
FORMAT Vertical FORMAT Vertical
``` ```
Notably, it may complain about a missing schema for a received
message. In this case, you need to ensure the schemas used by
*Akvorado* are available. When using `docker compose`, you can restart
the orchestrator and ClickHouse to ensure it downloads the latest
schemas. Otherwise, you can manually execute the script installing the
schemas on your ClickHouse server and restart:
```sh
curl http://akvorado/api/v0/orchestrator/clickhouse/init.sh | sh
```

View File

@@ -0,0 +1,28 @@
<clickhouse>
<!-- Don't keep logs forever -->
<asynchronous_metric_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</asynchronous_metric_log>
<metric_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</metric_log>
<part_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</part_log>
<query_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</query_log>
<query_thread_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</query_thread_log>
<trace_log>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
</trace_log>
<!-- Enable Prometheus endpoint -->
<prometheus>
<endpoint>/metrics</endpoint>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
</clickhouse>

View File

@@ -12,8 +12,8 @@ services:
depends_on: depends_on:
- zookeeper - zookeeper
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml - ./clickhouse/cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml
clickhouse-2: clickhouse-2:
extends: extends:
file: docker-compose.yml file: docker-compose.yml
@@ -21,8 +21,8 @@ services:
depends_on: depends_on:
- zookeeper - zookeeper
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml - ./clickhouse/cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml
- akvorado-clickhouse-2:/var/lib/clickhouse - akvorado-clickhouse-2:/var/lib/clickhouse
clickhouse-3: clickhouse-3:
extends: extends:
@@ -31,8 +31,8 @@ services:
depends_on: depends_on:
- zookeeper - zookeeper
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml - ./clickhouse/cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml
- akvorado-clickhouse-3:/var/lib/clickhouse - akvorado-clickhouse-3:/var/lib/clickhouse
clickhouse-4: clickhouse-4:
extends: extends:
@@ -41,6 +41,6 @@ services:
depends_on: depends_on:
- zookeeper - zookeeper
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml - ./clickhouse/cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml
- akvorado-clickhouse-4:/var/lib/clickhouse - akvorado-clickhouse-4:/var/lib/clickhouse

View File

@@ -121,30 +121,30 @@ services:
cap_add: cap_add:
- SYS_NICE - SYS_NICE
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml - ./clickhouse/cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml
ports: ports:
- 127.0.0.1:8123:8123/tcp - 127.0.0.1:8123:8123/tcp
- 127.0.0.1:9000:9000/tcp - 127.0.0.1:9000:9000/tcp
clickhouse-2: clickhouse-2:
<<: *clickhouse <<: *clickhouse
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml - ./clickhouse/cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml
ports: ports:
- 127.0.0.1:9001:9000/tcp - 127.0.0.1:9001:9000/tcp
clickhouse-3: clickhouse-3:
<<: *clickhouse <<: *clickhouse
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml - ./clickhouse/cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml
ports: ports:
- 127.0.0.1:9002:9000/tcp - 127.0.0.1:9002:9000/tcp
clickhouse-4: clickhouse-4:
<<: *clickhouse <<: *clickhouse
volumes: volumes:
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
- ./clickhouse-cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml - ./clickhouse/cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml
ports: ports:
- 127.0.0.1:9003:9000/tcp - 127.0.0.1:9003:9000/tcp

View File

@@ -167,9 +167,8 @@ services:
file: versions.yml file: versions.yml
service: clickhouse service: clickhouse
volumes: volumes:
- ../orchestrator/clickhouse/data/docker-entrypoint.sh:/entrypoint-override.sh:ro
- akvorado-clickhouse:/var/lib/clickhouse - akvorado-clickhouse:/var/lib/clickhouse
entrypoint: /entrypoint-override.sh - ./clickhouse/akvorado.xml:/etc/clickhouse-server/config.d/akvorado.xml
environment: environment:
- CLICKHOUSE_INIT_TIMEOUT=60 - CLICKHOUSE_INIT_TIMEOUT=60
- CLICKHOUSE_SKIP_USER_SETUP=1 - CLICKHOUSE_SKIP_USER_SETUP=1

View File

@@ -27,11 +27,6 @@ type Configuration struct {
// MaxPartitions define the number of partitions to have for a // MaxPartitions define the number of partitions to have for a
// consolidated flow tables when full. // consolidated flow tables when full.
MaxPartitions int `validate:"isdefault|min=1"` MaxPartitions int `validate:"isdefault|min=1"`
// SystemLogTTL is the TTL to set for system log tables.
SystemLogTTL time.Duration `validate:"isdefault|min=1m"`
// PrometheusEndpoint defines the endpoint ClickHouse can use to expose
// metrics to Prometheus. If not defined, this is not configured.
PrometheusEndpoint string
// ASNs is a mapping from AS numbers to names. It replaces or // ASNs is a mapping from AS numbers to names. It replaces or
// extends the builtin list of AS numbers. // extends the builtin list of AS numbers.
ASNs map[uint32]string ASNs map[uint32]string
@@ -52,6 +47,11 @@ type Configuration struct {
// OrchestratorBasicAuth holds optional basic auth credentials to reach // OrchestratorBasicAuth holds optional basic auth credentials to reach
// orchestrator from ClickHouse // orchestrator from ClickHouse
OrchestratorBasicAuth *ConfigurationBasicAuth OrchestratorBasicAuth *ConfigurationBasicAuth
// SystemLogTTL is unused now.
SystemLogTTL time.Duration `yaml:"-"`
// PrometheusEndpoint is unused now.
PrometheusEndpoint string `yaml:"-"`
} }
// ConfigurationBasicAuth holds Username and Password subfields // ConfigurationBasicAuth holds Username and Password subfields
@@ -100,7 +100,6 @@ func DefaultConfiguration() Configuration {
}, },
MaxPartitions: 50, MaxPartitions: 50,
NetworkSourcesTimeout: 10 * time.Second, NetworkSourcesTimeout: 10 * time.Second,
SystemLogTTL: 30 * 24 * time.Hour, // 30 days
} }
} }

View File

@@ -1,19 +0,0 @@
#!/bin/bash
# SPDX-FileCopyrightText: 2024 Free Mobile
# SPDX-License-Identifier: AGPL-3.0-only
set -e
if [[ $# -lt 1 ]] || [[ "$1" = "--"* ]]; then
rm -f /tmp/init.sh
while [[ ! -s /tmp/init.sh ]]; do
sleep 1
echo "Downloading ClickHouse init script..."
wget --no-proxy -qO /tmp/init.sh \
http://akvorado-orchestrator:8080/api/v0/orchestrator/clickhouse/init.sh || continue
done
sh /tmp/init.sh
fi
# Use official entrypoint
exec /entrypoint.sh "$@"

View File

@@ -4,7 +4,6 @@
package clickhouse package clickhouse
import ( import (
"bytes"
"compress/gzip" "compress/gzip"
"embed" "embed"
"encoding/csv" "encoding/csv"
@@ -13,7 +12,6 @@ import (
"net/http" "net/http"
"os" "os"
"strconv" "strconv"
"text/template"
"time" "time"
) )
@@ -23,49 +21,9 @@ var (
//go:embed data/asns.csv //go:embed data/asns.csv
//go:embed data/tcp.csv //go:embed data/tcp.csv
//go:embed data/udp.csv //go:embed data/udp.csv
data embed.FS data embed.FS
initShTemplate = template.Must(template.New("initsh").Parse(`#!/bin/sh
# Install Protobuf schema
mkdir -p /var/lib/clickhouse/format_schemas
echo "Install flow schema flow-{{ .FlowSchemaHash }}.proto"
cat > /var/lib/clickhouse/format_schemas/flow-{{ .FlowSchemaHash }}.proto <<'EOPROTO'
{{ .FlowSchema }}
EOPROTO
# Alter ClickHouse configuration
mkdir -p /etc/clickhouse-server/config.d
echo "Add Akvorado-specific configuration to ClickHouse"
cat > /etc/clickhouse-server/config.d/akvorado.xml <<'EOCONFIG'
<clickhouse>
{{- if gt .SystemLogTTL 0 }}
{{- range $table := .SystemLogTables }}
<{{ $table }}>
<ttl>event_date + INTERVAL {{ $.SystemLogTTL }} SECOND DELETE</ttl>
</{{ $table }}>
{{- end }}
{{- end }}
{{- if ne .PrometheusEndpoint "" }}
<prometheus>
<endpoint>{{ .PrometheusEndpoint }}</endpoint>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
{{- end }}
</clickhouse>
EOCONFIG
`))
) )
type initShVariables struct {
FlowSchemaHash string
FlowSchema string
SystemLogTTL int
SystemLogTables []string
PrometheusEndpoint string
}
func (c *Component) addHandlerEmbedded(url string, path string) { func (c *Component) addHandlerEmbedded(url string, path string) {
c.d.HTTP.AddHandler(url, c.d.HTTP.AddHandler(url,
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -83,37 +41,6 @@ func (c *Component) addHandlerEmbedded(url string, path string) {
// registerHTTPHandler register some handlers that will be useful for // registerHTTPHandler register some handlers that will be useful for
// ClickHouse // ClickHouse
func (c *Component) registerHTTPHandlers() error { func (c *Component) registerHTTPHandlers() error {
// init.sh
c.d.HTTP.AddHandler("/api/v0/orchestrator/clickhouse/init.sh",
http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
var result bytes.Buffer
if err := initShTemplate.Execute(&result, initShVariables{
FlowSchemaHash: c.d.Schema.ProtobufMessageHash(),
FlowSchema: c.d.Schema.ProtobufDefinition(),
SystemLogTTL: int(c.config.SystemLogTTL.Seconds()),
SystemLogTables: []string{
"asynchronous_metric_log",
"error_log",
"metric_log",
"part_log",
"processors_profile_log",
"query_log",
"query_thread_log",
"query_views_log",
"session_log",
"text_log",
"trace_log",
},
PrometheusEndpoint: c.config.PrometheusEndpoint,
}); err != nil {
c.r.Err(err).Msg("unable to serialize init.sh")
http.Error(w, fmt.Sprintf("Unable to serialize init.sh"), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/x-shellscript")
w.Write(result.Bytes())
}))
// Add handler for custom dicts // Add handler for custom dicts
for name, dict := range c.d.Schema.GetCustomDictConfig() { for name, dict := range c.d.Schema.GetCustomDictConfig() {
c.d.HTTP.AddHandler(fmt.Sprintf("/api/v0/orchestrator/clickhouse/custom_dict_%s.csv", name), http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { c.d.HTTP.AddHandler(fmt.Sprintf("/api/v0/orchestrator/clickhouse/custom_dict_%s.csv", name), http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {

View File

@@ -4,7 +4,6 @@
package clickhouse package clickhouse
import ( import (
"fmt"
"testing" "testing"
"akvorado/common/clickhousedb" "akvorado/common/clickhousedb"
@@ -75,30 +74,13 @@ func TestHTTPEndpoints(t *testing.T) {
`192.0.2.0/24,infra,,,,,,,,`, `192.0.2.0/24,infra,,,,,,,,`,
}, },
}, { }, {
URL: "/api/v0/orchestrator/clickhouse/init.sh",
ContentType: "text/x-shellscript",
FirstLines: []string{
`#!/bin/sh`,
``,
`# Install Protobuf schema`,
`mkdir -p /var/lib/clickhouse/format_schemas`,
fmt.Sprintf(`echo "Install flow schema flow-%s.proto"`,
c.d.Schema.ProtobufMessageHash()),
fmt.Sprintf(`cat > /var/lib/clickhouse/format_schemas/flow-%s.proto <<'EOPROTO'`,
c.d.Schema.ProtobufMessageHash()),
"",
`syntax = "proto3";`,
},
},
{
URL: "/api/v0/orchestrator/clickhouse/custom_dict_none.csv", URL: "/api/v0/orchestrator/clickhouse/custom_dict_none.csv",
ContentType: "text/plain; charset=utf-8", ContentType: "text/plain; charset=utf-8",
StatusCode: 404, StatusCode: 404,
FirstLines: []string{ FirstLines: []string{
"unable to deliver custom dict csv file none.csv", "unable to deliver custom dict csv file none.csv",
}, },
}, }, {
{
URL: "/api/v0/orchestrator/clickhouse/custom_dict_test.csv", URL: "/api/v0/orchestrator/clickhouse/custom_dict_test.csv",
ContentType: "text/csv; charset=utf-8", ContentType: "text/csv; charset=utf-8",
FirstLines: []string{ FirstLines: []string{