cmd: automatic restart of orchestrator on configuration change

This commit is contained in:
Vincent Bernat
2025-09-19 16:21:20 +02:00
parent 57185f67dd
commit c782312c2d
12 changed files with 244 additions and 65 deletions

View File

@@ -33,15 +33,17 @@ type ConfigRelatedOptions struct {
BeforeDump func(mapstructure.Metadata)
}
// Parse parses the configuration file (if present) and the
// environment variables into the provided configuration.
func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any) error {
// Parse parses the configuration file (if present) and the environment
// variables into the provided configuration. It returns the paths to watch if
// we want to detect configuration changes.
func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any) ([]string, error) {
var rawConfig gin.H
var paths []string
if cfgFile := c.Path; cfgFile != "" {
if strings.HasPrefix(cfgFile, "http://") || strings.HasPrefix(cfgFile, "https://") {
u, err := url.Parse(cfgFile)
if err != nil {
return fmt.Errorf("cannot parse configuration URL: %w", err)
return nil, fmt.Errorf("cannot parse configuration URL: %w", err)
}
if u.Path == "" {
u.Path = fmt.Sprintf("/api/v0/orchestrator/configuration/%s", component)
@@ -51,32 +53,36 @@ func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any)
}
resp, err := http.Get(u.String())
if err != nil {
return fmt.Errorf("unable to fetch configuration file: %w", err)
return nil, fmt.Errorf("unable to fetch configuration file: %w", err)
}
defer resp.Body.Close()
contentType := resp.Header.Get("Content-Type")
mediaType, _, err := mime.ParseMediaType(contentType)
if (mediaType != "application/x-yaml" && mediaType != "application/yaml") || err != nil {
return fmt.Errorf("received configuration file is not YAML (%s)", contentType)
return nil, fmt.Errorf("received configuration file is not YAML (%s)", contentType)
}
input, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to read configuration file: %w", err)
return nil, fmt.Errorf("unable to read configuration file: %w", err)
}
if err := yaml.Unmarshal(input, &rawConfig); err != nil {
return fmt.Errorf("unable to parse YAML configuration file: %w", err)
return nil, fmt.Errorf("unable to parse YAML configuration file: %w", err)
}
} else {
cfgFile, err := filepath.EvalSymlinks(cfgFile)
if err != nil {
return fmt.Errorf("cannot follow symlink: %w", err)
return nil, fmt.Errorf("cannot follow symlink: %w", err)
}
dirname, filename := filepath.Split(cfgFile)
if dirname == "" {
dirname = "."
}
if _, err := yaml.UnmarshalWithInclude(os.DirFS(dirname), filename, &rawConfig); err != nil {
return fmt.Errorf("unable to parse YAML configuration file: %w", err)
paths, err = yaml.UnmarshalWithInclude(os.DirFS(dirname), filename, &rawConfig)
for i := range paths {
paths[i] = filepath.Clean(filepath.Join(dirname, paths[i]))
}
if err != nil {
return nil, fmt.Errorf("unable to parse YAML configuration file: %w", err)
}
}
}
@@ -90,10 +96,10 @@ func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any)
decoderConfig.Metadata = &metadata
decoder, err := mapstructure.NewDecoder(decoderConfig)
if err != nil {
return fmt.Errorf("unable to create configuration decoder: %w", err)
return nil, fmt.Errorf("unable to create configuration decoder: %w", err)
}
if err := decoder.Decode(rawConfig); err != nil {
return fmt.Errorf("unable to parse configuration: %w", err)
return nil, fmt.Errorf("unable to parse configuration: %w", err)
}
disableDefaultHook()
disableZeroSliceHook()
@@ -126,7 +132,7 @@ func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any)
}
}
if err := decoder.Decode(rawConfig); err != nil {
return fmt.Errorf("unable to parse override %q: %w", kv[0], err)
return nil, fmt.Errorf("unable to parse override %q: %w", kv[0], err)
}
}
@@ -139,7 +145,7 @@ func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any)
}
sort.Strings(invalidKeys)
if len(invalidKeys) > 0 {
return fmt.Errorf("invalid configuration:\n%s", strings.Join(invalidKeys, "\n"))
return nil, fmt.Errorf("invalid configuration:\n%s", strings.Join(invalidKeys, "\n"))
}
// Validate and dump configuration if requested
@@ -149,22 +155,22 @@ func (c ConfigRelatedOptions) Parse(out io.Writer, component string, config any)
if err := helpers.Validate.Struct(config); err != nil {
switch verr := err.(type) {
case validator.ValidationErrors:
return fmt.Errorf("invalid configuration:\n%w", verr)
return nil, fmt.Errorf("invalid configuration:\n%w", verr)
default:
return fmt.Errorf("unexpected internal error: %w", verr)
return nil, fmt.Errorf("unexpected internal error: %w", verr)
}
}
if c.Dump {
output, err := yaml.Marshal(config)
if err != nil {
return fmt.Errorf("unable to dump configuration: %w", err)
return nil, fmt.Errorf("unable to dump configuration: %w", err)
}
out.Write([]byte("---\n"))
out.Write(output)
out.Write([]byte("\n"))
}
return nil
return paths, nil
}
// DefaultHook will reset the destination value to its default using

View File

@@ -92,7 +92,7 @@ module1:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err == nil {
if _, err := c.Parse(out, "dummy", &parsed); err == nil {
t.Fatal("Parse() didn't error")
} else if diff := helpers.Diff(err.Error(), `invalid configuration:
Key: 'dummyConfiguration.Module1.Topic' Error:Field validation for 'Topic' failed on the 'gte' tag
@@ -126,7 +126,7 @@ module2:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
// Expected configuration
@@ -226,7 +226,7 @@ module2:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
// Expected configuration
@@ -280,7 +280,7 @@ module2:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
// Expected configuration
@@ -325,7 +325,7 @@ module1:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
})
@@ -345,7 +345,7 @@ module1:
parsed := dummyConfiguration{}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err == nil {
if _, err := c.Parse(out, "dummy", &parsed); err == nil {
t.Fatal("Parse() didn't error")
} else if diff := helpers.Diff(err.Error(), `invalid configuration:
invalid key "Module1.extra"
@@ -384,7 +384,7 @@ modules:
parsed := struct {
Modules []dummyConfiguration
}{}
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
return parsed
@@ -452,7 +452,7 @@ modules:
parsed := struct {
Modules []*dummyConfiguration
}{}
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
return parsed
@@ -524,7 +524,7 @@ func TestDevNullDefault(t *testing.T) {
var parsed dummyConfiguration
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
// Expected configuration
@@ -603,7 +603,7 @@ module2:
},
}
out := bytes.NewBuffer([]byte{})
if err := c.Parse(out, "dummy", &parsed); err != nil {
if _, err := c.Parse(out, "dummy", &parsed); err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
if diff := helpers.Diff(parsed, expected); diff != "" {

View File

@@ -60,7 +60,7 @@ manage collected flows.`,
RunE: func(cmd *cobra.Command, args []string) error {
config := ConsoleConfiguration{}
ConsoleOptions.Path = args[0]
if err := ConsoleOptions.Parse(cmd.OutOrStdout(), "console", &config); err != nil {
if _, err := ConsoleOptions.Parse(cmd.OutOrStdout(), "console", &config); err != nil {
return err
}

View File

@@ -57,7 +57,7 @@ and answers SNMP requests.`,
RunE: func(cmd *cobra.Command, args []string) error {
config := DemoExporterConfiguration{}
DemoExporterOptions.Path = args[0]
if err := DemoExporterOptions.Parse(cmd.OutOrStdout(), "demo-exporter", &config); err != nil {
if _, err := DemoExporterOptions.Parse(cmd.OutOrStdout(), "demo-exporter", &config); err != nil {
return err
}

View File

@@ -51,7 +51,7 @@ and export to Kafka.`,
RunE: func(cmd *cobra.Command, args []string) error {
config := InletConfiguration{}
InletOptions.Path = args[0]
if err := InletOptions.Parse(cmd.OutOrStdout(), "inlet", &config); err != nil {
if _, err := InletOptions.Parse(cmd.OutOrStdout(), "inlet", &config); err != nil {
return err
}

View File

@@ -6,9 +6,13 @@ package cmd
import (
"errors"
"fmt"
"io"
"path/filepath"
"reflect"
"slices"
"sync/atomic"
"github.com/fsnotify/fsnotify"
"github.com/gin-gonic/gin"
"github.com/go-viper/mapstructure/v2"
"github.com/spf13/cobra"
@@ -27,14 +31,15 @@ import (
// OrchestratorConfiguration represents the configuration file for the orchestrator command.
type OrchestratorConfiguration struct {
Reporting reporter.Configuration
HTTP httpserver.Configuration
ClickHouse clickhouse.Configuration
ClickHouseDB clickhousedb.Configuration
Kafka kafka.Configuration
GeoIP geoip.Configuration
Orchestrator orchestrator.Configuration `mapstructure:",squash" yaml:",inline"`
Schema schema.Configuration
AutomaticRestart bool
Reporting reporter.Configuration
HTTP httpserver.Configuration
ClickHouse clickhouse.Configuration
ClickHouseDB clickhousedb.Configuration
Kafka kafka.Configuration
GeoIP geoip.Configuration
Orchestrator orchestrator.Configuration `mapstructure:",squash" yaml:",inline"`
Schema schema.Configuration
// Other service configurations
Inlet []InletConfiguration `validate:"dive"`
Outlet []OutletConfiguration `validate:"dive"`
@@ -51,14 +56,15 @@ func (c *OrchestratorConfiguration) Reset() {
consoleConfiguration := ConsoleConfiguration{}
consoleConfiguration.Reset()
*c = OrchestratorConfiguration{
Reporting: reporter.DefaultConfiguration(),
HTTP: httpserver.DefaultConfiguration(),
ClickHouse: clickhouse.DefaultConfiguration(),
ClickHouseDB: clickhousedb.DefaultConfiguration(),
Kafka: kafka.DefaultConfiguration(),
GeoIP: geoip.DefaultConfiguration(),
Orchestrator: orchestrator.DefaultConfiguration(),
Schema: schema.DefaultConfiguration(),
AutomaticRestart: true,
Reporting: reporter.DefaultConfiguration(),
HTTP: httpserver.DefaultConfiguration(),
ClickHouse: clickhouse.DefaultConfiguration(),
ClickHouseDB: clickhousedb.DefaultConfiguration(),
Kafka: kafka.DefaultConfiguration(),
GeoIP: geoip.DefaultConfiguration(),
Orchestrator: orchestrator.DefaultConfiguration(),
Schema: schema.DefaultConfiguration(),
// Other service configurations
Inlet: []InletConfiguration{inletConfiguration},
Outlet: []OutletConfiguration{outletConfiguration},
@@ -83,6 +89,7 @@ var orchestratorCmd = &cobra.Command{
components and centralizes configuration of the various other components.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
restart:
config := OrchestratorConfiguration{}
OrchestratorOptions.Path = args[0]
OrchestratorOptions.BeforeDump = func(metadata mapstructure.Metadata) {
@@ -108,15 +115,42 @@ components and centralizes configuration of the various other components.`,
config.Console[idx].Schema = config.Schema
}
}
if err := OrchestratorOptions.Parse(cmd.OutOrStdout(), "orchestrator", &config); err != nil {
// Parse and check the configuration a first time to start monitoring
// file changes if automatic restart is enabled.
paths, err := OrchestratorOptions.Parse(cmd.OutOrStdout(), "orchestrator", &config)
if err != nil {
return err
}
OrchestratorOptions.Dump = false
// Start a few of the components we need for automatic restart
r, err := reporter.New(config.Reporting)
if err != nil {
return fmt.Errorf("unable to initialize reporter: %w", err)
}
return orchestratorStart(r, config, OrchestratorOptions.CheckMode)
daemonComponent, err := daemon.New(r)
if err != nil {
return fmt.Errorf("unable to initialize daemon component: %w", err)
}
configurationModified := atomic.Bool{}
if config.AutomaticRestart && !OrchestratorOptions.CheckMode {
orchestratorWatch(r, daemonComponent, paths, &configurationModified)
}
// Then, a second time for the remaining of the configuration.
config = OrchestratorConfiguration{}
if _, err := OrchestratorOptions.Parse(cmd.OutOrStdout(), "orchestrator", &config); err != nil {
return err
}
if err := orchestratorStart(r, config, daemonComponent, OrchestratorOptions.CheckMode); err != nil {
return err
}
if configurationModified.Load() {
goto restart
}
return nil
},
}
@@ -128,11 +162,7 @@ func init() {
"Check configuration, but does not start")
}
func orchestratorStart(r *reporter.Reporter, config OrchestratorConfiguration, checkOnly bool) error {
daemonComponent, err := daemon.New(r)
if err != nil {
return fmt.Errorf("unable to initialize daemon component: %w", err)
}
func orchestratorStart(r *reporter.Reporter, config OrchestratorConfiguration, daemonComponent daemon.Component, checkOnly bool) error {
httpComponent, err := httpserver.New(r, config.HTTP, httpserver.Dependencies{
Daemon: daemonComponent,
})
@@ -208,6 +238,73 @@ func orchestratorStart(r *reporter.Reporter, config OrchestratorConfiguration, c
return StartStopComponents(r, daemonComponent, components)
}
// orchestratorWatch will listen to changes to the given path and trigger a
// restart of the orchestrator if any. When a modification is detected, the
// modified chan is closed. The internal goroutine is also stopped if there the
// modified chan is closed.
func orchestratorWatch(r *reporter.Reporter, daemonComponent daemon.Component, paths []string, modified *atomic.Bool) {
r.Info().Strs("paths", paths).Msg("watching loaded configuration files for changes")
watcher, err := fsnotify.NewWatcher()
if err != nil {
r.Err(err).Msg("cannot setup watcher for configuration changes")
return
}
for _, path := range paths {
if err := watcher.Add(filepath.Dir(path)); err != nil {
r.Err(err).Msg("cannot watch configuration file")
watcher.Close()
return
}
}
go func() {
defer watcher.Close()
for {
select {
case err, ok := <-watcher.Errors:
if !ok {
r.Error().Msg("configuration file watcher died")
}
r.Err(err).Msg("error from configuration file watcher")
return
case event, ok := <-watcher.Events:
if !ok {
r.Error().Msg("configuration file watcher died")
return
}
if event.Has(fsnotify.Create) || event.Has(fsnotify.Write) {
// Check if we have one of the monitored path matching
r.Debug().Str("name", event.Name).Msg("detected potential configuration change")
found := false
for _, path := range paths {
if filepath.Clean(event.Name) == path {
found = true
break
}
}
if !found {
continue
}
// Check if the configuration is correct
_, err = OrchestratorOptions.Parse(io.Discard, "orchestrator", &OrchestratorConfiguration{})
if err != nil {
r.Err(err).Msg("cannot validate new configuration, not reloading")
continue
}
// Request termination to reexec
r.Debug().Msg("request a restart on configuration change")
modified.Store(true)
daemonComponent.Terminate()
return
}
case <-daemonComponent.Terminated():
return
}
}
}()
}
// orchestratorGeoIPMigrationHook migrates GeoIP configuration from inlet
// component to clickhouse component
func orchestratorGeoIPMigrationHook() mapstructure.DecodeHookFunc {

View File

@@ -5,13 +5,17 @@ package cmd
import (
"bytes"
"io"
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"akvorado/common/daemon"
"akvorado/common/helpers"
"akvorado/common/helpers/yaml"
"akvorado/common/reporter"
@@ -21,7 +25,7 @@ func TestOrchestratorStart(t *testing.T) {
r := reporter.NewMock(t)
config := OrchestratorConfiguration{}
config.Reset()
if err := orchestratorStart(r, config, true); err != nil {
if err := orchestratorStart(r, config, daemon.NewMock(t), true); err != nil {
t.Fatalf("orchestratorStart() error:\n%+v", err)
}
}
@@ -108,3 +112,70 @@ func TestOrchestrator(t *testing.T) {
t.Errorf("`orchestrator` error:\n%+v", err)
}
}
func TestOrchestratorWatch(t *testing.T) {
tmp := t.TempDir()
if err := os.CopyFS(tmp, os.DirFS("../config")); err != nil {
t.Fatalf("CopyFS() error:\n%+v", err)
}
OrchestratorOptions.Path = filepath.Join(tmp, "akvorado.yaml")
config := OrchestratorConfiguration{}
paths, err := OrchestratorOptions.Parse(io.Discard, "orchestrator", &config)
if err != nil {
t.Fatalf("Parse() error:\n%+v", err)
}
modified := atomic.Bool{}
r := reporter.NewMock(t)
daemonComponent, err := daemon.New(r)
if err != nil {
t.Fatalf("daemon.New() error:\n%+v", err)
}
orchestratorWatch(r, daemonComponent, paths, &modified)
daemonComponent.Start()
// Add a file: no change
if err := os.WriteFile(filepath.Join(tmp, "titi.yaml"), []byte("---\n"), 0o666); err != nil {
t.Fatalf("WriteFile() error:\n%+v", err)
}
time.Sleep(20 * time.Millisecond)
if modified.Load() {
t.Fatal("orchestratorWatch() detected a change that should not be")
}
// Make a configuration error: no change
if err := os.Rename(filepath.Join(tmp, "inlet.yaml"), filepath.Join(tmp, "inlet-old.yaml")); err != nil {
t.Fatalf("Rename() error:\n%+v", err)
}
if err := os.WriteFile(filepath.Join(tmp, "inlet-new.yaml"), []byte("---\nflows: 767643\n"), 0o666); err != nil {
t.Fatalf("WriteFile() error:\n%+v", err)
}
if err := os.Rename(filepath.Join(tmp, "inlet-new.yaml"), filepath.Join(tmp, "inlet.yaml")); err != nil {
t.Fatalf("Rename() error:\n%+v", err)
}
time.Sleep(20 * time.Millisecond)
if modified.Load() {
t.Fatal("orchestratorWatch() detected a change that should be rejected")
}
// Modify a file: change
f, err := os.OpenFile(filepath.Join(tmp, "outlet.yaml"), os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
t.Fatalf("OpenFile() error:\n%+v", err)
}
f.WriteString("\n")
f.Close()
if err := os.Rename(filepath.Join(tmp, "inlet-old.yaml"), filepath.Join(tmp, "inlet.yaml")); err != nil {
t.Fatalf("Rename() error:\n%+v", err)
}
// Check there is a restart attempted
select {
case <-daemonComponent.Terminated():
case <-time.After(time.Second):
t.Fatalf("orchestratorWatch() did not restart the service")
}
daemonComponent.Stop()
if !modified.Load() {
t.Fatal("orchestratorWatch() did not register a change")
}
}

View File

@@ -75,7 +75,7 @@ enrichment and export to Kafka.`,
RunE: func(cmd *cobra.Command, args []string) error {
config := OutletConfiguration{}
OutletOptions.Path = args[0]
if err := OutletOptions.Parse(cmd.OutOrStdout(), "outlet", &config); err != nil {
if _, err := OutletOptions.Parse(cmd.OutOrStdout(), "outlet", &config); err != nil {
return err
}

View File

@@ -40,7 +40,7 @@ var RootCmd = &cobra.Command{
log.Logger = zerolog.New(w).With().Timestamp().Logger()
}
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if debug {
if debug || helpers.Testing() {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
},

View File

@@ -4,10 +4,12 @@ The orchestrator service is configured through YAML files (provided in the
`config/` directory) and includes the configuration of the other services.
> [!TIP]
> Other services query the orchestrator through HTTP on startup to get their
> configuration. This means that if you change the configuration for one
> service, you always need to restart the orchestrator first, then the service
> whose configuration has changed.
> configuration. By default, the orchestrator restarts automatically if it
> detects a configuration change, but this may fail if there is a configuration
> error. Look at the logs of the orchestrator service or restart it if you think
> a configuration change is not applied.
You can get the default configuration with `docker compose run --rm --no-deps
akvorado-orchestrator orchestrator --dump --check /dev/null`. Note that
@@ -621,7 +623,9 @@ efficiently.
## Orchestrator service
The three main components of the orchestrator service are `schema`,
`clickhouse`, and `kafka`.
`clickhouse`, and `kafka`. The `automatic-restart` directive tells the
orchestrator to watch for configuration changes and restart if there are any. It
is enable by default.
### Schema

View File

@@ -12,6 +12,7 @@ identified with a specific icon:
## Unreleased
-*orchestrator*: automatic restart of the orchestrator service on configuration change
- 🌱 *console*: submit form on Ctrl-Enter or Cmd-Enter while selecting dimensions
- 🌱 *cmd*: make `akvorado version` shorter (use `-d` for full output)
- 🌱 *build*: switch from NPM to PNPM for JavaScript build

View File

@@ -123,12 +123,12 @@ func (c *Component) Start() error {
c.r.Err(err).Msg("cannot setup watcher for GeoIP databases")
return fmt.Errorf("cannot setup watcher: %w", err)
}
dirs := map[string]struct{}{}
dirs := map[string]bool{}
for _, path := range c.config.GeoDatabase {
dirs[filepath.Dir(path)] = struct{}{}
dirs[filepath.Dir(path)] = true
}
for _, path := range c.config.ASNDatabase {
dirs[filepath.Dir(path)] = struct{}{}
dirs[filepath.Dir(path)] = true
}
for k := range dirs {
if err := watcher.Add(k); err != nil {