common/schema: make alias/generate configureable

This commit is contained in:
Marvin Gaube
2023-03-23 15:25:43 +01:00
parent 59c0e25e84
commit 00026beabd
4 changed files with 35 additions and 1 deletions

View File

@@ -15,6 +15,8 @@ type Configuration struct {
MainTableOnly []ColumnKey
// NotMainTableOnly lists columns to be moved out of the main table only
NotMainTableOnly []ColumnKey `validate:"ninterfield=MainTableOnly"`
// Generate lists columns that shall be generated at ingest instead of generated at query time
Generate []ColumnKey
}
// DefaultConfiguration returns the default configuration for the schema component.

View File

@@ -22,6 +22,16 @@ type Component struct {
// New creates a new schema component.
func New(config Configuration) (*Component, error) {
schema := flows()
for _, k := range config.Generate {
if column, ok := schema.LookupColumnByKey(k); ok {
if column.ClickHouseAlias != "" {
column.ClickHouseGenerateFrom = column.ClickHouseAlias
column.ClickHouseAlias = ""
} else {
return nil, fmt.Errorf("no alias configured for %s that can be converted to generate", k)
}
}
}
for _, k := range config.Enabled {
if column, ok := schema.LookupColumnByKey(k); ok {
column.Disabled = false

View File

@@ -400,6 +400,9 @@ removing the `SrcCountry` and `DstCountry`, one can use:
```yaml
schema:
generate:
- SrcNetPrefix
- DstNetPrefix
disabled:
- SrcCountry
- DstCountry
@@ -408,6 +411,9 @@ schema:
- DstVlan
```
With `generate`, you can control if an dimension generated by other dimensions (e.g. the Prefixes) is computed at query time (the default) or the generated ingest time.
This reduces the query time, but increases the storage needs.
You can get the list of columns you can enable or disable with `akvorado
version`. Disabling a column won't delete existing data.

View File

@@ -396,9 +396,10 @@ TTL TimeReceived + toIntervalSecond({{ .TTL }})
CompressionCodec string `ch:"compression_codec"`
IsSortingKey uint8 `ch:"is_in_sorting_key"`
IsPrimaryKey uint8 `ch:"is_in_primary_key"`
DefaultKind string `ch:"default_kind"`
}
if err := c.d.ClickHouse.Select(ctx, &existingColumns, `
SELECT name, type, compression_codec, is_in_sorting_key, is_in_primary_key
SELECT name, type, compression_codec, is_in_sorting_key, is_in_primary_key, default_kind
FROM system.columns
WHERE database = $1
AND table = $2
@@ -433,6 +434,21 @@ outer:
modifyTypeOrCodec = true
}
}
// change alias existence has changed. ALIAS expression changes are not yet checked here.
if (wantedColumn.ClickHouseAlias != "") != (existingColumn.DefaultKind == "ALIAS") {
// either the column was an alias and should be none, or the other way around. Either way, we need to recreate.
c.r.Logger.Debug().Msg(fmt.Sprintf("column %s alias content has changed, recreating. New ALIAS: %s", existingColumn.Name, wantedColumn.ClickHouseAlias))
err := c.d.ClickHouse.Exec(ctx,
fmt.Sprintf("ALTER TABLE %s DROP COLUMN %s", tableName, existingColumn.Name))
if err != nil {
return fmt.Errorf("cannot drop %s from %s to cleanup aliasing: %w",
existingColumn.Name, tableName, err)
}
// Schedule adding it back
modifications = append(modifications,
fmt.Sprintf("ADD COLUMN %s AFTER %s", wantedColumn.ClickHouseDefinition(), previousColumn))
}
if resolution.Interval > 0 && slices.Contains(c.d.Schema.ClickHousePrimaryKeys(), wantedColumn.Name) && existingColumn.IsPrimaryKey == 0 {
return fmt.Errorf("table %s, column %s should be a primary key, cannot change that",
tableName, wantedColumn.Name)