mirror of
https://github.com/akvorado/akvorado.git
synced 2025-12-11 22:14:02 +01:00
outlet/metadata: synchronous fetching of metadata
As we are not constrained by time that much in the outlet, we can simplify the fetching of metadata by doing it synchronously. We still keep the breaker design to avoid continously polling a source that is not responsive, so we still can loose some data if we are not able to poll metadata. We also keep the background cache refresh. We also introduce a grace time of 1 minute to avoid loosing data during start. For the static provider, we wait for the remote data sources to be ready. For the gNMI provider, there are target windows of availability during which the cached data can be polled. The SNMP provider is loosing its ability to coalesce requests.
This commit is contained in:
@@ -226,7 +226,9 @@ func OutletConfigurationUnmarshallerHook() mapstructure.DecodeHookFunc {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if helpers.MapStructureMatchName(k.String(), "PollerCoalesce") {
|
if helpers.MapStructureMatchName(k.String(), "PollerCoalesce") {
|
||||||
metadataValue["MaxBatchRequests"] = snmpMap.MapIndex(snmpKeys[i]).Interface()
|
continue
|
||||||
|
}
|
||||||
|
if helpers.MapStructureMatchName(k.String(), "Workers") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
metadataConfig := reflect.TypeOf(metadata.Configuration{})
|
metadataConfig := reflect.TypeOf(metadata.Configuration{})
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ paths:
|
|||||||
- kafka:9092
|
- kafka:9092
|
||||||
inlet.0.flow.inputs.0.decoder: netflow
|
inlet.0.flow.inputs.0.decoder: netflow
|
||||||
inlet.0.flow.inputs.1.decoder: sflow
|
inlet.0.flow.inputs.1.decoder: sflow
|
||||||
outlet.0.metadata.workers: 10
|
outlet.0.metadata.providers.0.type: snmp
|
||||||
inlet.0.kafka.brokers:
|
inlet.0.kafka.brokers:
|
||||||
- kafka:9092
|
- kafka:9092
|
||||||
outlet.0.kafka.brokers:
|
outlet.0.kafka.brokers:
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
---
|
---
|
||||||
paths:
|
paths:
|
||||||
outlet.0.metadata:
|
outlet.0.metadata:
|
||||||
workers: 10
|
|
||||||
maxbatchrequests: 20
|
|
||||||
cacheduration: 30m0s
|
cacheduration: 30m0s
|
||||||
cacherefresh: 30m0s
|
cacherefresh: 30m0s
|
||||||
cachecheckinterval: 2m0s
|
cachecheckinterval: 2m0s
|
||||||
cachepersistfile: ""
|
cachepersistfile: ""
|
||||||
|
initialdelay: 1m0s
|
||||||
|
querytimeout: 5s
|
||||||
providers:
|
providers:
|
||||||
- type: snmp
|
- type: snmp
|
||||||
pollerretries: 3
|
pollerretries: 3
|
||||||
|
|||||||
@@ -161,10 +161,7 @@ func (c *Component[T]) Start() error {
|
|||||||
customBackoff := backoff.NewExponentialBackOff()
|
customBackoff := backoff.NewExponentialBackOff()
|
||||||
customBackoff.MaxElapsedTime = 0
|
customBackoff.MaxElapsedTime = 0
|
||||||
customBackoff.MaxInterval = source.Interval
|
customBackoff.MaxInterval = source.Interval
|
||||||
customBackoff.InitialInterval = source.Interval / 10
|
customBackoff.InitialInterval = min(time.Second, source.Interval/10)
|
||||||
if customBackoff.InitialInterval > time.Second {
|
|
||||||
customBackoff.InitialInterval = time.Second
|
|
||||||
}
|
|
||||||
return backoff.NewTicker(customBackoff)
|
return backoff.NewTicker(customBackoff)
|
||||||
}
|
}
|
||||||
newRegularTicker := func() *time.Ticker {
|
newRegularTicker := func() *time.Ticker {
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
---
|
---
|
||||||
metadata:
|
metadata:
|
||||||
workers: 10
|
|
||||||
provider:
|
provider:
|
||||||
type: snmp
|
type: snmp
|
||||||
credentials:
|
credentials:
|
||||||
|
|||||||
@@ -253,8 +253,9 @@ following keys are accepted:
|
|||||||
about to expire or need an update
|
about to expire or need an update
|
||||||
- `cache-persist-file` tells where to store cached data on shutdown and
|
- `cache-persist-file` tells where to store cached data on shutdown and
|
||||||
read them back on startup
|
read them back on startup
|
||||||
- `workers` tell how many workers to spawn to fetch metadata.
|
- `query-timeout` tells how long to wait for a provider to answer a query
|
||||||
- `max-batch-requests` define how many requests can be batched together
|
- `initial-delay` tells how long to wait after starting before applying the
|
||||||
|
standard query timeout
|
||||||
- `providers` defines the provider configurations
|
- `providers` defines the provider configurations
|
||||||
|
|
||||||
As flows missing any interface information are discarded, persisting the cache
|
As flows missing any interface information are discarded, persisting the cache
|
||||||
|
|||||||
@@ -136,12 +136,10 @@ the name `clickhouse` or on `localhost`.
|
|||||||
## SNMP
|
## SNMP
|
||||||
|
|
||||||
SNMP polling is accomplished with [GoSNMP](https://github.com/gosnmp/gosnmp).
|
SNMP polling is accomplished with [GoSNMP](https://github.com/gosnmp/gosnmp).
|
||||||
The cache layer is tailored specifically for our needs. Cached information
|
The cache layer is tailored specifically for our needs. Cached information can
|
||||||
can expire if not accessed or refreshed periodically.
|
expire if not accessed or refreshed periodically. If an exporter fails to answer
|
||||||
Some coalescing of the requests is done when they are queued.
|
too frequently, a backoff will be triggered for a minute to ensure it does not
|
||||||
This adds some code complexity, maybe it was not worth it.
|
eat up all the workers' resources.
|
||||||
If an exporter fails to answer too frequently, a backoff will be triggered
|
|
||||||
for a minute to ensure it does not eat up all the workers' resources.
|
|
||||||
|
|
||||||
Testing is done by another implementation of an [SNMP
|
Testing is done by another implementation of an [SNMP
|
||||||
agent](https://github.com/slayercat/GoSNMPServer).
|
agent](https://github.com/slayercat/GoSNMPServer).
|
||||||
|
|||||||
@@ -164,6 +164,9 @@ services:
|
|||||||
- ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/keeper.xml
|
- ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/keeper.xml
|
||||||
|
|
||||||
srlinux:
|
srlinux:
|
||||||
|
# The SR Linux container does not like to be restarted. If you get:
|
||||||
|
# Error: Peer netns reference is invalid.
|
||||||
|
# Be sure to put it down before starting it.
|
||||||
extends:
|
extends:
|
||||||
file: versions.yml
|
file: versions.yml
|
||||||
service: srlinux
|
service: srlinux
|
||||||
|
|||||||
@@ -34,8 +34,8 @@ func (w *worker) enrichFlow(exporterIP netip.Addr, exporterStr string) (skip boo
|
|||||||
c := w.c
|
c := w.c
|
||||||
|
|
||||||
if flow.InIf != 0 {
|
if flow.InIf != 0 {
|
||||||
answer, ok := c.d.Metadata.Lookup(t, exporterIP, uint(flow.InIf))
|
answer := c.d.Metadata.Lookup(t, exporterIP, uint(flow.InIf))
|
||||||
if !ok {
|
if !answer.Found {
|
||||||
c.metrics.flowsErrors.WithLabelValues(exporterStr, "SNMP cache miss").Inc()
|
c.metrics.flowsErrors.WithLabelValues(exporterStr, "SNMP cache miss").Inc()
|
||||||
skip = true
|
skip = true
|
||||||
} else {
|
} else {
|
||||||
@@ -57,8 +57,8 @@ func (w *worker) enrichFlow(exporterIP netip.Addr, exporterStr string) (skip boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
if flow.OutIf != 0 {
|
if flow.OutIf != 0 {
|
||||||
answer, ok := c.d.Metadata.Lookup(t, exporterIP, uint(flow.OutIf))
|
answer := c.d.Metadata.Lookup(t, exporterIP, uint(flow.OutIf))
|
||||||
if !ok {
|
if !answer.Found {
|
||||||
// Only register a cache miss if we don't have one.
|
// Only register a cache miss if we don't have one.
|
||||||
// TODO: maybe we could do one SNMP query for both interfaces.
|
// TODO: maybe we could do one SNMP query for both interfaces.
|
||||||
if !skip {
|
if !skip {
|
||||||
|
|||||||
@@ -645,9 +645,6 @@ ClassifyProviderRegex(Interface.Description, "^Transit: ([^ ]+)", "$1")`,
|
|||||||
t.Fatalf("proto.Marshal() error: %v", err)
|
t.Fatalf("proto.Marshal() error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test twice to check cache behavior
|
|
||||||
incoming <- data
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
incoming <- data
|
incoming <- data
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
@@ -666,10 +663,9 @@ ClassifyProviderRegex(Interface.Description, "^Transit: ([^ ]+)", "$1")`,
|
|||||||
}
|
}
|
||||||
gotMetrics := r.GetMetrics("akvorado_outlet_core_", "-processing_", "flows_", "received_", "forwarded_")
|
gotMetrics := r.GetMetrics("akvorado_outlet_core_", "-processing_", "flows_", "received_", "forwarded_")
|
||||||
expectedMetrics := map[string]string{
|
expectedMetrics := map[string]string{
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.142"}`: "1",
|
|
||||||
`flows_http_clients`: "0",
|
`flows_http_clients`: "0",
|
||||||
`received_flows_total{exporter="192.0.2.142"}`: "2",
|
`received_flows_total{exporter="192.0.2.142"}`: "1",
|
||||||
`received_raw_flows_total`: "2",
|
`received_raw_flows_total`: "1",
|
||||||
}
|
}
|
||||||
if tc.OutputFlow != nil {
|
if tc.OutputFlow != nil {
|
||||||
expectedMetrics[`forwarded_flows_total{exporter="192.0.2.142"}`] = "1"
|
expectedMetrics[`forwarded_flows_total{exporter="192.0.2.142"}`] = "1"
|
||||||
|
|||||||
@@ -140,33 +140,26 @@ func TestCore(t *testing.T) {
|
|||||||
clickhouseMessages = clickhouseMessages[:0]
|
clickhouseMessages = clickhouseMessages[:0]
|
||||||
clickhouseMessagesMutex.Unlock()
|
clickhouseMessagesMutex.Unlock()
|
||||||
|
|
||||||
// Inject several messages with a cache miss from the SNMP component.
|
// Inject several messages
|
||||||
injectFlow(flowMessage("192.0.2.142", 434, 677))
|
injectFlow(flowMessage("192.0.2.142", 434, 677))
|
||||||
injectFlow(flowMessage("192.0.2.143", 434, 677))
|
injectFlow(flowMessage("192.0.2.143", 437, 679))
|
||||||
injectFlow(flowMessage("192.0.2.143", 437, 677))
|
|
||||||
injectFlow(flowMessage("192.0.2.143", 434, 679))
|
|
||||||
time.Sleep(20 * time.Millisecond)
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
|
||||||
gotMetrics := r.GetMetrics("akvorado_outlet_core_", "-flows_processing_")
|
gotMetrics := r.GetMetrics("akvorado_outlet_core_", "-flows_processing_")
|
||||||
expectedMetrics := map[string]string{
|
expectedMetrics := map[string]string{
|
||||||
`classifier_exporter_cache_items_total`: "0",
|
`classifier_exporter_cache_items_total`: "0",
|
||||||
`classifier_interface_cache_items_total`: "0",
|
`classifier_interface_cache_items_total`: "0",
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.142"}`: "1",
|
`received_flows_total{exporter="192.0.2.142"}`: "1",
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.143"}`: "3",
|
`received_flows_total{exporter="192.0.2.143"}`: "1",
|
||||||
`received_flows_total{exporter="192.0.2.142"}`: "1",
|
`forwarded_flows_total{exporter="192.0.2.142"}`: "1",
|
||||||
`received_flows_total{exporter="192.0.2.143"}`: "3",
|
`forwarded_flows_total{exporter="192.0.2.143"}`: "1",
|
||||||
`received_raw_flows_total`: "4",
|
`received_raw_flows_total`: "2",
|
||||||
`flows_http_clients`: "0",
|
`flows_http_clients`: "0",
|
||||||
}
|
}
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
||||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Inject again the messages, this time, we will get a cache hit!
|
|
||||||
injectFlow(flowMessage("192.0.2.142", 434, 677))
|
|
||||||
injectFlow(flowMessage("192.0.2.143", 437, 679))
|
|
||||||
time.Sleep(20 * time.Millisecond)
|
|
||||||
|
|
||||||
// Should have 2 more flows in clickhouseMessages now
|
// Should have 2 more flows in clickhouseMessages now
|
||||||
clickhouseMessagesMutex.Lock()
|
clickhouseMessagesMutex.Lock()
|
||||||
clickhouseMessagesLen := len(clickhouseMessages)
|
clickhouseMessagesLen := len(clickhouseMessages)
|
||||||
@@ -175,24 +168,6 @@ func TestCore(t *testing.T) {
|
|||||||
t.Fatalf("Expected at least 2 flows in clickhouseMessages, got %d", clickhouseMessagesLen)
|
t.Fatalf("Expected at least 2 flows in clickhouseMessages, got %d", clickhouseMessagesLen)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(20 * time.Millisecond)
|
|
||||||
gotMetrics = r.GetMetrics("akvorado_outlet_core_", "classifier_", "-flows_processing_", "flows_", "received_", "forwarded_")
|
|
||||||
expectedMetrics = map[string]string{
|
|
||||||
`classifier_exporter_cache_items_total`: "0",
|
|
||||||
`classifier_interface_cache_items_total`: "0",
|
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.142"}`: "1",
|
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.143"}`: "3",
|
|
||||||
`received_flows_total{exporter="192.0.2.142"}`: "2",
|
|
||||||
`received_flows_total{exporter="192.0.2.143"}`: "4",
|
|
||||||
`received_raw_flows_total`: "6",
|
|
||||||
`forwarded_flows_total{exporter="192.0.2.142"}`: "1",
|
|
||||||
`forwarded_flows_total{exporter="192.0.2.143"}`: "1",
|
|
||||||
`flows_http_clients`: "0",
|
|
||||||
}
|
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
|
||||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, check we get the message we expect
|
// Now, check we get the message we expect
|
||||||
clickhouseMessagesMutex.Lock()
|
clickhouseMessagesMutex.Lock()
|
||||||
clickhouseMessages = clickhouseMessages[:0]
|
clickhouseMessages = clickhouseMessages[:0]
|
||||||
@@ -221,15 +196,13 @@ func TestCore(t *testing.T) {
|
|||||||
expectedMetrics = map[string]string{
|
expectedMetrics = map[string]string{
|
||||||
`classifier_exporter_cache_items_total`: "0",
|
`classifier_exporter_cache_items_total`: "0",
|
||||||
`classifier_interface_cache_items_total`: "0",
|
`classifier_interface_cache_items_total`: "0",
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.142"}`: "1",
|
|
||||||
`flows_errors_total{error="SNMP cache miss",exporter="192.0.2.143"}`: "3",
|
|
||||||
`flows_errors_total{error="sampling rate missing",exporter="192.0.2.142"}`: "1",
|
`flows_errors_total{error="sampling rate missing",exporter="192.0.2.142"}`: "1",
|
||||||
`received_flows_total{exporter="192.0.2.142"}`: "4",
|
`received_flows_total{exporter="192.0.2.142"}`: "3",
|
||||||
`received_flows_total{exporter="192.0.2.143"}`: "4",
|
`received_flows_total{exporter="192.0.2.143"}`: "1",
|
||||||
`forwarded_flows_total{exporter="192.0.2.142"}`: "2",
|
`forwarded_flows_total{exporter="192.0.2.142"}`: "2",
|
||||||
`forwarded_flows_total{exporter="192.0.2.143"}`: "1",
|
`forwarded_flows_total{exporter="192.0.2.143"}`: "1",
|
||||||
`flows_http_clients`: "0",
|
`flows_http_clients`: "0",
|
||||||
`received_raw_flows_total`: "8",
|
`received_raw_flows_total`: "4",
|
||||||
}
|
}
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
||||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||||
|
|||||||
@@ -27,10 +27,11 @@ type Configuration struct {
|
|||||||
// Provider defines the configuration of the providers to use
|
// Provider defines the configuration of the providers to use
|
||||||
Providers []ProviderConfiguration
|
Providers []ProviderConfiguration
|
||||||
|
|
||||||
// Workers define the number of workers used to poll metadata
|
// QueryTimeout defines how long to wait for a provider to answer.
|
||||||
Workers int `validate:"min=1"`
|
QueryTimeout time.Duration `validate:"min=100ms,max=1m"`
|
||||||
// MaxBatchRequests define how many requests to pass to a worker at once if possible
|
// InitialDelay defines how long to wait at start (when receiving the first
|
||||||
MaxBatchRequests int `validate:"min=0"`
|
// packets) before applying the query timeout
|
||||||
|
InitialDelay time.Duration `validate:"min=1s,max=1h"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultConfiguration represents the default configuration for the metadata provider.
|
// DefaultConfiguration represents the default configuration for the metadata provider.
|
||||||
@@ -39,9 +40,8 @@ func DefaultConfiguration() Configuration {
|
|||||||
CacheDuration: 30 * time.Minute,
|
CacheDuration: 30 * time.Minute,
|
||||||
CacheRefresh: time.Hour,
|
CacheRefresh: time.Hour,
|
||||||
CacheCheckInterval: 2 * time.Minute,
|
CacheCheckInterval: 2 * time.Minute,
|
||||||
CachePersistFile: "",
|
QueryTimeout: 5 * time.Second,
|
||||||
Workers: 1,
|
InitialDelay: time.Minute,
|
||||||
MaxBatchRequests: 10,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ type ProviderConfiguration struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// MarshalYAML undoes ConfigurationUnmarshallerHook().
|
// MarshalYAML undoes ConfigurationUnmarshallerHook().
|
||||||
func (pc ProviderConfiguration) MarshalYAML() (interface{}, error) {
|
func (pc ProviderConfiguration) MarshalYAML() (any, error) {
|
||||||
return helpers.ParametrizedConfigurationMarshalYAML(pc, providers)
|
return helpers.ParametrizedConfigurationMarshalYAML(pc, providers)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,4 +67,5 @@ func init() {
|
|||||||
helpers.RenameKeyUnmarshallerHook(Configuration{}, "Provider", "Providers"))
|
helpers.RenameKeyUnmarshallerHook(Configuration{}, "Provider", "Providers"))
|
||||||
helpers.RegisterMapstructureUnmarshallerHook(
|
helpers.RegisterMapstructureUnmarshallerHook(
|
||||||
helpers.ParametrizedConfigurationUnmarshallerHook(ProviderConfiguration{}, providers))
|
helpers.ParametrizedConfigurationUnmarshallerHook(ProviderConfiguration{}, providers))
|
||||||
|
helpers.RegisterMapstructureDeprecatedFields[Configuration]("Workers", "MaxBatchRequests")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,8 @@ import (
|
|||||||
// exporterState is the state of an exporter.
|
// exporterState is the state of an exporter.
|
||||||
type exporterState struct {
|
type exporterState struct {
|
||||||
Name string
|
Name string
|
||||||
Ready bool
|
ready bool // ready for the first time
|
||||||
|
Ready chan bool // not polling, data ready
|
||||||
Interfaces map[uint]provider.Interface
|
Interfaces map[uint]provider.Interface
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,49 +244,70 @@ retryDetect:
|
|||||||
l.Debug().Msg("polling")
|
l.Debug().Msg("polling")
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
subscribeResp, err := tg.SubscribeOnce(ctx, subscribeReq)
|
subscribeResp, err := tg.SubscribeOnce(ctx, subscribeReq)
|
||||||
p.metrics.times.WithLabelValues(exporterStr).Observe(time.Now().Sub(start).Seconds())
|
p.metrics.times.WithLabelValues(exporterStr).Observe(time.Since(start).Seconds())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
events := subscribeResponsesToEvents(subscribeResp)
|
events := subscribeResponsesToEvents(subscribeResp)
|
||||||
p.metrics.paths.WithLabelValues(exporterStr).Set(float64(len(events)))
|
p.metrics.paths.WithLabelValues(exporterStr).Set(float64(len(events)))
|
||||||
p.stateLock.Lock()
|
p.stateLock.Lock()
|
||||||
state.update(events, model)
|
state.update(events, model)
|
||||||
state.Ready = true
|
state.ready = true
|
||||||
p.stateLock.Unlock()
|
p.stateLock.Unlock()
|
||||||
l.Debug().Msg("state updated")
|
l.Debug().Msg("state updated")
|
||||||
p.metrics.ready.WithLabelValues(exporterStr).Set(1)
|
p.metrics.ready.WithLabelValues(exporterStr).Set(1)
|
||||||
p.metrics.updates.WithLabelValues(exporterStr).Inc()
|
p.metrics.updates.WithLabelValues(exporterStr).Inc()
|
||||||
|
|
||||||
// On success, wait a bit before next refresh interval
|
// In the following window, we consider ourselves ready and unlock
|
||||||
|
// waiting clients to check for data.
|
||||||
|
|
||||||
|
// On success, wait a bit before next refresh interval and ignore
|
||||||
|
// any refresh requests.
|
||||||
next := time.NewTimer(p.config.MinimalRefreshInterval)
|
next := time.NewTimer(p.config.MinimalRefreshInterval)
|
||||||
select {
|
outerWaitRefreshTimer:
|
||||||
case <-ctx.Done():
|
for {
|
||||||
next.Stop()
|
select {
|
||||||
return
|
case state.Ready <- true:
|
||||||
case <-next.C:
|
case <-ctx.Done():
|
||||||
}
|
next.Stop()
|
||||||
// Drain any message in refresh queue (we ignore them)
|
return
|
||||||
select {
|
case <-p.refresh:
|
||||||
case <-p.refresh:
|
case <-next.C:
|
||||||
default:
|
break outerWaitRefreshTimer
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Wait for a new message in refresh queue
|
// Wait for a new message in refresh queue
|
||||||
select {
|
l.Debug().Msg("wait for refresh request")
|
||||||
case <-ctx.Done():
|
outerWaitRefresh:
|
||||||
return
|
for {
|
||||||
case <-p.refresh:
|
select {
|
||||||
|
case state.Ready <- true:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-p.refresh:
|
||||||
|
break outerWaitRefresh
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Reset retry timer and do the next fresh
|
// Reset retry timer and do the next fresh
|
||||||
retryFetchBackoff.Reset()
|
retryFetchBackoff.Reset()
|
||||||
} else {
|
} else {
|
||||||
// On error, retry a bit later
|
// On error, retry a bit later. While retrying, if we have an
|
||||||
|
// initial state, consider ourselves ready.
|
||||||
l.Err(err).Msg("cannot poll")
|
l.Err(err).Msg("cannot poll")
|
||||||
p.metrics.errors.WithLabelValues(exporterStr, "cannot poll").Inc()
|
p.metrics.errors.WithLabelValues(exporterStr, "cannot poll").Inc()
|
||||||
next := time.NewTimer(retryFetchBackoff.NextBackOff())
|
next := time.NewTimer(retryFetchBackoff.NextBackOff())
|
||||||
select {
|
var readyChan chan bool
|
||||||
case <-ctx.Done():
|
if state.ready {
|
||||||
next.Stop()
|
readyChan = state.Ready
|
||||||
return
|
}
|
||||||
case <-next.C:
|
outerWaitRetryTimer:
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case readyChan <- true:
|
||||||
|
case <-ctx.Done():
|
||||||
|
next.Stop()
|
||||||
|
return
|
||||||
|
case <-next.C:
|
||||||
|
break outerWaitRetryTimer
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,19 +19,16 @@ type Provider struct {
|
|||||||
config *Configuration
|
config *Configuration
|
||||||
metrics metrics
|
metrics metrics
|
||||||
|
|
||||||
put func(provider.Update)
|
|
||||||
refresh chan bool
|
|
||||||
|
|
||||||
state map[netip.Addr]*exporterState
|
state map[netip.Addr]*exporterState
|
||||||
stateLock sync.Mutex
|
stateLock sync.Mutex
|
||||||
|
refresh chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new gNMI provider from configuration
|
// New creates a new gNMI provider from configuration
|
||||||
func (configuration Configuration) New(r *reporter.Reporter, put func(provider.Update)) (provider.Provider, error) {
|
func (configuration Configuration) New(r *reporter.Reporter) (provider.Provider, error) {
|
||||||
p := Provider{
|
p := Provider{
|
||||||
r: r,
|
r: r,
|
||||||
config: &configuration,
|
config: &configuration,
|
||||||
put: put,
|
|
||||||
state: map[netip.Addr]*exporterState{},
|
state: map[netip.Addr]*exporterState{},
|
||||||
refresh: make(chan bool),
|
refresh: make(chan bool),
|
||||||
}
|
}
|
||||||
@@ -40,42 +37,51 @@ func (configuration Configuration) New(r *reporter.Reporter, put func(provider.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Query queries exporter to get information through gNMI.
|
// Query queries exporter to get information through gNMI.
|
||||||
func (p *Provider) Query(ctx context.Context, q *provider.BatchQuery) error {
|
func (p *Provider) Query(ctx context.Context, q provider.Query) (provider.Answer, error) {
|
||||||
p.stateLock.Lock()
|
p.stateLock.Lock()
|
||||||
defer p.stateLock.Unlock()
|
|
||||||
state, ok := p.state[q.ExporterIP]
|
state, ok := p.state[q.ExporterIP]
|
||||||
// If we don't have a collector for the provided IP, starts one. We should
|
|
||||||
// be sure we don't have several collectors for the same exporter, hence the
|
|
||||||
// write lock for everything.
|
|
||||||
if !ok {
|
if !ok {
|
||||||
state := exporterState{}
|
state = &exporterState{
|
||||||
p.state[q.ExporterIP] = &state
|
Ready: make(chan bool),
|
||||||
go p.startCollector(ctx, q.ExporterIP, &state)
|
}
|
||||||
|
p.state[q.ExporterIP] = state
|
||||||
p.metrics.collectorCount.Inc()
|
p.metrics.collectorCount.Inc()
|
||||||
return nil
|
go p.startCollector(ctx, q.ExporterIP, state)
|
||||||
}
|
}
|
||||||
// If the collector exists and already provided some data, populate the
|
|
||||||
// cache.
|
// Trigger a refresh
|
||||||
if state.Ready {
|
select {
|
||||||
for _, ifindex := range q.IfIndexes {
|
case p.refresh <- true:
|
||||||
p.put(provider.Update{
|
default:
|
||||||
Query: provider.Query{
|
}
|
||||||
ExporterIP: q.ExporterIP,
|
|
||||||
IfIndex: ifindex,
|
// Wait for the collector to be ready.
|
||||||
},
|
select {
|
||||||
Answer: provider.Answer{
|
case <-state.Ready:
|
||||||
Exporter: provider.Exporter{
|
// Most common case, keep the lock
|
||||||
Name: state.Name,
|
default:
|
||||||
},
|
// Not ready, release the lock until ready
|
||||||
Interface: state.Interfaces[ifindex],
|
p.stateLock.Unlock()
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
// Also trigger a refresh
|
|
||||||
select {
|
select {
|
||||||
case p.refresh <- true:
|
case <-state.Ready:
|
||||||
default:
|
p.stateLock.Lock()
|
||||||
|
case <-ctx.Done():
|
||||||
|
p.metrics.errors.WithLabelValues(q.ExporterIP.Unmap().String(), "not ready").Inc()
|
||||||
|
return provider.Answer{}, ctx.Err()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
defer p.stateLock.Unlock()
|
||||||
|
|
||||||
|
// Return the result from the state
|
||||||
|
iface, ok := state.Interfaces[q.IfIndex]
|
||||||
|
if !ok {
|
||||||
|
return provider.Answer{}, nil
|
||||||
|
}
|
||||||
|
return provider.Answer{
|
||||||
|
Found: true,
|
||||||
|
Exporter: provider.Exporter{
|
||||||
|
Name: state.Name,
|
||||||
|
},
|
||||||
|
Interface: iface,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -722,18 +722,16 @@ commit now
|
|||||||
configP.Ports = helpers.MustNewSubnetMap(map[string]uint16{
|
configP.Ports = helpers.MustNewSubnetMap(map[string]uint16{
|
||||||
"::/0": netip.MustParseAddrPort(srLinuxGNMI).Port(),
|
"::/0": netip.MustParseAddrPort(srLinuxGNMI).Port(),
|
||||||
})
|
})
|
||||||
put := func(update provider.Update) {
|
formatUpdate := func(exporter netip.Addr, iface string, answer provider.Answer) string {
|
||||||
got = append(got, fmt.Sprintf("%s %s %d %s %s %d",
|
return fmt.Sprintf("%s %v %s %s %s %s %d",
|
||||||
update.ExporterIP.Unmap().String(), update.Exporter.Name,
|
exporter.Unmap().String(), answer.Found, answer.Exporter.Name,
|
||||||
update.IfIndex, update.Interface.Name, update.Interface.Description, update.Interface.Speed))
|
iface, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed)
|
||||||
}
|
}
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
p, err := configP.New(r, put)
|
p, err := configP.New(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("New() error:\n%+v", err)
|
t.Fatalf("New() error:\n%+v", err)
|
||||||
}
|
}
|
||||||
// Let's trigger a request now
|
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{641}})
|
|
||||||
|
|
||||||
// We need the indexes
|
// We need the indexes
|
||||||
subscribeReq, err := api.NewSubscribeRequest(
|
subscribeReq, err := api.NewSubscribeRequest(
|
||||||
@@ -760,23 +758,27 @@ commit now
|
|||||||
t.Logf("indexes: %v", indexes)
|
t.Logf("indexes: %v", indexes)
|
||||||
|
|
||||||
// Wait a bit
|
// Wait a bit
|
||||||
time.Sleep(500 * time.Millisecond)
|
answer, _ := p.Query(context.Background(), provider.Query{ExporterIP: lo, IfIndex: indexes["name=ethernet-1/1"]})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{indexes["name=ethernet-1/1"]}})
|
got = append(got, formatUpdate(lo, "ethernet-1/1", answer))
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{indexes["name=ethernet-1/2"]}})
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: lo, IfIndex: indexes["name=ethernet-1/2"]})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo,
|
got = append(got, formatUpdate(lo, "ethernet-1/2", answer))
|
||||||
IfIndexes: []uint{indexes["name=lag1"], indexes["name=ethernet-1/3"]}})
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: lo, IfIndex: indexes["name=lag1"]})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{5}})
|
got = append(got, formatUpdate(lo, "lag1", answer))
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo,
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: lo, IfIndex: indexes["name=ethernet-1/3"]})
|
||||||
IfIndexes: []uint{indexes["name=ethernet-1/4,index=1"]}})
|
got = append(got, formatUpdate(lo, "ethernet-1/3", answer))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: lo, IfIndex: 5})
|
||||||
|
got = append(got, formatUpdate(lo, "idx5", answer))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: lo,
|
||||||
|
IfIndex: indexes["name=ethernet-1/4,index=1"]})
|
||||||
|
got = append(got, formatUpdate(lo, "ethernet-1/4,index=1", answer))
|
||||||
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
if diff := helpers.Diff(got, []string{
|
if diff := helpers.Diff(got, []string{
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/1 1st interface 100000", indexes["name=ethernet-1/1"]),
|
"127.0.0.1 true srlinux ethernet-1/1 ethernet-1/1 1st interface 100000",
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/2 2nd interface 100000", indexes["name=ethernet-1/2"]),
|
"127.0.0.1 true srlinux ethernet-1/2 ethernet-1/2 2nd interface 100000",
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d lag1 lag interface 0", indexes["name=lag1"]),
|
"127.0.0.1 true srlinux lag1 lag1 lag interface 0",
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/3 3rd interface 100000", indexes["name=ethernet-1/3"]),
|
"127.0.0.1 true srlinux ethernet-1/3 ethernet-1/3 3rd interface 100000",
|
||||||
"127.0.0.1 srlinux 5 0",
|
"127.0.0.1 false idx5 0",
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/4.1 4th interface 100000", indexes["name=ethernet-1/4,index=1"]),
|
"127.0.0.1 true srlinux ethernet-1/4,index=1 ethernet-1/4.1 4th interface 100000",
|
||||||
}); diff != "" {
|
}); diff != "" {
|
||||||
t.Fatalf("Query() (-got, +want):\n%s", diff)
|
t.Fatalf("Query() (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
@@ -812,20 +814,19 @@ commit now
|
|||||||
if resp.Failed != nil {
|
if resp.Failed != nil {
|
||||||
t.Fatalf("SendConfig() error:\n%+v", resp.Failed)
|
t.Fatalf("SendConfig() error:\n%+v", resp.Failed)
|
||||||
}
|
}
|
||||||
time.Sleep(500 * time.Millisecond) // We should exceed the second now and next request will trigger a refresh
|
time.Sleep(time.Second) // We should exceed the second now and next request will trigger a refresh
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{indexes["name=ethernet-1/1"]}})
|
t.Log("start queries")
|
||||||
time.Sleep(300 * time.Millisecond) // Do it again to get the fresh value
|
answer, _ = p.Query(context.Background(),
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo, IfIndexes: []uint{indexes["name=ethernet-1/1"]}})
|
provider.Query{ExporterIP: lo, IfIndex: indexes["name=ethernet-1/1"]})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: lo,
|
got = append(got, formatUpdate(lo, "ethernet-1/1", answer))
|
||||||
IfIndexes: []uint{indexes["name=ethernet-1/4,index=1"]}})
|
answer, _ = p.Query(context.Background(),
|
||||||
time.Sleep(50 * time.Millisecond)
|
provider.Query{ExporterIP: lo, IfIndex: indexes["name=ethernet-1/4,index=1"]})
|
||||||
|
got = append(got, formatUpdate(lo, "ethernet-1/4,index=1", answer))
|
||||||
if diff := helpers.Diff(got, []string{
|
if diff := helpers.Diff(got, []string{
|
||||||
// Previous value
|
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/1 1st interface 100000", indexes["name=ethernet-1/1"]),
|
|
||||||
// Fresh value
|
// Fresh value
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d ethernet-1/1 1st interface new 100000", indexes["name=ethernet-1/1"]),
|
"127.0.0.1 true srlinux ethernet-1/1 ethernet-1/1 1st interface new 100000",
|
||||||
// Removed value
|
// Removed value
|
||||||
fmt.Sprintf("127.0.0.1 srlinux %d 0", indexes["name=ethernet-1/4,index=1"]),
|
"127.0.0.1 false ethernet-1/4,index=1 0",
|
||||||
}); diff != "" {
|
}); diff != "" {
|
||||||
t.Fatalf("Query() (-got, +want):\n%s", diff)
|
t.Fatalf("Query() (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,9 +13,11 @@ import (
|
|||||||
"akvorado/common/schema"
|
"akvorado/common/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrSkipProvider is the error returned on lookup for providers unwilling to
|
var (
|
||||||
// handle a request.
|
// ErrSkipProvider is the error returned on lookup for providers unwilling to
|
||||||
var ErrSkipProvider = errors.New("provider skips query")
|
// handle a request.
|
||||||
|
ErrSkipProvider = errors.New("provider skips query")
|
||||||
|
)
|
||||||
|
|
||||||
// Interface contains the information about an interface.
|
// Interface contains the information about an interface.
|
||||||
type Interface struct {
|
type Interface struct {
|
||||||
@@ -49,32 +51,24 @@ type Query struct {
|
|||||||
IfIndex uint
|
IfIndex uint
|
||||||
}
|
}
|
||||||
|
|
||||||
// BatchQuery is a batched query.
|
|
||||||
type BatchQuery struct {
|
|
||||||
ExporterIP netip.Addr
|
|
||||||
IfIndexes []uint
|
|
||||||
}
|
|
||||||
|
|
||||||
// Answer is the answer received from a provider.
|
// Answer is the answer received from a provider.
|
||||||
type Answer struct {
|
type Answer struct {
|
||||||
|
Found bool
|
||||||
Exporter Exporter
|
Exporter Exporter
|
||||||
Interface Interface
|
Interface Interface
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update is an update received from a provider.
|
|
||||||
type Update struct {
|
|
||||||
Query
|
|
||||||
Answer
|
|
||||||
}
|
|
||||||
|
|
||||||
// Provider is the interface a provider should implement.
|
// Provider is the interface a provider should implement.
|
||||||
type Provider interface {
|
type Provider interface {
|
||||||
// Query asks the provider to query metadata for several requests.
|
// Query asks the provider to query metadata and return the result. The
|
||||||
Query(ctx context.Context, query *BatchQuery) error
|
// error can be ErrSkipProvider to skip to the next provider or another
|
||||||
|
// transient error. If the result is not found, Answer.Found is set to
|
||||||
|
// False.
|
||||||
|
Query(ctx context.Context, query Query) (Answer, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configuration defines an interface to configure a provider.
|
// Configuration defines an interface to configure a provider.
|
||||||
type Configuration interface {
|
type Configuration interface {
|
||||||
// New instantiates a new provider from its configuration.
|
// New instantiates a new provider from its configuration.
|
||||||
New(r *reporter.Reporter, put func(Update)) (Provider, error)
|
New(r *reporter.Reporter) (Provider, error)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
type Configuration struct {
|
type Configuration struct {
|
||||||
// PollerRetries tell how many time a poller should retry before giving up
|
// PollerRetries tell how many time a poller should retry before giving up
|
||||||
PollerRetries int `validate:"min=0"`
|
PollerRetries int `validate:"min=0"`
|
||||||
// PollerTimeout tell how much time a poller should wait for an answer
|
// PollerTimeout tell how much time a poller should wait for an answer before retrying
|
||||||
PollerTimeout time.Duration `validate:"min=100ms"`
|
PollerTimeout time.Duration `validate:"min=100ms"`
|
||||||
|
|
||||||
// Credentials is a mapping from exporter IPs to credentials
|
// Credentials is a mapping from exporter IPs to credentials
|
||||||
|
|||||||
@@ -17,34 +17,9 @@ import (
|
|||||||
"akvorado/outlet/metadata/provider"
|
"akvorado/outlet/metadata/provider"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Poll polls the SNMP provider for the requested interface indexes.
|
// Poll polls the SNMP provider for the requested interface index.
|
||||||
func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port uint16, ifIndexes []uint, put func(provider.Update)) error {
|
func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port uint16, ifIndex uint) (provider.Answer, error) {
|
||||||
// Check if already have a request running
|
|
||||||
exporterStr := exporter.Unmap().String()
|
exporterStr := exporter.Unmap().String()
|
||||||
filteredIfIndexes := make([]uint, 0, len(ifIndexes))
|
|
||||||
keys := make([]string, 0, len(ifIndexes))
|
|
||||||
p.pendingRequestsLock.Lock()
|
|
||||||
for _, ifIndex := range ifIndexes {
|
|
||||||
key := fmt.Sprintf("%s@%d", exporterStr, ifIndex)
|
|
||||||
_, ok := p.pendingRequests[key]
|
|
||||||
if !ok {
|
|
||||||
p.pendingRequests[key] = struct{}{}
|
|
||||||
filteredIfIndexes = append(filteredIfIndexes, ifIndex)
|
|
||||||
keys = append(keys, key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p.pendingRequestsLock.Unlock()
|
|
||||||
if len(filteredIfIndexes) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
ifIndexes = filteredIfIndexes
|
|
||||||
defer func() {
|
|
||||||
p.pendingRequestsLock.Lock()
|
|
||||||
for _, key := range keys {
|
|
||||||
delete(p.pendingRequests, key)
|
|
||||||
}
|
|
||||||
p.pendingRequestsLock.Unlock()
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Instantiate an SNMP state
|
// Instantiate an SNMP state
|
||||||
g := &gosnmp.GoSNMP{
|
g := &gosnmp.GoSNMP{
|
||||||
@@ -99,15 +74,12 @@ func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port ui
|
|||||||
p.metrics.errors.WithLabelValues(exporterStr, "connect").Inc()
|
p.metrics.errors.WithLabelValues(exporterStr, "connect").Inc()
|
||||||
p.errLogger.Err(err).Str("exporter", exporterStr).Msg("unable to connect")
|
p.errLogger.Err(err).Str("exporter", exporterStr).Msg("unable to connect")
|
||||||
}
|
}
|
||||||
requests := []string{"1.3.6.1.2.1.1.5.0"}
|
requests := []string{
|
||||||
for _, ifIndex := range ifIndexes {
|
"1.3.6.1.2.1.1.5.0",
|
||||||
moreRequests := []string{
|
fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex), // ifDescr
|
||||||
fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex), // ifDescr
|
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.1.%d", ifIndex), // ifName
|
||||||
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.1.%d", ifIndex), // ifName
|
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex), // ifAlias
|
||||||
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex), // ifAlias
|
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex), // ifSpeed
|
||||||
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex), // ifSpeed
|
|
||||||
}
|
|
||||||
requests = append(requests, moreRequests...)
|
|
||||||
}
|
}
|
||||||
var results []gosnmp.SnmpPDU
|
var results []gosnmp.SnmpPDU
|
||||||
success := false
|
success := false
|
||||||
@@ -128,17 +100,17 @@ func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port ui
|
|||||||
g.Community = community
|
g.Community = community
|
||||||
currentResult, err := g.Get(requests)
|
currentResult, err := g.Get(requests)
|
||||||
if errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.Canceled) {
|
||||||
return nil
|
return provider.Answer{}, err
|
||||||
}
|
}
|
||||||
if err != nil && canError {
|
if err != nil && canError {
|
||||||
return logError(err)
|
return provider.Answer{}, logError(err)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if currentResult.Error != gosnmp.NoError && currentResult.ErrorIndex == 0 && canError {
|
if currentResult.Error != gosnmp.NoError && currentResult.ErrorIndex == 0 && canError {
|
||||||
// There is some error affecting the whole request
|
// There is some error affecting the whole request
|
||||||
return logError(fmt.Errorf("SNMP error %s(%d)", currentResult.Error, currentResult.Error))
|
return provider.Answer{}, logError(fmt.Errorf("SNMP error %s(%d)", currentResult.Error, currentResult.Error))
|
||||||
}
|
}
|
||||||
success = true
|
success = true
|
||||||
if results == nil {
|
if results == nil {
|
||||||
@@ -158,7 +130,7 @@ func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port ui
|
|||||||
if len(results) != len(requests) {
|
if len(results) != len(requests) {
|
||||||
logError(fmt.Errorf("SNMP mismatch on variable lengths"))
|
logError(fmt.Errorf("SNMP mismatch on variable lengths"))
|
||||||
}
|
}
|
||||||
p.metrics.times.WithLabelValues(exporterStr).Observe(time.Now().Sub(start).Seconds())
|
p.metrics.times.WithLabelValues(exporterStr).Observe(time.Since(start).Seconds())
|
||||||
|
|
||||||
processStr := func(idx int, what string) (string, bool) {
|
processStr := func(idx int, what string) (string, bool) {
|
||||||
switch results[idx].Type {
|
switch results[idx].Type {
|
||||||
@@ -186,73 +158,57 @@ func (p *Provider) Poll(ctx context.Context, exporter, agent netip.Addr, port ui
|
|||||||
}
|
}
|
||||||
sysNameVal, ok := processStr(0, "sysname")
|
sysNameVal, ok := processStr(0, "sysname")
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.New("unable to get sysName")
|
return provider.Answer{}, errors.New("unable to get sysName")
|
||||||
}
|
|
||||||
for idx := 1; idx < len(requests)-3; idx += 4 {
|
|
||||||
var (
|
|
||||||
name, description string
|
|
||||||
speed uint
|
|
||||||
)
|
|
||||||
ifIndex := ifIndexes[(idx-1)/4]
|
|
||||||
ok := true
|
|
||||||
// We do not process results when index is 0 (this can happen for local
|
|
||||||
// traffic, we only care for exporter name).
|
|
||||||
if ifIndex > 0 {
|
|
||||||
ifDescrVal, okDescr := processStr(idx, "ifdescr")
|
|
||||||
ifNameVal, okName := processStr(idx+1, "ifname")
|
|
||||||
ifAliasVal, okAlias := processStr(idx+2, "ifalias")
|
|
||||||
ifSpeedVal, okSpeed := processUint(idx+3, "ifspeed")
|
|
||||||
|
|
||||||
// Many equipments are using ifDescr for the interface name and
|
|
||||||
// ifAlias for the description, which is counter-intuitive. We want
|
|
||||||
// both the name and the description.
|
|
||||||
|
|
||||||
if okName {
|
|
||||||
// If we have ifName, use ifDescr if it is different and ifAlias
|
|
||||||
// is not. Otherwise, keep description empty.
|
|
||||||
name = ifNameVal
|
|
||||||
if okAlias && ifAliasVal != ifNameVal {
|
|
||||||
description = ifAliasVal
|
|
||||||
} else if okDescr && ifDescrVal != ifNameVal {
|
|
||||||
description = ifDescrVal
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Don't handle the other case yet. It would be unexpected to
|
|
||||||
// have ifAlias and not ifName. And if we have only ifDescr, we
|
|
||||||
// can't really know what this is.
|
|
||||||
ok = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Speed is mandatory
|
|
||||||
ok = ok && okSpeed
|
|
||||||
speed = ifSpeedVal
|
|
||||||
}
|
|
||||||
if ok {
|
|
||||||
p.metrics.successes.WithLabelValues(exporterStr).Inc()
|
|
||||||
} else {
|
|
||||||
name = ""
|
|
||||||
description = ""
|
|
||||||
speed = 0
|
|
||||||
}
|
|
||||||
put(provider.Update{
|
|
||||||
Query: provider.Query{
|
|
||||||
ExporterIP: exporter,
|
|
||||||
IfIndex: ifIndex,
|
|
||||||
},
|
|
||||||
Answer: provider.Answer{
|
|
||||||
Exporter: provider.Exporter{
|
|
||||||
Name: sysNameVal,
|
|
||||||
},
|
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: name,
|
|
||||||
Description: description,
|
|
||||||
Speed: speed,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
var (
|
||||||
|
name, description string
|
||||||
|
speed uint
|
||||||
|
)
|
||||||
|
ok = true
|
||||||
|
ifDescrVal, okDescr := processStr(1, "ifdescr")
|
||||||
|
ifNameVal, okName := processStr(2, "ifname")
|
||||||
|
ifAliasVal, okAlias := processStr(3, "ifalias")
|
||||||
|
ifSpeedVal, okSpeed := processUint(4, "ifspeed")
|
||||||
|
|
||||||
|
// Many equipments are using ifDescr for the interface name and
|
||||||
|
// ifAlias for the description, which is counter-intuitive. We want
|
||||||
|
// both the name and the description.
|
||||||
|
|
||||||
|
if okName {
|
||||||
|
// If we have ifName, use ifDescr if it is different and ifAlias
|
||||||
|
// is not. Otherwise, keep description empty.
|
||||||
|
name = ifNameVal
|
||||||
|
if okAlias && ifAliasVal != ifNameVal {
|
||||||
|
description = ifAliasVal
|
||||||
|
} else if okDescr && ifDescrVal != ifNameVal {
|
||||||
|
description = ifDescrVal
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Don't handle the other case yet. It would be unexpected to
|
||||||
|
// have ifAlias and not ifName. And if we have only ifDescr, we
|
||||||
|
// can't really know what this is.
|
||||||
|
ok = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Speed is mandatory
|
||||||
|
ok = ok && okSpeed
|
||||||
|
speed = ifSpeedVal
|
||||||
|
if ok {
|
||||||
|
p.metrics.successes.WithLabelValues(exporterStr).Inc()
|
||||||
|
return provider.Answer{
|
||||||
|
Found: true,
|
||||||
|
Exporter: provider.Exporter{
|
||||||
|
Name: sysNameVal,
|
||||||
|
},
|
||||||
|
Interface: provider.Interface{
|
||||||
|
Name: name,
|
||||||
|
Description: description,
|
||||||
|
Speed: speed,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
return provider.Answer{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type goSNMPLogger struct {
|
type goSNMPLogger struct {
|
||||||
|
|||||||
@@ -264,41 +264,52 @@ func TestPoller(t *testing.T) {
|
|||||||
config.Ports = helpers.MustNewSubnetMap(map[string]uint16{
|
config.Ports = helpers.MustNewSubnetMap(map[string]uint16{
|
||||||
"::/0": uint16(port),
|
"::/0": uint16(port),
|
||||||
})
|
})
|
||||||
put := func(update provider.Update) {
|
p, err := config.New(r)
|
||||||
got = append(got, fmt.Sprintf("%s %s %d %s %s %d",
|
|
||||||
update.ExporterIP.Unmap().String(), update.Exporter.Name,
|
|
||||||
update.IfIndex, update.Interface.Name, update.Interface.Description, update.Interface.Speed))
|
|
||||||
}
|
|
||||||
p, err := config.New(r, put)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("New() error:\n%+v", err)
|
t.Fatalf("New() error:\n%+v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: tc.ExporterIP, IfIndexes: []uint{641}})
|
// Collect results from all queries
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: tc.ExporterIP, IfIndexes: []uint{642}})
|
answer, _ := p.Query(context.Background(), provider.Query{ExporterIP: tc.ExporterIP, IfIndex: 641})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: tc.ExporterIP, IfIndexes: []uint{643, 644, 645}})
|
got = append(got, fmt.Sprintf("%v %s %s %d %s %s %d",
|
||||||
p.Query(context.Background(), &provider.BatchQuery{ExporterIP: tc.ExporterIP, IfIndexes: []uint{0}})
|
answer.Found, tc.ExporterIP.Unmap().String(), answer.Exporter.Name,
|
||||||
|
641, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: tc.ExporterIP, IfIndex: 642})
|
||||||
|
got = append(got, fmt.Sprintf("%v %s %s %d %s %s %d",
|
||||||
|
answer.Found, tc.ExporterIP.Unmap().String(), answer.Exporter.Name,
|
||||||
|
642, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: tc.ExporterIP, IfIndex: 643})
|
||||||
|
got = append(got, fmt.Sprintf("%v %s %s %d %s %s %d",
|
||||||
|
answer.Found, tc.ExporterIP.Unmap().String(), answer.Exporter.Name,
|
||||||
|
643, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: tc.ExporterIP, IfIndex: 644})
|
||||||
|
got = append(got, fmt.Sprintf("%v %s %s %d %s %s %d",
|
||||||
|
answer.Found, tc.ExporterIP.Unmap().String(), answer.Exporter.Name,
|
||||||
|
644, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed))
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{ExporterIP: tc.ExporterIP, IfIndex: 645})
|
||||||
|
got = append(got, fmt.Sprintf("%v %s %s %d %s %s %d",
|
||||||
|
answer.Found, tc.ExporterIP.Unmap().String(), answer.Exporter.Name,
|
||||||
|
645, answer.Interface.Name, answer.Interface.Description, answer.Interface.Speed))
|
||||||
|
|
||||||
exporterStr := tc.ExporterIP.Unmap().String()
|
exporterStr := tc.ExporterIP.Unmap().String()
|
||||||
time.Sleep(50 * time.Millisecond)
|
time.Sleep(50 * time.Millisecond)
|
||||||
if diff := helpers.Diff(got, []string{
|
if diff := helpers.Diff(got, []string{
|
||||||
fmt.Sprintf(`%s exporter62 641 Gi0/0/0/0 Transit 10000`, exporterStr),
|
fmt.Sprintf(`true %s exporter62 641 Gi0/0/0/0 Transit 10000`, exporterStr),
|
||||||
fmt.Sprintf(`%s exporter62 642 Gi0/0/0/1 Peering 20000`, exporterStr),
|
fmt.Sprintf(`true %s exporter62 642 Gi0/0/0/1 Peering 20000`, exporterStr),
|
||||||
fmt.Sprintf(`%s exporter62 643 Gi0/0/0/2 10000`, exporterStr), // no ifAlias
|
fmt.Sprintf(`true %s exporter62 643 Gi0/0/0/2 10000`, exporterStr), // no ifAlias
|
||||||
fmt.Sprintf(`%s exporter62 644 0`, exporterStr), // negative cache
|
fmt.Sprintf(`false %s 644 0`, exporterStr),
|
||||||
fmt.Sprintf(`%s exporter62 645 Gi0/0/0/5 Correct description 1000`, exporterStr), // negative cache
|
fmt.Sprintf(`true %s exporter62 645 Gi0/0/0/5 Correct description 1000`, exporterStr),
|
||||||
fmt.Sprintf(`%s exporter62 0 0`, exporterStr),
|
|
||||||
}); diff != "" {
|
}); diff != "" {
|
||||||
t.Fatalf("Poll() (-got, +want):\n%s", diff)
|
t.Fatalf("Poll() (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|
||||||
gotMetrics := r.GetMetrics("akvorado_outlet_metadata_provider_snmp_poller_", "error_", "pending_", "success_")
|
gotMetrics := r.GetMetrics("akvorado_outlet_metadata_provider_snmp_poller_", "error_", "success_")
|
||||||
expectedMetrics := map[string]string{
|
expectedMetrics := map[string]string{
|
||||||
fmt.Sprintf(`error_requests_total{error="ifalias missing",exporter="%s"}`, exporterStr): "2", // 643+644
|
fmt.Sprintf(`error_requests_total{error="ifalias missing",exporter="%s"}`, exporterStr): "2", // 643+644
|
||||||
fmt.Sprintf(`error_requests_total{error="ifdescr missing",exporter="%s"}`, exporterStr): "1", // 644
|
fmt.Sprintf(`error_requests_total{error="ifdescr missing",exporter="%s"}`, exporterStr): "1", // 644
|
||||||
fmt.Sprintf(`error_requests_total{error="ifname missing",exporter="%s"}`, exporterStr): "1", // 644
|
fmt.Sprintf(`error_requests_total{error="ifname missing",exporter="%s"}`, exporterStr): "1", // 644
|
||||||
fmt.Sprintf(`error_requests_total{error="ifspeed missing",exporter="%s"}`, exporterStr): "1", // 644
|
fmt.Sprintf(`error_requests_total{error="ifspeed missing",exporter="%s"}`, exporterStr): "1", // 644
|
||||||
`pending_requests`: "0",
|
fmt.Sprintf(`success_requests_total{exporter="%s"}`, exporterStr): "4", // 641+642+643+645
|
||||||
fmt.Sprintf(`success_requests_total{exporter="%s"}`, exporterStr): "5", // 641+642+643+645+0
|
|
||||||
}
|
}
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
||||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ package snmp
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"akvorado/common/reporter"
|
"akvorado/common/reporter"
|
||||||
@@ -17,26 +16,20 @@ import (
|
|||||||
|
|
||||||
// Provider represents the SNMP provider.
|
// Provider represents the SNMP provider.
|
||||||
type Provider struct {
|
type Provider struct {
|
||||||
r *reporter.Reporter
|
r *reporter.Reporter
|
||||||
config *Configuration
|
config *Configuration
|
||||||
|
errLogger reporter.Logger
|
||||||
pendingRequests map[string]struct{}
|
|
||||||
pendingRequestsLock sync.Mutex
|
|
||||||
errLogger reporter.Logger
|
|
||||||
|
|
||||||
put func(provider.Update)
|
|
||||||
|
|
||||||
metrics struct {
|
metrics struct {
|
||||||
pendingRequests reporter.GaugeFunc
|
successes *reporter.CounterVec
|
||||||
successes *reporter.CounterVec
|
errors *reporter.CounterVec
|
||||||
errors *reporter.CounterVec
|
retries *reporter.CounterVec
|
||||||
retries *reporter.CounterVec
|
times *reporter.SummaryVec
|
||||||
times *reporter.SummaryVec
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new SNMP provider from configuration
|
// New creates a new SNMP provider from configuration
|
||||||
func (configuration Configuration) New(r *reporter.Reporter, put func(provider.Update)) (provider.Provider, error) {
|
func (configuration Configuration) New(r *reporter.Reporter) (provider.Provider, error) {
|
||||||
for exporterIP, agentIP := range configuration.Agents {
|
for exporterIP, agentIP := range configuration.Agents {
|
||||||
if exporterIP.Is4() || agentIP.Is4() {
|
if exporterIP.Is4() || agentIP.Is4() {
|
||||||
delete(configuration.Agents, exporterIP)
|
delete(configuration.Agents, exporterIP)
|
||||||
@@ -47,24 +40,11 @@ func (configuration Configuration) New(r *reporter.Reporter, put func(provider.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
p := Provider{
|
p := Provider{
|
||||||
r: r,
|
r: r,
|
||||||
config: &configuration,
|
config: &configuration,
|
||||||
|
errLogger: r.Sample(reporter.BurstSampler(10*time.Second, 3)),
|
||||||
pendingRequests: make(map[string]struct{}),
|
|
||||||
errLogger: r.Sample(reporter.BurstSampler(10*time.Second, 3)),
|
|
||||||
|
|
||||||
put: put,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p.metrics.pendingRequests = r.GaugeFunc(
|
|
||||||
reporter.GaugeOpts{
|
|
||||||
Name: "poller_pending_requests",
|
|
||||||
Help: "Number of pending requests in pollers.",
|
|
||||||
}, func() float64 {
|
|
||||||
p.pendingRequestsLock.Lock()
|
|
||||||
defer p.pendingRequestsLock.Unlock()
|
|
||||||
return float64(len(p.pendingRequests))
|
|
||||||
})
|
|
||||||
p.metrics.successes = r.CounterVec(
|
p.metrics.successes = r.CounterVec(
|
||||||
reporter.CounterOpts{
|
reporter.CounterOpts{
|
||||||
Name: "poller_success_requests_total",
|
Name: "poller_success_requests_total",
|
||||||
@@ -91,12 +71,11 @@ func (configuration Configuration) New(r *reporter.Reporter, put func(provider.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Query queries exporter to get information through SNMP.
|
// Query queries exporter to get information through SNMP.
|
||||||
func (p *Provider) Query(ctx context.Context, query *provider.BatchQuery) error {
|
func (p *Provider) Query(ctx context.Context, query provider.Query) (provider.Answer, error) {
|
||||||
// Avoid querying too much exporters with errors
|
|
||||||
agentIP, ok := p.config.Agents[query.ExporterIP]
|
agentIP, ok := p.config.Agents[query.ExporterIP]
|
||||||
if !ok {
|
if !ok {
|
||||||
agentIP = query.ExporterIP
|
agentIP = query.ExporterIP
|
||||||
}
|
}
|
||||||
agentPort := p.config.Ports.LookupOrDefault(query.ExporterIP, 161)
|
agentPort := p.config.Ports.LookupOrDefault(query.ExporterIP, 161)
|
||||||
return p.Poll(ctx, query.ExporterIP, agentIP, agentPort, query.IfIndexes, p.put)
|
return p.Poll(ctx, query.ExporterIP, agentIP, agentPort, query.IfIndex)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
package static
|
package static
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"akvorado/common/helpers"
|
"akvorado/common/helpers"
|
||||||
"akvorado/common/remotedatasourcefetcher"
|
"akvorado/common/remotedatasourcefetcher"
|
||||||
"akvorado/common/reporter"
|
"akvorado/common/reporter"
|
||||||
@@ -19,65 +21,74 @@ import (
|
|||||||
|
|
||||||
// Provider represents the static provider.
|
// Provider represents the static provider.
|
||||||
type Provider struct {
|
type Provider struct {
|
||||||
r *reporter.Reporter
|
r *reporter.Reporter
|
||||||
|
|
||||||
exporterSourcesFetcher *remotedatasourcefetcher.Component[exporterInfo]
|
exporterSourcesFetcher *remotedatasourcefetcher.Component[exporterInfo]
|
||||||
exportersMap map[string][]exporterInfo
|
exportersMap map[string][]exporterInfo
|
||||||
exporters atomic.Pointer[helpers.SubnetMap[ExporterConfiguration]]
|
exporters atomic.Pointer[helpers.SubnetMap[ExporterConfiguration]]
|
||||||
exportersLock sync.Mutex
|
exportersLock sync.Mutex
|
||||||
put func(provider.Update)
|
|
||||||
|
errLogger reporter.Logger
|
||||||
|
|
||||||
|
metrics struct {
|
||||||
|
notReady reporter.Counter
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new static provider from configuration
|
// New creates a new static provider from configuration
|
||||||
func (configuration Configuration) New(r *reporter.Reporter, put func(provider.Update)) (provider.Provider, error) {
|
func (configuration Configuration) New(r *reporter.Reporter) (provider.Provider, error) {
|
||||||
p := &Provider{
|
p := &Provider{
|
||||||
r: r,
|
r: r,
|
||||||
exportersMap: map[string][]exporterInfo{},
|
exportersMap: map[string][]exporterInfo{},
|
||||||
put: put,
|
errLogger: r.Sample(reporter.BurstSampler(time.Minute, 3)),
|
||||||
}
|
}
|
||||||
p.exporters.Store(configuration.Exporters)
|
p.exporters.Store(configuration.Exporters)
|
||||||
p.initStaticExporters()
|
p.initStaticExporters()
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
p.exporterSourcesFetcher, err = remotedatasourcefetcher.New[exporterInfo](r, p.UpdateRemoteDataSource, "metadata", configuration.ExporterSources)
|
p.exporterSourcesFetcher, err = remotedatasourcefetcher.New[exporterInfo](r,
|
||||||
|
p.UpdateRemoteDataSource, "metadata", configuration.ExporterSources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to initialize remote data source fetcher component: %w", err)
|
return nil, fmt.Errorf("unable to initialize remote data source fetcher component: %w", err)
|
||||||
}
|
}
|
||||||
if err := p.exporterSourcesFetcher.Start(); err != nil {
|
if err := p.exporterSourcesFetcher.Start(); err != nil {
|
||||||
return nil, fmt.Errorf("unable to start network sources fetcher component: %w", err)
|
return nil, fmt.Errorf("unable to start network sources fetcher component: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.metrics.notReady = r.Counter(
|
||||||
|
reporter.CounterOpts{
|
||||||
|
Name: "not_ready_total",
|
||||||
|
Help: "Number of queries failing because the remote data sources are not ready",
|
||||||
|
})
|
||||||
|
|
||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query queries static configuration.
|
// Query queries static configuration.
|
||||||
func (p *Provider) Query(_ context.Context, query *provider.BatchQuery) error {
|
func (p *Provider) Query(ctx context.Context, query provider.Query) (provider.Answer, error) {
|
||||||
|
// We wait for all data sources to be ready
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
p.metrics.notReady.Inc()
|
||||||
|
p.errLogger.Warn().Msg("remote datasources are not ready")
|
||||||
|
return provider.Answer{}, ctx.Err()
|
||||||
|
case <-p.exporterSourcesFetcher.DataSourcesReady:
|
||||||
|
}
|
||||||
exporter, ok := p.exporters.Load().Lookup(query.ExporterIP)
|
exporter, ok := p.exporters.Load().Lookup(query.ExporterIP)
|
||||||
if !ok {
|
if !ok {
|
||||||
return provider.ErrSkipProvider
|
return provider.Answer{}, provider.ErrSkipProvider
|
||||||
}
|
}
|
||||||
var skippedIfIndexes uint
|
|
||||||
for _, ifIndex := range query.IfIndexes {
|
iface, ok := exporter.IfIndexes[query.IfIndex]
|
||||||
iface, ok := exporter.IfIndexes[ifIndex]
|
if !ok {
|
||||||
if !ok {
|
if exporter.SkipMissingInterfaces {
|
||||||
if exporter.SkipMissingInterfaces {
|
return provider.Answer{}, provider.ErrSkipProvider
|
||||||
query.IfIndexes[skippedIfIndexes] = ifIndex
|
|
||||||
skippedIfIndexes++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
iface = exporter.Default
|
|
||||||
}
|
}
|
||||||
p.put(provider.Update{
|
iface = exporter.Default
|
||||||
Query: provider.Query{
|
|
||||||
ExporterIP: query.ExporterIP,
|
|
||||||
IfIndex: ifIndex,
|
|
||||||
},
|
|
||||||
Answer: provider.Answer{
|
|
||||||
Exporter: exporter.Exporter,
|
|
||||||
Interface: iface,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
if skippedIfIndexes > 0 {
|
return provider.Answer{
|
||||||
query.IfIndexes = query.IfIndexes[:skippedIfIndexes]
|
Found: true,
|
||||||
return provider.ErrSkipProvider
|
Exporter: exporter.Exporter,
|
||||||
}
|
Interface: iface,
|
||||||
return nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -95,173 +95,157 @@ func TestStaticProvider(t *testing.T) {
|
|||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
var got []provider.Update
|
var got []provider.Answer
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
p, _ := config.New(r, func(update provider.Update) {
|
p, _ := config.New(r)
|
||||||
got = append(got, update)
|
|
||||||
})
|
|
||||||
|
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
answer, _ := p.Query(context.Background(), provider.Query{
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
||||||
IfIndexes: []uint{9, 10, 11},
|
IfIndex: 9,
|
||||||
})
|
})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
got = append(got, answer)
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
IfIndexes: []uint{9, 10, 11},
|
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
||||||
|
IfIndex: 10,
|
||||||
})
|
})
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
got = append(got, answer)
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:3::10"),
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
IfIndexes: []uint{10},
|
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
||||||
|
IfIndex: 11,
|
||||||
})
|
})
|
||||||
query := provider.BatchQuery{
|
got = append(got, answer)
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:4::10"),
|
|
||||||
IfIndexes: []uint{9, 10, 11},
|
|
||||||
}
|
|
||||||
err := p.Query(context.Background(), &query)
|
|
||||||
|
|
||||||
expected := []provider.Update{
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
||||||
|
IfIndex: 9,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
||||||
|
IfIndex: 10,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
||||||
|
IfIndex: 11,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:3::10"),
|
||||||
|
IfIndex: 10,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
answer, _ = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:4::10"),
|
||||||
|
IfIndex: 10,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
answer, err = p.Query(context.Background(), provider.Query{
|
||||||
|
ExporterIP: netip.MustParseAddr("2001:db8:4::10"),
|
||||||
|
IfIndex: 11,
|
||||||
|
})
|
||||||
|
got = append(got, answer)
|
||||||
|
|
||||||
|
expected := []provider.Answer{
|
||||||
{
|
{
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 9,
|
Name: "nodefault",
|
||||||
},
|
|
||||||
Answer: provider.Answer{
|
|
||||||
Exporter: provider.Exporter{
|
|
||||||
Name: "nodefault",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 10,
|
Name: "nodefault",
|
||||||
},
|
},
|
||||||
Answer: provider.Answer{
|
Interface: provider.Interface{
|
||||||
Exporter: provider.Exporter{
|
Name: "Gi10",
|
||||||
Name: "nodefault",
|
Description: "10th interface",
|
||||||
},
|
Speed: 1000,
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: "Gi10",
|
|
||||||
Description: "10th interface",
|
|
||||||
Speed: 1000,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 11,
|
Name: "nodefault",
|
||||||
},
|
},
|
||||||
Answer: provider.Answer{
|
Interface: provider.Interface{
|
||||||
Exporter: provider.Exporter{
|
Name: "Gi11",
|
||||||
Name: "nodefault",
|
Description: "11th interface",
|
||||||
},
|
Speed: 1000,
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: "Gi11",
|
|
||||||
Description: "11th interface",
|
|
||||||
Speed: 1000,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 9,
|
Name: "default",
|
||||||
},
|
},
|
||||||
Answer: provider.Answer{
|
Interface: provider.Interface{
|
||||||
Exporter: provider.Exporter{
|
Name: "Default0",
|
||||||
Name: "default",
|
Description: "Default interface",
|
||||||
},
|
Speed: 1000,
|
||||||
Interface: provider.Interface{
|
},
|
||||||
Name: "Default0",
|
}, {
|
||||||
Description: "Default interface",
|
Found: true,
|
||||||
Speed: 1000,
|
Exporter: provider.Exporter{
|
||||||
},
|
Name: "default",
|
||||||
},
|
},
|
||||||
},
|
Interface: provider.Interface{
|
||||||
{
|
Name: "Gi10",
|
||||||
Query: provider.Query{
|
Description: "10th interface",
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
Speed: 1000,
|
||||||
IfIndex: 10,
|
},
|
||||||
},
|
}, {
|
||||||
Answer: provider.Answer{
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "default",
|
Name: "default",
|
||||||
},
|
},
|
||||||
Interface: provider.Interface{
|
Interface: provider.Interface{
|
||||||
Name: "Gi10",
|
Name: "Default0",
|
||||||
Description: "10th interface",
|
Description: "Default interface",
|
||||||
Speed: 1000,
|
Speed: 1000,
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
},
|
Found: true,
|
||||||
{
|
Exporter: provider.Exporter{
|
||||||
Query: provider.Query{
|
Name: "default with metadata",
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
Region: "eu",
|
||||||
IfIndex: 11,
|
Role: "peering",
|
||||||
},
|
Tenant: "mine",
|
||||||
Answer: provider.Answer{
|
Site: "par",
|
||||||
Exporter: provider.Exporter{
|
Group: "blue",
|
||||||
Name: "default",
|
},
|
||||||
},
|
Interface: provider.Interface{
|
||||||
Interface: provider.Interface{
|
Name: "Gi10",
|
||||||
Name: "Default0",
|
Description: "10th interface",
|
||||||
Description: "Default interface",
|
Speed: 1000,
|
||||||
Speed: 1000,
|
Provider: "transit101",
|
||||||
},
|
Connectivity: "transit",
|
||||||
},
|
Boundary: schema.InterfaceBoundaryExternal,
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:3::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 10,
|
Name: "nodefault skip",
|
||||||
},
|
},
|
||||||
Answer: provider.Answer{
|
Interface: provider.Interface{
|
||||||
Exporter: provider.Exporter{
|
Name: "Gi10",
|
||||||
Name: "default with metadata",
|
Description: "10th interface",
|
||||||
Region: "eu",
|
Speed: 1000,
|
||||||
Role: "peering",
|
Provider: "transit101",
|
||||||
Tenant: "mine",
|
Connectivity: "transit",
|
||||||
Site: "par",
|
Boundary: schema.InterfaceBoundaryExternal,
|
||||||
Group: "blue",
|
|
||||||
},
|
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: "Gi10",
|
|
||||||
Description: "10th interface",
|
|
||||||
Speed: 1000,
|
|
||||||
Provider: "transit101",
|
|
||||||
Connectivity: "transit",
|
|
||||||
Boundary: schema.InterfaceBoundaryExternal,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Query: provider.Query{
|
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:4::10"),
|
|
||||||
IfIndex: 10,
|
|
||||||
},
|
|
||||||
Answer: provider.Answer{
|
|
||||||
Exporter: provider.Exporter{
|
|
||||||
Name: "nodefault skip",
|
|
||||||
},
|
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: "Gi10",
|
|
||||||
Description: "10th interface",
|
|
||||||
Speed: 1000,
|
|
||||||
Provider: "transit101",
|
|
||||||
Connectivity: "transit",
|
|
||||||
Boundary: schema.InterfaceBoundaryExternal,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{}, // Skip
|
||||||
}
|
}
|
||||||
|
|
||||||
if diff := helpers.Diff(got, expected); diff != "" {
|
if diff := helpers.Diff(got, expected); diff != "" {
|
||||||
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
if diff := helpers.Diff(query.IfIndexes, []uint{9, 11}); diff != "" {
|
|
||||||
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
if diff := helpers.Diff(err, provider.ErrSkipProvider); diff != "" {
|
if diff := helpers.Diff(err, provider.ErrSkipProvider); diff != "" {
|
||||||
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ package static
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -38,7 +39,6 @@ func TestInitStaticExporters(t *testing.T) {
|
|||||||
p := &Provider{
|
p := &Provider{
|
||||||
r: r,
|
r: r,
|
||||||
exportersMap: map[string][]exporterInfo{},
|
exportersMap: map[string][]exporterInfo{},
|
||||||
put: func(_ provider.Update) {},
|
|
||||||
}
|
}
|
||||||
p.exporters.Store(conf.Exporters)
|
p.exporters.Store(conf.Exporters)
|
||||||
|
|
||||||
@@ -70,6 +70,7 @@ func TestInitStaticExporters(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRemoteExporterSources(t *testing.T) {
|
func TestRemoteExporterSources(t *testing.T) {
|
||||||
|
|
||||||
// Mux to answer requests
|
// Mux to answer requests
|
||||||
ready := make(chan bool)
|
ready := make(chan bool)
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
@@ -186,42 +187,24 @@ func TestRemoteExporterSources(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
var got []provider.Update
|
p, _ := config.New(r)
|
||||||
var expected []provider.Update
|
|
||||||
p, _ := config.New(r, func(update provider.Update) {
|
|
||||||
got = append(got, update)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Query when json is not ready yet, only static configured data available
|
// Query when json is not ready yet, we should get a timeout
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
answer1, err := p.Query(ctx, provider.Query{
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
||||||
IfIndexes: []uint{9},
|
IfIndex: 9,
|
||||||
})
|
})
|
||||||
|
if err == nil {
|
||||||
// Unknown Exporter at this moment
|
t.Fatalf("Query() should have been in error:\n%+v", answer1)
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
}
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
if !errors.Is(err, context.DeadlineExceeded) {
|
||||||
IfIndexes: []uint{1},
|
t.Fatalf("Query() error:\n%+v", err)
|
||||||
})
|
|
||||||
|
|
||||||
expected = append(expected, provider.Update{
|
|
||||||
Query: provider.Query{
|
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:1::10"),
|
|
||||||
IfIndex: 9,
|
|
||||||
},
|
|
||||||
Answer: provider.Answer{
|
|
||||||
Exporter: provider.Exporter{
|
|
||||||
Name: "nodefault",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
if diff := helpers.Diff(got, expected); diff != "" {
|
|
||||||
t.Fatalf("static provider - before remote source load (-got, +want):\n%s", diff)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
close(ready)
|
close(ready)
|
||||||
time.Sleep(50 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
gotMetrics := r.GetMetrics("akvorado_common_remotedatasourcefetcher_data_")
|
gotMetrics := r.GetMetrics("akvorado_common_remotedatasourcefetcher_data_")
|
||||||
expectedMetrics := map[string]string{
|
expectedMetrics := map[string]string{
|
||||||
@@ -232,29 +215,24 @@ func TestRemoteExporterSources(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We now should be able to resolve our new exporter from remote source
|
// We now should be able to resolve our new exporter from remote source
|
||||||
p.Query(context.Background(), &provider.BatchQuery{
|
got, _ := p.Query(context.Background(), provider.Query{
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
||||||
IfIndexes: []uint{1},
|
IfIndex: 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
expected = append(expected, provider.Update{
|
expected := provider.Answer{
|
||||||
Query: provider.Query{
|
Found: true,
|
||||||
ExporterIP: netip.MustParseAddr("2001:db8:2::10"),
|
Exporter: provider.Exporter{
|
||||||
IfIndex: 1,
|
Name: "exporter1",
|
||||||
},
|
},
|
||||||
Answer: provider.Answer{
|
Interface: provider.Interface{
|
||||||
Exporter: provider.Exporter{
|
Name: "iface1",
|
||||||
Name: "exporter1",
|
Description: "foo:desc1",
|
||||||
},
|
Speed: 1000,
|
||||||
Interface: provider.Interface{
|
|
||||||
Name: "iface1",
|
|
||||||
Description: "foo:desc1",
|
|
||||||
Speed: 1000,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
})
|
}
|
||||||
|
|
||||||
if diff := helpers.Diff(got, expected); diff != "" {
|
if diff := helpers.Diff(got, expected); diff != "" {
|
||||||
t.Fatalf("static provider - after remote source load(-got, +want):\n%s", diff)
|
t.Fatalf("static provider (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,15 +7,16 @@
|
|||||||
package metadata
|
package metadata
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"strconv"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/benbjohnson/clock"
|
"github.com/benbjohnson/clock"
|
||||||
"github.com/eapache/go-resiliency/breaker"
|
"github.com/eapache/go-resiliency/breaker"
|
||||||
|
"golang.org/x/sync/singleflight"
|
||||||
"gopkg.in/tomb.v2"
|
"gopkg.in/tomb.v2"
|
||||||
|
|
||||||
"akvorado/common/daemon"
|
"akvorado/common/daemon"
|
||||||
@@ -31,22 +32,20 @@ type Component struct {
|
|||||||
config Configuration
|
config Configuration
|
||||||
|
|
||||||
sc *metadataCache
|
sc *metadataCache
|
||||||
|
sf singleflight.Group
|
||||||
|
|
||||||
healthyWorkers chan reporter.ChannelHealthcheckFunc
|
|
||||||
providerChannel chan provider.BatchQuery
|
|
||||||
dispatcherChannel chan provider.Query
|
|
||||||
dispatcherBChannel chan (<-chan bool) // block channel for testing
|
|
||||||
providerBreakersLock sync.Mutex
|
providerBreakersLock sync.Mutex
|
||||||
providerBreakerLoggers map[netip.Addr]reporter.Logger
|
providerBreakerLoggers map[netip.Addr]reporter.Logger
|
||||||
providerBreakers map[netip.Addr]*breaker.Breaker
|
providerBreakers map[netip.Addr]*breaker.Breaker
|
||||||
providers []provider.Provider
|
providers []provider.Provider
|
||||||
|
initialDeadline time.Time
|
||||||
|
|
||||||
metrics struct {
|
metrics struct {
|
||||||
cacheRefreshRuns reporter.Counter
|
cacheRefreshRuns reporter.Counter
|
||||||
cacheRefresh reporter.Counter
|
cacheRefresh reporter.Counter
|
||||||
providerBusyCount *reporter.CounterVec
|
|
||||||
providerBreakerOpenCount *reporter.CounterVec
|
providerBreakerOpenCount *reporter.CounterVec
|
||||||
providerBatchedCount reporter.Counter
|
providerRequests reporter.Counter
|
||||||
|
providerErrors reporter.Counter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -56,6 +55,11 @@ type Dependencies struct {
|
|||||||
Clock clock.Clock
|
Clock clock.Clock
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrQueryTimeout is the error returned when a query timeout.
|
||||||
|
ErrQueryTimeout = errors.New("provider query timeout")
|
||||||
|
)
|
||||||
|
|
||||||
// New creates a new metadata component.
|
// New creates a new metadata component.
|
||||||
func New(r *reporter.Reporter, configuration Configuration, dependencies Dependencies) (*Component, error) {
|
func New(r *reporter.Reporter, configuration Configuration, dependencies Dependencies) (*Component, error) {
|
||||||
if configuration.CacheRefresh > 0 && configuration.CacheRefresh < configuration.CacheDuration {
|
if configuration.CacheRefresh > 0 && configuration.CacheRefresh < configuration.CacheDuration {
|
||||||
@@ -75,9 +79,6 @@ func New(r *reporter.Reporter, configuration Configuration, dependencies Depende
|
|||||||
config: configuration,
|
config: configuration,
|
||||||
sc: sc,
|
sc: sc,
|
||||||
|
|
||||||
providerChannel: make(chan provider.BatchQuery),
|
|
||||||
dispatcherChannel: make(chan provider.Query, 100*configuration.Workers),
|
|
||||||
dispatcherBChannel: make(chan (<-chan bool)),
|
|
||||||
providerBreakers: make(map[netip.Addr]*breaker.Breaker),
|
providerBreakers: make(map[netip.Addr]*breaker.Breaker),
|
||||||
providerBreakerLoggers: make(map[netip.Addr]reporter.Logger),
|
providerBreakerLoggers: make(map[netip.Addr]reporter.Logger),
|
||||||
providers: make([]provider.Provider, 0, 1),
|
providers: make([]provider.Provider, 0, 1),
|
||||||
@@ -86,9 +87,7 @@ func New(r *reporter.Reporter, configuration Configuration, dependencies Depende
|
|||||||
|
|
||||||
// Initialize providers
|
// Initialize providers
|
||||||
for _, p := range c.config.Providers {
|
for _, p := range c.config.Providers {
|
||||||
selectedProvider, err := p.Config.New(r, func(update provider.Update) {
|
selectedProvider, err := p.Config.New(r)
|
||||||
c.sc.Put(c.d.Clock.Now(), update.Query, update.Answer)
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -102,33 +101,32 @@ func New(r *reporter.Reporter, configuration Configuration, dependencies Depende
|
|||||||
})
|
})
|
||||||
c.metrics.cacheRefresh = r.Counter(
|
c.metrics.cacheRefresh = r.Counter(
|
||||||
reporter.CounterOpts{
|
reporter.CounterOpts{
|
||||||
Name: "cache_refreshs_total",
|
Name: "cache_refreshes_total",
|
||||||
Help: "Number of entries refreshed in cache.",
|
Help: "Number of entries refreshed in cache.",
|
||||||
})
|
})
|
||||||
c.metrics.providerBusyCount = r.CounterVec(
|
|
||||||
reporter.CounterOpts{
|
|
||||||
Name: "provider_dropped_requests_total",
|
|
||||||
Help: "Providers where too busy and dropped requests.",
|
|
||||||
},
|
|
||||||
[]string{"exporter"})
|
|
||||||
c.metrics.providerBreakerOpenCount = r.CounterVec(
|
c.metrics.providerBreakerOpenCount = r.CounterVec(
|
||||||
reporter.CounterOpts{
|
reporter.CounterOpts{
|
||||||
Name: "provider_breaker_opens_total",
|
Name: "provider_breaker_opens_total",
|
||||||
Help: "Provider breaker was opened due to too many errors.",
|
Help: "Provider breaker was opened due to too many errors.",
|
||||||
},
|
},
|
||||||
[]string{"exporter"})
|
[]string{"exporter"})
|
||||||
c.metrics.providerBatchedCount = r.Counter(
|
c.metrics.providerRequests = r.Counter(
|
||||||
reporter.CounterOpts{
|
reporter.CounterOpts{
|
||||||
Name: "provider_batched_requests_total",
|
Name: "provider_requests_total",
|
||||||
Help: "Several requests were batched into one.",
|
Help: "Number of provider requests.",
|
||||||
},
|
})
|
||||||
)
|
c.metrics.providerErrors = r.Counter(
|
||||||
|
reporter.CounterOpts{
|
||||||
|
Name: "provider_errors_total",
|
||||||
|
Help: "Number of provider errors.",
|
||||||
|
})
|
||||||
return &c, nil
|
return &c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start starts the metadata component.
|
// Start starts the metadata component.
|
||||||
func (c *Component) Start() error {
|
func (c *Component) Start() error {
|
||||||
c.r.Info().Msg("starting metadata component")
|
c.r.Info().Msg("starting metadata component")
|
||||||
|
c.initialDeadline = time.Now().Add(c.config.InitialDelay)
|
||||||
|
|
||||||
// Load cache
|
// Load cache
|
||||||
if c.config.CachePersistFile != "" {
|
if c.config.CachePersistFile != "" {
|
||||||
@@ -160,61 +158,12 @@ func (c *Component) Start() error {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// Goroutine to fetch incoming requests and dispatch them to workers
|
|
||||||
healthyDispatcher := make(chan reporter.ChannelHealthcheckFunc)
|
|
||||||
c.r.RegisterHealthcheck("metadata/dispatcher", reporter.ChannelHealthcheck(c.t.Context(nil), healthyDispatcher))
|
|
||||||
c.t.Go(func() error {
|
|
||||||
dying := c.t.Dying()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-dying:
|
|
||||||
c.r.Debug().Msg("stopping metadata dispatcher")
|
|
||||||
return nil
|
|
||||||
case cb, ok := <-healthyDispatcher:
|
|
||||||
if ok {
|
|
||||||
cb(reporter.HealthcheckOK, "ok")
|
|
||||||
}
|
|
||||||
case ch := <-c.dispatcherBChannel:
|
|
||||||
// This is to test batching
|
|
||||||
<-ch
|
|
||||||
case request := <-c.dispatcherChannel:
|
|
||||||
c.dispatchIncomingRequest(request)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Goroutines to poll exporters
|
|
||||||
c.healthyWorkers = make(chan reporter.ChannelHealthcheckFunc)
|
|
||||||
c.r.RegisterHealthcheck("metadata/worker", reporter.ChannelHealthcheck(c.t.Context(nil), c.healthyWorkers))
|
|
||||||
for i := range c.config.Workers {
|
|
||||||
workerIDStr := strconv.Itoa(i)
|
|
||||||
c.t.Go(func() error {
|
|
||||||
c.r.Debug().Str("worker", workerIDStr).Msg("starting metadata provider")
|
|
||||||
dying := c.t.Dying()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-dying:
|
|
||||||
c.r.Debug().Str("worker", workerIDStr).Msg("stopping metadata provider")
|
|
||||||
return nil
|
|
||||||
case cb, ok := <-c.healthyWorkers:
|
|
||||||
if ok {
|
|
||||||
cb(reporter.HealthcheckOK, fmt.Sprintf("worker %s ok", workerIDStr))
|
|
||||||
}
|
|
||||||
case request := <-c.providerChannel:
|
|
||||||
c.providerIncomingRequest(request)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop stops the metadata component
|
// Stop stops the metadata component
|
||||||
func (c *Component) Stop() error {
|
func (c *Component) Stop() error {
|
||||||
defer func() {
|
defer func() {
|
||||||
close(c.dispatcherChannel)
|
|
||||||
close(c.providerChannel)
|
|
||||||
close(c.healthyWorkers)
|
|
||||||
if c.config.CachePersistFile != "" {
|
if c.config.CachePersistFile != "" {
|
||||||
if err := c.sc.Save(c.config.CachePersistFile); err != nil {
|
if err := c.sc.Save(c.config.CachePersistFile); err != nil {
|
||||||
c.r.Err(err).Msg("cannot save cache")
|
c.r.Err(err).Msg("cannot save cache")
|
||||||
@@ -227,102 +176,102 @@ func (c *Component) Stop() error {
|
|||||||
return c.t.Wait()
|
return c.t.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lookup for interface information for the provided exporter and ifIndex.
|
// Lookup for interface information for the provided exporter and ifIndex. If
|
||||||
// If the information is not in the cache, it will be polled, but
|
// the information is not in the cache, it will be polled from the provider. The
|
||||||
// won't be returned immediately.
|
// returned result has a field Found to tell if the lookup is successful or not.
|
||||||
func (c *Component) Lookup(t time.Time, exporterIP netip.Addr, ifIndex uint) (provider.Answer, bool) {
|
func (c *Component) Lookup(t time.Time, exporterIP netip.Addr, ifIndex uint) provider.Answer {
|
||||||
query := provider.Query{ExporterIP: exporterIP, IfIndex: ifIndex}
|
query := provider.Query{ExporterIP: exporterIP, IfIndex: ifIndex}
|
||||||
answer, ok := c.sc.Lookup(t, query)
|
|
||||||
if !ok {
|
// Check cache first
|
||||||
select {
|
if answer, ok := c.sc.Lookup(t, query); ok {
|
||||||
case c.dispatcherChannel <- query:
|
return answer
|
||||||
default:
|
|
||||||
c.metrics.providerBusyCount.WithLabelValues(exporterIP.Unmap().String()).Inc()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return answer, ok
|
|
||||||
|
// Use singleflight to prevent duplicate queries
|
||||||
|
key := fmt.Sprintf("%s-%d", exporterIP, ifIndex)
|
||||||
|
result, err, _ := c.sf.Do(key, func() (any, error) {
|
||||||
|
return c.queryProviders(query)
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return provider.Answer{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.(provider.Answer)
|
||||||
}
|
}
|
||||||
|
|
||||||
// dispatchIncomingRequest dispatches an incoming request to workers. It may
|
// queryProviders queries all providers. It returns the answer for the specific
|
||||||
// handle more than the provided request if it can.
|
// query and cache it.
|
||||||
func (c *Component) dispatchIncomingRequest(request provider.Query) {
|
func (c *Component) queryProviders(query provider.Query) (provider.Answer, error) {
|
||||||
requestsMap := map[netip.Addr][]uint{
|
c.metrics.providerRequests.Inc()
|
||||||
request.ExporterIP: {request.IfIndex},
|
|
||||||
}
|
|
||||||
dying := c.t.Dying()
|
|
||||||
for c.config.MaxBatchRequests > 0 {
|
|
||||||
select {
|
|
||||||
case request := <-c.dispatcherChannel:
|
|
||||||
indexes, ok := requestsMap[request.ExporterIP]
|
|
||||||
if !ok {
|
|
||||||
indexes = []uint{request.IfIndex}
|
|
||||||
} else {
|
|
||||||
indexes = append(indexes, request.IfIndex)
|
|
||||||
}
|
|
||||||
requestsMap[request.ExporterIP] = indexes
|
|
||||||
// We don't want to exceed the configured limit but also there is no
|
|
||||||
// point of batching requests of too many exporters.
|
|
||||||
if len(indexes) < c.config.MaxBatchRequests && len(requestsMap) < 4 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
case <-dying:
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
// No more requests in queue
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
for exporterIP, ifIndexes := range requestsMap {
|
|
||||||
if len(ifIndexes) > 1 {
|
|
||||||
c.metrics.providerBatchedCount.Add(float64(len(ifIndexes)))
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case <-dying:
|
|
||||||
return
|
|
||||||
case c.providerChannel <- provider.BatchQuery{ExporterIP: exporterIP, IfIndexes: ifIndexes}:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// providerIncomingRequest handles an incoming request to the provider. It
|
// Check if provider breaker is open
|
||||||
// uses a breaker to avoid pushing working on non-responsive exporters.
|
|
||||||
func (c *Component) providerIncomingRequest(request provider.BatchQuery) {
|
|
||||||
// Avoid querying too much exporters with errors
|
|
||||||
c.providerBreakersLock.Lock()
|
c.providerBreakersLock.Lock()
|
||||||
providerBreaker, ok := c.providerBreakers[request.ExporterIP]
|
providerBreaker, ok := c.providerBreakers[query.ExporterIP]
|
||||||
if !ok {
|
if !ok {
|
||||||
providerBreaker = breaker.New(20, 1, time.Minute)
|
providerBreaker = breaker.New(20, 1, time.Minute)
|
||||||
c.providerBreakers[request.ExporterIP] = providerBreaker
|
c.providerBreakers[query.ExporterIP] = providerBreaker
|
||||||
}
|
}
|
||||||
c.providerBreakersLock.Unlock()
|
c.providerBreakersLock.Unlock()
|
||||||
|
|
||||||
if err := providerBreaker.Run(func() error {
|
var result provider.Answer
|
||||||
ctx := c.t.Context(nil)
|
err := providerBreaker.Run(func() error {
|
||||||
|
deadline := time.Now().Add(c.config.QueryTimeout)
|
||||||
|
if deadline.Before(c.initialDeadline) {
|
||||||
|
deadline = c.initialDeadline
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithDeadlineCause(
|
||||||
|
c.t.Context(nil),
|
||||||
|
deadline,
|
||||||
|
ErrQueryTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
now := c.d.Clock.Now()
|
||||||
for _, p := range c.providers {
|
for _, p := range c.providers {
|
||||||
// Query providers in the order they are defined and stop on the
|
answer, err := p.Query(ctx, query)
|
||||||
// first provider accepting to handle the query.
|
if err == provider.ErrSkipProvider {
|
||||||
if err := p.Query(ctx, &request); err != nil && err != provider.ErrSkipProvider {
|
// Next provider
|
||||||
return err
|
|
||||||
} else if err == provider.ErrSkipProvider {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
c.sc.Put(now, query, answer)
|
||||||
|
result = answer
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}); err == breaker.ErrBreakerOpen {
|
})
|
||||||
c.metrics.providerBreakerOpenCount.WithLabelValues(request.ExporterIP.Unmap().String()).Inc()
|
|
||||||
c.providerBreakersLock.Lock()
|
if err != nil {
|
||||||
l, ok := c.providerBreakerLoggers[request.ExporterIP]
|
c.metrics.providerErrors.Inc()
|
||||||
if !ok {
|
if err == breaker.ErrBreakerOpen {
|
||||||
l = c.r.Sample(reporter.BurstSampler(time.Minute, 1)).
|
c.metrics.providerBreakerOpenCount.WithLabelValues(query.ExporterIP.Unmap().String()).Inc()
|
||||||
With().
|
c.providerBreakersLock.Lock()
|
||||||
Str("exporter", request.ExporterIP.Unmap().String()).
|
l, ok := c.providerBreakerLoggers[query.ExporterIP]
|
||||||
Logger()
|
if !ok {
|
||||||
c.providerBreakerLoggers[request.ExporterIP] = l
|
l = c.r.Sample(reporter.BurstSampler(time.Minute, 1)).
|
||||||
|
With().
|
||||||
|
Str("exporter", query.ExporterIP.Unmap().String()).
|
||||||
|
Logger()
|
||||||
|
c.providerBreakerLoggers[query.ExporterIP] = l
|
||||||
|
}
|
||||||
|
l.Warn().Msg("provider breaker open")
|
||||||
|
c.providerBreakersLock.Unlock()
|
||||||
}
|
}
|
||||||
l.Warn().Msg("provider breaker open")
|
return provider.Answer{}, err
|
||||||
c.providerBreakersLock.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// refreshCacheEntry refreshes a single cache entry.
|
||||||
|
func (c *Component) refreshCacheEntry(exporterIP netip.Addr, ifIndex uint) {
|
||||||
|
query := provider.Query{
|
||||||
|
ExporterIP: exporterIP,
|
||||||
|
IfIndex: ifIndex,
|
||||||
|
}
|
||||||
|
c.queryProviders(query)
|
||||||
}
|
}
|
||||||
|
|
||||||
// expireCache handles cache expiration and refresh.
|
// expireCache handles cache expiration and refresh.
|
||||||
@@ -335,15 +284,8 @@ func (c *Component) expireCache() {
|
|||||||
toRefresh := c.sc.NeedUpdates(c.d.Clock.Now().Add(-c.config.CacheRefresh))
|
toRefresh := c.sc.NeedUpdates(c.d.Clock.Now().Add(-c.config.CacheRefresh))
|
||||||
for exporter, ifaces := range toRefresh {
|
for exporter, ifaces := range toRefresh {
|
||||||
for _, ifIndex := range ifaces {
|
for _, ifIndex := range ifaces {
|
||||||
select {
|
go c.refreshCacheEntry(exporter, ifIndex)
|
||||||
case c.dispatcherChannel <- provider.Query{
|
count++
|
||||||
ExporterIP: exporter,
|
|
||||||
IfIndex: ifIndex,
|
|
||||||
}:
|
|
||||||
count++
|
|
||||||
default:
|
|
||||||
c.metrics.providerBusyCount.WithLabelValues(exporter.Unmap().String()).Inc()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
c.r.Debug().Int("count", count).Msg("refreshed metadata cache")
|
c.r.Debug().Int("count", count).Msg("refreshed metadata cache")
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"akvorado/common/daemon"
|
"akvorado/common/daemon"
|
||||||
"akvorado/common/helpers"
|
"akvorado/common/helpers"
|
||||||
"akvorado/common/reporter"
|
"akvorado/common/reporter"
|
||||||
|
"akvorado/common/schema"
|
||||||
"akvorado/outlet/metadata/provider"
|
"akvorado/outlet/metadata/provider"
|
||||||
"akvorado/outlet/metadata/provider/static"
|
"akvorado/outlet/metadata/provider/static"
|
||||||
)
|
)
|
||||||
@@ -23,7 +24,7 @@ import (
|
|||||||
func expectMockLookup(t *testing.T, c *Component, exporter string, ifIndex uint, expected provider.Answer) {
|
func expectMockLookup(t *testing.T, c *Component, exporter string, ifIndex uint, expected provider.Answer) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
ip := netip.AddrFrom16(netip.MustParseAddr(exporter).As16())
|
ip := netip.AddrFrom16(netip.MustParseAddr(exporter).As16())
|
||||||
got, _ := c.Lookup(time.Now(), ip, ifIndex)
|
got := c.Lookup(time.Now(), ip, ifIndex)
|
||||||
if diff := helpers.Diff(got, expected); diff != "" {
|
if diff := helpers.Diff(got, expected); diff != "" {
|
||||||
t.Fatalf("Lookup() (-got, +want):\n%s", diff)
|
t.Fatalf("Lookup() (-got, +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
@@ -32,24 +33,53 @@ func expectMockLookup(t *testing.T, c *Component, exporter string, ifIndex uint,
|
|||||||
func TestLookup(t *testing.T) {
|
func TestLookup(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
c := NewMock(t, r, DefaultConfiguration(), Dependencies{Daemon: daemon.NewMock(t)})
|
c := NewMock(t, r, DefaultConfiguration(), Dependencies{Daemon: daemon.NewMock(t)})
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{})
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 999, provider.Answer{})
|
|
||||||
time.Sleep(30 * time.Millisecond)
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
},
|
},
|
||||||
|
|
||||||
Interface: provider.Interface{Name: "Gi0/0/765",
|
Interface: provider.Interface{Name: "Gi0/0/765",
|
||||||
Description: "Interface 765",
|
Description: "Interface 765",
|
||||||
Speed: 1000,
|
Speed: 1000,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
expectMockLookup(t, c, "127.0.0.1", 999, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 1010, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
|
Group: "metadata group",
|
||||||
|
Region: "metadata region",
|
||||||
|
Role: "metadata role",
|
||||||
|
Site: "metadata site",
|
||||||
|
Tenant: "metadata tenant",
|
||||||
|
},
|
||||||
|
Interface: provider.Interface{Name: "Gi0/0/1010",
|
||||||
|
Description: "Interface 1010",
|
||||||
|
Speed: 1000,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
expectMockLookup(t, c, "127.0.0.1", 2010, provider.Answer{
|
||||||
|
Found: true,
|
||||||
|
Exporter: provider.Exporter{
|
||||||
|
Name: "127_0_0_1",
|
||||||
|
Group: "metadata group",
|
||||||
|
Region: "metadata region",
|
||||||
|
Role: "metadata role",
|
||||||
|
Site: "metadata site",
|
||||||
|
Tenant: "metadata tenant",
|
||||||
|
},
|
||||||
|
Interface: provider.Interface{Name: "Gi0/0/2010",
|
||||||
|
Description: "Interface 2010",
|
||||||
|
Speed: 1000,
|
||||||
|
Boundary: schema.InterfaceBoundaryExternal,
|
||||||
|
Connectivity: "metadata connectivity",
|
||||||
|
Provider: "metadata provider",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
// With a simple lookup, this is not possible to distinguish between a
|
||||||
|
// transient error or a fatal error. Only the caching subsystem knows.
|
||||||
|
expectMockLookup(t, c, "127.0.0.1", 999, provider.Answer{})
|
||||||
|
expectMockLookup(t, c, "127.0.0.1", 998, provider.Answer{})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestComponentSaveLoad(t *testing.T) {
|
func TestComponentSaveLoad(t *testing.T) {
|
||||||
@@ -59,10 +89,8 @@ func TestComponentSaveLoad(t *testing.T) {
|
|||||||
t.Run("save", func(t *testing.T) {
|
t.Run("save", func(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
||||||
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{})
|
|
||||||
time.Sleep(30 * time.Millisecond)
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
},
|
},
|
||||||
@@ -76,8 +104,11 @@ func TestComponentSaveLoad(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("load", func(t *testing.T) {
|
t.Run("load", func(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
|
// Switch to the empty provider to check if we get answers from the cache.
|
||||||
|
configuration.Providers = []ProviderConfiguration{{Config: emptyProviderConfiguration{}}}
|
||||||
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
},
|
},
|
||||||
@@ -96,14 +127,11 @@ func TestAutoRefresh(t *testing.T) {
|
|||||||
mockClock := clock.NewMock()
|
mockClock := clock.NewMock()
|
||||||
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t), Clock: mockClock})
|
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t), Clock: mockClock})
|
||||||
|
|
||||||
// Fetch a value
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{})
|
|
||||||
time.Sleep(30 * time.Millisecond)
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
},
|
},
|
||||||
|
|
||||||
Interface: provider.Interface{
|
Interface: provider.Interface{
|
||||||
Name: "Gi0/0/765",
|
Name: "Gi0/0/765",
|
||||||
Description: "Interface 765",
|
Description: "Interface 765",
|
||||||
@@ -119,10 +147,9 @@ func TestAutoRefresh(t *testing.T) {
|
|||||||
|
|
||||||
// Go forward, we expect the entry to have been refreshed and be still present
|
// Go forward, we expect the entry to have been refreshed and be still present
|
||||||
mockClock.Add(11 * time.Minute)
|
mockClock.Add(11 * time.Minute)
|
||||||
time.Sleep(30 * time.Millisecond)
|
|
||||||
mockClock.Add(2 * time.Minute)
|
mockClock.Add(2 * time.Minute)
|
||||||
time.Sleep(30 * time.Millisecond)
|
|
||||||
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
expectMockLookup(t, c, "127.0.0.1", 765, provider.Answer{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "127_0_0_1",
|
Name: "127_0_0_1",
|
||||||
},
|
},
|
||||||
@@ -137,13 +164,13 @@ func TestAutoRefresh(t *testing.T) {
|
|||||||
for _, runs := range []string{"29", "30", "31"} { // 63/2
|
for _, runs := range []string{"29", "30", "31"} { // 63/2
|
||||||
expectedMetrics := map[string]string{
|
expectedMetrics := map[string]string{
|
||||||
`expired_entries_total`: "0",
|
`expired_entries_total`: "0",
|
||||||
`hits_total`: "4",
|
`misses_total`: "1", // First lookup misses
|
||||||
`misses_total`: "1",
|
`hits_total`: "3", // Subsequent ones hits
|
||||||
`size_entries`: "1",
|
`size_entries`: "1",
|
||||||
`refresh_runs_total`: runs,
|
`refresh_runs_total`: runs,
|
||||||
`refreshs_total`: "1",
|
`refreshes_total`: "1", // One refresh (after 1 hour)
|
||||||
}
|
}
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" && runs == "31" {
|
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" && runs == "19" {
|
||||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||||
} else if diff == "" {
|
} else if diff == "" {
|
||||||
break
|
break
|
||||||
@@ -184,22 +211,21 @@ func TestConfigCheck(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStartStopWithMultipleWorkers(t *testing.T) {
|
func TestStartStopSimple(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
configuration := DefaultConfiguration()
|
configuration := DefaultConfiguration()
|
||||||
configuration.Workers = 5
|
|
||||||
NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
||||||
}
|
}
|
||||||
|
|
||||||
type errorProvider struct{}
|
type errorProvider struct{}
|
||||||
|
|
||||||
func (ep errorProvider) Query(_ context.Context, _ *provider.BatchQuery) error {
|
func (ep errorProvider) Query(_ context.Context, _ provider.Query) (provider.Answer, error) {
|
||||||
return errors.New("noooo")
|
return provider.Answer{}, errors.New("noooo")
|
||||||
}
|
}
|
||||||
|
|
||||||
type errorProviderConfiguration struct{}
|
type errorProviderConfiguration struct{}
|
||||||
|
|
||||||
func (epc errorProviderConfiguration) New(_ *reporter.Reporter, _ func(provider.Update)) (provider.Provider, error) {
|
func (epc errorProviderConfiguration) New(_ *reporter.Reporter) (provider.Provider, error) {
|
||||||
return errorProvider{}, nil
|
return errorProvider{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -216,7 +242,6 @@ func TestProviderBreaker(t *testing.T) {
|
|||||||
t.Run(tc.Name, func(t *testing.T) {
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
configuration := DefaultConfiguration()
|
configuration := DefaultConfiguration()
|
||||||
configuration.MaxBatchRequests = 0
|
|
||||||
configuration.Providers = []ProviderConfiguration{{Config: tc.ProviderConfiguration}}
|
configuration.Providers = []ProviderConfiguration{{Config: tc.ProviderConfiguration}}
|
||||||
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
||||||
c.metrics.providerBreakerOpenCount.WithLabelValues("127.0.0.1").Add(0)
|
c.metrics.providerBreakerOpenCount.WithLabelValues("127.0.0.1").Add(0)
|
||||||
@@ -240,72 +265,6 @@ func TestProviderBreaker(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type batchProvider struct {
|
|
||||||
config *batchProviderConfiguration
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchProvider) Query(_ context.Context, query *provider.BatchQuery) error {
|
|
||||||
bp.config.received = append(bp.config.received, *query)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type batchProviderConfiguration struct {
|
|
||||||
received []provider.BatchQuery
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bpc *batchProviderConfiguration) New(_ *reporter.Reporter, _ func(provider.Update)) (provider.Provider, error) {
|
|
||||||
return &batchProvider{config: bpc}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestBatching(t *testing.T) {
|
|
||||||
bcp := batchProviderConfiguration{
|
|
||||||
received: []provider.BatchQuery{},
|
|
||||||
}
|
|
||||||
r := reporter.NewMock(t)
|
|
||||||
t.Run("run", func(t *testing.T) {
|
|
||||||
configuration := DefaultConfiguration()
|
|
||||||
configuration.Providers = []ProviderConfiguration{{Config: &bcp}}
|
|
||||||
c := NewMock(t, r, configuration, Dependencies{Daemon: daemon.NewMock(t)})
|
|
||||||
|
|
||||||
// Block dispatcher
|
|
||||||
blocker := make(chan bool)
|
|
||||||
c.dispatcherBChannel <- blocker
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
// Unblock
|
|
||||||
time.Sleep(20 * time.Millisecond)
|
|
||||||
close(blocker)
|
|
||||||
time.Sleep(20 * time.Millisecond)
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Queue requests
|
|
||||||
c.Lookup(c.d.Clock.Now(), netip.MustParseAddr("::ffff:127.0.0.1"), 766)
|
|
||||||
c.Lookup(c.d.Clock.Now(), netip.MustParseAddr("::ffff:127.0.0.1"), 767)
|
|
||||||
c.Lookup(c.d.Clock.Now(), netip.MustParseAddr("::ffff:127.0.0.1"), 768)
|
|
||||||
c.Lookup(c.d.Clock.Now(), netip.MustParseAddr("::ffff:127.0.0.1"), 769)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("check", func(t *testing.T) {
|
|
||||||
gotMetrics := r.GetMetrics("akvorado_outlet_metadata_provider_", "batched_requests_total")
|
|
||||||
expectedMetrics := map[string]string{
|
|
||||||
`batched_requests_total`: "4",
|
|
||||||
}
|
|
||||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
|
||||||
t.Errorf("Metrics (-got, +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedAccepted := []provider.BatchQuery{
|
|
||||||
{
|
|
||||||
ExporterIP: netip.MustParseAddr("::ffff:127.0.0.1"),
|
|
||||||
IfIndexes: []uint{766, 767, 768, 769},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if diff := helpers.Diff(bcp.received, expectedAccepted); diff != "" {
|
|
||||||
t.Errorf("Accepted requests (-got, +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMultipleProviders(t *testing.T) {
|
func TestMultipleProviders(t *testing.T) {
|
||||||
r := reporter.NewMock(t)
|
r := reporter.NewMock(t)
|
||||||
staticConfiguration1 := static.Configuration{
|
staticConfiguration1 := static.Configuration{
|
||||||
@@ -359,11 +318,12 @@ func TestMultipleProviders(t *testing.T) {
|
|||||||
c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:1::1"), 10)
|
c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:1::1"), 10)
|
||||||
c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:2::2"), 12)
|
c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:2::2"), 12)
|
||||||
time.Sleep(30 * time.Millisecond)
|
time.Sleep(30 * time.Millisecond)
|
||||||
got1, _ := c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:1::1"), 10)
|
got1 := c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:1::1"), 10)
|
||||||
got2, _ := c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:2::2"), 12)
|
got2 := c.Lookup(time.Now(), netip.MustParseAddr("2001:db8:2::2"), 12)
|
||||||
got := []provider.Answer{got1, got2}
|
got := []provider.Answer{got1, got2}
|
||||||
expected := []provider.Answer{
|
expected := []provider.Answer{
|
||||||
{
|
{
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "static1",
|
Name: "static1",
|
||||||
},
|
},
|
||||||
@@ -373,6 +333,7 @@ func TestMultipleProviders(t *testing.T) {
|
|||||||
Speed: 1000,
|
Speed: 1000,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
Found: true,
|
||||||
Exporter: provider.Exporter{
|
Exporter: provider.Exporter{
|
||||||
Name: "static2",
|
Name: "static2",
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ package metadata
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -18,54 +19,62 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// mockProvider represents a mock provider.
|
// mockProvider represents a mock provider.
|
||||||
type mockProvider struct {
|
type mockProvider struct{}
|
||||||
put func(provider.Update)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Query query the mock provider for a value.
|
// Query query the mock provider for a value.
|
||||||
func (mp mockProvider) Query(_ context.Context, query *provider.BatchQuery) error {
|
// - ifIndex = 999 → not found
|
||||||
for _, ifIndex := range query.IfIndexes {
|
// - ifIndex = 998 → transient error
|
||||||
answer := provider.Answer{
|
// - ifIndex = 1010 → with metadata for exporter
|
||||||
Exporter: provider.Exporter{
|
// - ifIndex = 2010 → with metadata for exporter and interface
|
||||||
Name: strings.ReplaceAll(query.ExporterIP.Unmap().String(), ".", "_"),
|
func (mp mockProvider) Query(_ context.Context, query provider.Query) (provider.Answer, error) {
|
||||||
},
|
ifIndex := query.IfIndex
|
||||||
}
|
if ifIndex == 999 {
|
||||||
if ifIndex != 999 {
|
return provider.Answer{}, nil
|
||||||
answer.Interface.Name = fmt.Sprintf("Gi0/0/%d", ifIndex)
|
|
||||||
answer.Interface.Description = fmt.Sprintf("Interface %d", ifIndex)
|
|
||||||
answer.Interface.Speed = 1000
|
|
||||||
}
|
|
||||||
// in iface with metadata (overriden by out iface)
|
|
||||||
if ifIndex == 1010 {
|
|
||||||
answer.Exporter.Group = "metadata group"
|
|
||||||
answer.Exporter.Region = "metadata region"
|
|
||||||
answer.Exporter.Role = "metadata role"
|
|
||||||
answer.Exporter.Site = "metadata site"
|
|
||||||
answer.Exporter.Tenant = "metadata tenant"
|
|
||||||
}
|
|
||||||
|
|
||||||
// out iface with metadata
|
|
||||||
if ifIndex == 2010 {
|
|
||||||
answer.Interface.Boundary = schema.InterfaceBoundaryExternal
|
|
||||||
answer.Interface.Connectivity = "metadata connectivity"
|
|
||||||
answer.Interface.Provider = "metadata provider"
|
|
||||||
answer.Exporter.Group = "metadata group"
|
|
||||||
answer.Exporter.Region = "metadata region"
|
|
||||||
answer.Exporter.Role = "metadata role"
|
|
||||||
answer.Exporter.Site = "metadata site"
|
|
||||||
answer.Exporter.Tenant = "metadata tenant"
|
|
||||||
}
|
|
||||||
mp.put(provider.Update{Query: provider.Query{ExporterIP: query.ExporterIP, IfIndex: ifIndex}, Answer: answer})
|
|
||||||
}
|
}
|
||||||
return nil
|
if ifIndex == 998 {
|
||||||
|
return provider.Answer{}, errors.New("noooo")
|
||||||
|
}
|
||||||
|
|
||||||
|
answer := provider.Answer{
|
||||||
|
Exporter: provider.Exporter{
|
||||||
|
Name: strings.ReplaceAll(query.ExporterIP.Unmap().String(), ".", "_"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
answer.Interface.Name = fmt.Sprintf("Gi0/0/%d", ifIndex)
|
||||||
|
answer.Interface.Description = fmt.Sprintf("Interface %d", ifIndex)
|
||||||
|
answer.Interface.Speed = 1000
|
||||||
|
|
||||||
|
// in iface with metadata (overriden by out iface)
|
||||||
|
if ifIndex == 1010 {
|
||||||
|
answer.Exporter.Group = "metadata group"
|
||||||
|
answer.Exporter.Region = "metadata region"
|
||||||
|
answer.Exporter.Role = "metadata role"
|
||||||
|
answer.Exporter.Site = "metadata site"
|
||||||
|
answer.Exporter.Tenant = "metadata tenant"
|
||||||
|
}
|
||||||
|
|
||||||
|
// out iface with metadata
|
||||||
|
if ifIndex == 2010 {
|
||||||
|
answer.Interface.Boundary = schema.InterfaceBoundaryExternal
|
||||||
|
answer.Interface.Connectivity = "metadata connectivity"
|
||||||
|
answer.Interface.Provider = "metadata provider"
|
||||||
|
answer.Exporter.Group = "metadata group"
|
||||||
|
answer.Exporter.Region = "metadata region"
|
||||||
|
answer.Exporter.Role = "metadata role"
|
||||||
|
answer.Exporter.Site = "metadata site"
|
||||||
|
answer.Exporter.Tenant = "metadata tenant"
|
||||||
|
}
|
||||||
|
|
||||||
|
answer.Found = true
|
||||||
|
return answer, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// mockProviderConfiguration is the configuration for the mock provider.
|
// mockProviderConfiguration is the configuration for the mock provider.
|
||||||
type mockProviderConfiguration struct{}
|
type mockProviderConfiguration struct{}
|
||||||
|
|
||||||
// New returns a new mock provider.
|
// New returns a new mock provider.
|
||||||
func (mpc mockProviderConfiguration) New(_ *reporter.Reporter, put func(provider.Update)) (provider.Provider, error) {
|
func (mpc mockProviderConfiguration) New(_ *reporter.Reporter) (provider.Provider, error) {
|
||||||
return mockProvider{put: put}, nil
|
return mockProvider{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMock creates a new metadata component building synthetic values. It is already started.
|
// NewMock creates a new metadata component building synthetic values. It is already started.
|
||||||
@@ -81,3 +90,19 @@ func NewMock(t *testing.T, reporter *reporter.Reporter, configuration Configurat
|
|||||||
helpers.StartStop(t, c)
|
helpers.StartStop(t, c)
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// emptyProvider represents an empty mock provider.
|
||||||
|
type emptyProvider struct{}
|
||||||
|
|
||||||
|
// Query returns always a not found status for the empty mock provider
|
||||||
|
func (mp emptyProvider) Query(_ context.Context, _ provider.Query) (provider.Answer, error) {
|
||||||
|
return provider.Answer{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// emptyProviderConfiguration is the configuration for the empty provider.
|
||||||
|
type emptyProviderConfiguration struct{}
|
||||||
|
|
||||||
|
// New returns a new empty provider.
|
||||||
|
func (mpc emptyProviderConfiguration) New(_ *reporter.Reporter) (provider.Provider, error) {
|
||||||
|
return emptyProvider{}, nil
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user