mirror of
https://github.com/akvorado/akvorado.git
synced 2025-12-12 06:24:10 +01:00
snmp: make poller handle coaelescing
This commit is contained in:
6
helpers/norace.go
Normal file
6
helpers/norace.go
Normal file
@@ -0,0 +1,6 @@
|
||||
//go:build !race
|
||||
|
||||
package helpers
|
||||
|
||||
// RaceEnabled reports if the race detector is enabled.
|
||||
const RaceEnabled = false
|
||||
6
helpers/race.go
Normal file
6
helpers/race.go
Normal file
@@ -0,0 +1,6 @@
|
||||
//go:build race
|
||||
|
||||
package helpers
|
||||
|
||||
// RaceEnabled reports if the race detector is enabled.
|
||||
const RaceEnabled = true
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
)
|
||||
|
||||
type poller interface {
|
||||
Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndex uint)
|
||||
Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndexes []uint)
|
||||
}
|
||||
|
||||
// realPoller will poll samplers using real SNMP requests.
|
||||
@@ -86,22 +86,30 @@ func newPoller(r *reporter.Reporter, config pollerConfig, clock clock.Clock, put
|
||||
return p
|
||||
}
|
||||
|
||||
func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, community string, ifIndex uint) {
|
||||
func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, community string, ifIndexes []uint) {
|
||||
// Check if already have a request running
|
||||
key := fmt.Sprintf("%s@%d", sampler, ifIndex)
|
||||
filteredIfIndexes := make([]uint, 0, len(ifIndexes))
|
||||
keys := make([]string, 0, len(ifIndexes))
|
||||
p.pendingRequestsLock.Lock()
|
||||
_, ok := p.pendingRequests[key]
|
||||
if !ok {
|
||||
p.pendingRequests[key] = true
|
||||
for _, ifIndex := range ifIndexes {
|
||||
key := fmt.Sprintf("%s@%d", sampler, ifIndex)
|
||||
_, ok := p.pendingRequests[key]
|
||||
if !ok {
|
||||
p.pendingRequests[key] = true
|
||||
filteredIfIndexes = append(filteredIfIndexes, ifIndex)
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
p.pendingRequestsLock.Unlock()
|
||||
if ok {
|
||||
// Request already in progress, skip it
|
||||
if len(filteredIfIndexes) == 0 {
|
||||
return
|
||||
}
|
||||
ifIndexes = filteredIfIndexes
|
||||
defer func() {
|
||||
p.pendingRequestsLock.Lock()
|
||||
delete(p.pendingRequests, key)
|
||||
for _, key := range keys {
|
||||
delete(p.pendingRequests, key)
|
||||
}
|
||||
p.pendingRequestsLock.Unlock()
|
||||
}()
|
||||
|
||||
@@ -127,63 +135,88 @@ func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, comm
|
||||
}
|
||||
}
|
||||
start := p.clock.Now()
|
||||
sysName := "1.3.6.1.2.1.1.5.0"
|
||||
ifDescr := fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex)
|
||||
ifAlias := fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex)
|
||||
ifSpeed := fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex)
|
||||
result, err := g.Get([]string{sysName, ifDescr, ifAlias, ifSpeed})
|
||||
requests := []string{"1.3.6.1.2.1.1.5.0"}
|
||||
for _, ifIndex := range ifIndexes {
|
||||
moreRequests := []string{
|
||||
fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex), // ifDescr
|
||||
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex), // ifAlias
|
||||
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex), // ifSpeed
|
||||
}
|
||||
requests = append(requests, moreRequests...)
|
||||
}
|
||||
result, err := g.Get(requests)
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
p.metrics.failures.WithLabelValues(sampler, "get").Inc()
|
||||
if p.errLimiter.Allow() {
|
||||
p.r.Err(err).Str("sampler", sampler).Msg("unable to get")
|
||||
p.r.Err(err).Str("sampler", sampler).Msg("unable to GET")
|
||||
}
|
||||
return
|
||||
}
|
||||
if result.Error != gosnmp.NoError && result.ErrorIndex == 0 {
|
||||
// There is some error affecting the whole request
|
||||
p.metrics.failures.WithLabelValues(sampler, "get").Inc()
|
||||
if p.errLimiter.Allow() {
|
||||
p.r.Error().Str("sampler", sampler).Stringer("code", result.Error).Msg("unable to GET")
|
||||
}
|
||||
}
|
||||
|
||||
ok = true
|
||||
processStr := func(idx int, what string, target *string) {
|
||||
processStr := func(idx int, what string, target *string) bool {
|
||||
switch result.Variables[idx].Type {
|
||||
case gosnmp.OctetString:
|
||||
*target = string(result.Variables[idx].Value.([]byte))
|
||||
return true
|
||||
case gosnmp.NoSuchInstance, gosnmp.NoSuchObject:
|
||||
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_missing", what)).Inc()
|
||||
ok = false
|
||||
return false
|
||||
default:
|
||||
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_unknown_type", what)).Inc()
|
||||
ok = false
|
||||
return false
|
||||
}
|
||||
}
|
||||
processUint := func(idx int, what string, target *uint) {
|
||||
processUint := func(idx int, what string, target *uint) bool {
|
||||
switch result.Variables[idx].Type {
|
||||
case gosnmp.Gauge32:
|
||||
*target = result.Variables[idx].Value.(uint)
|
||||
return true
|
||||
case gosnmp.NoSuchInstance, gosnmp.NoSuchObject:
|
||||
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_missing", what)).Inc()
|
||||
ok = false
|
||||
return false
|
||||
default:
|
||||
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_unknown_type", what)).Inc()
|
||||
ok = false
|
||||
return false
|
||||
}
|
||||
}
|
||||
var sysNameVal, ifDescrVal, ifAliasVal string
|
||||
var ifSpeedVal uint
|
||||
processStr(0, "sysname", &sysNameVal)
|
||||
processStr(1, "ifdescr", &ifDescrVal)
|
||||
processStr(2, "ifalias", &ifAliasVal)
|
||||
processUint(3, "ifspeed", &ifSpeedVal)
|
||||
if !ok {
|
||||
if !processStr(0, "sysname", &sysNameVal) {
|
||||
return
|
||||
}
|
||||
p.put(sampler, sysNameVal, ifIndex, Interface{
|
||||
Name: ifDescrVal,
|
||||
Description: ifAliasVal,
|
||||
Speed: ifSpeedVal,
|
||||
})
|
||||
for idx := 1; idx < len(requests)-2; idx += 3 {
|
||||
ok := true
|
||||
if !processStr(idx, "ifdescr", &ifDescrVal) {
|
||||
ok = false
|
||||
}
|
||||
if !processStr(idx+1, "ifalias", &ifAliasVal) {
|
||||
ok = false
|
||||
}
|
||||
if !processUint(idx+2, "ifspeed", &ifSpeedVal) {
|
||||
ok = false
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
ifIndex := ifIndexes[(idx-1)/3]
|
||||
p.put(sampler, sysNameVal, ifIndex, Interface{
|
||||
Name: ifDescrVal,
|
||||
Description: ifAliasVal,
|
||||
Speed: ifSpeedVal,
|
||||
})
|
||||
p.metrics.successes.WithLabelValues(sampler).Inc()
|
||||
}
|
||||
|
||||
p.metrics.successes.WithLabelValues(sampler).Inc()
|
||||
p.metrics.times.WithLabelValues(sampler).Observe(p.clock.Now().Sub(start).Seconds())
|
||||
}
|
||||
|
||||
|
||||
@@ -112,10 +112,10 @@ func TestPoller(t *testing.T) {
|
||||
go server.ServeForever()
|
||||
defer server.Shutdown()
|
||||
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 641)
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 642)
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 643)
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 644)
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{641})
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{642})
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{643})
|
||||
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{644})
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
if diff := helpers.Diff(got, []string{
|
||||
`127.0.0.1 sampler62 641 Gi0/0/0/0 Transit 10000`,
|
||||
@@ -124,18 +124,13 @@ func TestPoller(t *testing.T) {
|
||||
t.Fatalf("Poll() (-got, +want):\n%s", diff)
|
||||
}
|
||||
|
||||
gotMetrics := r.GetMetrics("akvorado_snmp_poller_")
|
||||
gotMetrics := r.GetMetrics("akvorado_snmp_poller_", "failure_", "pending_", "success_")
|
||||
expectedMetrics := map[string]string{
|
||||
`failure_requests{error="ifalias_missing",sampler="127.0.0.1"}`: "2",
|
||||
`failure_requests{error="ifspeed_missing",sampler="127.0.0.1"}`: "1",
|
||||
`failure_requests{error="ifdescr_missing",sampler="127.0.0.1"}`: "1",
|
||||
`pending_requests`: "0",
|
||||
`seconds_count{sampler="127.0.0.1"}`: "2",
|
||||
`seconds_sum{sampler="127.0.0.1"}`: "0",
|
||||
`seconds{sampler="127.0.0.1",quantile="0.5"}`: "0",
|
||||
`seconds{sampler="127.0.0.1",quantile="0.9"}`: "0",
|
||||
`seconds{sampler="127.0.0.1",quantile="0.99"}`: "0",
|
||||
`success_requests{sampler="127.0.0.1"}`: "2",
|
||||
`failure_requests{error="ifalias_missing",sampler="127.0.0.1"}`: "2", // 643+644
|
||||
`failure_requests{error="ifdescr_missing",sampler="127.0.0.1"}`: "1", // 644
|
||||
`failure_requests{error="ifspeed_missing",sampler="127.0.0.1"}`: "1", // 644
|
||||
`pending_requests`: "0",
|
||||
`success_requests{sampler="127.0.0.1"}`: "2", // 641+642
|
||||
}
|
||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||
|
||||
24
snmp/root.go
24
snmp/root.go
@@ -143,7 +143,7 @@ func (c *Component) Start() error {
|
||||
select {
|
||||
case c.dispatcherChannel <- lookupRequest{
|
||||
SamplerIP: sampler,
|
||||
IfIndex: []uint{ifIndex},
|
||||
IfIndexes: []uint{ifIndex},
|
||||
}:
|
||||
count++
|
||||
default:
|
||||
@@ -204,7 +204,7 @@ func (c *Component) Start() error {
|
||||
c.t.Context(nil),
|
||||
request.SamplerIP, 161,
|
||||
community,
|
||||
request.IfIndex[0])
|
||||
request.IfIndexes)
|
||||
idleTime := float64(startBusy.Sub(startIdle).Nanoseconds()) / 1000 / 1000 / 1000
|
||||
busyTime := float64(time.Since(startBusy).Nanoseconds()) / 1000 / 1000 / 1000
|
||||
c.metrics.pollerLoopTime.WithLabelValues(workerIDStr, "idle").Observe(idleTime)
|
||||
@@ -237,7 +237,7 @@ func (c *Component) Stop() error {
|
||||
// lookupRequest is used internally to queue a polling request.
|
||||
type lookupRequest struct {
|
||||
SamplerIP string
|
||||
IfIndex []uint
|
||||
IfIndexes []uint
|
||||
}
|
||||
|
||||
// Lookup for interface information for the provided sampler and ifIndex.
|
||||
@@ -248,7 +248,7 @@ func (c *Component) Lookup(samplerIP string, ifIndex uint) (string, Interface, e
|
||||
if errors.Is(err, ErrCacheMiss) {
|
||||
req := lookupRequest{
|
||||
SamplerIP: samplerIP,
|
||||
IfIndex: []uint{ifIndex},
|
||||
IfIndexes: []uint{ifIndex},
|
||||
}
|
||||
select {
|
||||
case c.dispatcherChannel <- req:
|
||||
@@ -263,16 +263,16 @@ func (c *Component) Lookup(samplerIP string, ifIndex uint) (string, Interface, e
|
||||
// provided request if it can.
|
||||
func (c *Component) dispatchIncomingRequest(request lookupRequest) {
|
||||
requestsMap := map[string][]uint{
|
||||
request.SamplerIP: request.IfIndex,
|
||||
request.SamplerIP: request.IfIndexes,
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case request := <-c.dispatcherChannel:
|
||||
indexes, ok := requestsMap[request.SamplerIP]
|
||||
if !ok {
|
||||
indexes = request.IfIndex
|
||||
indexes = request.IfIndexes
|
||||
} else {
|
||||
indexes = append(indexes, request.IfIndex...)
|
||||
indexes = append(indexes, request.IfIndexes...)
|
||||
}
|
||||
requestsMap[request.SamplerIP] = indexes
|
||||
// We don't want to exceed the configured
|
||||
@@ -292,12 +292,10 @@ func (c *Component) dispatchIncomingRequest(request lookupRequest) {
|
||||
if len(ifIndexes) > 1 {
|
||||
c.metrics.pollerCoalescedCount.Add(float64(len(ifIndexes)))
|
||||
}
|
||||
for _, ifIndex := range ifIndexes {
|
||||
select {
|
||||
case <-c.t.Dying():
|
||||
return
|
||||
case c.pollerChannel <- lookupRequest{samplerIP, []uint{ifIndex}}:
|
||||
}
|
||||
select {
|
||||
case <-c.t.Dying():
|
||||
return
|
||||
case c.pollerChannel <- lookupRequest{samplerIP, ifIndexes}:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,23 +193,18 @@ type forceCoaelescePoller struct {
|
||||
accepted []lookupRequest
|
||||
}
|
||||
|
||||
func (fcp *forceCoaelescePoller) Poll(ctx context.Context, samplerIP string, _ uint16, _ string, ifIndex uint) {
|
||||
func (fcp *forceCoaelescePoller) Poll(ctx context.Context, samplerIP string, _ uint16, _ string, ifIndexes []uint) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-fcp.accept:
|
||||
fcp.accepted = append(fcp.accepted, lookupRequest{samplerIP, []uint{ifIndex}})
|
||||
fcp.accepted = append(fcp.accepted, lookupRequest{samplerIP, ifIndexes})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoaelescing(t *testing.T) {
|
||||
r := reporter.NewMock(t)
|
||||
c := NewMock(t, r, DefaultConfiguration, Dependencies{Daemon: daemon.NewMock(t)})
|
||||
defer func() {
|
||||
if err := c.Stop(); err != nil {
|
||||
t.Fatalf("Stop() error:\n%+v", err)
|
||||
}
|
||||
}()
|
||||
fcp := &forceCoaelescePoller{
|
||||
accept: make(chan bool),
|
||||
accepted: []lookupRequest{},
|
||||
@@ -217,22 +212,41 @@ func TestCoaelescing(t *testing.T) {
|
||||
c.poller = fcp
|
||||
|
||||
expectSNMPLookup(t, c, "127.0.0.1", 765, answer{Err: ErrCacheMiss})
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
// dispatcher is now blocked, queue requests
|
||||
expectSNMPLookup(t, c, "127.0.0.1", 766, answer{Err: ErrCacheMiss})
|
||||
expectSNMPLookup(t, c, "127.0.0.1", 767, answer{Err: ErrCacheMiss})
|
||||
expectSNMPLookup(t, c, "127.0.0.1", 768, answer{Err: ErrCacheMiss})
|
||||
expectSNMPLookup(t, c, "127.0.0.1", 769, answer{Err: ErrCacheMiss})
|
||||
time.Sleep(50 * time.Millisecond) // ensure everything is queued
|
||||
fcp.accept <- true
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// The race detector may require read from the channel before
|
||||
// additional write. So, we can't run the remaining code under
|
||||
// the race detector.
|
||||
if helpers.RaceEnabled {
|
||||
return
|
||||
}
|
||||
|
||||
gotMetrics := r.GetMetrics("akvorado_snmp_poller_", "coalesced_count")
|
||||
expectedMetrics := map[string]string{
|
||||
`coalesced_count`: "4",
|
||||
}
|
||||
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
|
||||
t.Fatalf("Metrics (-got, +want):\n%s", diff)
|
||||
t.Errorf("Metrics (-got, +want):\n%s", diff)
|
||||
}
|
||||
|
||||
// TODO: check we have accepted the 4 requests at the same time
|
||||
fcp.accept <- true
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
if err := c.Stop(); err != nil {
|
||||
t.Fatalf("Stop() error:\n%+v", err)
|
||||
}
|
||||
|
||||
expectedAccepted := []lookupRequest{
|
||||
{"127.0.0.1", []uint{765}},
|
||||
{"127.0.0.1", []uint{766, 767, 768, 769}},
|
||||
}
|
||||
if diff := helpers.Diff(fcp.accepted, expectedAccepted); diff != "" {
|
||||
t.Errorf("Accepted requests (-got, +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,13 +26,15 @@ func newMockPoller(community string, put func(string, string, uint, Interface))
|
||||
}
|
||||
|
||||
// Poll just builds synthetic data.
|
||||
func (p *mockPoller) Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndex uint) {
|
||||
if community == p.community {
|
||||
p.put(samplerIP, strings.ReplaceAll(samplerIP, ".", "_"), ifIndex, Interface{
|
||||
Name: fmt.Sprintf("Gi0/0/%d", ifIndex),
|
||||
Description: fmt.Sprintf("Interface %d", ifIndex),
|
||||
Speed: 1000,
|
||||
})
|
||||
func (p *mockPoller) Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndexes []uint) {
|
||||
for _, ifIndex := range ifIndexes {
|
||||
if community == p.community {
|
||||
p.put(samplerIP, strings.ReplaceAll(samplerIP, ".", "_"), ifIndex, Interface{
|
||||
Name: fmt.Sprintf("Gi0/0/%d", ifIndex),
|
||||
Description: fmt.Sprintf("Interface %d", ifIndex),
|
||||
Speed: 1000,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user