snmp: make poller handle coaelescing

This commit is contained in:
Vincent Bernat
2022-03-23 14:43:40 +01:00
parent 32b56a2123
commit c4b54b3049
7 changed files with 133 additions and 79 deletions

6
helpers/norace.go Normal file
View File

@@ -0,0 +1,6 @@
//go:build !race
package helpers
// RaceEnabled reports if the race detector is enabled.
const RaceEnabled = false

6
helpers/race.go Normal file
View File

@@ -0,0 +1,6 @@
//go:build race
package helpers
// RaceEnabled reports if the race detector is enabled.
const RaceEnabled = true

View File

@@ -15,7 +15,7 @@ import (
)
type poller interface {
Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndex uint)
Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndexes []uint)
}
// realPoller will poll samplers using real SNMP requests.
@@ -86,22 +86,30 @@ func newPoller(r *reporter.Reporter, config pollerConfig, clock clock.Clock, put
return p
}
func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, community string, ifIndex uint) {
func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, community string, ifIndexes []uint) {
// Check if already have a request running
key := fmt.Sprintf("%s@%d", sampler, ifIndex)
filteredIfIndexes := make([]uint, 0, len(ifIndexes))
keys := make([]string, 0, len(ifIndexes))
p.pendingRequestsLock.Lock()
_, ok := p.pendingRequests[key]
if !ok {
p.pendingRequests[key] = true
for _, ifIndex := range ifIndexes {
key := fmt.Sprintf("%s@%d", sampler, ifIndex)
_, ok := p.pendingRequests[key]
if !ok {
p.pendingRequests[key] = true
filteredIfIndexes = append(filteredIfIndexes, ifIndex)
keys = append(keys, key)
}
}
p.pendingRequestsLock.Unlock()
if ok {
// Request already in progress, skip it
if len(filteredIfIndexes) == 0 {
return
}
ifIndexes = filteredIfIndexes
defer func() {
p.pendingRequestsLock.Lock()
delete(p.pendingRequests, key)
for _, key := range keys {
delete(p.pendingRequests, key)
}
p.pendingRequestsLock.Unlock()
}()
@@ -127,63 +135,88 @@ func (p *realPoller) Poll(ctx context.Context, sampler string, port uint16, comm
}
}
start := p.clock.Now()
sysName := "1.3.6.1.2.1.1.5.0"
ifDescr := fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex)
ifAlias := fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex)
ifSpeed := fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex)
result, err := g.Get([]string{sysName, ifDescr, ifAlias, ifSpeed})
requests := []string{"1.3.6.1.2.1.1.5.0"}
for _, ifIndex := range ifIndexes {
moreRequests := []string{
fmt.Sprintf("1.3.6.1.2.1.2.2.1.2.%d", ifIndex), // ifDescr
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.18.%d", ifIndex), // ifAlias
fmt.Sprintf("1.3.6.1.2.1.31.1.1.1.15.%d", ifIndex), // ifSpeed
}
requests = append(requests, moreRequests...)
}
result, err := g.Get(requests)
if errors.Is(err, context.Canceled) {
return
}
if err != nil {
p.metrics.failures.WithLabelValues(sampler, "get").Inc()
if p.errLimiter.Allow() {
p.r.Err(err).Str("sampler", sampler).Msg("unable to get")
p.r.Err(err).Str("sampler", sampler).Msg("unable to GET")
}
return
}
if result.Error != gosnmp.NoError && result.ErrorIndex == 0 {
// There is some error affecting the whole request
p.metrics.failures.WithLabelValues(sampler, "get").Inc()
if p.errLimiter.Allow() {
p.r.Error().Str("sampler", sampler).Stringer("code", result.Error).Msg("unable to GET")
}
}
ok = true
processStr := func(idx int, what string, target *string) {
processStr := func(idx int, what string, target *string) bool {
switch result.Variables[idx].Type {
case gosnmp.OctetString:
*target = string(result.Variables[idx].Value.([]byte))
return true
case gosnmp.NoSuchInstance, gosnmp.NoSuchObject:
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_missing", what)).Inc()
ok = false
return false
default:
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_unknown_type", what)).Inc()
ok = false
return false
}
}
processUint := func(idx int, what string, target *uint) {
processUint := func(idx int, what string, target *uint) bool {
switch result.Variables[idx].Type {
case gosnmp.Gauge32:
*target = result.Variables[idx].Value.(uint)
return true
case gosnmp.NoSuchInstance, gosnmp.NoSuchObject:
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_missing", what)).Inc()
ok = false
return false
default:
p.metrics.failures.WithLabelValues(sampler, fmt.Sprintf("%s_unknown_type", what)).Inc()
ok = false
return false
}
}
var sysNameVal, ifDescrVal, ifAliasVal string
var ifSpeedVal uint
processStr(0, "sysname", &sysNameVal)
processStr(1, "ifdescr", &ifDescrVal)
processStr(2, "ifalias", &ifAliasVal)
processUint(3, "ifspeed", &ifSpeedVal)
if !ok {
if !processStr(0, "sysname", &sysNameVal) {
return
}
p.put(sampler, sysNameVal, ifIndex, Interface{
Name: ifDescrVal,
Description: ifAliasVal,
Speed: ifSpeedVal,
})
for idx := 1; idx < len(requests)-2; idx += 3 {
ok := true
if !processStr(idx, "ifdescr", &ifDescrVal) {
ok = false
}
if !processStr(idx+1, "ifalias", &ifAliasVal) {
ok = false
}
if !processUint(idx+2, "ifspeed", &ifSpeedVal) {
ok = false
}
if !ok {
continue
}
ifIndex := ifIndexes[(idx-1)/3]
p.put(sampler, sysNameVal, ifIndex, Interface{
Name: ifDescrVal,
Description: ifAliasVal,
Speed: ifSpeedVal,
})
p.metrics.successes.WithLabelValues(sampler).Inc()
}
p.metrics.successes.WithLabelValues(sampler).Inc()
p.metrics.times.WithLabelValues(sampler).Observe(p.clock.Now().Sub(start).Seconds())
}

View File

@@ -112,10 +112,10 @@ func TestPoller(t *testing.T) {
go server.ServeForever()
defer server.Shutdown()
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 641)
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 642)
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 643)
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", 644)
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{641})
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{642})
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{643})
p.Poll(context.Background(), "127.0.0.1", uint16(port), "public", []uint{644})
time.Sleep(50 * time.Millisecond)
if diff := helpers.Diff(got, []string{
`127.0.0.1 sampler62 641 Gi0/0/0/0 Transit 10000`,
@@ -124,18 +124,13 @@ func TestPoller(t *testing.T) {
t.Fatalf("Poll() (-got, +want):\n%s", diff)
}
gotMetrics := r.GetMetrics("akvorado_snmp_poller_")
gotMetrics := r.GetMetrics("akvorado_snmp_poller_", "failure_", "pending_", "success_")
expectedMetrics := map[string]string{
`failure_requests{error="ifalias_missing",sampler="127.0.0.1"}`: "2",
`failure_requests{error="ifspeed_missing",sampler="127.0.0.1"}`: "1",
`failure_requests{error="ifdescr_missing",sampler="127.0.0.1"}`: "1",
`pending_requests`: "0",
`seconds_count{sampler="127.0.0.1"}`: "2",
`seconds_sum{sampler="127.0.0.1"}`: "0",
`seconds{sampler="127.0.0.1",quantile="0.5"}`: "0",
`seconds{sampler="127.0.0.1",quantile="0.9"}`: "0",
`seconds{sampler="127.0.0.1",quantile="0.99"}`: "0",
`success_requests{sampler="127.0.0.1"}`: "2",
`failure_requests{error="ifalias_missing",sampler="127.0.0.1"}`: "2", // 643+644
`failure_requests{error="ifdescr_missing",sampler="127.0.0.1"}`: "1", // 644
`failure_requests{error="ifspeed_missing",sampler="127.0.0.1"}`: "1", // 644
`pending_requests`: "0",
`success_requests{sampler="127.0.0.1"}`: "2", // 641+642
}
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
t.Fatalf("Metrics (-got, +want):\n%s", diff)

View File

@@ -143,7 +143,7 @@ func (c *Component) Start() error {
select {
case c.dispatcherChannel <- lookupRequest{
SamplerIP: sampler,
IfIndex: []uint{ifIndex},
IfIndexes: []uint{ifIndex},
}:
count++
default:
@@ -204,7 +204,7 @@ func (c *Component) Start() error {
c.t.Context(nil),
request.SamplerIP, 161,
community,
request.IfIndex[0])
request.IfIndexes)
idleTime := float64(startBusy.Sub(startIdle).Nanoseconds()) / 1000 / 1000 / 1000
busyTime := float64(time.Since(startBusy).Nanoseconds()) / 1000 / 1000 / 1000
c.metrics.pollerLoopTime.WithLabelValues(workerIDStr, "idle").Observe(idleTime)
@@ -237,7 +237,7 @@ func (c *Component) Stop() error {
// lookupRequest is used internally to queue a polling request.
type lookupRequest struct {
SamplerIP string
IfIndex []uint
IfIndexes []uint
}
// Lookup for interface information for the provided sampler and ifIndex.
@@ -248,7 +248,7 @@ func (c *Component) Lookup(samplerIP string, ifIndex uint) (string, Interface, e
if errors.Is(err, ErrCacheMiss) {
req := lookupRequest{
SamplerIP: samplerIP,
IfIndex: []uint{ifIndex},
IfIndexes: []uint{ifIndex},
}
select {
case c.dispatcherChannel <- req:
@@ -263,16 +263,16 @@ func (c *Component) Lookup(samplerIP string, ifIndex uint) (string, Interface, e
// provided request if it can.
func (c *Component) dispatchIncomingRequest(request lookupRequest) {
requestsMap := map[string][]uint{
request.SamplerIP: request.IfIndex,
request.SamplerIP: request.IfIndexes,
}
for {
select {
case request := <-c.dispatcherChannel:
indexes, ok := requestsMap[request.SamplerIP]
if !ok {
indexes = request.IfIndex
indexes = request.IfIndexes
} else {
indexes = append(indexes, request.IfIndex...)
indexes = append(indexes, request.IfIndexes...)
}
requestsMap[request.SamplerIP] = indexes
// We don't want to exceed the configured
@@ -292,12 +292,10 @@ func (c *Component) dispatchIncomingRequest(request lookupRequest) {
if len(ifIndexes) > 1 {
c.metrics.pollerCoalescedCount.Add(float64(len(ifIndexes)))
}
for _, ifIndex := range ifIndexes {
select {
case <-c.t.Dying():
return
case c.pollerChannel <- lookupRequest{samplerIP, []uint{ifIndex}}:
}
select {
case <-c.t.Dying():
return
case c.pollerChannel <- lookupRequest{samplerIP, ifIndexes}:
}
}
}

View File

@@ -193,23 +193,18 @@ type forceCoaelescePoller struct {
accepted []lookupRequest
}
func (fcp *forceCoaelescePoller) Poll(ctx context.Context, samplerIP string, _ uint16, _ string, ifIndex uint) {
func (fcp *forceCoaelescePoller) Poll(ctx context.Context, samplerIP string, _ uint16, _ string, ifIndexes []uint) {
select {
case <-ctx.Done():
return
case <-fcp.accept:
fcp.accepted = append(fcp.accepted, lookupRequest{samplerIP, []uint{ifIndex}})
fcp.accepted = append(fcp.accepted, lookupRequest{samplerIP, ifIndexes})
}
}
func TestCoaelescing(t *testing.T) {
r := reporter.NewMock(t)
c := NewMock(t, r, DefaultConfiguration, Dependencies{Daemon: daemon.NewMock(t)})
defer func() {
if err := c.Stop(); err != nil {
t.Fatalf("Stop() error:\n%+v", err)
}
}()
fcp := &forceCoaelescePoller{
accept: make(chan bool),
accepted: []lookupRequest{},
@@ -217,22 +212,41 @@ func TestCoaelescing(t *testing.T) {
c.poller = fcp
expectSNMPLookup(t, c, "127.0.0.1", 765, answer{Err: ErrCacheMiss})
time.Sleep(10 * time.Millisecond)
time.Sleep(50 * time.Millisecond)
// dispatcher is now blocked, queue requests
expectSNMPLookup(t, c, "127.0.0.1", 766, answer{Err: ErrCacheMiss})
expectSNMPLookup(t, c, "127.0.0.1", 767, answer{Err: ErrCacheMiss})
expectSNMPLookup(t, c, "127.0.0.1", 768, answer{Err: ErrCacheMiss})
expectSNMPLookup(t, c, "127.0.0.1", 769, answer{Err: ErrCacheMiss})
time.Sleep(50 * time.Millisecond) // ensure everything is queued
fcp.accept <- true
time.Sleep(10 * time.Millisecond)
// The race detector may require read from the channel before
// additional write. So, we can't run the remaining code under
// the race detector.
if helpers.RaceEnabled {
return
}
gotMetrics := r.GetMetrics("akvorado_snmp_poller_", "coalesced_count")
expectedMetrics := map[string]string{
`coalesced_count`: "4",
}
if diff := helpers.Diff(gotMetrics, expectedMetrics); diff != "" {
t.Fatalf("Metrics (-got, +want):\n%s", diff)
t.Errorf("Metrics (-got, +want):\n%s", diff)
}
// TODO: check we have accepted the 4 requests at the same time
fcp.accept <- true
time.Sleep(20 * time.Millisecond)
if err := c.Stop(); err != nil {
t.Fatalf("Stop() error:\n%+v", err)
}
expectedAccepted := []lookupRequest{
{"127.0.0.1", []uint{765}},
{"127.0.0.1", []uint{766, 767, 768, 769}},
}
if diff := helpers.Diff(fcp.accepted, expectedAccepted); diff != "" {
t.Errorf("Accepted requests (-got, +want):\n%s", diff)
}
}

View File

@@ -26,13 +26,15 @@ func newMockPoller(community string, put func(string, string, uint, Interface))
}
// Poll just builds synthetic data.
func (p *mockPoller) Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndex uint) {
if community == p.community {
p.put(samplerIP, strings.ReplaceAll(samplerIP, ".", "_"), ifIndex, Interface{
Name: fmt.Sprintf("Gi0/0/%d", ifIndex),
Description: fmt.Sprintf("Interface %d", ifIndex),
Speed: 1000,
})
func (p *mockPoller) Poll(ctx context.Context, samplerIP string, port uint16, community string, ifIndexes []uint) {
for _, ifIndex := range ifIndexes {
if community == p.community {
p.put(samplerIP, strings.ReplaceAll(samplerIP, ".", "_"), ifIndex, Interface{
Name: fmt.Sprintf("Gi0/0/%d", ifIndex),
Description: fmt.Sprintf("Interface %d", ifIndex),
Speed: 1000,
})
}
}
}