mirror_ubuntu-kernels/tools/testing/selftests/drivers/net/stats.py
Joe Damato a61a459f58 testing: net-drv: use stats64 for testing
Testing a network device that has large numbers of bytes/packets may
overflow. Using stats64 when comparing fixes this problem.

I tripped on this while iterating on a qstats patch for mlx5. See below
for confirmation without my added code that this is a bug.

Before this patch (with added debugging output):

$ NETIF=eth0 tools/testing/selftests/drivers/net/stats.py
KTAP version 1
1..4
ok 1 stats.check_pause
ok 2 stats.check_fec
rstat: 481708634 qstat: 666201639514 key: tx-bytes
not ok 3 stats.pkt_byte_sum
ok 4 stats.qstat_by_ifindex

Note the huge delta above ^^^ in the rtnl vs qstats.

After this patch:

$ NETIF=eth0 tools/testing/selftests/drivers/net/stats.py
KTAP version 1
1..4
ok 1 stats.check_pause
ok 2 stats.check_fec
ok 3 stats.pkt_byte_sum
ok 4 stats.qstat_by_ifindex

It looks like rtnl_fill_stats in net/core/rtnetlink.c will attempt to
copy the 64bit stats into a 32bit structure which is probably why this
behavior is occurring.

To show this is happening, you can get the underlying stats that the
stats.py test uses like this:

$ ./cli.py --spec ../../../Documentation/netlink/specs/rt_link.yaml \
           --do getlink --json '{"ifi-index": 7}'

And examine the output (heavily snipped to show relevant fields):

 'stats': {
           'multicast': 3739197,
           'rx-bytes': 1201525399,
           'rx-packets': 56807158,
           'tx-bytes': 492404458,
           'tx-packets': 1200285371,

 'stats64': {
             'multicast': 3739197,
             'rx-bytes': 35561263767,
             'rx-packets': 56807158,
             'tx-bytes': 666212335338,
             'tx-packets': 1200285371,

The stats.py test prior to this patch was using the 'stats' structure
above, which matches the failure output on my system.

Comparing side by side, rx-bytes and tx-bytes, and getting ethtool -S
output:

rx-bytes stats:    1201525399
rx-bytes stats64: 35561263767
rx-bytes ethtool: 36203402638

tx-bytes stats:      492404458
tx-bytes stats64: 666212335338
tx-bytes ethtool: 666215360113

Note that the above was taken from a system with an mlx5 NIC, which only
exposes ndo_get_stats64.

Based on the ethtool output and qstat output, it appears that stats.py
should be updated to use the 'stats64' structure for accurate
comparisons when packet/byte counters get very large.

To confirm that this was not related to the qstats code I was iterating
on, I booted a kernel without my driver changes and re-ran the test
which shows the qstats are skipped (as they don't exist for mlx5):

NETIF=eth0 tools/testing/selftests/drivers/net/stats.py
KTAP version 1
1..4
ok 1 stats.check_pause
ok 2 stats.check_fec
ok 3 stats.pkt_byte_sum # SKIP qstats not supported by the device
ok 4 stats.qstat_by_ifindex # SKIP No ifindex supports qstats

But, fetching the stats using the CLI

$ ./cli.py --spec ../../../Documentation/netlink/specs/rt_link.yaml \
           --do getlink --json '{"ifi-index": 7}'

Shows the same issue (heavily snipped for relevant fields only):

 'stats': {
           'multicast': 105489,
           'rx-bytes': 530879526,
           'rx-packets': 751415,
           'tx-bytes': 2510191396,
           'tx-packets': 27700323,
 'stats64': {
             'multicast': 105489,
             'rx-bytes': 530879526,
             'rx-packets': 751415,
             'tx-bytes': 15395093284,
             'tx-packets': 27700323,

Comparing side by side with ethtool -S on the unmodified mlx5 driver:

tx-bytes stats:    2510191396
tx-bytes stats64: 15395093284
tx-bytes ethtool: 17718435810

Fixes: f0e6c86e4b ("testing: net-drv: add a driver test for stats reporting")
Signed-off-by: Joe Damato <jdamato@fastly.com>
Link: https://lore.kernel.org/r/20240520235850.190041-1-jdamato@fastly.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-05-23 10:18:29 +02:00

145 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
ethnl = EthtoolFamily()
netfam = NetdevFamily()
rtnl = RtnlFamily()
def check_pause(cfg) -> None:
global ethnl
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == 95:
raise KsftXfailEx("pause not supported by the device")
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def check_fec(cfg) -> None:
global ethnl
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == 95:
raise KsftXfailEx("FEC not supported by the device")
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def pkt_byte_sum(cfg) -> None:
global netfam, rtnl
def get_qstat(test):
global netfam
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
qstat = get_qstat(cfg)
if qstat is None:
raise KsftSkipEx("qstats not supported by the device")
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
ksft_in(key, qstat, "Drivers should always report basic keys")
# Compare stats, rtnl stats and qstats must match,
# but the interface may be up, so do a series of dumps
# each time the more "recent" stats must be higher or same.
def stat_cmp(rstat, qstat):
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
if rstat[key] != qstat[key]:
return rstat[key] - qstat[key]
return 0
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
raise Exception("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
raise Exception("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
global netfam
global rtnl
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
for ifindex in ifindexes.keys():
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']][2] = entry
if len(ifindexes) == 0:
raise KsftSkipEx("No ifindex supports qstats")
# Now make sure the stats match/make sense
for ifindex, triple in ifindexes.items():
all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
for key in all_keys:
ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
# Test invalid dumps
# 0 is invalid
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 0}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -34)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# loopback has no stats
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 1}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -95)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
all_ifindexes = set([dev["ifi-index"] for dev in devs])
lowest = 2
while lowest in all_ifindexes:
lowest += 1
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": lowest}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -19)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
def main() -> None:
with NetDrvEnv(__file__) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
args=(cfg, ))
ksft_exit()
if __name__ == "__main__":
main()