mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-12-25 10:36:03 +00:00
When reading memory in order, HW prefetching optimizations will interfere with measuring how caches and memory are being accessed. This adds noise into the results. Change the fill_buf reading loop to not use an obvious in-order access using multiply by a prime and modulo. Using a prime multiplier with modulo ensures the entire buffer is eventually read. 23 is small enough that the reads are spread out but wrapping does not occur very frequently (wrapping too often can trigger L2 hits more frequently which causes noise to the test because getting the data from LLC is not required). It was discovered that not all primes work equally well and some can cause wildly unstable results (e.g., in an earlier version of this patch, the reads were done in reversed order and 59 was used as the prime resulting in unacceptably high and unstable results in MBA and MBM test on some architectures). Link: https://lore.kernel.org/linux-kselftest/TYAPR01MB6330025B5E6537F94DA49ACB8B499@TYAPR01MB6330.jpnprd01.prod.outlook.com/ Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
170 lines
3.4 KiB
C
170 lines
3.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* fill_buf benchmark
|
|
*
|
|
* Copyright (C) 2018 Intel Corporation
|
|
*
|
|
* Authors:
|
|
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
|
|
* Fenghua Yu <fenghua.yu@intel.com>
|
|
*/
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <inttypes.h>
|
|
#include <string.h>
|
|
|
|
#include "resctrl.h"
|
|
|
|
#define CL_SIZE (64)
|
|
#define PAGE_SIZE (4 * 1024)
|
|
#define MB (1024 * 1024)
|
|
|
|
static void sb(void)
|
|
{
|
|
#if defined(__i386) || defined(__x86_64)
|
|
asm volatile("sfence\n\t"
|
|
: : : "memory");
|
|
#endif
|
|
}
|
|
|
|
static void cl_flush(void *p)
|
|
{
|
|
#if defined(__i386) || defined(__x86_64)
|
|
asm volatile("clflush (%0)\n\t"
|
|
: : "r"(p) : "memory");
|
|
#endif
|
|
}
|
|
|
|
static void mem_flush(unsigned char *buf, size_t buf_size)
|
|
{
|
|
unsigned char *cp = buf;
|
|
size_t i = 0;
|
|
|
|
buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
|
|
|
|
for (i = 0; i < buf_size; i++)
|
|
cl_flush(&cp[i * CL_SIZE]);
|
|
|
|
sb();
|
|
}
|
|
|
|
/*
|
|
* Buffer index step advance to workaround HW prefetching interfering with
|
|
* the measurements.
|
|
*
|
|
* Must be a prime to step through all indexes of the buffer.
|
|
*
|
|
* Some primes work better than others on some architectures (from MBA/MBM
|
|
* result stability point of view).
|
|
*/
|
|
#define FILL_IDX_MULT 23
|
|
|
|
static int fill_one_span_read(unsigned char *buf, size_t buf_size)
|
|
{
|
|
unsigned int size = buf_size / (CL_SIZE / 2);
|
|
unsigned int i, idx = 0;
|
|
unsigned char sum = 0;
|
|
|
|
/*
|
|
* Read the buffer in an order that is unexpected by HW prefetching
|
|
* optimizations to prevent them interfering with the caching pattern.
|
|
*
|
|
* The read order is (in terms of halves of cachelines):
|
|
* i * FILL_IDX_MULT % size
|
|
* The formula is open-coded below to avoiding modulo inside the loop
|
|
* as it improves MBA/MBM result stability on some architectures.
|
|
*/
|
|
for (i = 0; i < size; i++) {
|
|
sum += buf[idx * (CL_SIZE / 2)];
|
|
|
|
idx += FILL_IDX_MULT;
|
|
while (idx >= size)
|
|
idx -= size;
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
static void fill_one_span_write(unsigned char *buf, size_t buf_size)
|
|
{
|
|
unsigned char *end_ptr = buf + buf_size;
|
|
unsigned char *p;
|
|
|
|
p = buf;
|
|
while (p < end_ptr) {
|
|
*p = '1';
|
|
p += (CL_SIZE / 2);
|
|
}
|
|
}
|
|
|
|
static void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
|
|
{
|
|
int ret = 0;
|
|
|
|
while (1) {
|
|
ret = fill_one_span_read(buf, buf_size);
|
|
if (once)
|
|
break;
|
|
}
|
|
|
|
/* Consume read result so that reading memory is not optimized out. */
|
|
*value_sink = ret;
|
|
}
|
|
|
|
static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
|
|
{
|
|
while (1) {
|
|
fill_one_span_write(buf, buf_size);
|
|
if (once)
|
|
break;
|
|
}
|
|
}
|
|
|
|
static unsigned char *alloc_buffer(size_t buf_size, int memflush)
|
|
{
|
|
void *buf = NULL;
|
|
uint64_t *p64;
|
|
size_t s64;
|
|
int ret;
|
|
|
|
ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
|
|
if (ret < 0)
|
|
return NULL;
|
|
|
|
/* Initialize the buffer */
|
|
p64 = buf;
|
|
s64 = buf_size / sizeof(uint64_t);
|
|
|
|
while (s64 > 0) {
|
|
*p64 = (uint64_t)rand();
|
|
p64 += (CL_SIZE / sizeof(uint64_t));
|
|
s64 -= (CL_SIZE / sizeof(uint64_t));
|
|
}
|
|
|
|
/* Flush the memory before using to avoid "cache hot pages" effect */
|
|
if (memflush)
|
|
mem_flush(buf, buf_size);
|
|
|
|
return buf;
|
|
}
|
|
|
|
int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
|
|
{
|
|
unsigned char *buf;
|
|
|
|
buf = alloc_buffer(buf_size, memflush);
|
|
if (!buf)
|
|
return -1;
|
|
|
|
if (op == 0)
|
|
fill_cache_read(buf, buf_size, once);
|
|
else
|
|
fill_cache_write(buf, buf_size, once);
|
|
free(buf);
|
|
|
|
return 0;
|
|
}
|