* Add some memory tracking, getting some information on memory hogs.

This commit is contained in:
Matthias Klose 2024-08-27 11:56:40 +02:00
parent 2b026ab24c
commit d6a67df217
5 changed files with 416 additions and 7 deletions

1
debian/changelog vendored
View File

@ -1,6 +1,7 @@
llvm-toolchain-19 (1:19.1.0~++rc3-1~exp2) UNRELEASED; urgency=medium
* Add a .gitignore file with all toplevel upstream files and directories.
* Add some memory tracking, getting some information on memory hogs.
-- Matthias Klose <doko@ubuntu.com> Tue, 27 Aug 2024 11:47:25 +0200

2
debian/control vendored
View File

@ -18,7 +18,7 @@ Build-Depends: debhelper (>= 10.0), cmake, ninja-build,
zlib1g-dev, libzstd-dev,
g++-multilib [amd64 i386 kfreebsd-amd64 mips64 mips64el mipsel powerpc ppc64 s390 s390x sparc sparc64 x32],
libjs-mathjax, python3-myst-parser | python3-recommonmark, python3-pexpect,
doxygen,
doxygen, python3:any, python3-psutil, python3-matplotlib,
ocaml-base [amd64 arm64 armhf ppc64el riscv64 s390x] | ocaml-nox [amd64 arm64 armhf ppc64el riscv64 s390x],
ocaml-findlib [amd64 arm64 armhf ppc64el riscv64 s390x],
libctypes-ocaml-dev [amd64 arm64 armhf ppc64el riscv64 s390x],

2
debian/control.in vendored
View File

@ -18,7 +18,7 @@ Build-Depends: debhelper (>= 10.0), cmake, ninja-build,
zlib1g-dev, libzstd-dev,
g++-multilib [amd64 i386 kfreebsd-amd64 mips64 mips64el mipsel powerpc ppc64 s390 s390x sparc sparc64 x32],
libjs-mathjax, python3-myst-parser | python3-recommonmark, python3-pexpect,
doxygen,
doxygen,@USAGE_BUILD_DEP@
ocaml-base [amd64 arm64 armhf ppc64el riscv64 s390x] | ocaml-nox [amd64 arm64 armhf ppc64el riscv64 s390x],
ocaml-findlib [amd64 arm64 armhf ppc64el riscv64 s390x],
libctypes-ocaml-dev [amd64 arm64 armhf ppc64el riscv64 s390x],

24
debian/rules vendored
View File

@ -71,7 +71,7 @@ packages := $(shell dh_listpackages)
# flang has some memory hogs, of up to 6.2gb per process. Limit the parallel jobs
# based on the available memory
ifneq (,$(filter flang-$(LLVM_VERSION), $(packages)))
MEM_PER_CPU=4500
MEM_PER_CPU=6000
else
MEM_PER_CPU=2100
endif
@ -79,6 +79,12 @@ NJOBS := $(shell mt=`awk '/^(MemAvail|SwapFree)/ { mt += $$2 } END {print mt}' /
awk -vn=$(NCPUS) -vmt=$$mt -vm=$(MEM_PER_CPU) \
'END { mt/=1024; n2 = int(mt/m); print n==1 ? 1 : n2<n+1 ? n2 : n+1}' < /dev/null)
TIME_COMMAND = /bin/time -v
TIME_COMMAND = $(CURDIR)/debian/usage-wrapper.py -j $(NJOBS) --base-memory -m 4.0 -o usage-$(notdir $@)-$(DEB_HOST_ARCH).svg
ifneq (,$(findstring usage-wrapper, $(TIME_COMMAND)))
USAGE_BUILD_DEP = $(EMPTY) python3:any, python3-psutil, python3-matplotlib,
endif
DH_VERSION := $(shell dpkg -s debhelper | grep '^Version' | awk '{print $$2}')
include /usr/share/dpkg/architecture.mk
@ -646,6 +652,7 @@ stamps/preconfigure:
f2=$$(echo $$f | sed 's/\.in$$//;s/X\.Y/$(LLVM_VERSION)/'); \
echo "$$f => $$f2"; \
sed -e 's|@DEB_HOST_MULTIARCH@|$(DEB_HOST_MULTIARCH)|g' \
-e "s|@USAGE_BUILD_DEP@|$(USAGE_BUILD_DEP)|g" \
-e "s|@BRANCH_NAME@|$(BRANCH_NAME)|g" \
-e "s|@OCAML_STDLIB_DIR@|$(OCAML_STDLIB_DIR)|g" \
-e "s|@LLVM_EPOCH@|$(LLVM_EPOCH)|g" \
@ -873,7 +880,7 @@ stamps/debian-full-build:
echo "Using cmake: $(CMAKE_BIN)"
# linker hack so stage2 can link against stage1 libs at runtime
LD_LIBRARY_PATH=$(STAGE_1_LIB_DIR):$$LD_LIBRARY_PATH \
VERBOSE=1 $(PRE_PROCESS) $(CMAKE_BIN) --build $(TARGET_BUILD) -j $(NJOBS) --target stage2 || cat build-llvm/tools/clang/stage2-bins/CMakeFiles/CMakeOutput.log
VERBOSE=1 $(TIME_COMMAND) $(PRE_PROCESS) $(CMAKE_BIN) --build $(TARGET_BUILD) -j $(NJOBS) --target stage2 || cat build-llvm/tools/clang/stage2-bins/CMakeFiles/CMakeOutput.log
# Check the stage 2 build worked
if ! readelf --string-dump .comment $(TARGET_BUILD_STAGE2)/bin/clang 2>&1|grep -q "clang version"; then \
@ -927,7 +934,7 @@ stamps/debian-libclc-build:
-DLLVM_CMAKE_DIR=$(STAGE_2_BIN_DIR)/../ \
$(LIBCLC_LLVM_SPIRV) \
-DLIBCLC_TARGETS_TO_BUILD="$(LIBCLC_TARGETS_TO_BUILD)"; \
LD_LIBRARY_PATH=$$LD_LIBRARY_PATH:$(STAGE_2_LIB_DIR) ninja -j $(NJOBS) $(VERBOSE)
LD_LIBRARY_PATH=$$LD_LIBRARY_PATH:$(STAGE_2_LIB_DIR) $(TIME_COMMAND) ninja -j $(NJOBS) $(VERBOSE)
ifndef LLVM_SPIRV
echo "libclc built without SPIRV (.spv) outputs because llvm-spirv wasn't found"
endif
@ -967,7 +974,7 @@ build-wasm/compiler-rt-%:
-DCOMPILER_RT_USE_BUILTINS_LIBRARY=ON \
-DCOMPILER_RT_DEFAULT_TARGET_TRIPLE=$(cpu)-unknown-unknown \
-DCOMPILER_RT_OS_DIR=wasi
ninja -C "$@" -j $(NJOBS) $(VERBOSE)
$(TIME_COMMAND) ninja -C "$@" -j $(NJOBS) $(VERBOSE)
ifeq ($(LIBCXX_WASM_ENABLE), no)
build-wasm/libcxx-%-wasi: build-wasm/compiler-rt-%
@ -1050,7 +1057,7 @@ build-wasm/libcxx-%-wasi: build-wasm/compiler-rt-%
-DLIBCXXABI_HAS_EXTERNAL_THREAD_API:BOOL=OFF \
-DLIBCXXABI_BUILD_EXTERNAL_THREAD_LIBRARY:BOOL=OFF \
-DLIBCXXABI_USE_LLVM_UNWINDER:BOOL=OFF
ninja -C "$@" -j $(NJOBS) $(VERBOSE)
$(TIME_COMMAND) ninja -C "$@" -j $(NJOBS) $(VERBOSE)
endif
# Build compiler-rt for wasm32 and wasm64. Build libcxx only for wasm32, as
@ -1452,6 +1459,12 @@ endif
# So, we remove this directory from the package
rm -fr $(CURDIR)/debian/libclang-rt-$(LLVM_VERSION)-dev/usr/lib/llvm-$(LLVM_VERSION)/lib/clang/$(LLVM_VERSION)/lib/wasi/
for svg in usage-*.svg; do \
[ -f $$svg ] || continue; \
xz -9v $$svg; \
cp -p $$svg.xz debian/llvm-$(LLVM_VERSION)/usr/share/doc/llvm-$(LLVM_VERSION)/.; \
done
stamps/repack_a_llvm_ir:
ifeq (${LTO_ENABLE},yes)
# with LTO, .a contains llvm ir instead of native code. So, recompile them
@ -1603,5 +1616,6 @@ override_dh_auto_clean:
: # remove extra stamps
rm -f override_dh_auto_install
rm -rf stamps
rm -f usage-*.svg*
.PHONY: override_dh_strip preconfigure

394
debian/usage-wrapper.py vendored Executable file
View File

@ -0,0 +1,394 @@
#!/usr/bin/env python3
import argparse
import math
import os
import subprocess
import sys
import threading
import time
try:
import psutil
except ImportError:
print(f'{sys.argv[0]}: the psutil module is required.', file=sys.stderr)
sys.exit(1)
try:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
except ImportError:
plt = None
def to_gigabyte(value):
return value / 1024**3
INTERVAL = 0.33
LW = 0.5
global_n = 0
global_cpu_data_sum = 0
global_memory_data_sum = 0
global_cpu_data_max = 0
global_memory_data_min = to_gigabyte(psutil.virtual_memory().total)
global_memory_data_max = 0
global_swap_data_min = to_gigabyte(psutil.swap_memory().total)
global_swap_data_max = 0
global_disk_data_total = to_gigabyte(psutil.disk_usage('.').total)
global_disk_data_start = to_gigabyte(psutil.disk_usage('.').used)
global_timestamps = []
global_cpu_data = []
global_memory_data = []
global_process_usage = []
global_process_hogs = {}
process_name_map = {}
lock = threading.Lock()
done = False
start_ts = time.monotonic()
cpu_count = psutil.cpu_count()
special_processes = {'ld': 'gold',
'WPA': 'deepskyblue',
'WPA-stream': 'lightblue',
'ltrans': 'forestgreen',
'as': 'coral',
'GCC': 'gray',
'clang': 'darkgray',
'rust': 'brown',
'go': 'hotpink',
'dwz': 'limegreen',
'rpm/dpkg': 'plum'}
for i, k in enumerate(special_processes.keys()):
process_name_map[k] = i
descr = 'Run command and measure memory and CPU utilization'
parser = argparse.ArgumentParser(description=descr)
parser.add_argument('command', metavar='command',
help='Command', nargs=argparse.REMAINDER)
parser.add_argument('-c', '--command', dest='command1',
help='command as a single argument')
parser.add_argument('-v', '--verbose', action='store_true', help='Verbose')
parser.add_argument('--summary-only', dest='summary_only',
action='store_true',
help='No plot, just a summary at the end')
parser.add_argument('--base-memory', action='store_true',
help='Adjust memory to include the system load')
parser.add_argument('-s', '--separate-ltrans', action='store_true',
help='Separate LTRANS processes in graph')
parser.add_argument('-o', '--output', default='usage.svg',
help='Path to output image (default: usage.svg)')
parser.add_argument('-r', '--ranges',
help='Plot only the selected time ranges '
'(e.g. 20-30, 0-1000)')
parser.add_argument('-t', '--title', help='Graph title')
parser.add_argument('-m', '--memory-hog-threshold', type=float,
help='Report about processes that consume the amount of '
'memory (in GB)')
parser.add_argument('-f', '--frequency', type=float,
default=INTERVAL,
help='Frequency of measuring (in seconds)')
parser.add_argument('-j', '--jobs', type=int,
default=cpu_count, dest='used_cpus',
help='Scale up CPU data to used CPUs '
'instead of available CPUs')
args = parser.parse_args()
if args.command1 and args.command:
print(f'{sys.argv[0]}: either use -c "<shell command>", '
'or append the command', file=sys.stderr)
sys.exit(1)
if not args.summary_only and plt is None:
print(f'{sys.argv[0]}: use --summary-only, '
'or install the matplotlib module', file=sys.stderr)
sys.exit(1)
cpu_scale = cpu_count / args.used_cpus
def get_process_name(proc):
name = proc.name()
cmdline = proc.cmdline()
if name == 'ld' or name == 'ld.gold':
return 'ld'
elif name == 'lto1-wpa':
return 'WPA'
elif name == 'lto1-wpa-stream':
return 'WPA-stream-out'
elif name in ('cc1', 'cc1plus', 'cc1objc', 'f951', 'd21', 'go1', 'gnat1'):
return 'GCC'
elif name.startswith('clang'):
return 'clang'
elif name.startswith('rust'):
return 'rust'
elif name in ('as', 'dwz', 'go'):
return name
elif name == 'rpmbuild' or name.startswith('dpkg'):
return 'rpm/dpkg'
elif '-fltrans' in cmdline:
if args.separate_ltrans:
return 'ltrans-%d' % proc.pid
else:
return 'ltrans'
return None
def record_process_memory_hog(proc, memory, timestamp):
if args.memory_hog_threshold:
if memory >= args.memory_hog_threshold:
cmd = ' '.join(proc.cmdline())
tpl = (memory, timestamp)
if cmd not in global_process_hogs:
global_process_hogs[cmd] = tpl
elif memory > global_process_hogs[cmd][0]:
global_process_hogs[cmd] = tpl
def record():
global global_n, global_cpu_data_sum, global_cpu_data_max
global global_memory_data_sum, global_memory_data_min
global global_memory_data_max
global global_swap_data_min, global_swap_data_max
active_pids = {}
while not done:
timestamp = time.monotonic() - start_ts
used_cpu = psutil.cpu_percent(interval=args.frequency) * cpu_scale
used_memory = to_gigabyte(psutil.virtual_memory().used)
used_swap = to_gigabyte(psutil.swap_memory().used)
if not args.summary_only:
global_timestamps.append(timestamp)
global_memory_data.append(used_memory)
global_cpu_data.append(used_cpu)
global_n += 1
global_cpu_data_sum += used_cpu
global_memory_data_sum += used_memory
global_cpu_data_max = max(global_cpu_data_max, used_cpu)
global_memory_data_min = min(global_memory_data_min, used_memory)
global_memory_data_max = max(global_memory_data_max, used_memory)
global_swap_data_min = min(global_swap_data_min, used_swap)
global_swap_data_max = max(global_swap_data_max, used_swap)
entry = {}
seen_pids = set()
for proc in psutil.Process().children(recursive=True):
try:
memory = to_gigabyte(proc.memory_info().rss)
record_process_memory_hog(proc, memory, timestamp)
name = get_process_name(proc)
if name:
seen_pids.add(proc.pid)
if proc.pid not in active_pids:
active_pids[proc.pid] = proc
else:
proc = active_pids[proc.pid]
cpu = proc.cpu_percent() / args.used_cpus
if name not in process_name_map:
length = len(process_name_map)
process_name_map[name] = length
if name not in entry:
entry[name] = {'memory': 0, 'cpu': 0}
entry[name]['cpu'] += cpu
# FIXME: ignore WPA streaming memory - COW makes it bogus
if name != 'WPA-stream-out':
entry[name]['memory'] += memory
except Exception:
# the process can be gone
pass
for pid in list(active_pids.keys()):
if pid not in seen_pids:
del active_pids[pid]
if args.verbose:
print(entry, flush=True)
if not args.summary_only:
global_process_usage.append(entry)
def stack_values(process_usage, key):
stacks = []
for _ in range(len(process_name_map)):
stacks.append([])
for values in process_usage:
for k, v in process_name_map.items():
if k in values:
stacks[v].append(values[k][key])
else:
stacks[v].append(0)
return stacks
def get_footnote():
hostname = os.uname()[1].split('.')[0]
cpu_average = global_cpu_data_sum / global_n
cpu_max = global_cpu_data_max
base_memory = global_memory_data_min
peak_memory = global_memory_data_max
total_mem = to_gigabyte(psutil.virtual_memory().total)
return (f'host: {hostname}; CPUs: {args.used_cpus}/{cpu_count};'
f' CPU avg: {cpu_average:.0f}%;'
f' CPU max: {cpu_max:.0f}%;'
f' base memory: {base_memory:.1f} GB;'
f' peak memory: {peak_memory:.1f} GB;'
f' total memory: {total_mem:.1f} GB')
def get_footnote2():
peak_swap = global_swap_data_max
total_swap = to_gigabyte(psutil.swap_memory().total)
disk_total = global_disk_data_total
disk_start = global_disk_data_start
disk_end = to_gigabyte(psutil.disk_usage('.').used)
disk_delta = disk_end - disk_start
return (f'swap peak/total: {peak_swap:.1f}/{total_swap:.1f} GB;'
f' disk start/end/total: {disk_start:.1f}/{disk_end:.1f}/{disk_total:.1f} GB;'
f' disk delta: {disk_delta:.1f} GB')
def generate_graph(time_range):
timestamps = []
cpu_data = []
memory_data = []
process_usage = []
# filter date by timestamp
for i, ts in enumerate(global_timestamps):
if not time_range or time_range[0] <= ts and ts <= time_range[1]:
timestamps.append(ts)
cpu_data.append(global_cpu_data[i])
memory_data.append(global_memory_data[i])
process_usage.append(global_process_usage[i])
if not timestamps:
if args.verbose:
print('No data for range: %s' % str(time_range))
return
peak_memory = max(memory_data)
fig, (cpu_subplot, mem_subplot) = plt.subplots(2, sharex=True)
title = args.title if args.title else ''
if time_range:
title += ' (%d-%d s)' % (time_range[0], time_range[1])
fig.suptitle(title, fontsize=17)
fig.set_figheight(5)
fig.set_figwidth(10)
# scale cpu axis
local_peak_cpu = max(cpu_data)
cpu_ylimit = (local_peak_cpu // 10) * 11 + 5
if cpu_ylimit > 200:
cpu_ylimit = 200
cpu_subplot.set_title('CPU usage')
cpu_subplot.set_ylabel('%')
cpu_subplot.plot(timestamps, cpu_data, c='blue', lw=LW, label='total')
cpu_subplot.set_ylim([0, cpu_ylimit])
cpu_subplot.axhline(color='r', alpha=0.5, y=100.0 / args.used_cpus, lw=LW,
linestyle='dotted', label='single core')
cpu_subplot.set_xlim(left=time_range[0] if time_range else 0)
cpu_subplot.grid(True)
mem_subplot.plot(timestamps, memory_data, c='blue', lw=LW, label='total')
mem_subplot.set_title('Memory usage')
mem_subplot.set_ylabel('GB')
mem_subplot.set_xlabel('time')
# scale it to a reasonable limit
limit = 1
while peak_memory > limit:
limit *= 2
if limit > 2 and limit * 0.75 >= peak_memory:
limit = int(limit * 0.75)
mem_subplot.set_ylim([0, 1.1 * limit])
mem_subplot.set_yticks(range(0, limit + 1, math.ceil(limit / 8)))
mem_subplot.grid(True)
colors = list(plt.cm.get_cmap('tab20c').colors * 100)
for name, color in special_processes.items():
if name in process_name_map:
colors[process_name_map[name]] = color
mem_stacks = stack_values(process_usage, 'memory')
cpu_stacks = stack_values(process_usage, 'cpu')
if mem_stacks:
mem_subplot.stackplot(timestamps, mem_stacks,
colors=colors)
cpu_subplot.stackplot(timestamps, cpu_stacks,
colors=colors)
# generate custom legend
colors = special_processes.values()
custom_lines = [Line2D([0], [0], color=x, lw=5) for x in colors]
custom_lines.insert(0, Line2D([0], [0], color='b', lw=LW))
custom_lines.insert(0, Line2D([0], [0], color='r', alpha=0.5,
linestyle='dotted', lw=LW))
names = ['single core', 'total'] + list(special_processes.keys())
fig.legend(custom_lines, names, loc='right', prop={'size': 6})
filename = args.output
if time_range:
tr = '-%d-%d' % (time_range[0], time_range[1])
filename = os.path.splitext(args.output)[0] + tr + '.svg'
plt.subplots_adjust(bottom=0.15)
plt.figtext(0.1, 0.04, get_footnote(), fontsize='small')
plt.figtext(0.1, 0.01, get_footnote2(), fontsize='small')
plt.savefig(filename)
if args.verbose:
print('Saving plot to %s' % filename)
def summary():
print(f'SUMMARY: {get_footnote()}')
print(f'SUMMARY: {get_footnote2()}')
if global_process_hogs:
print(f'PROCESS MEMORY HOGS (>={args.memory_hog_threshold:.1f} GB):')
items = sorted(global_process_hogs.items(), key=lambda x: x[1][0],
reverse=True)
for cmdline, (memory, ts) in items:
print(f' {memory:.1f} GB: {ts:.1f} s: {cmdline}')
thread = threading.Thread(target=record, args=())
thread.start()
ranges = []
if args.ranges:
for r in args.ranges.split(','):
parts = r.split('-')
assert len(parts) == 2
ranges.append([int(x) for x in parts])
if args.verbose:
print('Ranges are %s' % str(ranges))
print('Running command', flush=True)
cp = None
try:
if args.command1:
cp = subprocess.run(args.command1, shell=True)
else:
cp = subprocess.run(args.command)
except KeyboardInterrupt:
rv = 2
finally:
done = True
thread.join()
summary()
if global_memory_data:
min_memory = min(global_memory_data)
if not args.base_memory:
global_memory_data = [x - min_memory for x in global_memory_data]
if plt:
generate_graph(None)
for r in ranges:
generate_graph(r)
if cp:
rv = cp.returncode
sys.exit(rv)