qemu/scripts/ci/gitlab-failure-analysis
Alex Bennée 5a449678d6 scripts/ci: add gitlab-failure-analysis script
This is a script designed to collect data from multiple pipelines and
analyse the failure modes they have. By default it will probe the last
3 failed jobs on the staging branch. However this can all be
controlled by the CLI:

  ./scripts/ci/gitlab-failure-analysis --count 2 --branch=testing/next --id 39915562 --status=
  running pipeline 2028486060, total jobs 125, skipped 5, failed 0,  39742 tests, 0 failed tests
  success pipeline 2015018135, total jobs 125, skipped 5, failed 0,  49219 tests, 0 failed tests

You can also skip failing jobs and just dump the tests:

  ./scripts/ci/gitlab-failure-analysis --branch= --id 39915562 --status= --skip-jobs --pipeline 1946202491 1919542960
  failed pipeline 1946202491, total jobs 127, skipped 5, failed 26,  38742 tests, 278 skipped tests, 2 failed tests
    Failed test qemu.qemu:qtest+qtest-s390x / qtest-s390x/boot-serial-test, check-system-opensuse, 1 /s390x/boot-serial/s390-ccw-virtio - FATAL-ERROR: Failed to find expected string. Please check '/tmp/qtest-boot-serial-sW77EA3'
    Failed test qemu.qemu:qtest+qtest-aarch64 / qtest-aarch64/arm-cpu-features, check-system-opensuse, 1 /aarch64/arm/query-cpu-model-expansion - ERROR:../tests/qtest/arm-cpu-features.c:459:test_query_cpu_model_expansion: assertion failed (_error == "The CPU type 'host' requires KVM"): ("The CPU type 'host' requires hardware accelerator" == "The CPU type 'host' requires KVM")
  failed pipeline 1919542960, total jobs 127, skipped 5, failed 2,  48753 tests, 441 skipped tests, 1 failed tests
    Failed test qemu.qemu:unit / test-aio, msys2-64bit, 12 /aio/timer/schedule - ERROR:../tests/unit/test-aio.c:413:test_timer_schedule: assertion failed: (aio_poll(ctx, true))

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-ID: <20250922093711.2768983-3-alex.bennee@linaro.org>
2025-09-26 09:55:19 +01:00

118 lines
4.2 KiB
Python
Executable File

#!/usr/bin/env python3
#
# A script to analyse failures in the gitlab pipelines. It requires an
# API key from gitlab with the following permissions:
# - api
# - read_repository
# - read_user
#
import argparse
import gitlab
import os
#
# Arguments
#
class NoneForEmptyStringAction(argparse.Action):
def __call__(self, parser, namespace, value, option_string=None):
if value == '':
setattr(namespace, self.dest, None)
else:
setattr(namespace, self.dest, value)
parser = argparse.ArgumentParser(description="Analyse failed GitLab CI runs.")
parser.add_argument("--gitlab",
default="https://gitlab.com",
help="GitLab instance URL (default: https://gitlab.com).")
parser.add_argument("--id", default=11167699,
type=int,
help="GitLab project id (default: 11167699 for qemu-project/qemu)")
parser.add_argument("--token",
default=os.getenv("GITLAB_TOKEN"),
help="Your personal access token with 'api' scope.")
parser.add_argument("--branch",
type=str,
default="staging",
action=NoneForEmptyStringAction,
help="The name of the branch (default: 'staging')")
parser.add_argument("--status",
type=str,
action=NoneForEmptyStringAction,
default="failed",
help="Filter by branch status (default: 'failed')")
parser.add_argument("--count", type=int,
default=3,
help="The number of failed runs to fetch.")
parser.add_argument("--skip-jobs",
default=False,
action='store_true',
help="Skip dumping the job info")
parser.add_argument("--pipeline", type=int,
nargs="+",
default=None,
help="Explicit pipeline ID(s) to fetch.")
if __name__ == "__main__":
args = parser.parse_args()
gl = gitlab.Gitlab(url=args.gitlab, private_token=args.token)
project = gl.projects.get(args.id)
pipelines_to_process = []
# Use explicit pipeline IDs if provided, otherwise fetch a list
if args.pipeline:
args.count = len(args.pipeline)
for p_id in args.pipeline:
pipelines_to_process.append(project.pipelines.get(p_id))
else:
# Use an iterator to fetch the pipelines
pipe_iter = project.pipelines.list(iterator=True,
status=args.status,
ref=args.branch)
# Check each failed pipeline
pipelines_to_process = [next(pipe_iter) for _ in range(args.count)]
# Check each pipeline
for p in pipelines_to_process:
jobs = p.jobs.list(get_all=True)
failed_jobs = [j for j in jobs if j.status == "failed"]
skipped_jobs = [j for j in jobs if j.status == "skipped"]
manual_jobs = [j for j in jobs if j.status == "manual"]
trs = p.test_report_summary.get()
total = trs.total["count"]
skipped = trs.total["skipped"]
failed = trs.total["failed"]
print(f"{p.status} pipeline {p.id}, total jobs {len(jobs)}, "
f"skipped {len(skipped_jobs)}, "
f"failed {len(failed_jobs)}, ",
f"{total} tests, "
f"{skipped} skipped tests, "
f"{failed} failed tests")
if not args.skip_jobs:
for j in failed_jobs:
print(f" Failed job {j.id}, {j.name}, {j.web_url}")
# It seems we can only extract failing tests from the full
# test report, maybe there is some way to filter it.
if failed > 0:
ftr = p.test_report.get()
failed_suites = [s for s in ftr.test_suites if
s["failed_count"] > 0]
for fs in failed_suites:
name = fs["name"]
tests = fs["test_cases"]
failed_tests = [t for t in tests if t["status"] == 'failed']
for t in failed_tests:
print(f" Failed test {t["classname"]}, {name}, {t["name"]}")