diff --git a/add-tool-cpuload.patch b/add-tool-cpuload.patch new file mode 100644 index 0000000000000000000000000000000000000000..57679112667f7d679437878bb42ed0d6b9a3fa4a --- /dev/null +++ b/add-tool-cpuload.patch @@ -0,0 +1,250 @@ +From 4d206acc6e85317cf29fc883beb7c852780fe072 Mon Sep 17 00:00:00 2001 +From: Liu Chao +Date: Mon, 6 Dec 2021 02:36:52 +0000 +Subject: [PATCH] add tool: cpuload + +cpuload calculates the cpu usage, showing which tasks run out of cpu resource. + +It display top N tasks when the cpu usage exceeds more than P% and calculates +every T ms. + +This works by tracing the sched switch events using tracepoints. + +Since this uses BPF, only the root user can use this tool. + +optional arguments: + -h, --help show this help message and exit + -t TIME, --time TIME interval to calculate, default 1000 + -n NUMBER, --number NUMBER + maximum tasks to print, default 3 + -p PERCENT, --percent PERCENT + minimum percent to print, default 30 + +example: +[root@localhost ~]# ./cpuload.py -p 50 -n 2 -t 100 +Tracing task switch. Output when cpu is overload. Ctrl-C to end. +DATE COMM PID CPU TIME(ms) %CPU +2021-01-27 10:40:39 stress-ng-cpu 33179 1 100.529 96.68% +2021-01-27 10:40:39 cpuload.py 395575 1 3.363 03.23% +2021-01-27 10:40:39 stress-ng-cpu 33175 3 107.704 99.73% +2021-01-27 10:40:39 sshd 2259 3 0.226 00.21% +2021-01-27 10:40:39 stress-ng-cpu 33176 0 131.978 99.99% +2021-01-27 10:40:39 kworker/0:0 388650 0 0.017 00.01% +2021-01-27 10:40:39 stress-ng-cpu 33178 2 183.987 99.99% +2021-01-27 10:40:39 kworker/2:0 391880 2 0.011 00.01% +^C + +Signed-off-by: liuchao173@huawei.com +--- + tools/cpuload.py | 199 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 199 insertions(+) + create mode 100755 tools/cpuload.py + +diff --git a/tools/cpuload.py b/tools/cpuload.py +new file mode 100755 +index 0000000..f3378e9 +--- /dev/null ++++ b/tools/cpuload.py +@@ -0,0 +1,199 @@ ++#!/usr/bin/python ++# @lint-avoid-python-3-compatibility-imports ++# ++# cpuload Display top N tasks use more than U percent cpu resource when ++# the cpu doesn't enter idle state for more than T ms. ++# ++# USAGE: cpuload [-h] [-t time] [-n number] [-p percent_limit] [-m max_entry] ++# ++# This uses in-kernel eBPF maps to cache task details (PID and comm) by ++# sched_switch, as well as a running time for calculating cpu usage. ++# ++# Licensed under the Apache License, Version 2.0 (the "License") ++# Copyright (C) 2021 Huawei Technologies Co., Ltd. ++ ++from __future__ import print_function ++from bcc import BPF ++from bcc.utils import printb ++import argparse ++from datetime import datetime ++ ++# arguments ++examples = """examples: ++ ./cpuload # display tasks when cpu overload ++ ./cpuload -t 100 # calculate cpu usage every 100 ms ++ ./cpuload -n 5 # display top 5 tasks details ++ ./cpuload -p 30 # display tasks when cpu usage exceeds 30% ++ ./cpuload -m 10000 # set the maximum number of entry to 10,000 ++""" ++parser = argparse.ArgumentParser( ++ description="display tasks when cpu overload", ++ formatter_class=argparse.RawDescriptionHelpFormatter, ++ epilog=examples) ++parser.add_argument("-t", "--time", default=1000, ++ help="interval for calculating the CPU usage, in milliseconds(0 - 60000), default 1000") ++parser.add_argument("-n", "--number", default=3, ++ help="display top n tasks with high cpu usage, default 3") ++parser.add_argument("-p", "--percent_limit", default=90, ++ help="display when the usage of a cpu exceeds percent_limit(0 - 100), default 90") ++parser.add_argument("-m", "--max_entry", default=10000, ++ help="size of the cyclic buffer for recording the scheduling track(1000 - 1000000), default 10000") ++parser.add_argument("--ebpf", action="store_true", ++ help=argparse.SUPPRESS) ++args = parser.parse_args() ++time_ms = int(args.time) ++time_ns = time_ms * 1000000 ++number = int(args.number) ++percent_limit = int(args.percent_limit) ++max_entry = int(args.max_entry) ++debug = 0 ++ ++if time_ms > 60000 or time_ms < 0: ++ print("time invalid") ++ exit(1) ++ ++if percent_limit > 100 or percent_limit < 0: ++ print("percent_limit invalid") ++ exit(1) ++ ++if max_entry > 1000000 or max_entry < 1000: ++ print("max_entry invalid") ++ exit(1) ++ ++# define BPF program ++bpf_text = """ ++#include ++ ++#define MAX_TIME """ + str(time_ns) + """ ++#define THRESHOLD """ + str(percent_limit) + """ ++#define MAX_ENTRY """ + str(max_entry) + """ ++ ++struct cpu_data_t { ++ u32 index; ++ u32 number; ++ u64 prev_time; ++ u64 busy_time; ++ u64 total_time; ++}; ++ ++struct task_data_t { ++ u32 pid; ++ char comm[TASK_COMM_LEN]; ++ u64 delta; ++}; ++ ++struct data_t { ++ u32 index; ++ u32 number; ++ u64 total_time; ++}; ++ ++BPF_PERCPU_ARRAY(cpu_data, struct cpu_data_t, 1); ++ ++BPF_PERCPU_ARRAY(task_data, struct task_data_t, MAX_ENTRY); ++ ++BPF_PERF_OUTPUT(events); ++TRACEPOINT_PROBE(sched, sched_switch) { ++ u32 index = 0; ++ u64 now = bpf_ktime_get_ns(), delta; ++ struct data_t data = {}; ++ struct cpu_data_t *cpu = cpu_data.lookup(&index); ++ struct task_data_t *task; ++ ++ if (cpu == NULL) ++ return 0; ++ ++ if (cpu->prev_time == 0) { ++ cpu->prev_time = now; ++ return 0; ++ } ++ ++ index = (cpu->index + cpu->number) % MAX_ENTRY; ++ task = task_data.lookup(&index); ++ if (task == NULL) ++ return 0; ++ ++ delta = now - cpu->prev_time; ++ if (args->prev_pid != 0) { ++ cpu->busy_time += delta; ++ task->pid = args->prev_pid; ++ __builtin_memcpy(&task->comm, &args->prev_comm, sizeof(task->comm)); ++ task->delta = now - cpu->prev_time; ++ cpu->number++; ++ } ++ ++ cpu->prev_time = now; ++ cpu->total_time += delta; ++ ++ if (cpu->total_time > MAX_TIME || cpu->number == MAX_ENTRY) { ++ if (cpu->busy_time * 100 > cpu->total_time * THRESHOLD) { ++ data.index = cpu->index; ++ data.number = cpu->number; ++ data.total_time = cpu->total_time; ++ events.perf_submit(args, &data, sizeof(data)); ++ cpu->index = (index + 1) % MAX_ENTRY; ++ } ++ cpu->number = 0; ++ cpu->busy_time = 0; ++ cpu->total_time = 0; ++ cpu->prev_time = now; ++ } ++ ++ return 0; ++} ++""" ++ ++if debug or args.ebpf: ++ print(bpf_text) ++ if args.ebpf: ++ exit() ++ ++# initialize BPF ++b = BPF(text=bpf_text) ++ ++print("Tracing task switch. Output when cpu is overload. Ctrl-C to end.") ++ ++print("%-19s %-14s %-7s %-4s %-8s %-5s" % ++ ("DATE", "COMM", "PID", "CPU", "TIME(ms)", "%CPU")) ++ ++# process event ++def print_event(cpu, data, size): ++ date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') ++ data = b["events"].event(data) ++ dic = {} ++ tasks = b["task_data"] ++ if data.total_time < time_ns: ++ print("max_entry is too small, please set more than %d" % ++ (max_entry * time_ns / data.total_time)) ++ for i in range(data.index, data.number + data.index): ++ task = tasks[i % max_entry][cpu] ++ entry = dic.get(task.pid) ++ if entry is not None: ++ entry.delta += task.delta ++ else: ++ dic[task.pid] = task ++ ++ count = 0 ++ for item in sorted(dic.items(), key=lambda x: x[1].delta, reverse=True): ++ if count >= number: ++ break ++ task = item[1] ++ u = task.delta * 100 / data.total_time ++ print("%s %-14.14s %-7s %-4s %-8.3f %05.2f%%" % ( ++ date, ++ task.comm.decode("utf-8", "replace"), ++ task.pid, ++ cpu, ++ float(task.delta) / 1000000, ++ u)) ++ count += 1 ++ dic.clear() ++ print("---------------------------------------------------------------") ++ ++# loop with callback to print_event ++b["events"].open_perf_buffer(print_event) ++while 1: ++ try: ++ b.perf_buffer_poll() ++ except KeyboardInterrupt: ++ exit() +-- +2.23.0 + diff --git a/bcc.spec b/bcc.spec index e1d53b809f22a9adf58eee4a79c72ded38a16082..2bf6a7ab8124f421825e9ed3897f8aac7072efd0 100644 --- a/bcc.spec +++ b/bcc.spec @@ -1,6 +1,6 @@ Name: bcc Version: 0.15.0 -Release: 3 +Release: 4 Summary: BPF Compiler Collection (BCC) License: ASL 2.0 URL: https://github.com/iovisor/bcc @@ -21,6 +21,7 @@ Requires: %{name}-tools = %{version}-%{release} Requires: libbpf >= 0.0.5-3 Patch: 0001-bugfix-tcp-and-udp-tools-failed.patch +Patch: add-tool-cpuload.patch %description BCC is a toolkit for creating efficient kernel tracing and manipulation @@ -163,6 +164,9 @@ rm -rf %{buildroot}%{_datadir}/%{name}/tools/old/ %changelog +* Mon Dec 6 2021 liuchao - 0.15.0-4 +- add tool: cpuload + * Mon Jun 21 2021 luzhihao - 0.15.0-3 - bugfix: tcp* BPF_SK_LOOKUP undeclared failed