Fetch the repository succeeded.
This action will force synchronization from src-anolis-os/systemd, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
From 1a822dbe19ab6634ffb2c0d3ce92b27b503e1612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Sekleta=CC=81r?= <msekleta@redhat.com>
Date: Mon, 17 Feb 2020 13:50:31 +0100
Subject: [PATCH] core: add support for setting CPUAffinity= to special "numa"
value
systemd will automatically derive CPU affinity mask from NUMA node
mask.
Fixes #13248
(cherry picked from commit e2b2fb7f566d13a3de61952b5356cd4d2eaee917)
Resolves: #1740657
---
man/systemd.exec.xml | 9 +++---
src/basic/cpu-set-util.c | 43 ++++++++++++++++++++++++--
src/basic/cpu-set-util.h | 1 +
src/core/dbus-execute.c | 30 +++++++++++++++++-
src/core/execute.c | 46 ++++++++++++++++++++++++++--
src/core/execute.h | 3 ++
src/core/load-fragment.c | 14 ++++++++-
src/shared/bus-unit-util.c | 9 ++++++
src/test/test-cpu-set-util.c | 6 ++--
test/TEST-36-NUMAPOLICY/testsuite.sh | 18 +++++++++++
10 files changed, 166 insertions(+), 13 deletions(-)
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index e2a5ede968..696438c4ef 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -706,10 +706,11 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<term><varname>CPUAffinity=</varname></term>
<listitem><para>Controls the CPU affinity of the executed processes. Takes a list of CPU indices or ranges
- separated by either whitespace or commas. CPU ranges are specified by the lower and upper CPU indices separated
- by a dash. This option may be specified more than once, in which case the specified CPU affinity masks are
- merged. If the empty string is assigned, the mask is reset, all assignments prior to this will have no
- effect. See
+ separated by either whitespace or commas. Alternatively, takes a special "numa" value in which case systemd
+ automatically derives allowed CPU range based on the value of <varname>NUMAMask=</varname> option. CPU ranges
+ are specified by the lower and upper CPU indices separated by a dash. This option may be specified more than
+ once, in which case the specified CPU affinity masks are merged. If the empty string is assigned, the mask
+ is reset, all assignments prior to this will have no effect. See
<citerefentry><refentrytitle>sched_setaffinity</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
details.</para></listitem>
</varlistentry>
diff --git a/src/basic/cpu-set-util.c b/src/basic/cpu-set-util.c
index 51752ad1a6..1922c95864 100644
--- a/src/basic/cpu-set-util.c
+++ b/src/basic/cpu-set-util.c
@@ -12,12 +12,14 @@
#include "cpu-set-util.h"
#include "dirent-util.h"
#include "extract-word.h"
+#include "fileio.h"
#include "fd-util.h"
#include "log.h"
#include "macro.h"
#include "missing.h"
#include "parse-util.h"
#include "stat-util.h"
+#include "stdio-util.h"
#include "string-util.h"
#include "string-table.h"
#include "strv.h"
@@ -179,7 +181,7 @@ int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
return r;
}
- return 0;
+ return 1;
}
int parse_cpu_set_full(
@@ -264,7 +266,7 @@ int parse_cpu_set_extend(
if (!old->set) {
*old = cpuset;
cpuset = (CPUSet) {};
- return 0;
+ return 1;
}
return cpu_set_add_all(old, &cpuset);
@@ -417,6 +419,43 @@ int apply_numa_policy(const NUMAPolicy *policy) {
return 0;
}
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) {
+ int r;
+ size_t i;
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+
+ assert(policy);
+ assert(ret);
+
+ for (i = 0; i < policy->nodes.allocated * 8; i++) {
+ _cleanup_free_ char *l = NULL;
+ char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1];
+ _cleanup_(cpu_set_reset) CPUSet part = {};
+
+ if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set))
+ continue;
+
+ xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i);
+
+ r = read_one_line_file(p, &l);
+ if (r < 0)
+ return r;
+
+ r = parse_cpu_set(l, &part);
+ if (r < 0)
+ return r;
+
+ r = cpu_set_add_all(&s, &part);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = s;
+ s = (CPUSet) {};
+
+ return 0;
+}
+
static const char* const mpol_table[] = {
[MPOL_DEFAULT] = "default",
[MPOL_PREFERRED] = "preferred",
diff --git a/src/basic/cpu-set-util.h b/src/basic/cpu-set-util.h
index 8519a9b6c8..795be807af 100644
--- a/src/basic/cpu-set-util.h
+++ b/src/basic/cpu-set-util.h
@@ -78,6 +78,7 @@ static inline void numa_policy_reset(NUMAPolicy *p) {
}
int apply_numa_policy(const NUMAPolicy *policy);
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret);
const char* mpol_to_string(int i) _const_;
int mpol_from_string(const char *s) _pure_;
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index d5acca384f..0fe4c14e48 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -58,6 +58,8 @@ static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext,
static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
+static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
+
static int property_get_environment_files(
sd_bus *bus,
@@ -215,6 +217,7 @@ static int property_get_cpu_affinity(
sd_bus_error *error) {
ExecContext *c = userdata;
+ _cleanup_(cpu_set_reset) CPUSet s = {};
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
@@ -222,7 +225,16 @@ static int property_get_cpu_affinity(
assert(reply);
assert(c);
- (void) cpu_set_to_dbus(&c->cpu_set, &array, &allocated);
+ if (c->cpu_affinity_from_numa) {
+ int r;
+
+ r = numa_to_cpu_set(&c->numa_policy, &s);
+ if (r < 0)
+ return r;
+ }
+
+ (void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set, &array, &allocated);
+
return sd_bus_message_append_array(reply, 'y', array, allocated);
}
@@ -743,6 +755,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1639,6 +1652,20 @@ int bus_exec_context_set_transient_property(
return 1;
+ } else if (streq(name, "CPUAffinityFromNUMA")) {
+ int q;
+
+ r = sd_bus_message_read_basic(message, 'b', &q);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_affinity_from_numa = q;
+ unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa");
+ }
+
+ return 1;
+
} else if (streq(name, "NUMAPolicy")) {
int32_t type;
@@ -1653,6 +1680,7 @@ int bus_exec_context_set_transient_property(
c->numa_policy.type = type;
return 1;
+
} else if (streq(name, "IOSchedulingClass")) {
int32_t q;
diff --git a/src/core/execute.c b/src/core/execute.c
index 3c54ac1110..d528d08830 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2750,6 +2750,33 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p
static char *exec_command_line(char **argv);
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if (!c->numa_policy.nodes.set) {
+ log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+ return 0;
+ }
+
+ r = numa_to_cpu_set(&c->numa_policy, &s);
+ if (r < 0)
+ return r;
+
+ cpu_set_reset(ret);
+
+ return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+ assert(c);
+
+ return c->cpu_affinity_from_numa;
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
@@ -3012,11 +3039,26 @@ static int exec_child(
}
}
- if (context->cpu_set.set)
- if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
+ if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+ _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+ const CPUSet *cpu_set;
+
+ if (context->cpu_affinity_from_numa) {
+ r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+ if (r < 0) {
+ *exit_status = EXIT_CPUAFFINITY;
+ return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+ }
+
+ cpu_set = &converted_cpu_set;
+ } else
+ cpu_set = &context->cpu_set;
+
+ if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
*exit_status = EXIT_CPUAFFINITY;
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
}
+ }
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
r = apply_numa_policy(&context->numa_policy);
diff --git a/src/core/execute.h b/src/core/execute.h
index 86c1cee84c..62c6229621 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -152,6 +152,7 @@ struct ExecContext {
CPUSet cpu_set;
NUMAPolicy numa_policy;
+ bool cpu_affinity_from_numa;
ExecInput std_input;
ExecOutput std_output;
@@ -375,6 +376,8 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value,
void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
void exec_runtime_vacuum(Manager *m);
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
+
const char* exec_output_to_string(ExecOutput i) _const_;
ExecOutput exec_output_from_string(const char *s) _pure_;
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 33fdb82754..740401a582 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -1251,13 +1251,25 @@ int config_parse_exec_cpu_affinity(const char *unit,
void *userdata) {
ExecContext *c = data;
+ int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
- return parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
+ if (streq(rvalue, "numa")) {
+ c->cpu_affinity_from_numa = true;
+ cpu_set_reset(&c->cpu_set);
+
+ return 0;
+ }
+
+ r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
+ if (r >= 0)
+ c->cpu_affinity_from_numa = false;
+
+ return r;
}
int config_parse_capability_set(
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 7029aa5615..daa2c2dce5 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -26,6 +26,8 @@
#include "securebits-util.h"
#include "signal-util.h"
#include "socket-protocol-list.h"
+#include "socket-util.h"
+#include "stdio-util.h"
#include "string-util.h"
#include "syslog-util.h"
#include "terminal-util.h"
@@ -997,6 +999,13 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
+ if (eq && streq(eq, "numa")) {
+ r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return r;
+ }
+
r = parse_cpu_set(eq, &cpuset);
if (r < 0)
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c
index 136eaca82d..1b7be5df4e 100644
--- a/src/test/test-cpu-set-util.c
+++ b/src/test/test-cpu-set-util.c
@@ -218,12 +218,12 @@ static void test_parse_cpu_set_extend(void) {
log_info("/* %s */", __func__);
- assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
assert_se(s1 = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s1);
- assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
assert_se(s2 = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s2);
@@ -240,7 +240,7 @@ static void test_cpu_set_to_from_dbus(void) {
log_info("/* %s */", __func__);
- assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(s = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);
diff --git a/test/TEST-36-NUMAPOLICY/testsuite.sh b/test/TEST-36-NUMAPOLICY/testsuite.sh
index bffac4ffe6..7ccaa5b412 100755
--- a/test/TEST-36-NUMAPOLICY/testsuite.sh
+++ b/test/TEST-36-NUMAPOLICY/testsuite.sh
@@ -279,6 +279,18 @@ else
# Maks must be ignored
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" $straceLog
+ echo "Unit file CPUAffinity=NUMA support"
+ writeTestUnitNUMAPolicy "bind" "0"
+ echo "CPUAffinity=numa" >> $testUnitNUMAConf
+ systemctl daemon-reload
+ systemctl start $testUnit
+ systemctlCheckNUMAProperties $testUnit "bind" "0"
+ pid=$(systemctl show --value -p MainPID $testUnit)
+ cpulist=$(cat /sys/devices/system/node/node0/cpulist)
+ affinity_systemd=$(systemctl show --value -p CPUAffinity $testUnit)
+ [ $cpulist = $affinity_systemd ]
+ pid1StopUnit $testUnit
+
echo "systemd-run NUMAPolicy support"
runUnit='numa-systemd-run-test.service'
@@ -309,6 +321,12 @@ else
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit $runUnit sleep 1000
systemctlCheckNUMAProperties $runUnit "local" ""
pid1StopUnit $runUnit
+
+ systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit $runUnit sleep 1000
+ systemctlCheckNUMAProperties $runUnit "local" ""
+ systemctl cat $runUnit | grep -q 'CPUAffinity=numa'
+ pid1StopUnit $runUnit
+
fi
# Cleanup
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。