1 Star 0 Fork 35

bzg_repo/criu

forked from src-openEuler/criu 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch 11.21 KB
一键复制 编辑 原始数据 按行查看 历史
river 提交于 2022-04-13 15:05 . criu: backport kinds of features/bugfix
From 50f04f06eb3ecbdd465e417e8c5c8b19d43ec2f4 Mon Sep 17 00:00:00 2001
From: bb-cat <ningyu9@huawei.com>
Date: Wed, 2 Mar 2022 15:09:44 +0800
Subject: [PATCH 15/72] cr-dump: handle rseq flags field Userspace may
configure rseq critical section by def
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
---
criu/cr-dump.c | 86 +++++++++++++++++++++++++++----------------
criu/cr-restore.c | 63 +++++++++++++++++++++++++++++++
criu/include/pstree.h | 1 +
images/rseq.proto | 1 +
4 files changed, 119 insertions(+), 32 deletions(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index a3f8973..79387fb 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1047,13 +1047,13 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
return 0;
}
-static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc,
+ struct rseq_cs *rseq_cs, struct rseq *rseq)
{
int ret;
- uint64_t addr;
/* rseq is not registered */
- if (!rseq->rseq_abi_pointer)
+ if (!rseqc->rseq_abi_pointer)
return 0;
/*
@@ -1068,22 +1068,21 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
* then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
* will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
*/
- ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)),
- sizeof(uint64_t));
+ ret = ptrace_peek_area(tid, rseq, decode_pointer(rseqc->rseq_abi_pointer),
+ sizeof(struct rseq));
if (ret) {
- pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
- (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t));
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq struct\n", tid, (unsigned long)rseq,
+ (unsigned long)(rseqc->rseq_abi_pointer), sizeof(uint64_t));
return -1;
}
- /* (struct rseq)->rseq_cs is NULL */
- if (!addr)
+ if (!rseq->rseq_cs.ptr64)
return 0;
- ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs.ptr64), sizeof(struct rseq_cs));
if (ret) {
pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
- (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs));
+ (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs.ptr64, sizeof(struct rseq_cs));
return -1;
}
@@ -1092,11 +1091,12 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
static int dump_thread_rseq(struct pstree_item *item, int i)
{
- struct __ptrace_rseq_configuration rseq;
+ struct __ptrace_rseq_configuration rseqc;
RseqEntry *rseqe = NULL;
int ret;
CoreEntry *core = item->core[i];
RseqEntry **rseqep = &core->thread_core->rseq_entry;
+ struct rseq rseq;
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
pid_t tid = item->threads[i].real;
@@ -1111,20 +1111,20 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
if (!kdat.has_ptrace_get_rseq_conf)
return 0;
- ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq);
- if (ret != sizeof(rseq)) {
+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseqc), &rseqc);
+ if (ret != sizeof(rseqc)) {
pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret);
return -1;
}
- if (rseq.flags != 0) {
+ if (rseqc.flags != 0) {
pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid,
- rseq.flags);
+ rseqc.flags);
return -1;
}
- pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer,
- rseq.signature);
+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseqc.rseq_abi_pointer,
+ rseqc.signature);
rseqe = xmalloc(sizeof(*rseqe));
if (!rseqe)
@@ -1132,13 +1132,22 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
rseq_entry__init(rseqe);
- rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer;
- rseqe->rseq_abi_size = rseq.rseq_abi_size;
- rseqe->signature = rseq.signature;
+ rseqe->rseq_abi_pointer = rseqc.rseq_abi_pointer;
+ rseqe->rseq_abi_size = rseqc.rseq_abi_size;
+ rseqe->signature = rseqc.signature;
- if (read_rseq_cs(tid, &rseq, rseq_cs))
+ if (read_rseq_cs(tid, &rseqc, rseq_cs, &rseq))
goto err;
+ rseqe->has_rseq_cs_pointer = true;
+ rseqe->rseq_cs_pointer = rseq.rseq_cs.ptr64;
+ pr_err("cs pointer %lx\n", rseqe->rseq_cs_pointer);
+ /* we won't save rseq_cs to the image (only pointer),
+ * so let's combine flags from both struct rseq and struct rseq_cs
+ * (kernel does the same when interpreting RSEQ_CS_FLAG_*)
+ */
+ rseq_cs->flags |= rseq.flags;
+
/* save rseq entry to the image */
*rseqep = rseqe;
@@ -1188,11 +1197,11 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
pid_t tid = item->threads[i].real;
- /* (struct rseq)->rseq_cs is NULL */
+ /* equivalent to (struct rseq)->rseq_cs is NULL */
if (!rseq_cs->start_ip)
return 0;
- pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
+ pr_debug("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
rseq_cs->version, (unsigned long)TI_IP(core));
@@ -1204,25 +1213,38 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
if (task_in_rseq(rseq_cs, TI_IP(core))) {
struct pid *tid = &item->threads[i];
- pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
- tid->real);
-
/*
* We need to fixup task instruction pointer from
* the original one (which lays inside rseq critical section)
- * to rseq abort handler address.
+ * to rseq abort handler address. But we need to look on rseq_cs->flags
+ * (please refer to struct rseq -> flags field description).
+ * Naive idea of flags support may be like... let's change instruction pointer (IP)
+ * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL).
+ * But unfortunately, it doesn't work properly, because the kernel does
+ * clean up of rseq_cs field in the struct rseq (modifies userspace memory).
+ * So, we need to preserve original value of (struct rseq)->rseq_cs field in the
+ * image and restore it's value before releasing threads.
*
* It's worth to mention that we need to fixup IP in CoreEntry
* (used when full dump/restore is performed) and also in
* the parasite regs storage (used if --leave-running option is used,
* or if dump error occured and process execution is resumed).
*/
- TI_IP(core) = rseq_cs->abort_ip;
- if (item->pid->real == tid->real) {
- compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
+ if (rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) {
+ pr_err("The %d task is in rseq critical section.!!! IP will be set to rseq abort handler addr\n",
+ tid->real);
} else {
- compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
+ pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
+ tid->real);
+
+ TI_IP(core) = rseq_cs->abort_ip;
+
+ if (item->pid->real == tid->real) {
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
+ } else {
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
+ }
}
}
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index b2bd044..864140f 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -23,6 +23,7 @@
#include "common/compiler.h"
#include "linux/mount.h"
+#include "linux/rseq.h"
#include "clone-noasan.h"
#include "cr_options.h"
@@ -779,6 +780,7 @@ static int open_cores(int pid, CoreEntry *leader_core)
{
int i, tpid;
CoreEntry **cores = NULL;
+ //RseqEntry *rseqs;
cores = xmalloc(sizeof(*cores) * current->nr_threads);
if (!cores)
@@ -812,6 +814,19 @@ static int open_cores(int pid, CoreEntry *leader_core)
}
}
+
+ pr_err("item %lx\n", (uint64_t)current);
+
+ for (i = 0; i < current->nr_threads; i++) {
+ ThreadCoreEntry *tc = cores[i]->thread_core;
+
+ /* compatibility with older CRIU versions */
+ if (!tc->rseq_entry)
+ continue;
+
+ current->rseqe[i] = *tc->rseq_entry;
+ }
+
return 0;
err:
xfree(cores);
@@ -868,8 +883,15 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
{
unsigned args_len;
struct task_restore_args *ta;
+ RseqEntry *rseqs;
pr_info("Restoring resources\n");
+ rseqs = shmalloc(sizeof(*rseqs) * current->nr_threads);
+ if (!rseqs)
+ return -1;
+
+ current->rseqe = rseqs;
+
rst_mem_switch_to_private();
args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * current->nr_threads, page_size());
@@ -1966,6 +1988,44 @@ static int attach_to_tasks(bool root_seized)
return 0;
}
+static int restore_rseq_cs(void)
+{
+ struct pstree_item *item;
+
+ for_each_pstree_item(item) {
+ int i;
+
+ if (!task_alive(item))
+ continue;
+
+ if (item->nr_threads == 1) {
+ item->threads[0].real = item->pid->real;
+ } else {
+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads))
+ return -1;
+ }
+
+ for (i = 0; i < item->nr_threads; i++) {
+ pid_t pid = item->threads[i].real;
+
+ if (!item->rseqe[i].rseq_cs_pointer || !item->rseqe[i].rseq_abi_pointer) {
+ pr_err("item %lx rseqe %lx\n", (uint64_t)item, (uint64_t)item->rseqe);
+ pr_err("nothing to do with cs_pointer\n");
+ continue;
+ }
+
+ pr_err("restoring cs ... %lx \n", item->rseqe[i].rseq_cs_pointer);
+
+ if (ptrace_poke_area(pid, &item->rseqe[i].rseq_cs_pointer, (void *)(item->rseqe[i].rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t))) {
+ pr_err("Can't restore memfd args (pid: %d)\n", pid);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int catch_tasks(bool root_seized, enum trace_flags *flag)
{
struct pstree_item *item;
@@ -2400,6 +2460,9 @@ skip_ns_bouncing:
if (restore_freezer_state())
pr_err("Unable to restore freezer state\n");
+ /* just before releasing threads we have to restore rseq_cs */
+ restore_rseq_cs();
+
/* Detaches from processes and they continue run through sigreturn. */
if (finalize_restore_detach())
goto out_kill_network_unlocked;
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
index 458e5f9..97bef11 100644
--- a/criu/include/pstree.h
+++ b/criu/include/pstree.h
@@ -25,6 +25,7 @@ struct pstree_item {
int nr_threads; /* number of threads */
struct pid *threads; /* array of threads */
CoreEntry **core;
+ RseqEntry *rseqe;
TaskKobjIdsEntry *ids;
union {
futex_t task_st;
diff --git a/images/rseq.proto b/images/rseq.proto
index be28004..45cb847 100644
--- a/images/rseq.proto
+++ b/images/rseq.proto
@@ -6,4 +6,5 @@ message rseq_entry {
required uint64 rseq_abi_pointer = 1;
required uint32 rseq_abi_size = 2;
required uint32 signature = 3;
+ optional uint64 rseq_cs_pointer = 4;
}
--
2.34.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/baizg1107/criu.git
git@gitee.com:baizg1107/criu.git
baizg1107
criu
criu
master

搜索帮助