1 Star 0 Fork 35

xinminst/criu

forked from src-openEuler/criu 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0006-criu-add-pin-memory-method.patch 7.57 KB
一键复制 编辑 原始数据 按行查看 历史
river 提交于 2021-04-13 14:20 . criu: add backport patch set
From 4c11832330e6c7b924b96c7ea70c14025fe0d970 Mon Sep 17 00:00:00 2001
From: "fu.lin" <fulin10@huawei.com>
Date: Tue, 13 Apr 2021 14:10:23 +0800
Subject: [PATCH 6/6] criu: add pin memory method
We can use the checkpoint and restore in userspace method to dump
and restore tasks when updating the kernel. Currently, criu needs
dump all memory data of tasks to files. When the memory size is
very large (large than 1GiB), the cost time of the dumping data
will be very long (more than 1 min).
We can pin the memory data of tasks and collect the corresponding
physical pages mapping info in checkpoint process, and remap the
physical pages to restore tasks in restore process.
Signed-off-by: Jingxian He <hejingxian@huawei.com>
---
criu/config.c | 1 +
criu/cr-restore.c | 5 +++
criu/include/cr_options.h | 1 +
criu/include/restorer.h | 24 ++++++++++++
criu/mem.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++-
criu/pie/restorer.c | 21 ++++++++++-
6 files changed, 146 insertions(+), 2 deletions(-)
diff --git a/criu/config.c b/criu/config.c
index 5a53256..61b81fa 100644
--- a/criu/config.c
+++ b/criu/config.c
@@ -542,6 +542,7 @@ int parse_options(int argc, char **argv, bool *usage_error,
{ "pre-dump-mode", required_argument, 0, 1097},
{ "file-validation", required_argument, 0, 1098 },
BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity),
+ BOOL_OPT("pin-memory", &opts.pin_memory),
{ },
};
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index da2e53d..ff41976 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3866,6 +3866,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
task_args->clone_restore_fn,
task_args->thread_args);
+ if (opts.pin_memory)
+ task_args->pin_memory = true;
+ else
+ task_args->pin_memory = false;
+
/*
* An indirect call to task_restore, note it never returns
* and restoring core is extremely destructive.
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index fda54a4..a4dc5b8 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -176,6 +176,7 @@ struct cr_options {
int file_validation_method;
/* restore cpu affinity */
int with_cpu_affinity;
+ int pin_memory;
};
extern struct cr_options opts;
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index bd6ef6a..fc37e6d 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -225,6 +225,7 @@ struct task_restore_args {
int lsm_type;
int child_subreaper;
bool has_clone3_set_tid;
+ bool pin_memory;
} __aligned(64);
/*
@@ -317,4 +318,27 @@ enum {
#define __r_sym(name) restorer_sym ## name
#define restorer_sym(rblob, name) (void*)(rblob + __r_sym(name))
+#define PIN_MEM_FILE "/dev/pinmem"
+#define PIN_MEM_MAGIC 0x59
+#define _SET_PIN_MEM_AREA 1
+#define _CLEAR_PIN_MEM_AREA 2
+#define _REMAP_PIN_MEM_AREA 3
+#define _PIN_MEM_IOC_MAX_NR 4
+#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set)
+#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int)
+#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int)
+
+#define ONCE_PIN_MEM_SIZE_LIMIT 32 * 1024 * 1024
+#define MAX_PIN_MEM_AREA_NUM 16
+struct pin_mem_area {
+ unsigned long virt_start;
+ unsigned long virt_end;
+};
+
+struct pin_mem_area_set {
+ unsigned int pid;
+ unsigned int area_num;
+ struct pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
+};
+
#endif /* __CR_RESTORER_H__ */
diff --git a/criu/mem.c b/criu/mem.c
index 167838b..709de4e 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -438,6 +438,88 @@ again:
return ret;
}
+bool should_pin_vmae(VmaEntry *vmae)
+{
+ /*
+ * vDSO area must be always dumped because on restore
+ * we might need to generate a proxy.
+ */
+ if (vma_entry_is(vmae, VMA_AREA_VDSO))
+ return false;
+ /*
+ * In turn VVAR area is special and referenced from
+ * vDSO area by IP addressing (at least on x86) thus
+ * never ever dump its content but always use one provided
+ * by the kernel on restore, ie runtime VVAR area must
+ * be remapped into proper place..
+ */
+ if (vma_entry_is(vmae, VMA_AREA_VVAR))
+ return false;
+
+ if (vma_entry_is(vmae, VMA_AREA_AIORING))
+ return false;
+ if (vma_entry_is(vmae, VMA_ANON_PRIVATE)) {
+ pr_debug("find private anon vma: %lx-%lx\n", vmae->start, vmae->end);
+ return true;
+ }
+
+ return false;
+}
+
+static int pin_one_pmas(int fd, unsigned long start,
+ unsigned long *pend, struct pstree_item *item)
+{
+ int ret;
+ unsigned int index = 0;
+ unsigned long end;
+ unsigned long next = start;
+ struct pin_mem_area_set pmas;
+ struct pin_mem_area *pma;
+
+ end = *pend;
+ while (start < end) {
+ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT);
+ pma = &(pmas.mem_area[index]);
+ pma->virt_start = start;
+ pma->virt_end = next;
+ pr_info("start pin %lx-%lx\n", start, next);
+ index++;
+ start += ONCE_PIN_MEM_SIZE_LIMIT;
+ if (index >= MAX_PIN_MEM_AREA_NUM)
+ break;
+ }
+ *pend = next;
+ pmas.area_num = index;
+ pmas.pid = vpid(item);
+ pr_info("begin pin memory for pid:%d\n", pmas.pid);
+ ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas);
+ if (ret < 0)
+ pr_err("pin mem fail, errno: %s\n", strerror(errno));
+ return ret;
+}
+static int pin_vmae(VmaEntry *vmae, struct pstree_item *item)
+{
+ int fd;
+ int ret = 0;
+ unsigned long start, end;
+
+ fd = open(PIN_MEM_FILE, O_RDWR);
+ if (fd < 0) {
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
+ return -1;
+ }
+ start = vmae->start;
+ while (start < vmae->end) {
+ end = vmae->end;
+ ret = pin_one_pmas(fd, start, &end, item);
+ if (ret < 0)
+ break;
+ start = end;
+ }
+ close(fd);
+ return ret;
+}
+
static int __parasite_dump_pages_seized(struct pstree_item *item,
struct parasite_dump_pages_args *args,
struct vm_area_list *vma_area_list,
@@ -513,7 +595,16 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
if (possible_pid_reuse == -1)
goto out_xfer;
}
-
+ if (opts.pin_memory) {
+ /* pin memory before dump pages */
+ list_for_each_entry(vma_area, &vma_area_list->h, list) {
+ if (should_pin_vmae(vma_area->e)) {
+ ret = pin_vmae(vma_area->e, item);
+ if (ret)
+ goto out_xfer;
+ }
+ }
+ }
/*
* Step 1 -- generate the pagemap
@@ -524,6 +615,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
list_for_each_entry(vma_area, &vma_area_list->h, list) {
+ if (opts.pin_memory && should_pin_vmae(vma_area->e))
+ continue;
+
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
&pmc, has_parent, mdc->pre_dump,
parent_predump_mode);
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index c63f96b..f3bd541 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1414,6 +1414,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
return 0;
}
+int remap_vmas(int pid)
+{
+ int fd, ret = 0;
+
+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0);
+ if (fd == -1) {
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
+ return -1;;
+ }
+
+ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid);
+ if (ret < 0)
+ pr_err("remap pin mem fail for pid: %d\n", pid);
+ sys_close(fd);
+ return ret;
+}
+
+
/*
* The main routine to restore task via sigreturn.
* This one is very special, we never return there
@@ -1585,7 +1603,8 @@ long __export_restore_task(struct task_restore_args *args)
goto core_restore_end;
}
}
-
+ if (args->pin_memory)
+ remap_vmas(my_pid);
/*
* Now read the contents (if any)
*/
--
1.8.3.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/xinminst/criu.git
git@gitee.com:xinminst/criu.git
xinminst
criu
criu
master

搜索帮助