diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index f416413a219b..0e200472d82c 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -65,6 +65,9 @@ #ifdef __NR_userfaultfd +#define ALIGN_UP(x, align_to) \ + ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) + static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size; #define BOUNCE_RANDOM (1<<0) @@ -93,6 +96,8 @@ static volatile bool test_uffdio_zeropage_eexist = true; static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; +/* Whether to test uffd move ioctl */ +static bool test_uffdio_move = false; static bool map_shared; static int shm_fd; @@ -113,6 +118,8 @@ struct uffd_stats { unsigned long minor_faults; }; +static void (*uffd_test_page_fault_handler)(struct uffd_msg *msg, struct uffd_stats *args); + /* pthread_mutex_t starts at page offset 0 */ #define area_mutex(___area, ___nr) \ ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) @@ -731,6 +738,30 @@ static int copy_page(int ufd, unsigned long offset) return __copy_page(ufd, offset, false); } +static int move_page(int ufd, unsigned long offset, unsigned long len) +{ + struct uffdio_move uffdio_move; + + if (offset + len > nr_pages * page_size) + err("unexpected offset %lu and length %lu\n", offset, len); + uffdio_move.dst = (unsigned long) area_dst + offset; + uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.len = len; + uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; + uffdio_move.move = 0; + if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + /* real retval in uffdio_move.move */ + if (uffdio_move.move != -EEXIST) + err("UFFDIO_MOVE error: %"PRId64, + (int64_t)uffdio_move.move); + wake_range(ufd, uffdio_move.dst, len); + } else if (uffdio_move.move != len) { + err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); + } else + return 1; + return 0; +} + static int uffd_read_msg(int ufd, struct uffd_msg *msg) { int ret = read(uffd, msg, sizeof(*msg)); @@ -852,7 +883,10 @@ static void *uffd_poll_thread(void *arg) err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - uffd_handle_page_fault(&msg, stats); + if (uffd_test_page_fault_handler) + uffd_test_page_fault_handler(&msg, stats); + else + uffd_handle_page_fault(&msg, stats); break; case UFFD_EVENT_FORK: close(uffd); @@ -1210,6 +1244,198 @@ static int userfaultfd_zeropage_test(void) return 0; } +static void prevent_hugepages() +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ + if (errno != EINVAL) { + err("madvise(MADV_NOHUGEPAGE) failed"); + } + } +} + +static void request_hugepages() +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (errno == EINVAL) + err("CONFIG_TRANSPARENT_HUGEPAGE is not set"); + else + err("madvise(MADV_HUGEPAGE) failed"); + } +} + +struct uffd_test_case_ops uffd_move_test_case_ops = { + .post_alloc = prevent_hugepages, +}; + +struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { + .post_alloc = request_hugepages, +}; + +static void +uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_stats *args, + unsigned long len) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(len-1); + + if (move_page(uffd, offset, len)) + args->missing_faults++; +} + +static void uffd_move_handle_fault(struct uffd_msg *msg, + struct uffd_stats *args) +{ + uffd_move_handle_fault_common(msg, args, page_size); +} + +static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, + struct uffd_stats *args) +{ + uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); +} + +static int +uffd_move_test_common(struct uffd_test_case_ops *uffd_test_case_ops, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_msg *msg, struct uffd_stats *args)) +{ + struct uffdio_register uffdio_register; + unsigned long nr; + pthread_t uffd_mon; + char c; + unsigned long long count; + struct uffd_stats args = { 0 }; + char *orig_area_src, *orig_area_dst; + unsigned long step_size, step_count; + unsigned long src_offs = 0; + unsigned long dst_offs = 0; + + uffd_test_ctx_init(UFFD_FEATURE_MOVE, uffd_test_case_ops); + + /* Prevent source pages from being mapped more than once */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + err("madvise(MADV_DONTFORK) failure"); + + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + err("register failure"); + + uffd_test_page_fault_handler = handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + step_size = chunk_size / page_size; + step_count = nr_pages / step_size; + + if (chunk_size > page_size) { + char *aligned_src = ALIGN_UP(area_src, chunk_size); + char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + + if (aligned_src != area_src || aligned_dst != area_dst) { + src_offs = (aligned_src - area_src) / page_size; + dst_offs = (aligned_dst - area_dst) / page_size; + step_count--; + } + orig_area_src = area_src; + orig_area_dst = area_dst; + area_src = aligned_src; + area_dst = aligned_dst; + } + + /* + * Read each of the pages back using the UFFD-registered mapping. We + * expect that the first time we touch a page, it will result in a missing + * fault. uffd_poll_thread will resolve the fault by moving source + * page to destination. + */ + for (nr = 0; nr < step_count * step_size; nr += step_size) { + unsigned long i; + + /* Check area_src content */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != count_verify[src_offs + nr + i]) + err("nr %lu source memory invalid %llu %llu\n", + nr + i, count, count_verify[src_offs + nr + i]); + } + + /* Faulting into area_dst should move the page or the huge page */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_dst, nr + i); + if (count != count_verify[dst_offs + nr + i]) + err("nr %lu memory corruption %llu %llu\n", + nr, count, count_verify[dst_offs + nr + i]); + } + + /* Re-check area_src content which should be empty */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != 0) + err("nr %lu move failed %llu %llu\n", + nr, count, count_verify[src_offs + nr + i]); + } + } + if (step_size > page_size) { + area_src = orig_area_src; + area_dst = orig_area_dst; + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("join() failed"); + + uffd_test_page_fault_handler = NULL; + uffd_test_ctx_clear(); + + return args.missing_faults != step_count || args.minor_faults != 0; +} + +static int uffd_move_test(void) +{ + printf("move "); + return uffd_move_test_common(&uffd_move_test_case_ops, page_size, + uffd_move_handle_fault); +} + +static int uffd_move_pmd_test(void) +{ + printf("move-pmd "); + return uffd_move_test_common(&uffd_move_test_pmd_case_ops, + read_pmd_pagesize(), + uffd_move_pmd_handle_fault); +} + +static int userfaultfd_move_test(void) +{ + int ret; + + if (!test_uffdio_move) + return 0; + + printf("testing UFFDIO_MOVE: "); + fflush(stdout); + + ret = uffd_move_test() || uffd_move_pmd_test(); + + printf("done.\n"); + return ret; +} + static int userfaultfd_events_test(void) { struct uffdio_register uffdio_register; @@ -1701,7 +1927,8 @@ static int userfaultfd_stress(void) } return userfaultfd_zeropage_test() || userfaultfd_sig_test() - || userfaultfd_events_test() || userfaultfd_minor_test(); + || userfaultfd_events_test() || userfaultfd_minor_test() + || userfaultfd_move_test(); } /* @@ -1733,6 +1960,7 @@ static void set_test_type(const char *type) if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; + test_uffdio_move = true; } else if (!strcmp(type, "hugetlb")) { test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops;