blob: 829320a519e723127ad6c0f0928486d95a581024 [file] [log] [blame]
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001#define _GNU_SOURCE
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07002#include <ctype.h>
Zach O'Keefe3505c8e2022-09-22 11:46:51 -07003#include <errno.h>
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07004#include <fcntl.h>
5#include <limits.h>
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07006#include <dirent.h>
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07007#include <signal.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <stdbool.h>
11#include <string.h>
12#include <unistd.h>
13
Muhammad Usama Anjum0183d772023-06-12 14:53:47 +050014#include <linux/mman.h>
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070015#include <sys/mman.h>
16#include <sys/wait.h>
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070017#include <sys/types.h>
18#include <sys/stat.h>
19#include <sys/sysmacros.h>
20#include <sys/vfs.h>
21
22#include "linux/magic.h"
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070023
Zach O'Keefec07c3432022-09-22 15:40:41 -070024#include "vm_util.h"
Ryan Roberts00679a12023-12-07 16:12:07 +000025#include "thp_settings.h"
Zach O'Keefec07c3432022-09-22 15:40:41 -070026
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070027#define BASE_ADDR ((void *)(1UL << 30))
28static unsigned long hpage_pmd_size;
29static unsigned long page_size;
30static int hpage_pmd_nr;
Ryan Roberts9f0704e2023-12-07 16:12:09 +000031static int anon_order;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070032
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070033#define PID_SMAPS "/proc/self/smaps"
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070034#define TEST_FILE "collapse_test_file"
35
36#define MAX_LINE_LENGTH 500
37
38enum vma_type {
39 VMA_ANON,
40 VMA_FILE,
41 VMA_SHMEM,
42};
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070043
Zach O'Keefe8e638702022-09-22 15:40:42 -070044struct mem_ops {
45 void *(*setup_area)(int nr_hpages);
46 void (*cleanup_area)(void *p, unsigned long size);
47 void (*fault)(void *p, unsigned long start, unsigned long end);
48 bool (*check_huge)(void *addr, int nr_hpages);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070049 const char *name;
Zach O'Keefe8e638702022-09-22 15:40:42 -070050};
51
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070052static struct mem_ops *file_ops;
53static struct mem_ops *anon_ops;
Zach O'Keefed0d35b62022-09-22 15:40:44 -070054static struct mem_ops *shmem_ops;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070055
Zach O'Keefe61c2c672022-07-06 16:59:32 -070056struct collapse_context {
Zach O'Keefe8e638702022-09-22 15:40:42 -070057 void (*collapse)(const char *msg, char *p, int nr_hpages,
58 struct mem_ops *ops, bool expect);
Zach O'Keefe61c2c672022-07-06 16:59:32 -070059 bool enforce_pte_scan_limits;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070060 const char *name;
Zach O'Keefe61c2c672022-07-06 16:59:32 -070061};
62
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070063static struct collapse_context *khugepaged_context;
64static struct collapse_context *madvise_context;
65
66struct file_info {
67 const char *dir;
68 char path[PATH_MAX];
69 enum vma_type type;
70 int fd;
71 char dev_queue_read_ahead_path[PATH_MAX];
72};
73
74static struct file_info finfo;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070075static bool skip_settings_restore;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070076static int exit_status;
77
78static void success(const char *msg)
79{
80 printf(" \e[32m%s\e[0m\n", msg);
81}
82
83static void fail(const char *msg)
84{
85 printf(" \e[31m%s\e[0m\n", msg);
86 exit_status++;
87}
88
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -070089static void skip(const char *msg)
90{
91 printf(" \e[33m%s\e[0m\n", msg);
92}
93
Ryan Robertsb6aab332023-12-07 16:12:06 +000094static void restore_settings_atexit(void)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070095{
96 if (skip_settings_restore)
Ryan Robertsb6aab332023-12-07 16:12:06 +000097 return;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -070098
99 printf("Restore THP and khugepaged settings...");
Ryan Roberts00679a12023-12-07 16:12:07 +0000100 thp_restore_settings();
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700101 success("OK");
Ryan Robertsb6aab332023-12-07 16:12:06 +0000102
103 skip_settings_restore = true;
104}
105
106static void restore_settings(int sig)
107{
108 /* exit() will invoke the restore_settings_atexit handler. */
109 exit(sig ? EXIT_FAILURE : exit_status);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700110}
111
112static void save_settings(void)
113{
114 printf("Save THP and khugepaged settings...");
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700115 if (file_ops && finfo.type == VMA_FILE)
Ryan Roberts00679a12023-12-07 16:12:07 +0000116 thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
117 thp_save_settings();
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700118
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700119 success("OK");
120
Ryan Robertsb6aab332023-12-07 16:12:06 +0000121 atexit(restore_settings_atexit);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700122 signal(SIGTERM, restore_settings);
123 signal(SIGINT, restore_settings);
124 signal(SIGHUP, restore_settings);
125 signal(SIGQUIT, restore_settings);
126}
127
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700128static void get_finfo(const char *dir)
129{
130 struct stat path_stat;
131 struct statfs fs;
132 char buf[1 << 10];
133 char path[PATH_MAX];
134 char *str, *end;
135
136 finfo.dir = dir;
137 stat(finfo.dir, &path_stat);
138 if (!S_ISDIR(path_stat.st_mode)) {
139 printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
140 exit(EXIT_FAILURE);
141 }
142 if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
143 finfo.dir) >= sizeof(finfo.path)) {
144 printf("%s: Pathname is too long\n", __func__);
145 exit(EXIT_FAILURE);
146 }
147 if (statfs(finfo.dir, &fs)) {
148 perror("statfs()");
149 exit(EXIT_FAILURE);
150 }
151 finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
152 if (finfo.type == VMA_SHMEM)
153 return;
154
155 /* Find owning device's queue/read_ahead_kb control */
156 if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
157 major(path_stat.st_dev), minor(path_stat.st_dev))
158 >= sizeof(path)) {
159 printf("%s: Pathname is too long\n", __func__);
160 exit(EXIT_FAILURE);
161 }
162 if (read_file(path, buf, sizeof(buf)) < 0) {
163 perror("read_file(read_num)");
164 exit(EXIT_FAILURE);
165 }
166 if (strstr(buf, "DEVTYPE=disk")) {
167 /* Found it */
168 if (snprintf(finfo.dev_queue_read_ahead_path,
169 sizeof(finfo.dev_queue_read_ahead_path),
170 "/sys/dev/block/%d:%d/queue/read_ahead_kb",
171 major(path_stat.st_dev), minor(path_stat.st_dev))
172 >= sizeof(finfo.dev_queue_read_ahead_path)) {
173 printf("%s: Pathname is too long\n", __func__);
174 exit(EXIT_FAILURE);
175 }
176 return;
177 }
178 if (!strstr(buf, "DEVTYPE=partition")) {
179 printf("%s: Unknown device type: %s\n", __func__, path);
180 exit(EXIT_FAILURE);
181 }
182 /*
183 * Partition of block device - need to find actual device.
184 * Using naming convention that devnameN is partition of
185 * device devname.
186 */
187 str = strstr(buf, "DEVNAME=");
188 if (!str) {
189 printf("%s: Could not read: %s", __func__, path);
190 exit(EXIT_FAILURE);
191 }
192 str += 8;
193 end = str;
194 while (*end) {
195 if (isdigit(*end)) {
196 *end = '\0';
197 if (snprintf(finfo.dev_queue_read_ahead_path,
198 sizeof(finfo.dev_queue_read_ahead_path),
199 "/sys/block/%s/queue/read_ahead_kb",
200 str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
201 printf("%s: Pathname is too long\n", __func__);
202 exit(EXIT_FAILURE);
203 }
204 return;
205 }
206 ++end;
207 }
208 printf("%s: Could not read: %s\n", __func__, path);
209 exit(EXIT_FAILURE);
210}
211
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700212static bool check_swap(void *addr, unsigned long size)
213{
214 bool swap = false;
215 int ret;
216 FILE *fp;
217 char buffer[MAX_LINE_LENGTH];
218 char addr_pattern[MAX_LINE_LENGTH];
219
220 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
221 (unsigned long) addr);
222 if (ret >= MAX_LINE_LENGTH) {
223 printf("%s: Pattern is too long\n", __func__);
224 exit(EXIT_FAILURE);
225 }
226
227
228 fp = fopen(PID_SMAPS, "r");
229 if (!fp) {
230 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
231 exit(EXIT_FAILURE);
232 }
Zach O'Keefec07c3432022-09-22 15:40:41 -0700233 if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700234 goto err_out;
235
236 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
237 size >> 10);
238 if (ret >= MAX_LINE_LENGTH) {
239 printf("%s: Pattern is too long\n", __func__);
240 exit(EXIT_FAILURE);
241 }
242 /*
243 * Fetch the Swap: in the same block and check whether it got
244 * the expected number of hugeepages next.
245 */
Zach O'Keefec07c3432022-09-22 15:40:41 -0700246 if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700247 goto err_out;
248
249 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
250 goto err_out;
251
252 swap = true;
253err_out:
254 fclose(fp);
255 return swap;
256}
257
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700258static void *alloc_mapping(int nr)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700259{
260 void *p;
261
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700262 p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
263 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700264 if (p != BASE_ADDR) {
265 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
266 exit(EXIT_FAILURE);
267 }
268
269 return p;
270}
271
272static void fill_memory(int *p, unsigned long start, unsigned long end)
273{
274 int i;
275
276 for (i = start / page_size; i < end / page_size; i++)
277 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
278}
279
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700280/*
Zach O'Keefe3505c8e2022-09-22 11:46:51 -0700281 * MADV_COLLAPSE is a best-effort request and may fail if an internal
282 * resource is temporarily unavailable, in which case it will set errno to
283 * EAGAIN. In such a case, immediately reattempt the operation one more
284 * time.
285 */
286static int madvise_collapse_retry(void *p, unsigned long size)
287{
288 bool retry = true;
289 int ret;
290
291retry:
292 ret = madvise(p, size, MADV_COLLAPSE);
293 if (ret && errno == EAGAIN && retry) {
294 retry = false;
295 goto retry;
296 }
297 return ret;
298}
299
300/*
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700301 * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
302 * validate_memory()'able contents.
303 */
Zach O'Keefe8e638702022-09-22 15:40:42 -0700304static void *alloc_hpage(struct mem_ops *ops)
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700305{
Zach O'Keefe8e638702022-09-22 15:40:42 -0700306 void *p = ops->setup_area(1);
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700307
Zach O'Keefe8e638702022-09-22 15:40:42 -0700308 ops->fault(p, 0, hpage_pmd_size);
309
310 /*
311 * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
312 * The latter is ineligible for collapse by MADV_COLLAPSE
313 * while the former might cause MADV_COLLAPSE to race with
314 * khugepaged on low-load system (like a test machine), which
315 * would cause MADV_COLLAPSE to fail with EAGAIN.
316 */
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700317 printf("Allocate huge page...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700318 if (madvise_collapse_retry(p, hpage_pmd_size)) {
319 perror("madvise(MADV_COLLAPSE)");
320 exit(EXIT_FAILURE);
321 }
322 if (!ops->check_huge(p, 1)) {
323 perror("madvise(MADV_COLLAPSE)");
324 exit(EXIT_FAILURE);
325 }
326 if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
327 perror("madvise(MADV_HUGEPAGE)");
328 exit(EXIT_FAILURE);
329 }
330 success("OK");
Zach O'Keefebe6667b2022-07-06 16:59:33 -0700331 return p;
332}
333
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700334static void validate_memory(int *p, unsigned long start, unsigned long end)
335{
336 int i;
337
338 for (i = start / page_size; i < end / page_size; i++) {
339 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
340 printf("Page %d is corrupted: %#x\n",
341 i, p[i * page_size / sizeof(*p)]);
342 exit(EXIT_FAILURE);
343 }
344 }
345}
346
Zach O'Keefe8e638702022-09-22 15:40:42 -0700347static void *anon_setup_area(int nr_hpages)
348{
349 return alloc_mapping(nr_hpages);
350}
351
352static void anon_cleanup_area(void *p, unsigned long size)
353{
354 munmap(p, size);
355}
356
357static void anon_fault(void *p, unsigned long start, unsigned long end)
358{
359 fill_memory(p, start, end);
360}
361
362static bool anon_check_huge(void *addr, int nr_hpages)
363{
364 return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
365}
366
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700367static void *file_setup_area(int nr_hpages)
368{
369 int fd;
370 void *p;
371 unsigned long size;
372
373 unlink(finfo.path); /* Cleanup from previous failed tests */
374 printf("Creating %s for collapse%s...", finfo.path,
375 finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
376 fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
377 777);
378 if (fd < 0) {
379 perror("open()");
380 exit(EXIT_FAILURE);
381 }
382
383 size = nr_hpages * hpage_pmd_size;
384 p = alloc_mapping(nr_hpages);
385 fill_memory(p, 0, size);
386 write(fd, p, size);
387 close(fd);
388 munmap(p, size);
389 success("OK");
390
391 printf("Opening %s read only for collapse...", finfo.path);
392 finfo.fd = open(finfo.path, O_RDONLY, 777);
393 if (finfo.fd < 0) {
394 perror("open()");
395 exit(EXIT_FAILURE);
396 }
397 p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
398 MAP_PRIVATE, finfo.fd, 0);
399 if (p == MAP_FAILED || p != BASE_ADDR) {
400 perror("mmap()");
401 exit(EXIT_FAILURE);
402 }
403
404 /* Drop page cache */
405 write_file("/proc/sys/vm/drop_caches", "3", 2);
406 success("OK");
407 return p;
408}
409
410static void file_cleanup_area(void *p, unsigned long size)
411{
412 munmap(p, size);
413 close(finfo.fd);
414 unlink(finfo.path);
415}
416
417static void file_fault(void *p, unsigned long start, unsigned long end)
418{
419 if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
420 perror("madvise(MADV_POPULATE_READ");
421 exit(EXIT_FAILURE);
422 }
423}
424
425static bool file_check_huge(void *addr, int nr_hpages)
426{
427 switch (finfo.type) {
428 case VMA_FILE:
429 return check_huge_file(addr, nr_hpages, hpage_pmd_size);
430 case VMA_SHMEM:
431 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
432 default:
433 exit(EXIT_FAILURE);
434 return false;
435 }
436}
437
Zach O'Keefed0d35b62022-09-22 15:40:44 -0700438static void *shmem_setup_area(int nr_hpages)
439{
440 void *p;
441 unsigned long size = nr_hpages * hpage_pmd_size;
442
443 finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
444 if (finfo.fd < 0) {
445 perror("memfd_create()");
446 exit(EXIT_FAILURE);
447 }
448 if (ftruncate(finfo.fd, size)) {
449 perror("ftruncate()");
450 exit(EXIT_FAILURE);
451 }
452 p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
453 0);
454 if (p != BASE_ADDR) {
455 perror("mmap()");
456 exit(EXIT_FAILURE);
457 }
458 return p;
459}
460
461static void shmem_cleanup_area(void *p, unsigned long size)
462{
463 munmap(p, size);
464 close(finfo.fd);
465}
466
467static bool shmem_check_huge(void *addr, int nr_hpages)
468{
469 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
470}
471
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700472static struct mem_ops __anon_ops = {
Zach O'Keefe8e638702022-09-22 15:40:42 -0700473 .setup_area = &anon_setup_area,
474 .cleanup_area = &anon_cleanup_area,
475 .fault = &anon_fault,
476 .check_huge = &anon_check_huge,
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700477 .name = "anon",
478};
479
480static struct mem_ops __file_ops = {
481 .setup_area = &file_setup_area,
482 .cleanup_area = &file_cleanup_area,
483 .fault = &file_fault,
484 .check_huge = &file_check_huge,
485 .name = "file",
Zach O'Keefe8e638702022-09-22 15:40:42 -0700486};
487
Zach O'Keefed0d35b62022-09-22 15:40:44 -0700488static struct mem_ops __shmem_ops = {
489 .setup_area = &shmem_setup_area,
490 .cleanup_area = &shmem_cleanup_area,
491 .fault = &anon_fault,
492 .check_huge = &shmem_check_huge,
493 .name = "shmem",
494};
495
Zach O'Keefe8e638702022-09-22 15:40:42 -0700496static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
497 struct mem_ops *ops, bool expect)
Zach O'Keefe93306942022-07-06 16:59:34 -0700498{
499 int ret;
Ryan Roberts00679a12023-12-07 16:12:07 +0000500 struct thp_settings settings = *thp_current_settings();
Zach O'Keefe93306942022-07-06 16:59:34 -0700501
502 printf("%s...", msg);
Zach O'Keefe93306942022-07-06 16:59:34 -0700503
504 /*
505 * Prevent khugepaged interference and tests that MADV_COLLAPSE
506 * ignores /sys/kernel/mm/transparent_hugepage/enabled
507 */
508 settings.thp_enabled = THP_NEVER;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700509 settings.shmem_enabled = SHMEM_NEVER;
Ryan Roberts00679a12023-12-07 16:12:07 +0000510 thp_push_settings(&settings);
Zach O'Keefe93306942022-07-06 16:59:34 -0700511
512 /* Clear VM_NOHUGEPAGE */
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700513 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
Zach O'Keefe3505c8e2022-09-22 11:46:51 -0700514 ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
Zach O'Keefe93306942022-07-06 16:59:34 -0700515 if (((bool)ret) == expect)
516 fail("Fail: Bad return value");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700517 else if (!ops->check_huge(p, expect ? nr_hpages : 0))
Zach O'Keefe93306942022-07-06 16:59:34 -0700518 fail("Fail: check_huge()");
519 else
520 success("OK");
521
Ryan Roberts00679a12023-12-07 16:12:07 +0000522 thp_pop_settings();
Zach O'Keefe93306942022-07-06 16:59:34 -0700523}
524
Zach O'Keefe8e638702022-09-22 15:40:42 -0700525static void madvise_collapse(const char *msg, char *p, int nr_hpages,
526 struct mem_ops *ops, bool expect)
527{
528 /* Sanity check */
529 if (!ops->check_huge(p, 0)) {
530 printf("Unexpected huge page\n");
531 exit(EXIT_FAILURE);
532 }
533 __madvise_collapse(msg, p, nr_hpages, ops, expect);
534}
535
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700536#define TICK 500000
Zach O'Keefe8e638702022-09-22 15:40:42 -0700537static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
538 struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700539{
540 int full_scans;
541 int timeout = 6; /* 3 seconds */
542
543 /* Sanity check */
Zach O'Keefe8e638702022-09-22 15:40:42 -0700544 if (!ops->check_huge(p, 0)) {
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700545 printf("Unexpected huge page\n");
546 exit(EXIT_FAILURE);
547 }
548
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700549 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700550
551 /* Wait until the second full_scan completed */
Ryan Roberts00679a12023-12-07 16:12:07 +0000552 full_scans = thp_read_num("khugepaged/full_scans") + 2;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700553
554 printf("%s...", msg);
555 while (timeout--) {
Zach O'Keefe8e638702022-09-22 15:40:42 -0700556 if (ops->check_huge(p, nr_hpages))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700557 break;
Ryan Roberts00679a12023-12-07 16:12:07 +0000558 if (thp_read_num("khugepaged/full_scans") >= full_scans)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700559 break;
560 printf(".");
561 usleep(TICK);
562 }
563
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700564 madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700565
Dan Carpenter9f267a12020-06-10 18:41:22 -0700566 return timeout == -1;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700567}
568
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700569static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700570 struct mem_ops *ops, bool expect)
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700571{
Zach O'Keefe8e638702022-09-22 15:40:42 -0700572 if (wait_for_scan(msg, p, nr_hpages, ops)) {
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700573 if (expect)
574 fail("Timeout");
575 else
576 success("OK");
577 return;
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700578 }
Zach O'Keefe8e638702022-09-22 15:40:42 -0700579
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700580 /*
581 * For file and shmem memory, khugepaged only retracts pte entries after
582 * putting the new hugepage in the page cache. The hugepage must be
583 * subsequently refaulted to install the pmd mapping for the mm.
584 */
585 if (ops != &__anon_ops)
586 ops->fault(p, 0, nr_hpages * hpage_pmd_size);
587
Zach O'Keefe8e638702022-09-22 15:40:42 -0700588 if (ops->check_huge(p, expect ? nr_hpages : 0))
589 success("OK");
590 else
591 fail("Fail");
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700592}
593
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700594static struct collapse_context __khugepaged_context = {
595 .collapse = &khugepaged_collapse,
596 .enforce_pte_scan_limits = true,
597 .name = "khugepaged",
598};
599
600static struct collapse_context __madvise_context = {
601 .collapse = &madvise_collapse,
602 .enforce_pte_scan_limits = false,
603 .name = "madvise",
604};
605
606static bool is_tmpfs(struct mem_ops *ops)
607{
608 return ops == &__file_ops && finfo.type == VMA_SHMEM;
609}
610
Ryan Roberts9f0704e2023-12-07 16:12:09 +0000611static bool is_anon(struct mem_ops *ops)
612{
613 return ops == &__anon_ops;
614}
615
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700616static void alloc_at_fault(void)
617{
Ryan Roberts00679a12023-12-07 16:12:07 +0000618 struct thp_settings settings = *thp_current_settings();
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700619 char *p;
620
621 settings.thp_enabled = THP_ALWAYS;
Ryan Roberts00679a12023-12-07 16:12:07 +0000622 thp_push_settings(&settings);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700623
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700624 p = alloc_mapping(1);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700625 *p = 1;
626 printf("Allocate huge page on fault...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700627 if (check_huge_anon(p, 1, hpage_pmd_size))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700628 success("OK");
629 else
630 fail("Fail");
631
Ryan Roberts00679a12023-12-07 16:12:07 +0000632 thp_pop_settings();
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700633
634 madvise(p, page_size, MADV_DONTNEED);
635 printf("Split huge PMD on MADV_DONTNEED...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700636 if (check_huge_anon(p, 0, hpage_pmd_size))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700637 success("OK");
638 else
639 fail("Fail");
640 munmap(p, hpage_pmd_size);
641}
642
Zach O'Keefe8e638702022-09-22 15:40:42 -0700643static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700644{
645 void *p;
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700646 int nr_hpages = 4;
647 unsigned long size = nr_hpages * hpage_pmd_size;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700648
Zach O'Keefe8e638702022-09-22 15:40:42 -0700649 p = ops->setup_area(nr_hpages);
650 ops->fault(p, 0, size);
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700651 c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700652 ops, true);
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700653 validate_memory(p, 0, size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700654 ops->cleanup_area(p, size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700655}
656
Zach O'Keefe8e638702022-09-22 15:40:42 -0700657static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700658{
659 void *p;
660
Zach O'Keefe8e638702022-09-22 15:40:42 -0700661 p = ops->setup_area(1);
662 c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
663 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700664}
665
Zach O'Keefe8e638702022-09-22 15:40:42 -0700666static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700667{
668 void *p;
669
Zach O'Keefe8e638702022-09-22 15:40:42 -0700670 p = ops->setup_area(1);
671 ops->fault(p, 0, page_size);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700672 c->collapse("Collapse PTE table with single PTE entry present", p,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700673 1, ops, true);
674 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700675}
676
Zach O'Keefe8e638702022-09-22 15:40:42 -0700677static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700678{
679 int max_ptes_none = hpage_pmd_nr / 2;
Ryan Roberts00679a12023-12-07 16:12:07 +0000680 struct thp_settings settings = *thp_current_settings();
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700681 void *p;
Ryan Roberts9f0704e2023-12-07 16:12:09 +0000682 int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700683
684 settings.khugepaged.max_ptes_none = max_ptes_none;
Ryan Roberts00679a12023-12-07 16:12:07 +0000685 thp_push_settings(&settings);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700686
Zach O'Keefe8e638702022-09-22 15:40:42 -0700687 p = ops->setup_area(1);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700688
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700689 if (is_tmpfs(ops)) {
690 /* shmem pages always in the page cache */
691 printf("tmpfs...");
692 skip("Skip");
693 goto skip;
694 }
695
Ryan Roberts9f0704e2023-12-07 16:12:09 +0000696 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700697 c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700698 ops, !c->enforce_pte_scan_limits);
Ryan Roberts9f0704e2023-12-07 16:12:09 +0000699 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700700
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700701 if (c->enforce_pte_scan_limits) {
Zach O'Keefe8e638702022-09-22 15:40:42 -0700702 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
703 c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700704 true);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700705 validate_memory(p, 0,
706 (hpage_pmd_nr - max_ptes_none) * page_size);
707 }
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700708skip:
Zach O'Keefe8e638702022-09-22 15:40:42 -0700709 ops->cleanup_area(p, hpage_pmd_size);
Ryan Roberts00679a12023-12-07 16:12:07 +0000710 thp_pop_settings();
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700711}
712
Zach O'Keefe8e638702022-09-22 15:40:42 -0700713static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700714{
715 void *p;
Zach O'Keefe8e638702022-09-22 15:40:42 -0700716
717 p = ops->setup_area(1);
718 ops->fault(p, 0, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700719
720 printf("Swapout one page...");
721 if (madvise(p, page_size, MADV_PAGEOUT)) {
722 perror("madvise(MADV_PAGEOUT)");
723 exit(EXIT_FAILURE);
724 }
725 if (check_swap(p, page_size)) {
726 success("OK");
727 } else {
728 fail("Fail");
729 goto out;
730 }
731
Zach O'Keefe8e638702022-09-22 15:40:42 -0700732 c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
733 true);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700734 validate_memory(p, 0, hpage_pmd_size);
735out:
Zach O'Keefe8e638702022-09-22 15:40:42 -0700736 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700737}
738
Zach O'Keefe8e638702022-09-22 15:40:42 -0700739static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700740{
Ryan Roberts00679a12023-12-07 16:12:07 +0000741 int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap");
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700742 void *p;
743
Zach O'Keefe8e638702022-09-22 15:40:42 -0700744 p = ops->setup_area(1);
745 ops->fault(p, 0, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700746
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700747 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
748 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
749 perror("madvise(MADV_PAGEOUT)");
750 exit(EXIT_FAILURE);
751 }
752 if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
753 success("OK");
754 } else {
755 fail("Fail");
756 goto out;
757 }
758
Zach O'Keefe8e638702022-09-22 15:40:42 -0700759 c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700760 !c->enforce_pte_scan_limits);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700761 validate_memory(p, 0, hpage_pmd_size);
762
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700763 if (c->enforce_pte_scan_limits) {
Zach O'Keefe8e638702022-09-22 15:40:42 -0700764 ops->fault(p, 0, hpage_pmd_size);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700765 printf("Swapout %d of %d pages...", max_ptes_swap,
766 hpage_pmd_nr);
767 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
768 perror("madvise(MADV_PAGEOUT)");
769 exit(EXIT_FAILURE);
770 }
771 if (check_swap(p, max_ptes_swap * page_size)) {
772 success("OK");
773 } else {
774 fail("Fail");
775 goto out;
776 }
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700777
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700778 c->collapse("Collapse with max_ptes_swap pages swapped out", p,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700779 1, ops, true);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700780 validate_memory(p, 0, hpage_pmd_size);
781 }
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700782out:
Zach O'Keefe8e638702022-09-22 15:40:42 -0700783 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700784}
785
Zach O'Keefe8e638702022-09-22 15:40:42 -0700786static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700787{
788 void *p;
789
Zach O'Keefe8e638702022-09-22 15:40:42 -0700790 p = alloc_hpage(ops);
791
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700792 if (is_tmpfs(ops)) {
793 /* MADV_DONTNEED won't evict tmpfs pages */
794 printf("tmpfs...");
795 skip("Skip");
796 goto skip;
797 }
798
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700799 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700800 printf("Split huge page leaving single PTE mapping compound page...");
801 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700802 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700803 success("OK");
804 else
805 fail("Fail");
806
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700807 c->collapse("Collapse PTE table with single PTE mapping compound page",
Zach O'Keefe8e638702022-09-22 15:40:42 -0700808 p, 1, ops, true);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700809 validate_memory(p, 0, page_size);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -0700810skip:
Zach O'Keefe8e638702022-09-22 15:40:42 -0700811 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700812}
813
Zach O'Keefe8e638702022-09-22 15:40:42 -0700814static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700815{
816 void *p;
817
Zach O'Keefe8e638702022-09-22 15:40:42 -0700818 p = alloc_hpage(ops);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700819 printf("Split huge page leaving single PTE page table full of compound pages...");
820 madvise(p, page_size, MADV_NOHUGEPAGE);
821 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700822 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700823 success("OK");
824 else
825 fail("Fail");
826
Zach O'Keefe8e638702022-09-22 15:40:42 -0700827 c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
828 true);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700829 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700830 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700831}
832
Zach O'Keefe8e638702022-09-22 15:40:42 -0700833static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700834{
835 void *p;
836 int i;
837
Zach O'Keefe8e638702022-09-22 15:40:42 -0700838 p = ops->setup_area(1);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700839 for (i = 0; i < hpage_pmd_nr; i++) {
840 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
841 i + 1, hpage_pmd_nr);
842
843 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700844 ops->fault(BASE_ADDR, 0, hpage_pmd_size);
845 if (!ops->check_huge(BASE_ADDR, 1)) {
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700846 printf("Failed to allocate huge page\n");
847 exit(EXIT_FAILURE);
848 }
849 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
850
851 p = mremap(BASE_ADDR - i * page_size,
852 i * page_size + hpage_pmd_size,
853 (i + 1) * page_size,
854 MREMAP_MAYMOVE | MREMAP_FIXED,
855 BASE_ADDR + 2 * hpage_pmd_size);
856 if (p == MAP_FAILED) {
857 perror("mremap+unmap");
858 exit(EXIT_FAILURE);
859 }
860
861 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
862 (i + 1) * page_size,
863 (i + 1) * page_size + hpage_pmd_size,
864 MREMAP_MAYMOVE | MREMAP_FIXED,
865 BASE_ADDR - (i + 1) * page_size);
866 if (p == MAP_FAILED) {
867 perror("mremap+alloc");
868 exit(EXIT_FAILURE);
869 }
870 }
871
Zach O'Keefe8e638702022-09-22 15:40:42 -0700872 ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
873 ops->fault(p, 0, hpage_pmd_size);
874 if (!ops->check_huge(p, 1))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700875 success("OK");
876 else
877 fail("Fail");
878
Zach O'Keefe9d0d9462022-07-06 16:59:36 -0700879 c->collapse("Collapse PTE table full of different compound pages", p, 1,
Zach O'Keefe8e638702022-09-22 15:40:42 -0700880 ops, true);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700881
882 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700883 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700884}
885
Zach O'Keefe8e638702022-09-22 15:40:42 -0700886static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700887{
888 int wstatus;
889 void *p;
890
Zach O'Keefe8e638702022-09-22 15:40:42 -0700891 p = ops->setup_area(1);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700892
893 printf("Allocate small page...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700894 ops->fault(p, 0, page_size);
895 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700896 success("OK");
897 else
898 fail("Fail");
899
900 printf("Share small page over fork()...");
901 if (!fork()) {
902 /* Do not touch settings on child exit */
903 skip_settings_restore = true;
904 exit_status = 0;
905
Zach O'Keefe8e638702022-09-22 15:40:42 -0700906 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700907 success("OK");
908 else
909 fail("Fail");
910
Zach O'Keefe8e638702022-09-22 15:40:42 -0700911 ops->fault(p, page_size, 2 * page_size);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700912 c->collapse("Collapse PTE table with single page shared with parent process",
Zach O'Keefe8e638702022-09-22 15:40:42 -0700913 p, 1, ops, true);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700914
915 validate_memory(p, 0, page_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700916 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700917 exit(exit_status);
918 }
919
920 wait(&wstatus);
921 exit_status += WEXITSTATUS(wstatus);
922
923 printf("Check if parent still has small page...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700924 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700925 success("OK");
926 else
927 fail("Fail");
928 validate_memory(p, 0, page_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700929 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700930}
931
Zach O'Keefe8e638702022-09-22 15:40:42 -0700932static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700933{
934 int wstatus;
935 void *p;
936
Zach O'Keefe8e638702022-09-22 15:40:42 -0700937 p = alloc_hpage(ops);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700938 printf("Share huge page over fork()...");
939 if (!fork()) {
940 /* Do not touch settings on child exit */
941 skip_settings_restore = true;
942 exit_status = 0;
943
Zach O'Keefe8e638702022-09-22 15:40:42 -0700944 if (ops->check_huge(p, 1))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700945 success("OK");
946 else
947 fail("Fail");
948
949 printf("Split huge page PMD in child process...");
950 madvise(p, page_size, MADV_NOHUGEPAGE);
951 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700952 if (ops->check_huge(p, 0))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700953 success("OK");
954 else
955 fail("Fail");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700956 ops->fault(p, 0, page_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -0700957
Ryan Roberts00679a12023-12-07 16:12:07 +0000958 thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
Zach O'Keefe61c2c672022-07-06 16:59:32 -0700959 c->collapse("Collapse PTE table full of compound pages in child",
Zach O'Keefe8e638702022-09-22 15:40:42 -0700960 p, 1, ops, true);
Ryan Roberts00679a12023-12-07 16:12:07 +0000961 thp_write_num("khugepaged/max_ptes_shared",
962 thp_current_settings()->khugepaged.max_ptes_shared);
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700963
964 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700965 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700966 exit(exit_status);
967 }
968
969 wait(&wstatus);
970 exit_status += WEXITSTATUS(wstatus);
971
972 printf("Check if parent still has huge page...");
Zach O'Keefe8e638702022-09-22 15:40:42 -0700973 if (ops->check_huge(p, 1))
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700974 success("OK");
975 else
976 fail("Fail");
977 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -0700978 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700979}
980
Zach O'Keefe8e638702022-09-22 15:40:42 -0700981static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700982{
Ryan Roberts00679a12023-12-07 16:12:07 +0000983 int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared");
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700984 int wstatus;
985 void *p;
986
Zach O'Keefe8e638702022-09-22 15:40:42 -0700987 p = alloc_hpage(ops);
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700988 printf("Share huge page over fork()...");
989 if (!fork()) {
990 /* Do not touch settings on child exit */
991 skip_settings_restore = true;
992 exit_status = 0;
993
Zach O'Keefe8e638702022-09-22 15:40:42 -0700994 if (ops->check_huge(p, 1))
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -0700995 success("OK");
996 else
997 fail("Fail");
998
999 printf("Trigger CoW on page %d of %d...",
1000 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001001 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
1002 if (ops->check_huge(p, 0))
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -07001003 success("OK");
1004 else
1005 fail("Fail");
1006
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001007 c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
Zach O'Keefe8e638702022-09-22 15:40:42 -07001008 1, ops, !c->enforce_pte_scan_limits);
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -07001009
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001010 if (c->enforce_pte_scan_limits) {
1011 printf("Trigger CoW on page %d of %d...",
1012 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001013 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001014 page_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001015 if (ops->check_huge(p, 0))
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001016 success("OK");
1017 else
1018 fail("Fail");
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -07001019
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001020 c->collapse("Collapse with max_ptes_shared PTEs shared",
Zach O'Keefe8e638702022-09-22 15:40:42 -07001021 p, 1, ops, true);
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001022 }
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001023
1024 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001025 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001026 exit(exit_status);
1027 }
1028
1029 wait(&wstatus);
1030 exit_status += WEXITSTATUS(wstatus);
1031
1032 printf("Check if parent still has huge page...");
Zach O'Keefe8e638702022-09-22 15:40:42 -07001033 if (ops->check_huge(p, 1))
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001034 success("OK");
1035 else
1036 fail("Fail");
1037 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001038 ops->cleanup_area(p, hpage_pmd_size);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001039}
1040
Zach O'Keefe8e638702022-09-22 15:40:42 -07001041static void madvise_collapse_existing_thps(struct collapse_context *c,
1042 struct mem_ops *ops)
Zach O'Keefe1370a212022-07-06 16:59:35 -07001043{
1044 void *p;
Zach O'Keefe1370a212022-07-06 16:59:35 -07001045
Zach O'Keefe8e638702022-09-22 15:40:42 -07001046 p = ops->setup_area(1);
1047 ops->fault(p, 0, hpage_pmd_size);
1048 c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
Zach O'Keefe1370a212022-07-06 16:59:35 -07001049 validate_memory(p, 0, hpage_pmd_size);
Zach O'Keefe8e638702022-09-22 15:40:42 -07001050
1051 /* c->collapse() will find a hugepage and complain - call directly. */
1052 __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
1053 validate_memory(p, 0, hpage_pmd_size);
1054 ops->cleanup_area(p, hpage_pmd_size);
Zach O'Keefe1370a212022-07-06 16:59:35 -07001055}
1056
Zach O'Keefe69d9428c2022-09-22 15:40:45 -07001057/*
1058 * Test race with khugepaged where page tables have been retracted and
1059 * pmd cleared.
1060 */
1061static void madvise_retracted_page_tables(struct collapse_context *c,
1062 struct mem_ops *ops)
1063{
1064 void *p;
1065 int nr_hpages = 1;
1066 unsigned long size = nr_hpages * hpage_pmd_size;
1067
1068 p = ops->setup_area(nr_hpages);
1069 ops->fault(p, 0, size);
1070
1071 /* Let khugepaged collapse and leave pmd cleared */
1072 if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
1073 ops)) {
1074 fail("Timeout");
1075 return;
1076 }
1077 success("OK");
1078 c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
1079 true);
1080 validate_memory(p, 0, size);
1081 ops->cleanup_area(p, size);
1082}
1083
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001084static void usage(void)
1085{
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001086 fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n");
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001087 fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
1088 fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001089 fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001090 fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
1091 fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
1092 fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
1093 fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
1094 fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001095 fprintf(stderr, "\n\tSupported Options:\n");
1096 fprintf(stderr, "\t\t-h: This help message.\n");
1097 fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n");
1098 fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n");
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001099 exit(1);
1100}
1101
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001102static void parse_test_type(int argc, char **argv)
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001103{
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001104 int opt;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001105 char *buf;
1106 const char *token;
1107
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001108 while ((opt = getopt(argc, argv, "s:h")) != -1) {
1109 switch (opt) {
1110 case 's':
1111 anon_order = atoi(optarg);
1112 break;
1113 case 'h':
1114 default:
1115 usage();
1116 }
1117 }
1118
1119 argv += optind;
1120 argc -= optind;
1121
1122 if (argc == 0) {
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001123 /* Backwards compatibility */
1124 khugepaged_context = &__khugepaged_context;
1125 madvise_context = &__madvise_context;
1126 anon_ops = &__anon_ops;
1127 return;
1128 }
1129
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001130 buf = strdup(argv[0]);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001131 token = strsep(&buf, ":");
1132
1133 if (!strcmp(token, "all")) {
1134 khugepaged_context = &__khugepaged_context;
1135 madvise_context = &__madvise_context;
1136 } else if (!strcmp(token, "khugepaged")) {
1137 khugepaged_context = &__khugepaged_context;
1138 } else if (!strcmp(token, "madvise")) {
1139 madvise_context = &__madvise_context;
1140 } else {
1141 usage();
1142 }
1143
1144 if (!buf)
1145 usage();
1146
1147 if (!strcmp(buf, "all")) {
1148 file_ops = &__file_ops;
1149 anon_ops = &__anon_ops;
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001150 shmem_ops = &__shmem_ops;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001151 } else if (!strcmp(buf, "anon")) {
1152 anon_ops = &__anon_ops;
1153 } else if (!strcmp(buf, "file")) {
1154 file_ops = &__file_ops;
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001155 } else if (!strcmp(buf, "shmem")) {
1156 shmem_ops = &__shmem_ops;
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001157 } else {
1158 usage();
1159 }
1160
1161 if (!file_ops)
1162 return;
1163
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001164 if (argc != 2)
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001165 usage();
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001166
1167 get_finfo(argv[1]);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001168}
1169
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001170int main(int argc, char **argv)
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001171{
Ryan Roberts4f5070a2023-12-07 16:12:08 +00001172 int hpage_pmd_order;
Ryan Roberts00679a12023-12-07 16:12:07 +00001173 struct thp_settings default_settings = {
Zach O'Keefe93306942022-07-06 16:59:34 -07001174 .thp_enabled = THP_MADVISE,
1175 .thp_defrag = THP_DEFRAG_ALWAYS,
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001176 .shmem_enabled = SHMEM_ADVISE,
Zach O'Keefe93306942022-07-06 16:59:34 -07001177 .use_zero_page = 0,
1178 .khugepaged = {
1179 .defrag = 1,
1180 .alloc_sleep_millisecs = 10,
1181 .scan_sleep_millisecs = 10,
1182 },
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001183 /*
1184 * When testing file-backed memory, the collapse path
1185 * looks at how many pages are found in the page cache, not
1186 * what pages are mapped. Disable read ahead optimization so
1187 * pages don't find their way into the page cache unless
1188 * we mem_ops->fault() them in.
1189 */
1190 .read_ahead_kb = 0,
Zach O'Keefe93306942022-07-06 16:59:34 -07001191 };
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001192
1193 parse_test_type(argc, argv);
1194
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001195 setbuf(stdout, NULL);
1196
1197 page_size = getpagesize();
Zach O'Keefec07c3432022-09-22 15:40:41 -07001198 hpage_pmd_size = read_pmd_pagesize();
David Hildenbrandd6e61af2023-04-11 16:25:07 +02001199 if (!hpage_pmd_size) {
1200 printf("Reading PMD pagesize failed");
1201 exit(EXIT_FAILURE);
1202 }
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001203 hpage_pmd_nr = hpage_pmd_size / page_size;
Ryan Roberts4f5070a2023-12-07 16:12:08 +00001204 hpage_pmd_order = __builtin_ctz(hpage_pmd_nr);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001205
1206 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1207 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
Kirill A. Shutemov71a2c112020-06-03 16:00:30 -07001208 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001209 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
Ryan Roberts4f5070a2023-12-07 16:12:08 +00001210 default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT;
Ryan Roberts9f0704e2023-12-07 16:12:09 +00001211 default_settings.hugepages[anon_order].enabled = THP_ALWAYS;
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001212
1213 save_settings();
Ryan Roberts00679a12023-12-07 16:12:07 +00001214 thp_push_settings(&default_settings);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001215
1216 alloc_at_fault();
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001217
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001218#define TEST(t, c, o) do { \
1219 if (c && o) { \
1220 printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
1221 t(c, o); \
1222 } \
1223 } while (0)
Zach O'Keefe61c2c672022-07-06 16:59:32 -07001224
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001225 TEST(collapse_full, khugepaged_context, anon_ops);
1226 TEST(collapse_full, khugepaged_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001227 TEST(collapse_full, khugepaged_context, shmem_ops);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001228 TEST(collapse_full, madvise_context, anon_ops);
1229 TEST(collapse_full, madvise_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001230 TEST(collapse_full, madvise_context, shmem_ops);
Zach O'Keefe93306942022-07-06 16:59:34 -07001231
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001232 TEST(collapse_empty, khugepaged_context, anon_ops);
1233 TEST(collapse_empty, madvise_context, anon_ops);
1234
1235 TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
1236 TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001237 TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001238 TEST(collapse_single_pte_entry, madvise_context, anon_ops);
1239 TEST(collapse_single_pte_entry, madvise_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001240 TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001241
1242 TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
1243 TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
1244 TEST(collapse_max_ptes_none, madvise_context, anon_ops);
1245 TEST(collapse_max_ptes_none, madvise_context, file_ops);
1246
1247 TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
1248 TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
1249 TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
1250 TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
1251
1252 TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
1253 TEST(collapse_full_of_compound, khugepaged_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001254 TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001255 TEST(collapse_full_of_compound, madvise_context, anon_ops);
1256 TEST(collapse_full_of_compound, madvise_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001257 TEST(collapse_full_of_compound, madvise_context, shmem_ops);
Zach O'Keefe1b03d0d2022-09-22 15:40:43 -07001258
1259 TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
1260 TEST(collapse_compound_extreme, madvise_context, anon_ops);
1261
1262 TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
1263 TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
1264
1265 TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
1266 TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
1267
1268 TEST(collapse_fork, khugepaged_context, anon_ops);
1269 TEST(collapse_fork, madvise_context, anon_ops);
1270
1271 TEST(collapse_fork_compound, khugepaged_context, anon_ops);
1272 TEST(collapse_fork_compound, madvise_context, anon_ops);
1273
1274 TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
1275 TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
1276
1277 TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
1278 TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
Zach O'Keefed0d35b62022-09-22 15:40:44 -07001279 TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001280
Zach O'Keefe69d9428c2022-09-22 15:40:45 -07001281 TEST(madvise_retracted_page_tables, madvise_context, file_ops);
1282 TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
1283
Kirill A. Shutemove0c13f92020-06-03 16:00:06 -07001284 restore_settings(0);
1285}