| # SPDX-License-Identifier: GPL-2.0-only |
| |
| menu "Memory Management options" |
| |
| # |
| # For some reason microblaze and nios2 hard code SWAP=n. Hopefully we can |
| # add proper SWAP support to them, in which case this can be remove. |
| # |
| config ARCH_NO_SWAP |
| bool |
| |
| config ZPOOL |
| bool |
| |
| menuconfig SWAP |
| bool "Support for paging of anonymous memory (swap)" |
| depends on MMU && BLOCK && !ARCH_NO_SWAP |
| default y |
| help |
| This option allows you to choose whether you want to have support |
| for so called swap devices or swap files in your kernel that are |
| used to provide more virtual memory than the actual RAM present |
| in your computer. If unsure say Y. |
| |
| config ZSWAP |
| bool "Compressed cache for swap pages" |
| depends on SWAP |
| select CRYPTO |
| select ZPOOL |
| help |
| A lightweight compressed cache for swap pages. It takes |
| pages that are in the process of being swapped out and attempts to |
| compress them into a dynamically allocated RAM-based memory pool. |
| This can result in a significant I/O reduction on swap device and, |
| in the case where decompressing from RAM is faster than swap device |
| reads, can also improve workload performance. |
| |
| config ZSWAP_DEFAULT_ON |
| bool "Enable the compressed cache for swap pages by default" |
| depends on ZSWAP |
| help |
| If selected, the compressed cache for swap pages will be enabled |
| at boot, otherwise it will be disabled. |
| |
| The selection made here can be overridden by using the kernel |
| command line 'zswap.enabled=' option. |
| |
| config ZSWAP_SHRINKER_DEFAULT_ON |
| bool "Shrink the zswap pool on memory pressure" |
| depends on ZSWAP |
| default n |
| help |
| If selected, the zswap shrinker will be enabled, and the pages |
| stored in the zswap pool will become available for reclaim (i.e |
| written back to the backing swap device) on memory pressure. |
| |
| This means that zswap writeback could happen even if the pool is |
| not yet full, or the cgroup zswap limit has not been reached, |
| reducing the chance that cold pages will reside in the zswap pool |
| and consume memory indefinitely. |
| |
| choice |
| prompt "Default compressor" |
| depends on ZSWAP |
| default ZSWAP_COMPRESSOR_DEFAULT_LZO |
| help |
| Selects the default compression algorithm for the compressed cache |
| for swap pages. |
| |
| For an overview what kind of performance can be expected from |
| a particular compression algorithm please refer to the benchmarks |
| available at the following LWN page: |
| https://lwn.net/Articles/751795/ |
| |
| If in doubt, select 'LZO'. |
| |
| The selection made here can be overridden by using the kernel |
| command line 'zswap.compressor=' option. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_DEFLATE |
| bool "Deflate" |
| select CRYPTO_DEFLATE |
| help |
| Use the Deflate algorithm as the default compression algorithm. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_LZO |
| bool "LZO" |
| select CRYPTO_LZO |
| help |
| Use the LZO algorithm as the default compression algorithm. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_842 |
| bool "842" |
| select CRYPTO_842 |
| help |
| Use the 842 algorithm as the default compression algorithm. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_LZ4 |
| bool "LZ4" |
| select CRYPTO_LZ4 |
| help |
| Use the LZ4 algorithm as the default compression algorithm. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_LZ4HC |
| bool "LZ4HC" |
| select CRYPTO_LZ4HC |
| help |
| Use the LZ4HC algorithm as the default compression algorithm. |
| |
| config ZSWAP_COMPRESSOR_DEFAULT_ZSTD |
| bool "zstd" |
| select CRYPTO_ZSTD |
| help |
| Use the zstd algorithm as the default compression algorithm. |
| endchoice |
| |
| config ZSWAP_COMPRESSOR_DEFAULT |
| string |
| depends on ZSWAP |
| default "deflate" if ZSWAP_COMPRESSOR_DEFAULT_DEFLATE |
| default "lzo" if ZSWAP_COMPRESSOR_DEFAULT_LZO |
| default "842" if ZSWAP_COMPRESSOR_DEFAULT_842 |
| default "lz4" if ZSWAP_COMPRESSOR_DEFAULT_LZ4 |
| default "lz4hc" if ZSWAP_COMPRESSOR_DEFAULT_LZ4HC |
| default "zstd" if ZSWAP_COMPRESSOR_DEFAULT_ZSTD |
| default "" |
| |
| choice |
| prompt "Default allocator" |
| depends on ZSWAP |
| default ZSWAP_ZPOOL_DEFAULT_ZSMALLOC if MMU |
| default ZSWAP_ZPOOL_DEFAULT_ZBUD |
| help |
| Selects the default allocator for the compressed cache for |
| swap pages. |
| The default is 'zbud' for compatibility, however please do |
| read the description of each of the allocators below before |
| making a right choice. |
| |
| The selection made here can be overridden by using the kernel |
| command line 'zswap.zpool=' option. |
| |
| config ZSWAP_ZPOOL_DEFAULT_ZBUD |
| bool "zbud" |
| select ZBUD |
| help |
| Use the zbud allocator as the default allocator. |
| |
| config ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED |
| bool "z3foldi (DEPRECATED)" |
| select Z3FOLD_DEPRECATED |
| help |
| Use the z3fold allocator as the default allocator. |
| |
| Deprecated and scheduled for removal in a few cycles, |
| see CONFIG_Z3FOLD_DEPRECATED. |
| |
| config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC |
| bool "zsmalloc" |
| select ZSMALLOC |
| help |
| Use the zsmalloc allocator as the default allocator. |
| endchoice |
| |
| config ZSWAP_ZPOOL_DEFAULT |
| string |
| depends on ZSWAP |
| default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD |
| default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED |
| default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC |
| default "" |
| |
| config ZBUD |
| tristate "2:1 compression allocator (zbud)" |
| depends on ZSWAP |
| help |
| A special purpose allocator for storing compressed pages. |
| It is designed to store up to two compressed pages per physical |
| page. While this design limits storage density, it has simple and |
| deterministic reclaim properties that make it preferable to a higher |
| density approach when reclaim will be used. |
| |
| config Z3FOLD_DEPRECATED |
| tristate "3:1 compression allocator (z3fold) (DEPRECATED)" |
| depends on ZSWAP |
| help |
| Deprecated and scheduled for removal in a few cycles. If you have |
| a good reason for using Z3FOLD over ZSMALLOC, please contact |
| linux-mm@kvack.org and the zswap maintainers. |
| |
| A special purpose allocator for storing compressed pages. |
| It is designed to store up to three compressed pages per physical |
| page. It is a ZBUD derivative so the simplicity and determinism are |
| still there. |
| |
| config Z3FOLD |
| tristate |
| default y if Z3FOLD_DEPRECATED=y |
| default m if Z3FOLD_DEPRECATED=m |
| depends on Z3FOLD_DEPRECATED |
| |
| config ZSMALLOC |
| tristate |
| prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM) |
| depends on MMU |
| help |
| zsmalloc is a slab-based memory allocator designed to store |
| pages of various compression levels efficiently. It achieves |
| the highest storage density with the least amount of fragmentation. |
| |
| config ZSMALLOC_STAT |
| bool "Export zsmalloc statistics" |
| depends on ZSMALLOC |
| select DEBUG_FS |
| help |
| This option enables code in the zsmalloc to collect various |
| statistics about what's happening in zsmalloc and exports that |
| information to userspace via debugfs. |
| If unsure, say N. |
| |
| config ZSMALLOC_CHAIN_SIZE |
| int "Maximum number of physical pages per-zspage" |
| default 8 |
| range 4 16 |
| depends on ZSMALLOC |
| help |
| This option sets the upper limit on the number of physical pages |
| that a zmalloc page (zspage) can consist of. The optimal zspage |
| chain size is calculated for each size class during the |
| initialization of the pool. |
| |
| Changing this option can alter the characteristics of size classes, |
| such as the number of pages per zspage and the number of objects |
| per zspage. This can also result in different configurations of |
| the pool, as zsmalloc merges size classes with similar |
| characteristics. |
| |
| For more information, see zsmalloc documentation. |
| |
| menu "Slab allocator options" |
| |
| config SLUB |
| def_bool y |
| |
| config SLUB_TINY |
| bool "Configure for minimal memory footprint" |
| depends on EXPERT |
| select SLAB_MERGE_DEFAULT |
| help |
| Configures the slab allocator in a way to achieve minimal memory |
| footprint, sacrificing scalability, debugging and other features. |
| This is intended only for the smallest system that had used the |
| SLOB allocator and is not recommended for systems with more than |
| 16MB RAM. |
| |
| If unsure, say N. |
| |
| config SLAB_MERGE_DEFAULT |
| bool "Allow slab caches to be merged" |
| default y |
| help |
| For reduced kernel memory fragmentation, slab caches can be |
| merged when they share the same size and other characteristics. |
| This carries a risk of kernel heap overflows being able to |
| overwrite objects from merged caches (and more easily control |
| cache layout), which makes such heap attacks easier to exploit |
| by attackers. By keeping caches unmerged, these kinds of exploits |
| can usually only damage objects in the same cache. To disable |
| merging at runtime, "slab_nomerge" can be passed on the kernel |
| command line. |
| |
| config SLAB_FREELIST_RANDOM |
| bool "Randomize slab freelist" |
| depends on !SLUB_TINY |
| help |
| Randomizes the freelist order used on creating new pages. This |
| security feature reduces the predictability of the kernel slab |
| allocator against heap overflows. |
| |
| config SLAB_FREELIST_HARDENED |
| bool "Harden slab freelist metadata" |
| depends on !SLUB_TINY |
| help |
| Many kernel heap attacks try to target slab cache metadata and |
| other infrastructure. This options makes minor performance |
| sacrifices to harden the kernel slab allocator against common |
| freelist exploit methods. |
| |
| config SLAB_BUCKETS |
| bool "Support allocation from separate kmalloc buckets" |
| depends on !SLUB_TINY |
| default SLAB_FREELIST_HARDENED |
| help |
| Kernel heap attacks frequently depend on being able to create |
| specifically-sized allocations with user-controlled contents |
| that will be allocated into the same kmalloc bucket as a |
| target object. To avoid sharing these allocation buckets, |
| provide an explicitly separated set of buckets to be used for |
| user-controlled allocations. This may very slightly increase |
| memory fragmentation, though in practice it's only a handful |
| of extra pages since the bulk of user-controlled allocations |
| are relatively long-lived. |
| |
| If unsure, say Y. |
| |
| config SLUB_STATS |
| default n |
| bool "Enable performance statistics" |
| depends on SYSFS && !SLUB_TINY |
| help |
| The statistics are useful to debug slab allocation behavior in |
| order find ways to optimize the allocator. This should never be |
| enabled for production use since keeping statistics slows down |
| the allocator by a few percentage points. The slabinfo command |
| supports the determination of the most active slabs to figure |
| out which slabs are relevant to a particular load. |
| Try running: slabinfo -DA |
| |
| config SLUB_CPU_PARTIAL |
| default y |
| depends on SMP && !SLUB_TINY |
| bool "Enable per cpu partial caches" |
| help |
| Per cpu partial caches accelerate objects allocation and freeing |
| that is local to a processor at the price of more indeterminism |
| in the latency of the free. On overflow these caches will be cleared |
| which requires the taking of locks that may cause latency spikes. |
| Typically one would choose no for a realtime system. |
| |
| config RANDOM_KMALLOC_CACHES |
| default n |
| depends on !SLUB_TINY |
| bool "Randomize slab caches for normal kmalloc" |
| help |
| A hardening feature that creates multiple copies of slab caches for |
| normal kmalloc allocation and makes kmalloc randomly pick one based |
| on code address, which makes the attackers more difficult to spray |
| vulnerable memory objects on the heap for the purpose of exploiting |
| memory vulnerabilities. |
| |
| Currently the number of copies is set to 16, a reasonably large value |
| that effectively diverges the memory objects allocated for different |
| subsystems or modules into different caches, at the expense of a |
| limited degree of memory and CPU overhead that relates to hardware and |
| system workload. |
| |
| endmenu # Slab allocator options |
| |
| config SHUFFLE_PAGE_ALLOCATOR |
| bool "Page allocator randomization" |
| default SLAB_FREELIST_RANDOM && ACPI_NUMA |
| help |
| Randomization of the page allocator improves the average |
| utilization of a direct-mapped memory-side-cache. See section |
| 5.2.27 Heterogeneous Memory Attribute Table (HMAT) in the ACPI |
| 6.2a specification for an example of how a platform advertises |
| the presence of a memory-side-cache. There are also incidental |
| security benefits as it reduces the predictability of page |
| allocations to compliment SLAB_FREELIST_RANDOM, but the |
| default granularity of shuffling on the MAX_PAGE_ORDER i.e, 10th |
| order of pages is selected based on cache utilization benefits |
| on x86. |
| |
| While the randomization improves cache utilization it may |
| negatively impact workloads on platforms without a cache. For |
| this reason, by default, the randomization is not enabled even |
| if SHUFFLE_PAGE_ALLOCATOR=y. The randomization may be force enabled |
| with the 'page_alloc.shuffle' kernel command line parameter. |
| |
| Say Y if unsure. |
| |
| config COMPAT_BRK |
| bool "Disable heap randomization" |
| default y |
| help |
| Randomizing heap placement makes heap exploits harder, but it |
| also breaks ancient binaries (including anything libc5 based). |
| This option changes the bootup default to heap randomization |
| disabled, and can be overridden at runtime by setting |
| /proc/sys/kernel/randomize_va_space to 2. |
| |
| On non-ancient distros (post-2000 ones) N is usually a safe choice. |
| |
| config MMAP_ALLOW_UNINITIALIZED |
| bool "Allow mmapped anonymous memory to be uninitialized" |
| depends on EXPERT && !MMU |
| default n |
| help |
| Normally, and according to the Linux spec, anonymous memory obtained |
| from mmap() has its contents cleared before it is passed to |
| userspace. Enabling this config option allows you to request that |
| mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus |
| providing a huge performance boost. If this option is not enabled, |
| then the flag will be ignored. |
| |
| This is taken advantage of by uClibc's malloc(), and also by |
| ELF-FDPIC binfmt's brk and stack allocator. |
| |
| Because of the obvious security issues, this option should only be |
| enabled on embedded devices where you control what is run in |
| userspace. Since that isn't generally a problem on no-MMU systems, |
| it is normally safe to say Y here. |
| |
| See Documentation/admin-guide/mm/nommu-mmap.rst for more information. |
| |
| config SELECT_MEMORY_MODEL |
| def_bool y |
| depends on ARCH_SELECT_MEMORY_MODEL |
| |
| choice |
| prompt "Memory model" |
| depends on SELECT_MEMORY_MODEL |
| default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT |
| default FLATMEM_MANUAL |
| help |
| This option allows you to change some of the ways that |
| Linux manages its memory internally. Most users will |
| only have one option here selected by the architecture |
| configuration. This is normal. |
| |
| config FLATMEM_MANUAL |
| bool "Flat Memory" |
| depends on !ARCH_SPARSEMEM_ENABLE || ARCH_FLATMEM_ENABLE |
| help |
| This option is best suited for non-NUMA systems with |
| flat address space. The FLATMEM is the most efficient |
| system in terms of performance and resource consumption |
| and it is the best option for smaller systems. |
| |
| For systems that have holes in their physical address |
| spaces and for features like NUMA and memory hotplug, |
| choose "Sparse Memory". |
| |
| If unsure, choose this option (Flat Memory) over any other. |
| |
| config SPARSEMEM_MANUAL |
| bool "Sparse Memory" |
| depends on ARCH_SPARSEMEM_ENABLE |
| help |
| This will be the only option for some systems, including |
| memory hot-plug systems. This is normal. |
| |
| This option provides efficient support for systems with |
| holes is their physical address space and allows memory |
| hot-plug and hot-remove. |
| |
| If unsure, choose "Flat Memory" over this option. |
| |
| endchoice |
| |
| config SPARSEMEM |
| def_bool y |
| depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL |
| |
| config FLATMEM |
| def_bool y |
| depends on !SPARSEMEM || FLATMEM_MANUAL |
| |
| # |
| # SPARSEMEM_EXTREME (which is the default) does some bootmem |
| # allocations when sparse_init() is called. If this cannot |
| # be done on your architecture, select this option. However, |
| # statically allocating the mem_section[] array can potentially |
| # consume vast quantities of .bss, so be careful. |
| # |
| # This option will also potentially produce smaller runtime code |
| # with gcc 3.4 and later. |
| # |
| config SPARSEMEM_STATIC |
| bool |
| |
| # |
| # Architecture platforms which require a two level mem_section in SPARSEMEM |
| # must select this option. This is usually for architecture platforms with |
| # an extremely sparse physical address space. |
| # |
| config SPARSEMEM_EXTREME |
| def_bool y |
| depends on SPARSEMEM && !SPARSEMEM_STATIC |
| |
| config SPARSEMEM_VMEMMAP_ENABLE |
| bool |
| |
| config SPARSEMEM_VMEMMAP |
| bool "Sparse Memory virtual memmap" |
| depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
| default y |
| help |
| SPARSEMEM_VMEMMAP uses a virtually mapped memmap to optimise |
| pfn_to_page and page_to_pfn operations. This is the most |
| efficient option when sufficient kernel resources are available. |
| # |
| # Select this config option from the architecture Kconfig, if it is preferred |
| # to enable the feature of HugeTLB/dev_dax vmemmap optimization. |
| # |
| config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP |
| bool |
| |
| config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP |
| bool |
| |
| config HAVE_MEMBLOCK_PHYS_MAP |
| bool |
| |
| config HAVE_GUP_FAST |
| depends on MMU |
| bool |
| |
| # Don't discard allocated memory used to track "memory" and "reserved" memblocks |
| # after early boot, so it can still be used to test for validity of memory. |
| # Also, memblocks are updated with memory hot(un)plug. |
| config ARCH_KEEP_MEMBLOCK |
| bool |
| |
| # Keep arch NUMA mapping infrastructure post-init. |
| config NUMA_KEEP_MEMINFO |
| bool |
| |
| config MEMORY_ISOLATION |
| bool |
| |
| # IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked |
| # IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via |
| # /dev/mem. |
| config EXCLUSIVE_SYSTEM_RAM |
| def_bool y |
| depends on !DEVMEM || STRICT_DEVMEM |
| |
| # |
| # Only be set on architectures that have completely implemented memory hotplug |
| # feature. If you are not sure, don't touch it. |
| # |
| config HAVE_BOOTMEM_INFO_NODE |
| def_bool n |
| |
| config ARCH_ENABLE_MEMORY_HOTPLUG |
| bool |
| |
| config ARCH_ENABLE_MEMORY_HOTREMOVE |
| bool |
| |
| # eventually, we can have this option just 'select SPARSEMEM' |
| menuconfig MEMORY_HOTPLUG |
| bool "Memory hotplug" |
| select MEMORY_ISOLATION |
| depends on SPARSEMEM |
| depends on ARCH_ENABLE_MEMORY_HOTPLUG |
| depends on 64BIT |
| select NUMA_KEEP_MEMINFO if NUMA |
| |
| if MEMORY_HOTPLUG |
| |
| config MEMORY_HOTPLUG_DEFAULT_ONLINE |
| bool "Online the newly added memory blocks by default" |
| depends on MEMORY_HOTPLUG |
| help |
| This option sets the default policy setting for memory hotplug |
| onlining policy (/sys/devices/system/memory/auto_online_blocks) which |
| determines what happens to newly added memory regions. Policy setting |
| can always be changed at runtime. |
| See Documentation/admin-guide/mm/memory-hotplug.rst for more information. |
| |
| Say Y here if you want all hot-plugged memory blocks to appear in |
| 'online' state by default. |
| Say N here if you want the default policy to keep all hot-plugged |
| memory blocks in 'offline' state. |
| |
| config MEMORY_HOTREMOVE |
| bool "Allow for memory hot remove" |
| select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64) |
| depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE |
| depends on MIGRATION |
| |
| config MHP_MEMMAP_ON_MEMORY |
| def_bool y |
| depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP |
| depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE |
| |
| endif # MEMORY_HOTPLUG |
| |
| config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE |
| bool |
| |
| # Heavily threaded applications may benefit from splitting the mm-wide |
| # page_table_lock, so that faults on different parts of the user address |
| # space can be handled with less contention: split it at this NR_CPUS. |
| # Default to 4 for wider testing, though 8 might be more appropriate. |
| # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. |
| # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes. |
| # SPARC32 allocates multiple pte tables within a single page, and therefore |
| # a per-page lock leads to problems when multiple tables need to be locked |
| # at the same time (e.g. copy_page_range()). |
| # DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC spinlock_t also enlarge struct page. |
| # |
| config SPLIT_PTE_PTLOCKS |
| def_bool y |
| depends on MMU |
| depends on SMP |
| depends on NR_CPUS >= 4 |
| depends on !ARM || CPU_CACHE_VIPT |
| depends on !PARISC || PA20 |
| depends on !SPARC32 |
| |
| config ARCH_ENABLE_SPLIT_PMD_PTLOCK |
| bool |
| |
| config SPLIT_PMD_PTLOCKS |
| def_bool y |
| depends on SPLIT_PTE_PTLOCKS && ARCH_ENABLE_SPLIT_PMD_PTLOCK |
| |
| # |
| # support for memory balloon |
| config MEMORY_BALLOON |
| bool |
| |
| # |
| # support for memory balloon compaction |
| config BALLOON_COMPACTION |
| bool "Allow for balloon memory compaction/migration" |
| default y |
| depends on COMPACTION && MEMORY_BALLOON |
| help |
| Memory fragmentation introduced by ballooning might reduce |
| significantly the number of 2MB contiguous memory blocks that can be |
| used within a guest, thus imposing performance penalties associated |
| with the reduced number of transparent huge pages that could be used |
| by the guest workload. Allowing the compaction & migration for memory |
| pages enlisted as being part of memory balloon devices avoids the |
| scenario aforementioned and helps improving memory defragmentation. |
| |
| # |
| # support for memory compaction |
| config COMPACTION |
| bool "Allow for memory compaction" |
| default y |
| select MIGRATION |
| depends on MMU |
| help |
| Compaction is the only memory management component to form |
| high order (larger physically contiguous) memory blocks |
| reliably. The page allocator relies on compaction heavily and |
| the lack of the feature can lead to unexpected OOM killer |
| invocations for high order memory requests. You shouldn't |
| disable this option unless there really is a strong reason for |
| it and then we would be really interested to hear about that at |
| linux-mm@kvack.org. |
| |
| config COMPACT_UNEVICTABLE_DEFAULT |
| int |
| depends on COMPACTION |
| default 0 if PREEMPT_RT |
| default 1 |
| |
| # |
| # support for free page reporting |
| config PAGE_REPORTING |
| bool "Free page reporting" |
| help |
| Free page reporting allows for the incremental acquisition of |
| free pages from the buddy allocator for the purpose of reporting |
| those pages to another entity, such as a hypervisor, so that the |
| memory can be freed within the host for other uses. |
| |
| # |
| # support for page migration |
| # |
| config MIGRATION |
| bool "Page migration" |
| default y |
| depends on (NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU |
| help |
| Allows the migration of the physical location of pages of processes |
| while the virtual addresses are not changed. This is useful in |
| two situations. The first is on NUMA systems to put pages nearer |
| to the processors accessing. The second is when allocating huge |
| pages as migration can relocate pages to satisfy a huge page |
| allocation instead of reclaiming. |
| |
| config DEVICE_MIGRATION |
| def_bool MIGRATION && ZONE_DEVICE |
| |
| config ARCH_ENABLE_HUGEPAGE_MIGRATION |
| bool |
| |
| config ARCH_ENABLE_THP_MIGRATION |
| bool |
| |
| config HUGETLB_PAGE_SIZE_VARIABLE |
| def_bool n |
| help |
| Allows the pageblock_order value to be dynamic instead of just standard |
| HUGETLB_PAGE_ORDER when there are multiple HugeTLB page sizes available |
| on a platform. |
| |
| Note that the pageblock_order cannot exceed MAX_PAGE_ORDER and will be |
| clamped down to MAX_PAGE_ORDER. |
| |
| config CONTIG_ALLOC |
| def_bool (MEMORY_ISOLATION && COMPACTION) || CMA |
| |
| config PCP_BATCH_SCALE_MAX |
| int "Maximum scale factor of PCP (Per-CPU pageset) batch allocate/free" |
| default 5 |
| range 0 6 |
| help |
| In page allocator, PCP (Per-CPU pageset) is refilled and drained in |
| batches. The batch number is scaled automatically to improve page |
| allocation/free throughput. But too large scale factor may hurt |
| latency. This option sets the upper limit of scale factor to limit |
| the maximum latency. |
| |
| config PHYS_ADDR_T_64BIT |
| def_bool 64BIT |
| |
| config BOUNCE |
| bool "Enable bounce buffers" |
| default y |
| depends on BLOCK && MMU && HIGHMEM |
| help |
| Enable bounce buffers for devices that cannot access the full range of |
| memory available to the CPU. Enabled by default when HIGHMEM is |
| selected, but you may say n to override this. |
| |
| config MMU_NOTIFIER |
| bool |
| select INTERVAL_TREE |
| |
| config KSM |
| bool "Enable KSM for page merging" |
| depends on MMU |
| select XXHASH |
| help |
| Enable Kernel Samepage Merging: KSM periodically scans those areas |
| of an application's address space that an app has advised may be |
| mergeable. When it finds pages of identical content, it replaces |
| the many instances by a single page with that content, so |
| saving memory until one or another app needs to modify the content. |
| Recommended for use with KVM, or with other duplicative applications. |
| See Documentation/mm/ksm.rst for more information: KSM is inactive |
| until a program has madvised that an area is MADV_MERGEABLE, and |
| root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set). |
| |
| config DEFAULT_MMAP_MIN_ADDR |
| int "Low address space to protect from user allocation" |
| depends on MMU |
| default 4096 |
| help |
| This is the portion of low virtual memory which should be protected |
| from userspace allocation. Keeping a user from writing to low pages |
| can help reduce the impact of kernel NULL pointer bugs. |
| |
| For most arm64, ppc64 and x86 users with lots of address space |
| a value of 65536 is reasonable and should cause no problems. |
| On arm and other archs it should not be higher than 32768. |
| Programs which use vm86 functionality or have some need to map |
| this low address space will need CAP_SYS_RAWIO or disable this |
| protection by setting the value to 0. |
| |
| This value can be changed after boot using the |
| /proc/sys/vm/mmap_min_addr tunable. |
| |
| config ARCH_SUPPORTS_MEMORY_FAILURE |
| bool |
| |
| config MEMORY_FAILURE |
| depends on MMU |
| depends on ARCH_SUPPORTS_MEMORY_FAILURE |
| bool "Enable recovery from hardware memory errors" |
| select MEMORY_ISOLATION |
| select RAS |
| help |
| Enables code to recover from some memory failures on systems |
| with MCA recovery. This allows a system to continue running |
| even when some of its memory has uncorrected errors. This requires |
| special hardware support and typically ECC memory. |
| |
| config HWPOISON_INJECT |
| tristate "HWPoison pages injector" |
| depends on MEMORY_FAILURE && DEBUG_KERNEL && PROC_FS |
| select PROC_PAGE_MONITOR |
| |
| config NOMMU_INITIAL_TRIM_EXCESS |
| int "Turn on mmap() excess space trimming before booting" |
| depends on !MMU |
| default 1 |
| help |
| The NOMMU mmap() frequently needs to allocate large contiguous chunks |
| of memory on which to store mappings, but it can only ask the system |
| allocator for chunks in 2^N*PAGE_SIZE amounts - which is frequently |
| more than it requires. To deal with this, mmap() is able to trim off |
| the excess and return it to the allocator. |
| |
| If trimming is enabled, the excess is trimmed off and returned to the |
| system allocator, which can cause extra fragmentation, particularly |
| if there are a lot of transient processes. |
| |
| If trimming is disabled, the excess is kept, but not used, which for |
| long-term mappings means that the space is wasted. |
| |
| Trimming can be dynamically controlled through a sysctl option |
| (/proc/sys/vm/nr_trim_pages) which specifies the minimum number of |
| excess pages there must be before trimming should occur, or zero if |
| no trimming is to occur. |
| |
| This option specifies the initial value of this option. The default |
| of 1 says that all excess pages should be trimmed. |
| |
| See Documentation/admin-guide/mm/nommu-mmap.rst for more information. |
| |
| config ARCH_WANT_GENERAL_HUGETLB |
| bool |
| |
| config ARCH_WANTS_THP_SWAP |
| def_bool n |
| |
| menuconfig TRANSPARENT_HUGEPAGE |
| bool "Transparent Hugepage Support" |
| depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT |
| select COMPACTION |
| select XARRAY_MULTI |
| help |
| Transparent Hugepages allows the kernel to use huge pages and |
| huge tlb transparently to the applications whenever possible. |
| This feature can improve computing performance to certain |
| applications by speeding up page faults during memory |
| allocation, by reducing the number of tlb misses and by speeding |
| up the pagetable walking. |
| |
| If memory constrained on embedded, you may want to say N. |
| |
| if TRANSPARENT_HUGEPAGE |
| |
| choice |
| prompt "Transparent Hugepage Support sysfs defaults" |
| depends on TRANSPARENT_HUGEPAGE |
| default TRANSPARENT_HUGEPAGE_ALWAYS |
| help |
| Selects the sysfs defaults for Transparent Hugepage Support. |
| |
| config TRANSPARENT_HUGEPAGE_ALWAYS |
| bool "always" |
| help |
| Enabling Transparent Hugepage always, can increase the |
| memory footprint of applications without a guaranteed |
| benefit but it will work automatically for all applications. |
| |
| config TRANSPARENT_HUGEPAGE_MADVISE |
| bool "madvise" |
| help |
| Enabling Transparent Hugepage madvise, will only provide a |
| performance improvement benefit to the applications using |
| madvise(MADV_HUGEPAGE) but it won't risk to increase the |
| memory footprint of applications without a guaranteed |
| benefit. |
| |
| config TRANSPARENT_HUGEPAGE_NEVER |
| bool "never" |
| help |
| Disable Transparent Hugepage by default. It can still be |
| enabled at runtime via sysfs. |
| endchoice |
| |
| config THP_SWAP |
| def_bool y |
| depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT |
| help |
| Swap transparent huge pages in one piece, without splitting. |
| XXX: For now, swap cluster backing transparent huge page |
| will be split after swapout. |
| |
| For selection by architectures with reasonable THP sizes. |
| |
| config READ_ONLY_THP_FOR_FS |
| bool "Read-only THP for filesystems (EXPERIMENTAL)" |
| depends on TRANSPARENT_HUGEPAGE && SHMEM |
| |
| help |
| Allow khugepaged to put read-only file-backed pages in THP. |
| |
| This is marked experimental because it is a new feature. Write |
| support of file THPs will be developed in the next few release |
| cycles. |
| |
| endif # TRANSPARENT_HUGEPAGE |
| |
| # |
| # The architecture supports pgtable leaves that is larger than PAGE_SIZE |
| # |
| config PGTABLE_HAS_HUGE_LEAVES |
| def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE |
| |
| # TODO: Allow to be enabled without THP |
| config ARCH_SUPPORTS_HUGE_PFNMAP |
| def_bool n |
| depends on TRANSPARENT_HUGEPAGE |
| |
| config ARCH_SUPPORTS_PMD_PFNMAP |
| def_bool y |
| depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE |
| |
| config ARCH_SUPPORTS_PUD_PFNMAP |
| def_bool y |
| depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
| |
| # |
| # UP and nommu archs use km based percpu allocator |
| # |
| config NEED_PER_CPU_KM |
| depends on !SMP || !MMU |
| bool |
| default y |
| |
| config NEED_PER_CPU_EMBED_FIRST_CHUNK |
| bool |
| |
| config NEED_PER_CPU_PAGE_FIRST_CHUNK |
| bool |
| |
| config USE_PERCPU_NUMA_NODE_ID |
| bool |
| |
| config HAVE_SETUP_PER_CPU_AREA |
| bool |
| |
| config CMA |
| bool "Contiguous Memory Allocator" |
| depends on MMU |
| select MIGRATION |
| select MEMORY_ISOLATION |
| help |
| This enables the Contiguous Memory Allocator which allows other |
| subsystems to allocate big physically-contiguous blocks of memory. |
| CMA reserves a region of memory and allows only movable pages to |
| be allocated from it. This way, the kernel can use the memory for |
| pagecache and when a subsystem requests for contiguous area, the |
| allocated pages are migrated away to serve the contiguous request. |
| |
| If unsure, say "n". |
| |
| config CMA_DEBUGFS |
| bool "CMA debugfs interface" |
| depends on CMA && DEBUG_FS |
| help |
| Turns on the DebugFS interface for CMA. |
| |
| config CMA_SYSFS |
| bool "CMA information through sysfs interface" |
| depends on CMA && SYSFS |
| help |
| This option exposes some sysfs attributes to get information |
| from CMA. |
| |
| config CMA_AREAS |
| int "Maximum count of the CMA areas" |
| depends on CMA |
| default 20 if NUMA |
| default 8 |
| help |
| CMA allows to create CMA areas for particular purpose, mainly, |
| used as device private area. This parameter sets the maximum |
| number of CMA area in the system. |
| |
| If unsure, leave the default value "8" in UMA and "20" in NUMA. |
| |
| config MEM_SOFT_DIRTY |
| bool "Track memory changes" |
| depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS |
| select PROC_PAGE_MONITOR |
| help |
| This option enables memory changes tracking by introducing a |
| soft-dirty bit on pte-s. This bit it set when someone writes |
| into a page just as regular dirty bit, but unlike the latter |
| it can be cleared by hands. |
| |
| See Documentation/admin-guide/mm/soft-dirty.rst for more details. |
| |
| config GENERIC_EARLY_IOREMAP |
| bool |
| |
| config STACK_MAX_DEFAULT_SIZE_MB |
| int "Default maximum user stack size for 32-bit processes (MB)" |
| default 100 |
| range 8 2048 |
| depends on STACK_GROWSUP && (!64BIT || COMPAT) |
| help |
| This is the maximum stack size in Megabytes in the VM layout of 32-bit |
| user processes when the stack grows upwards (currently only on parisc |
| arch) when the RLIMIT_STACK hard limit is unlimited. |
| |
| A sane initial value is 100 MB. |
| |
| config DEFERRED_STRUCT_PAGE_INIT |
| bool "Defer initialisation of struct pages to kthreads" |
| depends on SPARSEMEM |
| depends on !NEED_PER_CPU_KM |
| depends on 64BIT |
| depends on !KMSAN |
| select PADATA |
| help |
| Ordinarily all struct pages are initialised during early boot in a |
| single thread. On very large machines this can take a considerable |
| amount of time. If this option is set, large machines will bring up |
| a subset of memmap at boot and then initialise the rest in parallel. |
| This has a potential performance impact on tasks running early in the |
| lifetime of the system until these kthreads finish the |
| initialisation. |
| |
| config PAGE_IDLE_FLAG |
| bool |
| select PAGE_EXTENSION if !64BIT |
| help |
| This adds PG_idle and PG_young flags to 'struct page'. PTE Accessed |
| bit writers can set the state of the bit in the flags so that PTE |
| Accessed bit readers may avoid disturbance. |
| |
| config IDLE_PAGE_TRACKING |
| bool "Enable idle page tracking" |
| depends on SYSFS && MMU |
| select PAGE_IDLE_FLAG |
| help |
| This feature allows to estimate the amount of user pages that have |
| not been touched during a given period of time. This information can |
| be useful to tune memory cgroup limits and/or for job placement |
| within a compute cluster. |
| |
| See Documentation/admin-guide/mm/idle_page_tracking.rst for |
| more details. |
| |
| # Architectures which implement cpu_dcache_is_aliasing() to query |
| # whether the data caches are aliased (VIVT or VIPT with dcache |
| # aliasing) need to select this. |
| config ARCH_HAS_CPU_CACHE_ALIASING |
| bool |
| |
| config ARCH_HAS_CACHE_LINE_SIZE |
| bool |
| |
| config ARCH_HAS_CURRENT_STACK_POINTER |
| bool |
| help |
| In support of HARDENED_USERCOPY performing stack variable lifetime |
| checking, an architecture-agnostic way to find the stack pointer |
| is needed. Once an architecture defines an unsigned long global |
| register alias named "current_stack_pointer", this config can be |
| selected. |
| |
| config ARCH_HAS_PTE_DEVMAP |
| bool |
| |
| config ARCH_HAS_ZONE_DMA_SET |
| bool |
| |
| config ZONE_DMA |
| bool "Support DMA zone" if ARCH_HAS_ZONE_DMA_SET |
| default y if ARM64 || X86 |
| |
| config ZONE_DMA32 |
| bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET |
| depends on !X86_32 |
| default y if ARM64 |
| |
| config ZONE_DEVICE |
| bool "Device memory (pmem, HMM, etc...) hotplug support" |
| depends on MEMORY_HOTPLUG |
| depends on MEMORY_HOTREMOVE |
| depends on SPARSEMEM_VMEMMAP |
| depends on ARCH_HAS_PTE_DEVMAP |
| select XARRAY_MULTI |
| |
| help |
| Device memory hotplug support allows for establishing pmem, |
| or other device driver discovered memory regions, in the |
| memmap. This allows pfn_to_page() lookups of otherwise |
| "device-physical" addresses which is needed for using a DAX |
| mapping in an O_DIRECT operation, among other things. |
| |
| If FS_DAX is enabled, then say Y. |
| |
| # |
| # Helpers to mirror range of the CPU page tables of a process into device page |
| # tables. |
| # |
| config HMM_MIRROR |
| bool |
| depends on MMU |
| |
| config GET_FREE_REGION |
| bool |
| |
| config DEVICE_PRIVATE |
| bool "Unaddressable device memory (GPU memory, ...)" |
| depends on ZONE_DEVICE |
| select GET_FREE_REGION |
| |
| help |
| Allows creation of struct pages to represent unaddressable device |
| memory; i.e., memory that is only accessible from the device (or |
| group of devices). You likely also want to select HMM_MIRROR. |
| |
| config VMAP_PFN |
| bool |
| |
| config ARCH_USES_HIGH_VMA_FLAGS |
| bool |
| config ARCH_HAS_PKEYS |
| bool |
| |
| config ARCH_USES_PG_ARCH_2 |
| bool |
| config ARCH_USES_PG_ARCH_3 |
| bool |
| |
| config VM_EVENT_COUNTERS |
| default y |
| bool "Enable VM event counters for /proc/vmstat" if EXPERT |
| help |
| VM event counters are needed for event counts to be shown. |
| This option allows the disabling of the VM event counters |
| on EXPERT systems. /proc/vmstat will only show page counts |
| if VM event counters are disabled. |
| |
| config PERCPU_STATS |
| bool "Collect percpu memory statistics" |
| help |
| This feature collects and exposes statistics via debugfs. The |
| information includes global and per chunk statistics, which can |
| be used to help understand percpu memory usage. |
| |
| config GUP_TEST |
| bool "Enable infrastructure for get_user_pages()-related unit tests" |
| depends on DEBUG_FS |
| help |
| Provides /sys/kernel/debug/gup_test, which in turn provides a way |
| to make ioctl calls that can launch kernel-based unit tests for |
| the get_user_pages*() and pin_user_pages*() family of API calls. |
| |
| These tests include benchmark testing of the _fast variants of |
| get_user_pages*() and pin_user_pages*(), as well as smoke tests of |
| the non-_fast variants. |
| |
| There is also a sub-test that allows running dump_page() on any |
| of up to eight pages (selected by command line args) within the |
| range of user-space addresses. These pages are either pinned via |
| pin_user_pages*(), or pinned via get_user_pages*(), as specified |
| by other command line arguments. |
| |
| See tools/testing/selftests/mm/gup_test.c |
| |
| comment "GUP_TEST needs to have DEBUG_FS enabled" |
| depends on !GUP_TEST && !DEBUG_FS |
| |
| config GUP_GET_PXX_LOW_HIGH |
| bool |
| |
| config DMAPOOL_TEST |
| tristate "Enable a module to run time tests on dma_pool" |
| depends on HAS_DMA |
| help |
| Provides a test module that will allocate and free many blocks of |
| various sizes and report how long it takes. This is intended to |
| provide a consistent way to measure how changes to the |
| dma_pool_alloc/free routines affect performance. |
| |
| config ARCH_HAS_PTE_SPECIAL |
| bool |
| |
| config MAPPING_DIRTY_HELPERS |
| bool |
| |
| config KMAP_LOCAL |
| bool |
| |
| config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY |
| bool |
| |
| # struct io_mapping based helper. Selected by drivers that need them |
| config IO_MAPPING |
| bool |
| |
| config MEMFD_CREATE |
| bool "Enable memfd_create() system call" if EXPERT |
| |
| config SECRETMEM |
| default y |
| bool "Enable memfd_secret() system call" if EXPERT |
| depends on ARCH_HAS_SET_DIRECT_MAP |
| help |
| Enable the memfd_secret() system call with the ability to create |
| memory areas visible only in the context of the owning process and |
| not mapped to other processes and other kernel page tables. |
| |
| config ANON_VMA_NAME |
| bool "Anonymous VMA name support" |
| depends on PROC_FS && ADVISE_SYSCALLS && MMU |
| |
| help |
| Allow naming anonymous virtual memory areas. |
| |
| This feature allows assigning names to virtual memory areas. Assigned |
| names can be later retrieved from /proc/pid/maps and /proc/pid/smaps |
| and help identifying individual anonymous memory areas. |
| Assigning a name to anonymous virtual memory area might prevent that |
| area from being merged with adjacent virtual memory areas due to the |
| difference in their name. |
| |
| config HAVE_ARCH_USERFAULTFD_WP |
| bool |
| help |
| Arch has userfaultfd write protection support |
| |
| config HAVE_ARCH_USERFAULTFD_MINOR |
| bool |
| help |
| Arch has userfaultfd minor fault support |
| |
| menuconfig USERFAULTFD |
| bool "Enable userfaultfd() system call" |
| depends on MMU |
| help |
| Enable the userfaultfd() system call that allows to intercept and |
| handle page faults in userland. |
| |
| if USERFAULTFD |
| config PTE_MARKER_UFFD_WP |
| bool "Userfaultfd write protection support for shmem/hugetlbfs" |
| default y |
| depends on HAVE_ARCH_USERFAULTFD_WP |
| |
| help |
| Allows to create marker PTEs for userfaultfd write protection |
| purposes. It is required to enable userfaultfd write protection on |
| file-backed memory types like shmem and hugetlbfs. |
| endif # USERFAULTFD |
| |
| # multi-gen LRU { |
| config LRU_GEN |
| bool "Multi-Gen LRU" |
| depends on MMU |
| # make sure folio->flags has enough spare bits |
| depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP |
| help |
| A high performance LRU implementation to overcommit memory. See |
| Documentation/admin-guide/mm/multigen_lru.rst for details. |
| |
| config LRU_GEN_ENABLED |
| bool "Enable by default" |
| depends on LRU_GEN |
| help |
| This option enables the multi-gen LRU by default. |
| |
| config LRU_GEN_STATS |
| bool "Full stats for debugging" |
| depends on LRU_GEN |
| help |
| Do not enable this option unless you plan to look at historical stats |
| from evicted generations for debugging purpose. |
| |
| This option has a per-memcg and per-node memory overhead. |
| |
| config LRU_GEN_WALKS_MMU |
| def_bool y |
| depends on LRU_GEN && ARCH_HAS_HW_PTE_YOUNG |
| # } |
| |
| config ARCH_SUPPORTS_PER_VMA_LOCK |
| def_bool n |
| |
| config PER_VMA_LOCK |
| def_bool y |
| depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP |
| help |
| Allow per-vma locking during page fault handling. |
| |
| This feature allows locking each virtual memory area separately when |
| handling page faults instead of taking mmap_lock. |
| |
| config LOCK_MM_AND_FIND_VMA |
| bool |
| depends on !STACK_GROWSUP |
| |
| config IOMMU_MM_DATA |
| bool |
| |
| config EXECMEM |
| bool |
| |
| config NUMA_MEMBLKS |
| bool |
| |
| config NUMA_EMU |
| bool "NUMA emulation" |
| depends on NUMA_MEMBLKS |
| help |
| Enable NUMA emulation. A flat machine will be split |
| into virtual nodes when booted with "numa=fake=N", where N is the |
| number of nodes. This is only useful for debugging. |
| |
| config ARCH_HAS_USER_SHADOW_STACK |
| bool |
| help |
| The architecture has hardware support for userspace shadow call |
| stacks (eg, x86 CET, arm64 GCS or RISC-V Zicfiss). |
| |
| source "mm/damon/Kconfig" |
| |
| endmenu |