|  | #!/usr/bin/env python3 | 
|  | # SPDX-License-Identifier: GPL-2.0-only | 
|  | # Copyright (C) 2024 ARM Ltd. | 
|  | # | 
|  | # Utility providing smaps-like output detailing transparent hugepage usage. | 
|  | # For more info, run: | 
|  | # ./thpmaps --help | 
|  | # | 
|  | # Requires numpy: | 
|  | # pip3 install numpy | 
|  |  | 
|  |  | 
|  | import argparse | 
|  | import collections | 
|  | import math | 
|  | import os | 
|  | import re | 
|  | import resource | 
|  | import shutil | 
|  | import sys | 
|  | import textwrap | 
|  | import time | 
|  | import numpy as np | 
|  |  | 
|  |  | 
|  | with open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f: | 
|  | PAGE_SIZE = resource.getpagesize() | 
|  | PAGE_SHIFT = int(math.log2(PAGE_SIZE)) | 
|  | PMD_SIZE = int(f.read()) | 
|  | PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE)) | 
|  |  | 
|  |  | 
|  | def align_forward(v, a): | 
|  | return (v + (a - 1)) & ~(a - 1) | 
|  |  | 
|  |  | 
|  | def align_offset(v, a): | 
|  | return v & (a - 1) | 
|  |  | 
|  |  | 
|  | def kbnr(kb): | 
|  | # Convert KB to number of pages. | 
|  | return (kb << 10) >> PAGE_SHIFT | 
|  |  | 
|  |  | 
|  | def nrkb(nr): | 
|  | # Convert number of pages to KB. | 
|  | return (nr << PAGE_SHIFT) >> 10 | 
|  |  | 
|  |  | 
|  | def odkb(order): | 
|  | # Convert page order to KB. | 
|  | return (PAGE_SIZE << order) >> 10 | 
|  |  | 
|  |  | 
|  | def cont_ranges_all(search, index): | 
|  | # Given a list of arrays, find the ranges for which values are monotonically | 
|  | # incrementing in all arrays. all arrays in search and index must be the | 
|  | # same size. | 
|  | sz = len(search[0]) | 
|  | r = np.full(sz, 2) | 
|  | d = np.diff(search[0]) == 1 | 
|  | for dd in [np.diff(arr) == 1 for arr in search[1:]]: | 
|  | d &= dd | 
|  | r[1:] -= d | 
|  | r[:-1] -= d | 
|  | return [np.repeat(arr, r).reshape(-1, 2) for arr in index] | 
|  |  | 
|  |  | 
|  | class ArgException(Exception): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class FileIOException(Exception): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class BinArrayFile: | 
|  | # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a | 
|  | # numpy array. Use inherrited class in a with clause to ensure file is | 
|  | # closed when it goes out of scope. | 
|  | def __init__(self, filename, element_size): | 
|  | self.element_size = element_size | 
|  | self.filename = filename | 
|  | self.fd = os.open(self.filename, os.O_RDONLY) | 
|  |  | 
|  | def cleanup(self): | 
|  | os.close(self.fd) | 
|  |  | 
|  | def __enter__(self): | 
|  | return self | 
|  |  | 
|  | def __exit__(self, exc_type, exc_val, exc_tb): | 
|  | self.cleanup() | 
|  |  | 
|  | def _readin(self, offset, buffer): | 
|  | length = os.preadv(self.fd, (buffer,), offset) | 
|  | if len(buffer) != length: | 
|  | raise FileIOException('error: {} failed to read {} bytes at {:x}' | 
|  | .format(self.filename, len(buffer), offset)) | 
|  |  | 
|  | def _toarray(self, buf): | 
|  | assert(self.element_size == 8) | 
|  | return np.frombuffer(buf, dtype=np.uint64) | 
|  |  | 
|  | def getv(self, vec): | 
|  | vec *= self.element_size | 
|  | offsets = vec[:, 0] | 
|  | lengths = (np.diff(vec) + self.element_size).reshape(len(vec)) | 
|  | buf = bytearray(int(np.sum(lengths))) | 
|  | view = memoryview(buf) | 
|  | pos = 0 | 
|  | for offset, length in zip(offsets, lengths): | 
|  | offset = int(offset) | 
|  | length = int(length) | 
|  | self._readin(offset, view[pos:pos+length]) | 
|  | pos += length | 
|  | return self._toarray(buf) | 
|  |  | 
|  | def get(self, index, nr=1): | 
|  | offset = index * self.element_size | 
|  | length = nr * self.element_size | 
|  | buf = bytearray(length) | 
|  | self._readin(offset, buf) | 
|  | return self._toarray(buf) | 
|  |  | 
|  |  | 
|  | PM_PAGE_PRESENT = 1 << 63 | 
|  | PM_PFN_MASK = (1 << 55) - 1 | 
|  |  | 
|  | class PageMap(BinArrayFile): | 
|  | # Read ranges of a given pid's pagemap into a numpy array. | 
|  | def __init__(self, pid='self'): | 
|  | super().__init__(f'/proc/{pid}/pagemap', 8) | 
|  |  | 
|  |  | 
|  | KPF_ANON = 1 << 12 | 
|  | KPF_COMPOUND_HEAD = 1 << 15 | 
|  | KPF_COMPOUND_TAIL = 1 << 16 | 
|  | KPF_THP = 1 << 22 | 
|  |  | 
|  | class KPageFlags(BinArrayFile): | 
|  | # Read ranges of /proc/kpageflags into a numpy array. | 
|  | def __init__(self): | 
|  | super().__init__(f'/proc/kpageflags', 8) | 
|  |  | 
|  |  | 
|  | vma_all_stats = set([ | 
|  | "Size", | 
|  | "Rss", | 
|  | "Pss", | 
|  | "Pss_Dirty", | 
|  | "Shared_Clean", | 
|  | "Shared_Dirty", | 
|  | "Private_Clean", | 
|  | "Private_Dirty", | 
|  | "Referenced", | 
|  | "Anonymous", | 
|  | "KSM", | 
|  | "LazyFree", | 
|  | "AnonHugePages", | 
|  | "ShmemPmdMapped", | 
|  | "FilePmdMapped", | 
|  | "Shared_Hugetlb", | 
|  | "Private_Hugetlb", | 
|  | "Swap", | 
|  | "SwapPss", | 
|  | "Locked", | 
|  | ]) | 
|  |  | 
|  | vma_min_stats = set([ | 
|  | "Rss", | 
|  | "Anonymous", | 
|  | "AnonHugePages", | 
|  | "ShmemPmdMapped", | 
|  | "FilePmdMapped", | 
|  | ]) | 
|  |  | 
|  | VMA = collections.namedtuple('VMA', [ | 
|  | 'name', | 
|  | 'start', | 
|  | 'end', | 
|  | 'read', | 
|  | 'write', | 
|  | 'execute', | 
|  | 'private', | 
|  | 'pgoff', | 
|  | 'major', | 
|  | 'minor', | 
|  | 'inode', | 
|  | 'stats', | 
|  | ]) | 
|  |  | 
|  | class VMAList: | 
|  | # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the | 
|  | # instance to receive VMAs. | 
|  | def __init__(self, pid='self', stats=[]): | 
|  | self.vmas = [] | 
|  | with open(f'/proc/{pid}/smaps', 'r') as file: | 
|  | for line in file: | 
|  | elements = line.split() | 
|  | if '-' in elements[0]: | 
|  | start, end = map(lambda x: int(x, 16), elements[0].split('-')) | 
|  | major, minor = map(lambda x: int(x, 16), elements[3].split(':')) | 
|  | self.vmas.append(VMA( | 
|  | name=elements[5] if len(elements) == 6 else '', | 
|  | start=start, | 
|  | end=end, | 
|  | read=elements[1][0] == 'r', | 
|  | write=elements[1][1] == 'w', | 
|  | execute=elements[1][2] == 'x', | 
|  | private=elements[1][3] == 'p', | 
|  | pgoff=int(elements[2], 16), | 
|  | major=major, | 
|  | minor=minor, | 
|  | inode=int(elements[4], 16), | 
|  | stats={}, | 
|  | )) | 
|  | else: | 
|  | param = elements[0][:-1] | 
|  | if param in stats: | 
|  | value = int(elements[1]) | 
|  | self.vmas[-1].stats[param] = {'type': None, 'value': value} | 
|  |  | 
|  | def __iter__(self): | 
|  | yield from self.vmas | 
|  |  | 
|  |  | 
|  | def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads): | 
|  | # Given 4 same-sized arrays representing a range within a page table backed | 
|  | # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: | 
|  | # True if page is anonymous, heads: True if page is head of a THP), return a | 
|  | # dictionary of statistics describing the mapped THPs. | 
|  | stats = { | 
|  | 'file': { | 
|  | 'partial': 0, | 
|  | 'aligned': [0] * (PMD_ORDER + 1), | 
|  | 'unaligned': [0] * (PMD_ORDER + 1), | 
|  | }, | 
|  | 'anon': { | 
|  | 'partial': 0, | 
|  | 'aligned': [0] * (PMD_ORDER + 1), | 
|  | 'unaligned': [0] * (PMD_ORDER + 1), | 
|  | }, | 
|  | } | 
|  |  | 
|  | for rindex, rpfn in zip(ranges[0], ranges[2]): | 
|  | index_next = int(rindex[0]) | 
|  | index_end = int(rindex[1]) + 1 | 
|  | pfn_end = int(rpfn[1]) + 1 | 
|  |  | 
|  | folios = indexes[index_next:index_end][heads[index_next:index_end]] | 
|  |  | 
|  | # Account pages for any partially mapped THP at the front. In that case, | 
|  | # the first page of the range is a tail. | 
|  | nr = (int(folios[0]) if len(folios) else index_end) - index_next | 
|  | stats['anon' if anons[index_next] else 'file']['partial'] += nr | 
|  |  | 
|  | # Account pages for any partially mapped THP at the back. In that case, | 
|  | # the next page after the range is a tail. | 
|  | if len(folios): | 
|  | flags = int(kpageflags.get(pfn_end)[0]) | 
|  | if flags & KPF_COMPOUND_TAIL: | 
|  | nr = index_end - int(folios[-1]) | 
|  | folios = folios[:-1] | 
|  | index_end -= nr | 
|  | stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr | 
|  |  | 
|  | # Account fully mapped THPs in the middle of the range. | 
|  | if len(folios): | 
|  | folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1])) | 
|  | folio_orders = np.log2(folio_nrs).astype(np.uint64) | 
|  | for index, order in zip(folios, folio_orders): | 
|  | index = int(index) | 
|  | order = int(order) | 
|  | nr = 1 << order | 
|  | vfn = int(vfns[index]) | 
|  | align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned' | 
|  | anon = 'anon' if anons[index] else 'file' | 
|  | stats[anon][align][order] += nr | 
|  |  | 
|  | # Account PMD-mapped THPs spearately, so filter out of the stats. There is a | 
|  | # race between acquiring the smaps stats and reading pagemap, where memory | 
|  | # could be deallocated. So clamp to zero incase it would have gone negative. | 
|  | anon_pmd_mapped = vma.stats['AnonHugePages']['value'] | 
|  | file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ | 
|  | vma.stats['FilePmdMapped']['value'] | 
|  | stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped)) | 
|  | stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped)) | 
|  |  | 
|  | rstats = { | 
|  | f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, | 
|  | f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped}, | 
|  | } | 
|  |  | 
|  | def flatten_sub(type, subtype, stats): | 
|  | param = f"{type}-thp-pte-{subtype}-{{}}kB" | 
|  | for od, nr in enumerate(stats[2:], 2): | 
|  | rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)} | 
|  |  | 
|  | def flatten_type(type, stats): | 
|  | flatten_sub(type, 'aligned', stats['aligned']) | 
|  | flatten_sub(type, 'unaligned', stats['unaligned']) | 
|  | rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])} | 
|  |  | 
|  | flatten_type('anon', stats['anon']) | 
|  | flatten_type('file', stats['file']) | 
|  |  | 
|  | return rstats | 
|  |  | 
|  |  | 
|  | def cont_parse(vma, order, ranges, anons, heads): | 
|  | # Given 4 same-sized arrays representing a range within a page table backed | 
|  | # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: | 
|  | # True if page is anonymous, heads: True if page is head of a THP), return a | 
|  | # dictionary of statistics describing the contiguous blocks. | 
|  | nr_cont = 1 << order | 
|  | nr_anon = 0 | 
|  | nr_file = 0 | 
|  |  | 
|  | for rindex, rvfn, rpfn in zip(*ranges): | 
|  | index_next = int(rindex[0]) | 
|  | index_end = int(rindex[1]) + 1 | 
|  | vfn_start = int(rvfn[0]) | 
|  | pfn_start = int(rpfn[0]) | 
|  |  | 
|  | if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont): | 
|  | continue | 
|  |  | 
|  | off = align_forward(vfn_start, nr_cont) - vfn_start | 
|  | index_next += off | 
|  |  | 
|  | while index_next + nr_cont <= index_end: | 
|  | folio_boundary = heads[index_next+1:index_next+nr_cont].any() | 
|  | if not folio_boundary: | 
|  | if anons[index_next]: | 
|  | nr_anon += nr_cont | 
|  | else: | 
|  | nr_file += nr_cont | 
|  | index_next += nr_cont | 
|  |  | 
|  | # Account blocks that are PMD-mapped spearately, so filter out of the stats. | 
|  | # There is a race between acquiring the smaps stats and reading pagemap, | 
|  | # where memory could be deallocated. So clamp to zero incase it would have | 
|  | # gone negative. | 
|  | anon_pmd_mapped = vma.stats['AnonHugePages']['value'] | 
|  | file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ | 
|  | vma.stats['FilePmdMapped']['value'] | 
|  | nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped)) | 
|  | nr_file = max(0, nr_file - kbnr(file_pmd_mapped)) | 
|  |  | 
|  | rstats = { | 
|  | f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, | 
|  | f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped}, | 
|  | } | 
|  |  | 
|  | rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)} | 
|  | rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)} | 
|  |  | 
|  | return rstats | 
|  |  | 
|  |  | 
|  | def vma_print(vma, pid): | 
|  | # Prints a VMA instance in a format similar to smaps. The main difference is | 
|  | # that the pid is included as the first value. | 
|  | print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}" | 
|  | .format( | 
|  | pid, vma.start, vma.end, | 
|  | 'r' if vma.read else '-', 'w' if vma.write else '-', | 
|  | 'x' if vma.execute else '-', 'p' if vma.private else 's', | 
|  | vma.pgoff, vma.major, vma.minor, vma.inode, vma.name | 
|  | )) | 
|  |  | 
|  |  | 
|  | def stats_print(stats, tot_anon, tot_file, inc_empty): | 
|  | # Print a statistics dictionary. | 
|  | label_field = 32 | 
|  | for label, stat in stats.items(): | 
|  | type = stat['type'] | 
|  | value = stat['value'] | 
|  | if value or inc_empty: | 
|  | pad = max(0, label_field - len(label) - 1) | 
|  | if type == 'anon' and tot_anon > 0: | 
|  | percent = f' ({value / tot_anon:3.0%})' | 
|  | elif type == 'file' and tot_file > 0: | 
|  | percent = f' ({value / tot_file:3.0%})' | 
|  | else: | 
|  | percent = '' | 
|  | print(f"{label}:{' ' * pad}{value:8} kB{percent}") | 
|  |  | 
|  |  | 
|  | def vma_parse(vma, pagemap, kpageflags, contorders): | 
|  | # Generate thp and cont statistics for a single VMA. | 
|  | start = vma.start >> PAGE_SHIFT | 
|  | end = vma.end >> PAGE_SHIFT | 
|  |  | 
|  | pmes = pagemap.get(start, end - start) | 
|  | present = pmes & PM_PAGE_PRESENT != 0 | 
|  | pfns = pmes & PM_PFN_MASK | 
|  | pfns = pfns[present] | 
|  | vfns = np.arange(start, end, dtype=np.uint64) | 
|  | vfns = vfns[present] | 
|  |  | 
|  | pfn_vec = cont_ranges_all([pfns], [pfns])[0] | 
|  | flags = kpageflags.getv(pfn_vec) | 
|  | anons = flags & KPF_ANON != 0 | 
|  | heads = flags & KPF_COMPOUND_HEAD != 0 | 
|  | thps = flags & KPF_THP != 0 | 
|  |  | 
|  | vfns = vfns[thps] | 
|  | pfns = pfns[thps] | 
|  | anons = anons[thps] | 
|  | heads = heads[thps] | 
|  |  | 
|  | indexes = np.arange(len(vfns), dtype=np.uint64) | 
|  | ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns]) | 
|  |  | 
|  | thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads) | 
|  | contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders] | 
|  |  | 
|  | tot_anon = vma.stats['Anonymous']['value'] | 
|  | tot_file = vma.stats['Rss']['value'] - tot_anon | 
|  |  | 
|  | return { | 
|  | **thpstats, | 
|  | **{k: v for s in contstats for k, v in s.items()} | 
|  | }, tot_anon, tot_file | 
|  |  | 
|  |  | 
|  | def do_main(args): | 
|  | pids = set() | 
|  | rollup = {} | 
|  | rollup_anon = 0 | 
|  | rollup_file = 0 | 
|  |  | 
|  | if args.cgroup: | 
|  | strict = False | 
|  | for walk_info in os.walk(args.cgroup): | 
|  | cgroup = walk_info[0] | 
|  | with open(f'{cgroup}/cgroup.procs') as pidfile: | 
|  | for line in pidfile.readlines(): | 
|  | pids.add(int(line.strip())) | 
|  | elif args.pid: | 
|  | strict = True | 
|  | pids = pids.union(args.pid) | 
|  | else: | 
|  | strict = False | 
|  | for pid in os.listdir('/proc'): | 
|  | if pid.isdigit(): | 
|  | pids.add(int(pid)) | 
|  |  | 
|  | if not args.rollup: | 
|  | print("       PID             START              END PROT   OFFSET   DEV    INODE OBJECT") | 
|  |  | 
|  | for pid in pids: | 
|  | try: | 
|  | with PageMap(pid) as pagemap: | 
|  | with KPageFlags() as kpageflags: | 
|  | for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats): | 
|  | if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0: | 
|  | stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont) | 
|  | else: | 
|  | stats = {} | 
|  | vma_anon = 0 | 
|  | vma_file = 0 | 
|  | if args.inc_smaps: | 
|  | stats = {**vma.stats, **stats} | 
|  | if args.rollup: | 
|  | for k, v in stats.items(): | 
|  | if k in rollup: | 
|  | assert(rollup[k]['type'] == v['type']) | 
|  | rollup[k]['value'] += v['value'] | 
|  | else: | 
|  | rollup[k] = v | 
|  | rollup_anon += vma_anon | 
|  | rollup_file += vma_file | 
|  | else: | 
|  | vma_print(vma, pid) | 
|  | stats_print(stats, vma_anon, vma_file, args.inc_empty) | 
|  | except (FileNotFoundError, ProcessLookupError, FileIOException): | 
|  | if strict: | 
|  | raise | 
|  |  | 
|  | if args.rollup: | 
|  | stats_print(rollup, rollup_anon, rollup_file, args.inc_empty) | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | docs_width = shutil.get_terminal_size().columns | 
|  | docs_width -= 2 | 
|  | docs_width = min(80, docs_width) | 
|  |  | 
|  | def format(string): | 
|  | text = re.sub(r'\s+', ' ', string) | 
|  | text = re.sub(r'\s*\\n\s*', '\n', text) | 
|  | paras = text.split('\n') | 
|  | paras = [textwrap.fill(p, width=docs_width) for p in paras] | 
|  | return '\n'.join(paras) | 
|  |  | 
|  | def formatter(prog): | 
|  | return argparse.RawDescriptionHelpFormatter(prog, width=docs_width) | 
|  |  | 
|  | def size2order(human): | 
|  | units = { | 
|  | "K": 2**10, "M": 2**20, "G": 2**30, | 
|  | "k": 2**10, "m": 2**20, "g": 2**30, | 
|  | } | 
|  | unit = 1 | 
|  | if human[-1] in units: | 
|  | unit = units[human[-1]] | 
|  | human = human[:-1] | 
|  | try: | 
|  | size = int(human) | 
|  | except ValueError: | 
|  | raise ArgException('error: --cont value must be integer size with optional KMG unit') | 
|  | size *= unit | 
|  | order = int(math.log2(size / PAGE_SIZE)) | 
|  | if order < 1: | 
|  | raise ArgException('error: --cont value must be size of at least 2 pages') | 
|  | if (1 << order) * PAGE_SIZE != size: | 
|  | raise ArgException('error: --cont value must be size of power-of-2 pages') | 
|  | if order > PMD_ORDER: | 
|  | raise ArgException('error: --cont value must be less than or equal to PMD order') | 
|  | return order | 
|  |  | 
|  | parser = argparse.ArgumentParser(formatter_class=formatter, | 
|  | description=format("""Prints information about how transparent huge | 
|  | pages are mapped, either system-wide, or for a specified | 
|  | process or cgroup.\\n | 
|  | \\n | 
|  | When run with --pid, the user explicitly specifies the set | 
|  | of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run | 
|  | with --cgroup, the user passes either a v1 or v2 cgroup and | 
|  | all pids that belong to the cgroup subtree are scanned. When | 
|  | run with neither --pid nor --cgroup, the full set of pids on | 
|  | the system is gathered from /proc and scanned as if the user | 
|  | had provided "--pid 1 --pid 2 ...".\\n | 
|  | \\n | 
|  | A default set of statistics is always generated for THP | 
|  | mappings. However, it is also possible to generate | 
|  | additional statistics for "contiguous block mappings" where | 
|  | the block size is user-defined.\\n | 
|  | \\n | 
|  | Statistics are maintained independently for anonymous and | 
|  | file-backed (pagecache) memory and are shown both in kB and | 
|  | as a percentage of either total anonymous or total | 
|  | file-backed memory as appropriate.\\n | 
|  | \\n | 
|  | THP Statistics\\n | 
|  | --------------\\n | 
|  | \\n | 
|  | Statistics are always generated for fully- and | 
|  | contiguously-mapped THPs whose mapping address is aligned to | 
|  | their size, for each <size> supported by the system. | 
|  | Separate counters describe THPs mapped by PTE vs those | 
|  | mapped by PMD. (Although note a THP can only be mapped by | 
|  | PMD if it is PMD-sized):\\n | 
|  | \\n | 
|  | - anon-thp-pte-aligned-<size>kB\\n | 
|  | - file-thp-pte-aligned-<size>kB\\n | 
|  | - anon-thp-pmd-aligned-<size>kB\\n | 
|  | - file-thp-pmd-aligned-<size>kB\\n | 
|  | \\n | 
|  | Similarly, statistics are always generated for fully- and | 
|  | contiguously-mapped THPs whose mapping address is *not* | 
|  | aligned to their size, for each <size> supported by the | 
|  | system. Due to the unaligned mapping, it is impossible to | 
|  | map by PMD, so there are only PTE counters for this case:\\n | 
|  | \\n | 
|  | - anon-thp-pte-unaligned-<size>kB\\n | 
|  | - file-thp-pte-unaligned-<size>kB\\n | 
|  | \\n | 
|  | Statistics are also always generated for mapped pages that | 
|  | belong to a THP but where the is THP is *not* fully- and | 
|  | contiguously- mapped. These "partial" mappings are all | 
|  | counted in the same counter regardless of the size of the | 
|  | THP that is partially mapped:\\n | 
|  | \\n | 
|  | - anon-thp-pte-partial\\n | 
|  | - file-thp-pte-partial\\n | 
|  | \\n | 
|  | Contiguous Block Statistics\\n | 
|  | ---------------------------\\n | 
|  | \\n | 
|  | An optional, additional set of statistics is generated for | 
|  | every contiguous block size specified with `--cont <size>`. | 
|  | These statistics show how much memory is mapped in | 
|  | contiguous blocks of <size> and also aligned to <size>. A | 
|  | given contiguous block must all belong to the same THP, but | 
|  | there is no requirement for it to be the *whole* THP. | 
|  | Separate counters describe contiguous blocks mapped by PTE | 
|  | vs those mapped by PMD:\\n | 
|  | \\n | 
|  | - anon-cont-pte-aligned-<size>kB\\n | 
|  | - file-cont-pte-aligned-<size>kB\\n | 
|  | - anon-cont-pmd-aligned-<size>kB\\n | 
|  | - file-cont-pmd-aligned-<size>kB\\n | 
|  | \\n | 
|  | As an example, if monitoring 64K contiguous blocks (--cont | 
|  | 64K), there are a number of sources that could provide such | 
|  | blocks: a fully- and contiguously-mapped 64K THP that is | 
|  | aligned to a 64K boundary would provide 1 block. A fully- | 
|  | and contiguously-mapped 128K THP that is aligned to at least | 
|  | a 64K boundary would provide 2 blocks. Or a 128K THP that | 
|  | maps its first 100K, but contiguously and starting at a 64K | 
|  | boundary would provide 1 block. A fully- and | 
|  | contiguously-mapped 2M THP would provide 32 blocks. There | 
|  | are many other possible permutations.\\n"""), | 
|  | epilog=format("""Requires root privilege to access pagemap and | 
|  | kpageflags.""")) | 
|  |  | 
|  | group = parser.add_mutually_exclusive_group(required=False) | 
|  | group.add_argument('--pid', | 
|  | metavar='pid', required=False, type=int, default=[], action='append', | 
|  | help="""Process id of the target process. Maybe issued multiple times to | 
|  | scan multiple processes. --pid and --cgroup are mutually exclusive. | 
|  | If neither are provided, all processes are scanned to provide | 
|  | system-wide information.""") | 
|  |  | 
|  | group.add_argument('--cgroup', | 
|  | metavar='path', required=False, | 
|  | help="""Path to the target cgroup in sysfs. Iterates over every pid in | 
|  | the cgroup and its children. --pid and --cgroup are mutually | 
|  | exclusive. If neither are provided, all processes are scanned to | 
|  | provide system-wide information.""") | 
|  |  | 
|  | parser.add_argument('--rollup', | 
|  | required=False, default=False, action='store_true', | 
|  | help="""Sum the per-vma statistics to provide a summary over the whole | 
|  | system, process or cgroup.""") | 
|  |  | 
|  | parser.add_argument('--cont', | 
|  | metavar='size[KMG]', required=False, default=[], action='append', | 
|  | help="""Adds stats for memory that is mapped in contiguous blocks of | 
|  | <size> and also aligned to <size>. May be issued multiple times to | 
|  | track multiple sized blocks. Useful to infer e.g. arm64 contpte and | 
|  | hpa mappings. Size must be a power-of-2 number of pages.""") | 
|  |  | 
|  | parser.add_argument('--inc-smaps', | 
|  | required=False, default=False, action='store_true', | 
|  | help="""Include all numerical, additive /proc/<pid>/smaps stats in the | 
|  | output.""") | 
|  |  | 
|  | parser.add_argument('--inc-empty', | 
|  | required=False, default=False, action='store_true', | 
|  | help="""Show all statistics including those whose value is 0.""") | 
|  |  | 
|  | parser.add_argument('--periodic', | 
|  | metavar='sleep_ms', required=False, type=int, | 
|  | help="""Run in a loop, polling every sleep_ms milliseconds.""") | 
|  |  | 
|  | args = parser.parse_args() | 
|  |  | 
|  | try: | 
|  | args.cont = [size2order(cont) for cont in args.cont] | 
|  | except ArgException as e: | 
|  | parser.print_usage() | 
|  | raise | 
|  |  | 
|  | if args.periodic: | 
|  | while True: | 
|  | do_main(args) | 
|  | print() | 
|  | time.sleep(args.periodic / 1000) | 
|  | else: | 
|  | do_main(args) | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | try: | 
|  | main() | 
|  | except Exception as e: | 
|  | prog = os.path.basename(sys.argv[0]) | 
|  | print(f'{prog}: {e}') | 
|  | exit(1) |