Ryan Roberts | 2444172 | 2024-01-16 14:12:35 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # SPDX-License-Identifier: GPL-2.0-only |
| 3 | # Copyright (C) 2024 ARM Ltd. |
| 4 | # |
| 5 | # Utility providing smaps-like output detailing transparent hugepage usage. |
| 6 | # For more info, run: |
| 7 | # ./thpmaps --help |
| 8 | # |
| 9 | # Requires numpy: |
| 10 | # pip3 install numpy |
| 11 | |
| 12 | |
| 13 | import argparse |
| 14 | import collections |
| 15 | import math |
| 16 | import os |
| 17 | import re |
| 18 | import resource |
| 19 | import shutil |
| 20 | import sys |
| 21 | import textwrap |
| 22 | import time |
| 23 | import numpy as np |
| 24 | |
| 25 | |
| 26 | with open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f: |
| 27 | PAGE_SIZE = resource.getpagesize() |
| 28 | PAGE_SHIFT = int(math.log2(PAGE_SIZE)) |
| 29 | PMD_SIZE = int(f.read()) |
| 30 | PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE)) |
| 31 | |
| 32 | |
| 33 | def align_forward(v, a): |
| 34 | return (v + (a - 1)) & ~(a - 1) |
| 35 | |
| 36 | |
| 37 | def align_offset(v, a): |
| 38 | return v & (a - 1) |
| 39 | |
| 40 | |
| 41 | def kbnr(kb): |
| 42 | # Convert KB to number of pages. |
| 43 | return (kb << 10) >> PAGE_SHIFT |
| 44 | |
| 45 | |
| 46 | def nrkb(nr): |
| 47 | # Convert number of pages to KB. |
| 48 | return (nr << PAGE_SHIFT) >> 10 |
| 49 | |
| 50 | |
| 51 | def odkb(order): |
| 52 | # Convert page order to KB. |
| 53 | return (PAGE_SIZE << order) >> 10 |
| 54 | |
| 55 | |
| 56 | def cont_ranges_all(search, index): |
| 57 | # Given a list of arrays, find the ranges for which values are monotonically |
| 58 | # incrementing in all arrays. all arrays in search and index must be the |
| 59 | # same size. |
| 60 | sz = len(search[0]) |
| 61 | r = np.full(sz, 2) |
| 62 | d = np.diff(search[0]) == 1 |
| 63 | for dd in [np.diff(arr) == 1 for arr in search[1:]]: |
| 64 | d &= dd |
| 65 | r[1:] -= d |
| 66 | r[:-1] -= d |
| 67 | return [np.repeat(arr, r).reshape(-1, 2) for arr in index] |
| 68 | |
| 69 | |
| 70 | class ArgException(Exception): |
| 71 | pass |
| 72 | |
| 73 | |
| 74 | class FileIOException(Exception): |
| 75 | pass |
| 76 | |
| 77 | |
| 78 | class BinArrayFile: |
| 79 | # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a |
| 80 | # numpy array. Use inherrited class in a with clause to ensure file is |
| 81 | # closed when it goes out of scope. |
| 82 | def __init__(self, filename, element_size): |
| 83 | self.element_size = element_size |
| 84 | self.filename = filename |
| 85 | self.fd = os.open(self.filename, os.O_RDONLY) |
| 86 | |
| 87 | def cleanup(self): |
| 88 | os.close(self.fd) |
| 89 | |
| 90 | def __enter__(self): |
| 91 | return self |
| 92 | |
| 93 | def __exit__(self, exc_type, exc_val, exc_tb): |
| 94 | self.cleanup() |
| 95 | |
| 96 | def _readin(self, offset, buffer): |
| 97 | length = os.preadv(self.fd, (buffer,), offset) |
| 98 | if len(buffer) != length: |
| 99 | raise FileIOException('error: {} failed to read {} bytes at {:x}' |
| 100 | .format(self.filename, len(buffer), offset)) |
| 101 | |
| 102 | def _toarray(self, buf): |
| 103 | assert(self.element_size == 8) |
| 104 | return np.frombuffer(buf, dtype=np.uint64) |
| 105 | |
| 106 | def getv(self, vec): |
| 107 | vec *= self.element_size |
| 108 | offsets = vec[:, 0] |
| 109 | lengths = (np.diff(vec) + self.element_size).reshape(len(vec)) |
| 110 | buf = bytearray(int(np.sum(lengths))) |
| 111 | view = memoryview(buf) |
| 112 | pos = 0 |
| 113 | for offset, length in zip(offsets, lengths): |
| 114 | offset = int(offset) |
| 115 | length = int(length) |
| 116 | self._readin(offset, view[pos:pos+length]) |
| 117 | pos += length |
| 118 | return self._toarray(buf) |
| 119 | |
| 120 | def get(self, index, nr=1): |
| 121 | offset = index * self.element_size |
| 122 | length = nr * self.element_size |
| 123 | buf = bytearray(length) |
| 124 | self._readin(offset, buf) |
| 125 | return self._toarray(buf) |
| 126 | |
| 127 | |
| 128 | PM_PAGE_PRESENT = 1 << 63 |
| 129 | PM_PFN_MASK = (1 << 55) - 1 |
| 130 | |
| 131 | class PageMap(BinArrayFile): |
| 132 | # Read ranges of a given pid's pagemap into a numpy array. |
| 133 | def __init__(self, pid='self'): |
| 134 | super().__init__(f'/proc/{pid}/pagemap', 8) |
| 135 | |
| 136 | |
| 137 | KPF_ANON = 1 << 12 |
| 138 | KPF_COMPOUND_HEAD = 1 << 15 |
| 139 | KPF_COMPOUND_TAIL = 1 << 16 |
| 140 | KPF_THP = 1 << 22 |
| 141 | |
| 142 | class KPageFlags(BinArrayFile): |
| 143 | # Read ranges of /proc/kpageflags into a numpy array. |
| 144 | def __init__(self): |
| 145 | super().__init__(f'/proc/kpageflags', 8) |
| 146 | |
| 147 | |
| 148 | vma_all_stats = set([ |
| 149 | "Size", |
| 150 | "Rss", |
| 151 | "Pss", |
| 152 | "Pss_Dirty", |
| 153 | "Shared_Clean", |
| 154 | "Shared_Dirty", |
| 155 | "Private_Clean", |
| 156 | "Private_Dirty", |
| 157 | "Referenced", |
| 158 | "Anonymous", |
| 159 | "KSM", |
| 160 | "LazyFree", |
| 161 | "AnonHugePages", |
| 162 | "ShmemPmdMapped", |
| 163 | "FilePmdMapped", |
| 164 | "Shared_Hugetlb", |
| 165 | "Private_Hugetlb", |
| 166 | "Swap", |
| 167 | "SwapPss", |
| 168 | "Locked", |
| 169 | ]) |
| 170 | |
| 171 | vma_min_stats = set([ |
| 172 | "Rss", |
| 173 | "Anonymous", |
| 174 | "AnonHugePages", |
| 175 | "ShmemPmdMapped", |
| 176 | "FilePmdMapped", |
| 177 | ]) |
| 178 | |
| 179 | VMA = collections.namedtuple('VMA', [ |
| 180 | 'name', |
| 181 | 'start', |
| 182 | 'end', |
| 183 | 'read', |
| 184 | 'write', |
| 185 | 'execute', |
| 186 | 'private', |
| 187 | 'pgoff', |
| 188 | 'major', |
| 189 | 'minor', |
| 190 | 'inode', |
| 191 | 'stats', |
| 192 | ]) |
| 193 | |
| 194 | class VMAList: |
| 195 | # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the |
| 196 | # instance to receive VMAs. |
| 197 | def __init__(self, pid='self', stats=[]): |
| 198 | self.vmas = [] |
| 199 | with open(f'/proc/{pid}/smaps', 'r') as file: |
| 200 | for line in file: |
| 201 | elements = line.split() |
| 202 | if '-' in elements[0]: |
| 203 | start, end = map(lambda x: int(x, 16), elements[0].split('-')) |
| 204 | major, minor = map(lambda x: int(x, 16), elements[3].split(':')) |
| 205 | self.vmas.append(VMA( |
| 206 | name=elements[5] if len(elements) == 6 else '', |
| 207 | start=start, |
| 208 | end=end, |
| 209 | read=elements[1][0] == 'r', |
| 210 | write=elements[1][1] == 'w', |
| 211 | execute=elements[1][2] == 'x', |
| 212 | private=elements[1][3] == 'p', |
| 213 | pgoff=int(elements[2], 16), |
| 214 | major=major, |
| 215 | minor=minor, |
| 216 | inode=int(elements[4], 16), |
| 217 | stats={}, |
| 218 | )) |
| 219 | else: |
| 220 | param = elements[0][:-1] |
| 221 | if param in stats: |
| 222 | value = int(elements[1]) |
| 223 | self.vmas[-1].stats[param] = {'type': None, 'value': value} |
| 224 | |
| 225 | def __iter__(self): |
| 226 | yield from self.vmas |
| 227 | |
| 228 | |
| 229 | def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads): |
| 230 | # Given 4 same-sized arrays representing a range within a page table backed |
| 231 | # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: |
| 232 | # True if page is anonymous, heads: True if page is head of a THP), return a |
| 233 | # dictionary of statistics describing the mapped THPs. |
| 234 | stats = { |
| 235 | 'file': { |
| 236 | 'partial': 0, |
| 237 | 'aligned': [0] * (PMD_ORDER + 1), |
| 238 | 'unaligned': [0] * (PMD_ORDER + 1), |
| 239 | }, |
| 240 | 'anon': { |
| 241 | 'partial': 0, |
| 242 | 'aligned': [0] * (PMD_ORDER + 1), |
| 243 | 'unaligned': [0] * (PMD_ORDER + 1), |
| 244 | }, |
| 245 | } |
| 246 | |
| 247 | for rindex, rpfn in zip(ranges[0], ranges[2]): |
| 248 | index_next = int(rindex[0]) |
| 249 | index_end = int(rindex[1]) + 1 |
| 250 | pfn_end = int(rpfn[1]) + 1 |
| 251 | |
| 252 | folios = indexes[index_next:index_end][heads[index_next:index_end]] |
| 253 | |
| 254 | # Account pages for any partially mapped THP at the front. In that case, |
| 255 | # the first page of the range is a tail. |
| 256 | nr = (int(folios[0]) if len(folios) else index_end) - index_next |
| 257 | stats['anon' if anons[index_next] else 'file']['partial'] += nr |
| 258 | |
| 259 | # Account pages for any partially mapped THP at the back. In that case, |
| 260 | # the next page after the range is a tail. |
| 261 | if len(folios): |
| 262 | flags = int(kpageflags.get(pfn_end)[0]) |
| 263 | if flags & KPF_COMPOUND_TAIL: |
| 264 | nr = index_end - int(folios[-1]) |
| 265 | folios = folios[:-1] |
| 266 | index_end -= nr |
| 267 | stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr |
| 268 | |
| 269 | # Account fully mapped THPs in the middle of the range. |
| 270 | if len(folios): |
| 271 | folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1])) |
| 272 | folio_orders = np.log2(folio_nrs).astype(np.uint64) |
| 273 | for index, order in zip(folios, folio_orders): |
| 274 | index = int(index) |
| 275 | order = int(order) |
| 276 | nr = 1 << order |
| 277 | vfn = int(vfns[index]) |
| 278 | align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned' |
| 279 | anon = 'anon' if anons[index] else 'file' |
| 280 | stats[anon][align][order] += nr |
| 281 | |
| 282 | # Account PMD-mapped THPs spearately, so filter out of the stats. There is a |
| 283 | # race between acquiring the smaps stats and reading pagemap, where memory |
| 284 | # could be deallocated. So clamp to zero incase it would have gone negative. |
| 285 | anon_pmd_mapped = vma.stats['AnonHugePages']['value'] |
| 286 | file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ |
| 287 | vma.stats['FilePmdMapped']['value'] |
| 288 | stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped)) |
| 289 | stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped)) |
| 290 | |
| 291 | rstats = { |
| 292 | f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, |
| 293 | f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped}, |
| 294 | } |
| 295 | |
| 296 | def flatten_sub(type, subtype, stats): |
| 297 | param = f"{type}-thp-pte-{subtype}-{{}}kB" |
| 298 | for od, nr in enumerate(stats[2:], 2): |
| 299 | rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)} |
| 300 | |
| 301 | def flatten_type(type, stats): |
| 302 | flatten_sub(type, 'aligned', stats['aligned']) |
| 303 | flatten_sub(type, 'unaligned', stats['unaligned']) |
| 304 | rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])} |
| 305 | |
| 306 | flatten_type('anon', stats['anon']) |
| 307 | flatten_type('file', stats['file']) |
| 308 | |
| 309 | return rstats |
| 310 | |
| 311 | |
| 312 | def cont_parse(vma, order, ranges, anons, heads): |
| 313 | # Given 4 same-sized arrays representing a range within a page table backed |
| 314 | # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: |
| 315 | # True if page is anonymous, heads: True if page is head of a THP), return a |
| 316 | # dictionary of statistics describing the contiguous blocks. |
| 317 | nr_cont = 1 << order |
| 318 | nr_anon = 0 |
| 319 | nr_file = 0 |
| 320 | |
| 321 | for rindex, rvfn, rpfn in zip(*ranges): |
| 322 | index_next = int(rindex[0]) |
| 323 | index_end = int(rindex[1]) + 1 |
| 324 | vfn_start = int(rvfn[0]) |
| 325 | pfn_start = int(rpfn[0]) |
| 326 | |
| 327 | if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont): |
| 328 | continue |
| 329 | |
| 330 | off = align_forward(vfn_start, nr_cont) - vfn_start |
| 331 | index_next += off |
| 332 | |
| 333 | while index_next + nr_cont <= index_end: |
| 334 | folio_boundary = heads[index_next+1:index_next+nr_cont].any() |
| 335 | if not folio_boundary: |
| 336 | if anons[index_next]: |
| 337 | nr_anon += nr_cont |
| 338 | else: |
| 339 | nr_file += nr_cont |
| 340 | index_next += nr_cont |
| 341 | |
| 342 | # Account blocks that are PMD-mapped spearately, so filter out of the stats. |
| 343 | # There is a race between acquiring the smaps stats and reading pagemap, |
| 344 | # where memory could be deallocated. So clamp to zero incase it would have |
| 345 | # gone negative. |
| 346 | anon_pmd_mapped = vma.stats['AnonHugePages']['value'] |
| 347 | file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ |
| 348 | vma.stats['FilePmdMapped']['value'] |
| 349 | nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped)) |
| 350 | nr_file = max(0, nr_file - kbnr(file_pmd_mapped)) |
| 351 | |
| 352 | rstats = { |
| 353 | f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, |
| 354 | f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped}, |
| 355 | } |
| 356 | |
| 357 | rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)} |
| 358 | rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)} |
| 359 | |
| 360 | return rstats |
| 361 | |
| 362 | |
| 363 | def vma_print(vma, pid): |
| 364 | # Prints a VMA instance in a format similar to smaps. The main difference is |
| 365 | # that the pid is included as the first value. |
| 366 | print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}" |
| 367 | .format( |
| 368 | pid, vma.start, vma.end, |
| 369 | 'r' if vma.read else '-', 'w' if vma.write else '-', |
| 370 | 'x' if vma.execute else '-', 'p' if vma.private else 's', |
| 371 | vma.pgoff, vma.major, vma.minor, vma.inode, vma.name |
| 372 | )) |
| 373 | |
| 374 | |
| 375 | def stats_print(stats, tot_anon, tot_file, inc_empty): |
| 376 | # Print a statistics dictionary. |
| 377 | label_field = 32 |
| 378 | for label, stat in stats.items(): |
| 379 | type = stat['type'] |
| 380 | value = stat['value'] |
| 381 | if value or inc_empty: |
| 382 | pad = max(0, label_field - len(label) - 1) |
| 383 | if type == 'anon' and tot_anon > 0: |
| 384 | percent = f' ({value / tot_anon:3.0%})' |
| 385 | elif type == 'file' and tot_file > 0: |
| 386 | percent = f' ({value / tot_file:3.0%})' |
| 387 | else: |
| 388 | percent = '' |
| 389 | print(f"{label}:{' ' * pad}{value:8} kB{percent}") |
| 390 | |
| 391 | |
| 392 | def vma_parse(vma, pagemap, kpageflags, contorders): |
| 393 | # Generate thp and cont statistics for a single VMA. |
| 394 | start = vma.start >> PAGE_SHIFT |
| 395 | end = vma.end >> PAGE_SHIFT |
| 396 | |
| 397 | pmes = pagemap.get(start, end - start) |
| 398 | present = pmes & PM_PAGE_PRESENT != 0 |
| 399 | pfns = pmes & PM_PFN_MASK |
| 400 | pfns = pfns[present] |
| 401 | vfns = np.arange(start, end, dtype=np.uint64) |
| 402 | vfns = vfns[present] |
| 403 | |
| 404 | pfn_vec = cont_ranges_all([pfns], [pfns])[0] |
| 405 | flags = kpageflags.getv(pfn_vec) |
| 406 | anons = flags & KPF_ANON != 0 |
| 407 | heads = flags & KPF_COMPOUND_HEAD != 0 |
| 408 | thps = flags & KPF_THP != 0 |
| 409 | |
| 410 | vfns = vfns[thps] |
| 411 | pfns = pfns[thps] |
| 412 | anons = anons[thps] |
| 413 | heads = heads[thps] |
| 414 | |
| 415 | indexes = np.arange(len(vfns), dtype=np.uint64) |
| 416 | ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns]) |
| 417 | |
| 418 | thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads) |
| 419 | contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders] |
| 420 | |
| 421 | tot_anon = vma.stats['Anonymous']['value'] |
| 422 | tot_file = vma.stats['Rss']['value'] - tot_anon |
| 423 | |
| 424 | return { |
| 425 | **thpstats, |
| 426 | **{k: v for s in contstats for k, v in s.items()} |
| 427 | }, tot_anon, tot_file |
| 428 | |
| 429 | |
| 430 | def do_main(args): |
| 431 | pids = set() |
| 432 | rollup = {} |
| 433 | rollup_anon = 0 |
| 434 | rollup_file = 0 |
| 435 | |
| 436 | if args.cgroup: |
| 437 | strict = False |
| 438 | for walk_info in os.walk(args.cgroup): |
| 439 | cgroup = walk_info[0] |
| 440 | with open(f'{cgroup}/cgroup.procs') as pidfile: |
| 441 | for line in pidfile.readlines(): |
| 442 | pids.add(int(line.strip())) |
| 443 | elif args.pid: |
| 444 | strict = True |
| 445 | pids = pids.union(args.pid) |
| 446 | else: |
| 447 | strict = False |
| 448 | for pid in os.listdir('/proc'): |
| 449 | if pid.isdigit(): |
| 450 | pids.add(int(pid)) |
| 451 | |
| 452 | if not args.rollup: |
| 453 | print(" PID START END PROT OFFSET DEV INODE OBJECT") |
| 454 | |
| 455 | for pid in pids: |
| 456 | try: |
| 457 | with PageMap(pid) as pagemap: |
| 458 | with KPageFlags() as kpageflags: |
| 459 | for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats): |
| 460 | if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0: |
| 461 | stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont) |
| 462 | else: |
| 463 | stats = {} |
| 464 | vma_anon = 0 |
| 465 | vma_file = 0 |
| 466 | if args.inc_smaps: |
| 467 | stats = {**vma.stats, **stats} |
| 468 | if args.rollup: |
| 469 | for k, v in stats.items(): |
| 470 | if k in rollup: |
| 471 | assert(rollup[k]['type'] == v['type']) |
| 472 | rollup[k]['value'] += v['value'] |
| 473 | else: |
| 474 | rollup[k] = v |
| 475 | rollup_anon += vma_anon |
| 476 | rollup_file += vma_file |
| 477 | else: |
| 478 | vma_print(vma, pid) |
| 479 | stats_print(stats, vma_anon, vma_file, args.inc_empty) |
| 480 | except (FileNotFoundError, ProcessLookupError, FileIOException): |
| 481 | if strict: |
| 482 | raise |
| 483 | |
| 484 | if args.rollup: |
| 485 | stats_print(rollup, rollup_anon, rollup_file, args.inc_empty) |
| 486 | |
| 487 | |
| 488 | def main(): |
| 489 | docs_width = shutil.get_terminal_size().columns |
| 490 | docs_width -= 2 |
| 491 | docs_width = min(80, docs_width) |
| 492 | |
| 493 | def format(string): |
| 494 | text = re.sub(r'\s+', ' ', string) |
| 495 | text = re.sub(r'\s*\\n\s*', '\n', text) |
| 496 | paras = text.split('\n') |
| 497 | paras = [textwrap.fill(p, width=docs_width) for p in paras] |
| 498 | return '\n'.join(paras) |
| 499 | |
| 500 | def formatter(prog): |
| 501 | return argparse.RawDescriptionHelpFormatter(prog, width=docs_width) |
| 502 | |
| 503 | def size2order(human): |
| 504 | units = { |
| 505 | "K": 2**10, "M": 2**20, "G": 2**30, |
| 506 | "k": 2**10, "m": 2**20, "g": 2**30, |
| 507 | } |
| 508 | unit = 1 |
| 509 | if human[-1] in units: |
| 510 | unit = units[human[-1]] |
| 511 | human = human[:-1] |
| 512 | try: |
| 513 | size = int(human) |
| 514 | except ValueError: |
| 515 | raise ArgException('error: --cont value must be integer size with optional KMG unit') |
| 516 | size *= unit |
| 517 | order = int(math.log2(size / PAGE_SIZE)) |
| 518 | if order < 1: |
| 519 | raise ArgException('error: --cont value must be size of at least 2 pages') |
| 520 | if (1 << order) * PAGE_SIZE != size: |
| 521 | raise ArgException('error: --cont value must be size of power-of-2 pages') |
| 522 | if order > PMD_ORDER: |
| 523 | raise ArgException('error: --cont value must be less than or equal to PMD order') |
| 524 | return order |
| 525 | |
| 526 | parser = argparse.ArgumentParser(formatter_class=formatter, |
| 527 | description=format("""Prints information about how transparent huge |
| 528 | pages are mapped, either system-wide, or for a specified |
| 529 | process or cgroup.\\n |
| 530 | \\n |
| 531 | When run with --pid, the user explicitly specifies the set |
| 532 | of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run |
| 533 | with --cgroup, the user passes either a v1 or v2 cgroup and |
| 534 | all pids that belong to the cgroup subtree are scanned. When |
| 535 | run with neither --pid nor --cgroup, the full set of pids on |
| 536 | the system is gathered from /proc and scanned as if the user |
| 537 | had provided "--pid 1 --pid 2 ...".\\n |
| 538 | \\n |
| 539 | A default set of statistics is always generated for THP |
| 540 | mappings. However, it is also possible to generate |
| 541 | additional statistics for "contiguous block mappings" where |
| 542 | the block size is user-defined.\\n |
| 543 | \\n |
| 544 | Statistics are maintained independently for anonymous and |
| 545 | file-backed (pagecache) memory and are shown both in kB and |
| 546 | as a percentage of either total anonymous or total |
| 547 | file-backed memory as appropriate.\\n |
| 548 | \\n |
| 549 | THP Statistics\\n |
| 550 | --------------\\n |
| 551 | \\n |
| 552 | Statistics are always generated for fully- and |
| 553 | contiguously-mapped THPs whose mapping address is aligned to |
| 554 | their size, for each <size> supported by the system. |
| 555 | Separate counters describe THPs mapped by PTE vs those |
| 556 | mapped by PMD. (Although note a THP can only be mapped by |
| 557 | PMD if it is PMD-sized):\\n |
| 558 | \\n |
| 559 | - anon-thp-pte-aligned-<size>kB\\n |
| 560 | - file-thp-pte-aligned-<size>kB\\n |
| 561 | - anon-thp-pmd-aligned-<size>kB\\n |
| 562 | - file-thp-pmd-aligned-<size>kB\\n |
| 563 | \\n |
| 564 | Similarly, statistics are always generated for fully- and |
| 565 | contiguously-mapped THPs whose mapping address is *not* |
| 566 | aligned to their size, for each <size> supported by the |
| 567 | system. Due to the unaligned mapping, it is impossible to |
| 568 | map by PMD, so there are only PTE counters for this case:\\n |
| 569 | \\n |
| 570 | - anon-thp-pte-unaligned-<size>kB\\n |
| 571 | - file-thp-pte-unaligned-<size>kB\\n |
| 572 | \\n |
| 573 | Statistics are also always generated for mapped pages that |
| 574 | belong to a THP but where the is THP is *not* fully- and |
| 575 | contiguously- mapped. These "partial" mappings are all |
| 576 | counted in the same counter regardless of the size of the |
| 577 | THP that is partially mapped:\\n |
| 578 | \\n |
| 579 | - anon-thp-pte-partial\\n |
| 580 | - file-thp-pte-partial\\n |
| 581 | \\n |
| 582 | Contiguous Block Statistics\\n |
| 583 | ---------------------------\\n |
| 584 | \\n |
| 585 | An optional, additional set of statistics is generated for |
| 586 | every contiguous block size specified with `--cont <size>`. |
| 587 | These statistics show how much memory is mapped in |
| 588 | contiguous blocks of <size> and also aligned to <size>. A |
| 589 | given contiguous block must all belong to the same THP, but |
| 590 | there is no requirement for it to be the *whole* THP. |
| 591 | Separate counters describe contiguous blocks mapped by PTE |
| 592 | vs those mapped by PMD:\\n |
| 593 | \\n |
| 594 | - anon-cont-pte-aligned-<size>kB\\n |
| 595 | - file-cont-pte-aligned-<size>kB\\n |
| 596 | - anon-cont-pmd-aligned-<size>kB\\n |
| 597 | - file-cont-pmd-aligned-<size>kB\\n |
| 598 | \\n |
| 599 | As an example, if monitoring 64K contiguous blocks (--cont |
| 600 | 64K), there are a number of sources that could provide such |
| 601 | blocks: a fully- and contiguously-mapped 64K THP that is |
| 602 | aligned to a 64K boundary would provide 1 block. A fully- |
| 603 | and contiguously-mapped 128K THP that is aligned to at least |
| 604 | a 64K boundary would provide 2 blocks. Or a 128K THP that |
| 605 | maps its first 100K, but contiguously and starting at a 64K |
| 606 | boundary would provide 1 block. A fully- and |
| 607 | contiguously-mapped 2M THP would provide 32 blocks. There |
| 608 | are many other possible permutations.\\n"""), |
| 609 | epilog=format("""Requires root privilege to access pagemap and |
| 610 | kpageflags.""")) |
| 611 | |
| 612 | group = parser.add_mutually_exclusive_group(required=False) |
| 613 | group.add_argument('--pid', |
| 614 | metavar='pid', required=False, type=int, default=[], action='append', |
| 615 | help="""Process id of the target process. Maybe issued multiple times to |
| 616 | scan multiple processes. --pid and --cgroup are mutually exclusive. |
| 617 | If neither are provided, all processes are scanned to provide |
| 618 | system-wide information.""") |
| 619 | |
| 620 | group.add_argument('--cgroup', |
| 621 | metavar='path', required=False, |
| 622 | help="""Path to the target cgroup in sysfs. Iterates over every pid in |
| 623 | the cgroup and its children. --pid and --cgroup are mutually |
| 624 | exclusive. If neither are provided, all processes are scanned to |
| 625 | provide system-wide information.""") |
| 626 | |
| 627 | parser.add_argument('--rollup', |
| 628 | required=False, default=False, action='store_true', |
| 629 | help="""Sum the per-vma statistics to provide a summary over the whole |
| 630 | system, process or cgroup.""") |
| 631 | |
| 632 | parser.add_argument('--cont', |
| 633 | metavar='size[KMG]', required=False, default=[], action='append', |
| 634 | help="""Adds stats for memory that is mapped in contiguous blocks of |
| 635 | <size> and also aligned to <size>. May be issued multiple times to |
| 636 | track multiple sized blocks. Useful to infer e.g. arm64 contpte and |
| 637 | hpa mappings. Size must be a power-of-2 number of pages.""") |
| 638 | |
| 639 | parser.add_argument('--inc-smaps', |
| 640 | required=False, default=False, action='store_true', |
| 641 | help="""Include all numerical, additive /proc/<pid>/smaps stats in the |
| 642 | output.""") |
| 643 | |
| 644 | parser.add_argument('--inc-empty', |
| 645 | required=False, default=False, action='store_true', |
| 646 | help="""Show all statistics including those whose value is 0.""") |
| 647 | |
| 648 | parser.add_argument('--periodic', |
| 649 | metavar='sleep_ms', required=False, type=int, |
| 650 | help="""Run in a loop, polling every sleep_ms milliseconds.""") |
| 651 | |
| 652 | args = parser.parse_args() |
| 653 | |
| 654 | try: |
| 655 | args.cont = [size2order(cont) for cont in args.cont] |
| 656 | except ArgException as e: |
| 657 | parser.print_usage() |
| 658 | raise |
| 659 | |
| 660 | if args.periodic: |
| 661 | while True: |
| 662 | do_main(args) |
| 663 | print() |
| 664 | time.sleep(args.periodic / 1000) |
| 665 | else: |
| 666 | do_main(args) |
| 667 | |
| 668 | |
| 669 | if __name__ == "__main__": |
| 670 | try: |
| 671 | main() |
| 672 | except Exception as e: |
| 673 | prog = os.path.basename(sys.argv[0]) |
| 674 | print(f'{prog}: {e}') |
| 675 | exit(1) |