| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (c) 2022, Oracle and/or its affiliates. |
| * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved |
| */ |
| #include <linux/iova_bitmap.h> |
| #include <linux/mm.h> |
| #include <linux/slab.h> |
| #include <linux/highmem.h> |
| |
| #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) |
| |
| /* |
| * struct iova_bitmap_map - A bitmap representing an IOVA range |
| * |
| * Main data structure for tracking mapped user pages of bitmap data. |
| * |
| * For example, for something recording dirty IOVAs, it will be provided a |
| * struct iova_bitmap structure, as a general structure for iterating the |
| * total IOVA range. The struct iova_bitmap_map, though, represents the |
| * subset of said IOVA space that is pinned by its parent structure (struct |
| * iova_bitmap). |
| * |
| * The user does not need to exact location of the bits in the bitmap. |
| * From user perspective the only API available is iova_bitmap_set() which |
| * records the IOVA *range* in the bitmap by setting the corresponding |
| * bits. |
| * |
| * The bitmap is an array of u64 whereas each bit represents an IOVA of |
| * range of (1 << pgshift). Thus formula for the bitmap data to be set is: |
| * |
| * data[(iova / page_size) / 64] & (1ULL << (iova % 64)) |
| */ |
| struct iova_bitmap_map { |
| /* base IOVA representing bit 0 of the first page */ |
| unsigned long iova; |
| |
| /* page size order that each bit granules to */ |
| unsigned long pgshift; |
| |
| /* page offset of the first user page pinned */ |
| unsigned long pgoff; |
| |
| /* number of pages pinned */ |
| unsigned long npages; |
| |
| /* pinned pages representing the bitmap data */ |
| struct page **pages; |
| }; |
| |
| /* |
| * struct iova_bitmap - The IOVA bitmap object |
| * |
| * Main data structure for iterating over the bitmap data. |
| * |
| * Abstracts the pinning work and iterates in IOVA ranges. |
| * It uses a windowing scheme and pins the bitmap in relatively |
| * big ranges e.g. |
| * |
| * The bitmap object uses one base page to store all the pinned pages |
| * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores |
| * 512 struct page pointers which, if the base page size is 4K, it means |
| * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is |
| * also 4K then the range window to iterate is 64G. |
| * |
| * For example iterating on a total IOVA range of 4G..128G, it will walk |
| * through this set of ranges: |
| * |
| * 4G - 68G-1 (64G) |
| * 68G - 128G-1 (64G) |
| * |
| * An example of the APIs on how to use/iterate over the IOVA bitmap: |
| * |
| * bitmap = iova_bitmap_alloc(iova, length, page_size, data); |
| * if (IS_ERR(bitmap)) |
| * return PTR_ERR(bitmap); |
| * |
| * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn); |
| * |
| * iova_bitmap_free(bitmap); |
| * |
| * Each iteration of the @dirty_reporter_fn is called with a unique @iova |
| * and @length argument, indicating the current range available through the |
| * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty |
| * areas (@iova_length) within that provided range, as following: |
| * |
| * iova_bitmap_set(bitmap, iova, iova_length); |
| * |
| * The internals of the object uses an index @mapped_base_index that indexes |
| * which u64 word of the bitmap is mapped, up to @mapped_total_index. |
| * Those keep being incremented until @mapped_total_index is reached while |
| * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages. |
| * |
| * The IOVA bitmap is usually located on what tracks DMA mapped ranges or |
| * some form of IOVA range tracking that co-relates to the user passed |
| * bitmap. |
| */ |
| struct iova_bitmap { |
| /* IOVA range representing the currently mapped bitmap data */ |
| struct iova_bitmap_map mapped; |
| |
| /* userspace address of the bitmap */ |
| u8 __user *bitmap; |
| |
| /* u64 index that @mapped points to */ |
| unsigned long mapped_base_index; |
| |
| /* how many u64 can we walk in total */ |
| unsigned long mapped_total_index; |
| |
| /* base IOVA of the whole bitmap */ |
| unsigned long iova; |
| |
| /* length of the IOVA range for the whole bitmap */ |
| size_t length; |
| |
| /* length of the IOVA range set ahead the pinned pages */ |
| unsigned long set_ahead_length; |
| }; |
| |
| /* |
| * Converts a relative IOVA to a bitmap index. |
| * This function provides the index into the u64 array (bitmap::bitmap) |
| * for a given IOVA offset. |
| * Relative IOVA means relative to the bitmap::mapped base IOVA |
| * (stored in mapped::iova). All computations in this file are done using |
| * relative IOVAs and thus avoid an extra subtraction against mapped::iova. |
| * The user API iova_bitmap_set() always uses a regular absolute IOVAs. |
| */ |
| static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, |
| unsigned long iova) |
| { |
| unsigned long pgsize = 1 << bitmap->mapped.pgshift; |
| |
| return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); |
| } |
| |
| /* |
| * Converts a bitmap index to a *relative* IOVA. |
| */ |
| static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap, |
| unsigned long index) |
| { |
| unsigned long pgshift = bitmap->mapped.pgshift; |
| |
| return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift; |
| } |
| |
| /* |
| * Returns the base IOVA of the mapped range. |
| */ |
| static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap) |
| { |
| unsigned long skip = bitmap->mapped_base_index; |
| |
| return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip); |
| } |
| |
| /* |
| * Pins the bitmap user pages for the current range window. |
| * This is internal to IOVA bitmap and called when advancing the |
| * index (@mapped_base_index) or allocating the bitmap. |
| */ |
| static int iova_bitmap_get(struct iova_bitmap *bitmap) |
| { |
| struct iova_bitmap_map *mapped = &bitmap->mapped; |
| unsigned long npages; |
| u8 __user *addr; |
| long ret; |
| |
| /* |
| * @mapped_base_index is the index of the currently mapped u64 words |
| * that we have access. Anything before @mapped_base_index is not |
| * mapped. The range @mapped_base_index .. @mapped_total_index-1 is |
| * mapped but capped at a maximum number of pages. |
| */ |
| npages = DIV_ROUND_UP((bitmap->mapped_total_index - |
| bitmap->mapped_base_index) * |
| sizeof(*bitmap->bitmap), PAGE_SIZE); |
| |
| /* |
| * Bitmap address to be pinned is calculated via pointer arithmetic |
| * with bitmap u64 word index. |
| */ |
| addr = bitmap->bitmap + bitmap->mapped_base_index; |
| |
| /* |
| * We always cap at max number of 'struct page' a base page can fit. |
| * This is, for example, on x86 means 2M of bitmap data max. |
| */ |
| npages = min(npages + !!offset_in_page(addr), |
| PAGE_SIZE / sizeof(struct page *)); |
| |
| ret = pin_user_pages_fast((unsigned long)addr, npages, |
| FOLL_WRITE, mapped->pages); |
| if (ret <= 0) |
| return -EFAULT; |
| |
| mapped->npages = (unsigned long)ret; |
| /* Base IOVA where @pages point to i.e. bit 0 of the first page */ |
| mapped->iova = iova_bitmap_mapped_iova(bitmap); |
| |
| /* |
| * offset of the page where pinned pages bit 0 is located. |
| * This handles the case where the bitmap is not PAGE_SIZE |
| * aligned. |
| */ |
| mapped->pgoff = offset_in_page(addr); |
| return 0; |
| } |
| |
| /* |
| * Unpins the bitmap user pages and clears @npages |
| * (un)pinning is abstracted from API user and it's done when advancing |
| * the index or freeing the bitmap. |
| */ |
| static void iova_bitmap_put(struct iova_bitmap *bitmap) |
| { |
| struct iova_bitmap_map *mapped = &bitmap->mapped; |
| |
| if (mapped->npages) { |
| unpin_user_pages(mapped->pages, mapped->npages); |
| mapped->npages = 0; |
| } |
| } |
| |
| /** |
| * iova_bitmap_alloc() - Allocates an IOVA bitmap object |
| * @iova: Start address of the IOVA range |
| * @length: Length of the IOVA range |
| * @page_size: Page size of the IOVA bitmap. It defines what each bit |
| * granularity represents |
| * @data: Userspace address of the bitmap |
| * |
| * Allocates an IOVA object and initializes all its fields including the |
| * first user pages of @data. |
| * |
| * Return: A pointer to a newly allocated struct iova_bitmap |
| * or ERR_PTR() on error. |
| */ |
| struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, |
| unsigned long page_size, u64 __user *data) |
| { |
| struct iova_bitmap_map *mapped; |
| struct iova_bitmap *bitmap; |
| int rc; |
| |
| bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); |
| if (!bitmap) |
| return ERR_PTR(-ENOMEM); |
| |
| mapped = &bitmap->mapped; |
| mapped->pgshift = __ffs(page_size); |
| bitmap->bitmap = (u8 __user *)data; |
| bitmap->mapped_total_index = |
| iova_bitmap_offset_to_index(bitmap, length - 1) + 1; |
| bitmap->iova = iova; |
| bitmap->length = length; |
| mapped->iova = iova; |
| mapped->pages = (struct page **)__get_free_page(GFP_KERNEL); |
| if (!mapped->pages) { |
| rc = -ENOMEM; |
| goto err; |
| } |
| |
| rc = iova_bitmap_get(bitmap); |
| if (rc) |
| goto err; |
| return bitmap; |
| |
| err: |
| iova_bitmap_free(bitmap); |
| return ERR_PTR(rc); |
| } |
| EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD); |
| |
| /** |
| * iova_bitmap_free() - Frees an IOVA bitmap object |
| * @bitmap: IOVA bitmap to free |
| * |
| * It unpins and releases pages array memory and clears any leftover |
| * state. |
| */ |
| void iova_bitmap_free(struct iova_bitmap *bitmap) |
| { |
| struct iova_bitmap_map *mapped = &bitmap->mapped; |
| |
| iova_bitmap_put(bitmap); |
| |
| if (mapped->pages) { |
| free_page((unsigned long)mapped->pages); |
| mapped->pages = NULL; |
| } |
| |
| kfree(bitmap); |
| } |
| EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD); |
| |
| /* |
| * Returns the remaining bitmap indexes from mapped_total_index to process for |
| * the currently pinned bitmap pages. |
| */ |
| static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) |
| { |
| unsigned long remaining, bytes; |
| |
| bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff; |
| |
| remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; |
| remaining = min_t(unsigned long, remaining, |
| DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); |
| |
| return remaining; |
| } |
| |
| /* |
| * Returns the length of the mapped IOVA range. |
| */ |
| static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap) |
| { |
| unsigned long max_iova = bitmap->iova + bitmap->length - 1; |
| unsigned long iova = iova_bitmap_mapped_iova(bitmap); |
| unsigned long remaining; |
| |
| /* |
| * iova_bitmap_mapped_remaining() returns a number of indexes which |
| * when converted to IOVA gives us a max length that the bitmap |
| * pinned data can cover. Afterwards, that is capped to |
| * only cover the IOVA range in @bitmap::iova .. @bitmap::length. |
| */ |
| remaining = iova_bitmap_index_to_offset(bitmap, |
| iova_bitmap_mapped_remaining(bitmap)); |
| |
| if (iova + remaining - 1 > max_iova) |
| remaining -= ((iova + remaining - 1) - max_iova); |
| |
| return remaining; |
| } |
| |
| /* |
| * Returns true if there's not more data to iterate. |
| */ |
| static bool iova_bitmap_done(struct iova_bitmap *bitmap) |
| { |
| return bitmap->mapped_base_index >= bitmap->mapped_total_index; |
| } |
| |
| static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, |
| size_t set_ahead_length) |
| { |
| int ret = 0; |
| |
| while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { |
| unsigned long length = iova_bitmap_mapped_length(bitmap); |
| unsigned long iova = iova_bitmap_mapped_iova(bitmap); |
| |
| ret = iova_bitmap_get(bitmap); |
| if (ret) |
| break; |
| |
| length = min(length, set_ahead_length); |
| iova_bitmap_set(bitmap, iova, length); |
| |
| set_ahead_length -= length; |
| bitmap->mapped_base_index += |
| iova_bitmap_offset_to_index(bitmap, length - 1) + 1; |
| iova_bitmap_put(bitmap); |
| } |
| |
| bitmap->set_ahead_length = 0; |
| return ret; |
| } |
| |
| /* |
| * Advances to the next range, releases the current pinned |
| * pages and pins the next set of bitmap pages. |
| * Returns 0 on success or otherwise errno. |
| */ |
| static int iova_bitmap_advance(struct iova_bitmap *bitmap) |
| { |
| unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1; |
| unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1; |
| |
| bitmap->mapped_base_index += count; |
| |
| iova_bitmap_put(bitmap); |
| if (iova_bitmap_done(bitmap)) |
| return 0; |
| |
| /* Iterate, set and skip any bits requested for next iteration */ |
| if (bitmap->set_ahead_length) { |
| int ret; |
| |
| ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); |
| if (ret) |
| return ret; |
| } |
| |
| /* When advancing the index we pin the next set of bitmap pages */ |
| return iova_bitmap_get(bitmap); |
| } |
| |
| /** |
| * iova_bitmap_for_each() - Iterates over the bitmap |
| * @bitmap: IOVA bitmap to iterate |
| * @opaque: Additional argument to pass to the callback |
| * @fn: Function that gets called for each IOVA range |
| * |
| * Helper function to iterate over bitmap data representing a portion of IOVA |
| * space. It hides the complexity of iterating bitmaps and translating the |
| * mapped bitmap user pages into IOVA ranges to process. |
| * |
| * Return: 0 on success, and an error on failure either upon |
| * iteration or when the callback returns an error. |
| */ |
| int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, |
| iova_bitmap_fn_t fn) |
| { |
| int ret = 0; |
| |
| for (; !iova_bitmap_done(bitmap) && !ret; |
| ret = iova_bitmap_advance(bitmap)) { |
| ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), |
| iova_bitmap_mapped_length(bitmap), opaque); |
| if (ret) |
| break; |
| } |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); |
| |
| /** |
| * iova_bitmap_set() - Records an IOVA range in bitmap |
| * @bitmap: IOVA bitmap |
| * @iova: IOVA to start |
| * @length: IOVA range length |
| * |
| * Set the bits corresponding to the range [iova .. iova+length-1] in |
| * the user bitmap. |
| * |
| */ |
| void iova_bitmap_set(struct iova_bitmap *bitmap, |
| unsigned long iova, size_t length) |
| { |
| struct iova_bitmap_map *mapped = &bitmap->mapped; |
| unsigned long cur_bit = ((iova - mapped->iova) >> |
| mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; |
| unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> |
| mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; |
| unsigned long last_page_idx = mapped->npages - 1; |
| |
| do { |
| unsigned int page_idx = cur_bit / BITS_PER_PAGE; |
| unsigned int offset = cur_bit % BITS_PER_PAGE; |
| unsigned int nbits = min(BITS_PER_PAGE - offset, |
| last_bit - cur_bit + 1); |
| void *kaddr; |
| |
| if (unlikely(page_idx > last_page_idx)) |
| break; |
| |
| kaddr = kmap_local_page(mapped->pages[page_idx]); |
| bitmap_set(kaddr, offset, nbits); |
| kunmap_local(kaddr); |
| cur_bit += nbits; |
| } while (cur_bit <= last_bit); |
| |
| if (unlikely(cur_bit <= last_bit)) { |
| bitmap->set_ahead_length = |
| ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); |
| } |
| } |
| EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); |