wuqiang.matt | b4edb8d | 2023-10-17 21:56:50 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | #include <linux/objpool.h> |
| 4 | #include <linux/slab.h> |
| 5 | #include <linux/vmalloc.h> |
| 6 | #include <linux/atomic.h> |
| 7 | #include <linux/irqflags.h> |
| 8 | #include <linux/cpumask.h> |
| 9 | #include <linux/log2.h> |
| 10 | |
| 11 | /* |
| 12 | * objpool: ring-array based lockless MPMC/FIFO queues |
| 13 | * |
| 14 | * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org |
| 15 | */ |
| 16 | |
| 17 | /* initialize percpu objpool_slot */ |
| 18 | static int |
| 19 | objpool_init_percpu_slot(struct objpool_head *pool, |
| 20 | struct objpool_slot *slot, |
| 21 | int nodes, void *context, |
| 22 | objpool_init_obj_cb objinit) |
| 23 | { |
| 24 | void *obj = (void *)&slot->entries[pool->capacity]; |
| 25 | int i; |
| 26 | |
| 27 | /* initialize elements of percpu objpool_slot */ |
| 28 | slot->mask = pool->capacity - 1; |
| 29 | |
| 30 | for (i = 0; i < nodes; i++) { |
| 31 | if (objinit) { |
| 32 | int rc = objinit(obj, context); |
| 33 | if (rc) |
| 34 | return rc; |
| 35 | } |
| 36 | slot->entries[slot->tail & slot->mask] = obj; |
| 37 | obj = obj + pool->obj_size; |
| 38 | slot->tail++; |
| 39 | slot->last = slot->tail; |
| 40 | pool->nr_objs++; |
| 41 | } |
| 42 | |
| 43 | return 0; |
| 44 | } |
| 45 | |
| 46 | /* allocate and initialize percpu slots */ |
| 47 | static int |
| 48 | objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs, |
| 49 | void *context, objpool_init_obj_cb objinit) |
| 50 | { |
| 51 | int i, cpu_count = 0; |
| 52 | |
| 53 | for (i = 0; i < pool->nr_cpus; i++) { |
| 54 | |
| 55 | struct objpool_slot *slot; |
| 56 | int nodes, size, rc; |
| 57 | |
| 58 | /* skip the cpu node which could never be present */ |
| 59 | if (!cpu_possible(i)) |
| 60 | continue; |
| 61 | |
| 62 | /* compute how many objects to be allocated with this slot */ |
| 63 | nodes = nr_objs / num_possible_cpus(); |
| 64 | if (cpu_count < (nr_objs % num_possible_cpus())) |
| 65 | nodes++; |
| 66 | cpu_count++; |
| 67 | |
| 68 | size = struct_size(slot, entries, pool->capacity) + |
| 69 | pool->obj_size * nodes; |
| 70 | |
| 71 | /* |
| 72 | * here we allocate percpu-slot & objs together in a single |
| 73 | * allocation to make it more compact, taking advantage of |
| 74 | * warm caches and TLB hits. in default vmalloc is used to |
| 75 | * reduce the pressure of kernel slab system. as we know, |
| 76 | * mimimal size of vmalloc is one page since vmalloc would |
| 77 | * always align the requested size to page size |
| 78 | */ |
| 79 | if (pool->gfp & GFP_ATOMIC) |
| 80 | slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); |
| 81 | else |
| 82 | slot = __vmalloc_node(size, sizeof(void *), pool->gfp, |
| 83 | cpu_to_node(i), __builtin_return_address(0)); |
| 84 | if (!slot) |
| 85 | return -ENOMEM; |
| 86 | memset(slot, 0, size); |
| 87 | pool->cpu_slots[i] = slot; |
| 88 | |
| 89 | /* initialize the objpool_slot of cpu node i */ |
| 90 | rc = objpool_init_percpu_slot(pool, slot, nodes, context, objinit); |
| 91 | if (rc) |
| 92 | return rc; |
| 93 | } |
| 94 | |
| 95 | return 0; |
| 96 | } |
| 97 | |
| 98 | /* cleanup all percpu slots of the object pool */ |
| 99 | static void objpool_fini_percpu_slots(struct objpool_head *pool) |
| 100 | { |
| 101 | int i; |
| 102 | |
| 103 | if (!pool->cpu_slots) |
| 104 | return; |
| 105 | |
| 106 | for (i = 0; i < pool->nr_cpus; i++) |
| 107 | kvfree(pool->cpu_slots[i]); |
| 108 | kfree(pool->cpu_slots); |
| 109 | } |
| 110 | |
| 111 | /* initialize object pool and pre-allocate objects */ |
| 112 | int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, |
| 113 | gfp_t gfp, void *context, objpool_init_obj_cb objinit, |
| 114 | objpool_fini_cb release) |
| 115 | { |
| 116 | int rc, capacity, slot_size; |
| 117 | |
| 118 | /* check input parameters */ |
| 119 | if (nr_objs <= 0 || nr_objs > OBJPOOL_NR_OBJECT_MAX || |
| 120 | object_size <= 0 || object_size > OBJPOOL_OBJECT_SIZE_MAX) |
| 121 | return -EINVAL; |
| 122 | |
| 123 | /* align up to unsigned long size */ |
| 124 | object_size = ALIGN(object_size, sizeof(long)); |
| 125 | |
| 126 | /* calculate capacity of percpu objpool_slot */ |
| 127 | capacity = roundup_pow_of_two(nr_objs); |
| 128 | if (!capacity) |
| 129 | return -EINVAL; |
| 130 | |
| 131 | /* initialize objpool pool */ |
| 132 | memset(pool, 0, sizeof(struct objpool_head)); |
| 133 | pool->nr_cpus = nr_cpu_ids; |
| 134 | pool->obj_size = object_size; |
| 135 | pool->capacity = capacity; |
| 136 | pool->gfp = gfp & ~__GFP_ZERO; |
| 137 | pool->context = context; |
| 138 | pool->release = release; |
| 139 | slot_size = pool->nr_cpus * sizeof(struct objpool_slot); |
| 140 | pool->cpu_slots = kzalloc(slot_size, pool->gfp); |
| 141 | if (!pool->cpu_slots) |
| 142 | return -ENOMEM; |
| 143 | |
| 144 | /* initialize per-cpu slots */ |
| 145 | rc = objpool_init_percpu_slots(pool, nr_objs, context, objinit); |
| 146 | if (rc) |
| 147 | objpool_fini_percpu_slots(pool); |
| 148 | else |
| 149 | refcount_set(&pool->ref, pool->nr_objs + 1); |
| 150 | |
| 151 | return rc; |
| 152 | } |
| 153 | EXPORT_SYMBOL_GPL(objpool_init); |
| 154 | |
| 155 | /* adding object to slot, abort if the slot was already full */ |
| 156 | static inline int |
| 157 | objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu) |
| 158 | { |
| 159 | struct objpool_slot *slot = pool->cpu_slots[cpu]; |
| 160 | uint32_t head, tail; |
| 161 | |
| 162 | /* loading tail and head as a local snapshot, tail first */ |
| 163 | tail = READ_ONCE(slot->tail); |
| 164 | |
| 165 | do { |
| 166 | head = READ_ONCE(slot->head); |
| 167 | /* fault caught: something must be wrong */ |
| 168 | WARN_ON_ONCE(tail - head > pool->nr_objs); |
| 169 | } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1)); |
| 170 | |
| 171 | /* now the tail position is reserved for the given obj */ |
| 172 | WRITE_ONCE(slot->entries[tail & slot->mask], obj); |
| 173 | /* update sequence to make this obj available for pop() */ |
| 174 | smp_store_release(&slot->last, tail + 1); |
| 175 | |
| 176 | return 0; |
| 177 | } |
| 178 | |
| 179 | /* reclaim an object to object pool */ |
| 180 | int objpool_push(void *obj, struct objpool_head *pool) |
| 181 | { |
| 182 | unsigned long flags; |
| 183 | int rc; |
| 184 | |
| 185 | /* disable local irq to avoid preemption & interruption */ |
| 186 | raw_local_irq_save(flags); |
| 187 | rc = objpool_try_add_slot(obj, pool, raw_smp_processor_id()); |
| 188 | raw_local_irq_restore(flags); |
| 189 | |
| 190 | return rc; |
| 191 | } |
| 192 | EXPORT_SYMBOL_GPL(objpool_push); |
| 193 | |
| 194 | /* try to retrieve object from slot */ |
| 195 | static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu) |
| 196 | { |
| 197 | struct objpool_slot *slot = pool->cpu_slots[cpu]; |
| 198 | /* load head snapshot, other cpus may change it */ |
| 199 | uint32_t head = smp_load_acquire(&slot->head); |
| 200 | |
| 201 | while (head != READ_ONCE(slot->last)) { |
| 202 | void *obj; |
| 203 | |
| 204 | /* obj must be retrieved before moving forward head */ |
| 205 | obj = READ_ONCE(slot->entries[head & slot->mask]); |
| 206 | |
| 207 | /* move head forward to mark it's consumption */ |
| 208 | if (try_cmpxchg_release(&slot->head, &head, head + 1)) |
| 209 | return obj; |
| 210 | } |
| 211 | |
| 212 | return NULL; |
| 213 | } |
| 214 | |
| 215 | /* allocate an object from object pool */ |
| 216 | void *objpool_pop(struct objpool_head *pool) |
| 217 | { |
| 218 | void *obj = NULL; |
| 219 | unsigned long flags; |
| 220 | int i, cpu; |
| 221 | |
| 222 | /* disable local irq to avoid preemption & interruption */ |
| 223 | raw_local_irq_save(flags); |
| 224 | |
| 225 | cpu = raw_smp_processor_id(); |
| 226 | for (i = 0; i < num_possible_cpus(); i++) { |
| 227 | obj = objpool_try_get_slot(pool, cpu); |
| 228 | if (obj) |
| 229 | break; |
| 230 | cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); |
| 231 | } |
| 232 | raw_local_irq_restore(flags); |
| 233 | |
| 234 | return obj; |
| 235 | } |
| 236 | EXPORT_SYMBOL_GPL(objpool_pop); |
| 237 | |
| 238 | /* release whole objpool forcely */ |
| 239 | void objpool_free(struct objpool_head *pool) |
| 240 | { |
| 241 | if (!pool->cpu_slots) |
| 242 | return; |
| 243 | |
| 244 | /* release percpu slots */ |
| 245 | objpool_fini_percpu_slots(pool); |
| 246 | |
| 247 | /* call user's cleanup callback if provided */ |
| 248 | if (pool->release) |
| 249 | pool->release(pool, pool->context); |
| 250 | } |
| 251 | EXPORT_SYMBOL_GPL(objpool_free); |
| 252 | |
| 253 | /* drop the allocated object, rather reclaim it to objpool */ |
| 254 | int objpool_drop(void *obj, struct objpool_head *pool) |
| 255 | { |
| 256 | if (!obj || !pool) |
| 257 | return -EINVAL; |
| 258 | |
| 259 | if (refcount_dec_and_test(&pool->ref)) { |
| 260 | objpool_free(pool); |
| 261 | return 0; |
| 262 | } |
| 263 | |
| 264 | return -EAGAIN; |
| 265 | } |
| 266 | EXPORT_SYMBOL_GPL(objpool_drop); |
| 267 | |
| 268 | /* drop unused objects and defref objpool for releasing */ |
| 269 | void objpool_fini(struct objpool_head *pool) |
| 270 | { |
| 271 | int count = 1; /* extra ref for objpool itself */ |
| 272 | |
| 273 | /* drop all remained objects from objpool */ |
| 274 | while (objpool_pop(pool)) |
| 275 | count++; |
| 276 | |
| 277 | if (refcount_sub_and_test(count, &pool->ref)) |
| 278 | objpool_free(pool); |
| 279 | } |
| 280 | EXPORT_SYMBOL_GPL(objpool_fini); |