fs/bcachefs/rcu_pending.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) "%s() " fmt "\n", __func__

 #include <linux/generic-radix-tree.h>
 #include <linux/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/srcu.h>
 #include <linux/vmalloc.h>

 #include "rcu_pending.h"
 #include "darray.h"
 #include "util.h"

 #define static_array_for_each(_a, _i)			\
 	for (typeof(&(_a)[0]) _i = _a;			\
 	     _i < (_a) + ARRAY_SIZE(_a);		\
 	     _i++)

 enum rcu_pending_special {
 	RCU_PENDING_KVFREE	= 1,
 	RCU_PENDING_CALL_RCU	= 2,
 };

 #define RCU_PENDING_KVFREE_FN		((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
 #define RCU_PENDING_CALL_RCU_FN		((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)

 static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp)
 {
 	return ssp
 		? get_state_synchronize_srcu(ssp)
 		: get_state_synchronize_rcu();
 }

 static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp)
 {
 	return ssp
 		? start_poll_synchronize_srcu(ssp)
 		: start_poll_synchronize_rcu();
 }

 static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie)
 {
 	return ssp
 		? poll_state_synchronize_srcu(ssp, cookie)
 		: poll_state_synchronize_rcu(cookie);
 }

 static inline void __rcu_barrier(struct srcu_struct *ssp)
 {
 	return ssp
 		? srcu_barrier(ssp)
 		: rcu_barrier();
 }

 static inline void __call_rcu(struct srcu_struct *ssp, struct rcu_head *rhp,
 			      rcu_callback_t func)
 {
 	if (ssp)
 		call_srcu(ssp, rhp, func);
 	else
 		call_rcu(rhp, func);
 }

 struct rcu_pending_seq {
 	/*
 	 * We're using a radix tree like a vector - we're just pushing elements
 	 * onto the end; we're using a radix tree instead of an actual vector to
 	 * avoid reallocation overhead
 	 */
 	GENRADIX(struct rcu_head *)	objs;
 	size_t				nr;
 	struct rcu_head			**cursor;
 	unsigned long			seq;
 };

 struct rcu_pending_list {
 	struct rcu_head			*head;
 	struct rcu_head			*tail;
 	unsigned long			seq;
 };

 struct rcu_pending_pcpu {
 	struct rcu_pending		*parent;
 	spinlock_t			lock;
 	int				cpu;

 	/*
 	 * We can't bound the number of unprocessed gp sequence numbers, and we
 	 * can't efficiently merge radix trees for expired grace periods, so we
 	 * need darray/vector:
 	 */
 	DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs;

 	/* Third entry is for expired objects: */
 	struct rcu_pending_list		lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1];

 	struct rcu_head			cb;
 	bool				cb_armed;
 	struct work_struct		work;
 };

 static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p)
 {
 	if (p->objs.nr)
 		return true;

 	static_array_for_each(p->lists, i)
 		if (i->head)
 			return true;

 	return false;
 }

 static void rcu_pending_list_merge(struct rcu_pending_list *l1,
 				   struct rcu_pending_list *l2)
 {
 #ifdef __KERNEL__
 	if (!l1->head)
 		l1->head = l2->head;
 	else
 		l1->tail->next = l2->head;
 #else
 	if (!l1->head)
 		l1->head = l2->head;
 	else
 		l1->tail->next.next = (void *) l2->head;
 #endif

 	l1->tail = l2->tail;
 	l2->head = l2->tail = NULL;
 }

 static void rcu_pending_list_add(struct rcu_pending_list *l,
 				 struct rcu_head *n)
 {
 #ifdef __KERNEL__
 	if (!l->head)
 		l->head = n;
 	else
 		l->tail->next = n;
 	l->tail = n;
 	n->next = NULL;
 #else
 	if (!l->head)
 		l->head = n;
 	else
 		l->tail->next.next = (void *) n;
 	l->tail = n;
 	n->next.next = NULL;
 #endif
 }

 static void merge_expired_lists(struct rcu_pending_pcpu *p)
 {
 	struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];

 	for (struct rcu_pending_list *i = p->lists; i < expired; i++)
 		if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq))
 			rcu_pending_list_merge(expired, i);
 }

 #ifndef __KERNEL__
 static inline void kfree_bulk(size_t nr, void ** p)
 {
 	while (nr--)
 		kfree(*p);
 }

 #define local_irq_save(flags)		\
 do {					\
 	flags = 0;			\
 } while (0)
 #endif

 static noinline void __process_finished_items(struct rcu_pending *pending,
 					      struct rcu_pending_pcpu *p,
 					      unsigned long flags)
 {
 	struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];
 	struct rcu_pending_seq objs = {};
 	struct rcu_head *list = NULL;

 	if (p->objs.nr &&
 	    __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) {
 		objs = p->objs.data[0];
 		darray_remove_item(&p->objs, p->objs.data);
 	}

 	merge_expired_lists(p);

 	list = expired->head;
 	expired->head = expired->tail = NULL;

 	spin_unlock_irqrestore(&p->lock, flags);

 	switch ((ulong) pending->process) {
 	case RCU_PENDING_KVFREE:
 		for (size_t i = 0; i < objs.nr; ) {
 			size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i);

 			kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i));
 			i += nr_this_node;
 		}
 		genradix_free(&objs.objs);

 		while (list) {
 			struct rcu_head *obj = list;
 #ifdef __KERNEL__
 			list = obj->next;
 #else
 			list = (void *) obj->next.next;
 #endif

 			/*
 			 * low bit of pointer indicates whether rcu_head needs
 			 * to be freed - kvfree_rcu_mightsleep()
 			 */
 			BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);

 			void *ptr = (void *)(((unsigned long) obj->func) & ~1UL);
 			bool free_head = ((unsigned long) obj->func) & 1UL;

 			kvfree(ptr);
 			if (free_head)
 				kfree(obj);
 		}

 		break;

 	case RCU_PENDING_CALL_RCU:
 		for (size_t i = 0; i < objs.nr; i++) {
 			struct rcu_head *obj = *genradix_ptr(&objs.objs, i);
 			obj->func(obj);
 		}
 		genradix_free(&objs.objs);

 		while (list) {
 			struct rcu_head *obj = list;
 #ifdef __KERNEL__
 			list = obj->next;
 #else
 			list = (void *) obj->next.next;
 #endif
 			obj->func(obj);
 		}
 		break;

 	default:
 		for (size_t i = 0; i < objs.nr; i++)
 			pending->process(pending, *genradix_ptr(&objs.objs, i));
 		genradix_free(&objs.objs);

 		while (list) {
 			struct rcu_head *obj = list;
 #ifdef __KERNEL__
 			list = obj->next;
 #else
 			list = (void *) obj->next.next;
 #endif
 			pending->process(pending, obj);
 		}
 		break;
 	}
 }

 static bool process_finished_items(struct rcu_pending *pending,
 				   struct rcu_pending_pcpu *p,
 				   unsigned long flags)
 {
 	/*
 	 * XXX: we should grab the gp seq once and avoid multiple function
 	 * calls, this is called from __rcu_pending_enqueue() fastpath in
 	 * may_sleep==true mode
 	 */
 	if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) ||
 	    (p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) ||
 	    (p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) ||
 	    p->lists[2].head) {
 		__process_finished_items(pending, p, flags);
 		return true;
 	}

 	return false;
 }

 static void rcu_pending_work(struct work_struct *work)
 {
 	struct rcu_pending_pcpu *p =
 		container_of(work, struct rcu_pending_pcpu, work);
 	struct rcu_pending *pending = p->parent;
 	unsigned long flags;

 	do {
 		spin_lock_irqsave(&p->lock, flags);
 	} while (process_finished_items(pending, p, flags));

 	spin_unlock_irqrestore(&p->lock, flags);
 }

 static void rcu_pending_rcu_cb(struct rcu_head *rcu)
 {
 	struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb);

 	schedule_work_on(p->cpu, &p->work);

 	unsigned long flags;
 	spin_lock_irqsave(&p->lock, flags);
 	if (__rcu_pending_has_pending(p)) {
 		spin_unlock_irqrestore(&p->lock, flags);
 		__call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb);
 	} else {
 		p->cb_armed = false;
 		spin_unlock_irqrestore(&p->lock, flags);
 	}
 }

 static __always_inline struct rcu_pending_seq *
 get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq)
 {
 	darray_for_each_reverse(p->objs, objs)
 		if (objs->seq == seq)
 			return objs;

 	if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC))
 		return NULL;

 	return &darray_last(p->objs);
 }

 static noinline bool
 rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq,
 			 struct rcu_head *head, void *ptr,
 			 unsigned long *flags)
 {
 	if (ptr) {
 		if (!head) {
 			/*
 			 * kvfree_rcu_mightsleep(): we weren't passed an
 			 * rcu_head, but we need one: use the low bit of the
 			 * ponter to free to flag that the head needs to be
 			 * freed as well:
 			 */
 			ptr = (void *)(((unsigned long) ptr)|1UL);
 			head = kmalloc(sizeof(*head), __GFP_NOWARN);
 			if (!head) {
 				spin_unlock_irqrestore(&p->lock, *flags);
 				head = kmalloc(sizeof(*head), GFP_KERNEL|__GFP_NOFAIL);
 				/*
 				 * dropped lock, did GFP_KERNEL allocation,
 				 * check for gp expiration
 				 */
 				if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) {
 					kvfree(--ptr);
 					kfree(head);
 					spin_lock_irqsave(&p->lock, *flags);
 					return false;
 				}
 			}
 		}

 		head->func = ptr;
 	}
 again:
 	for (struct rcu_pending_list *i = p->lists;
 	     i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
 		if (i->seq == seq) {
 			rcu_pending_list_add(i, head);
 			return false;
 		}
 	}

 	for (struct rcu_pending_list *i = p->lists;
 	     i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
 		if (!i->head) {
 			i->seq = seq;
 			rcu_pending_list_add(i, head);
 			return true;
 		}
 	}

 	merge_expired_lists(p);
 	goto again;
 }

 /*
  * __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via
  * pending->pracess) once grace period elapses.
  *
  * Attempt to enqueue items onto a radix tree; if memory allocation fails, fall
  * back to a linked list.
  *
  * - If @ptr is NULL, we're enqueuing an item for a generic @pending with a
  *   process callback
  *
  * - If @ptr and @head are both not NULL, we're kvfree_rcu()
  *
  * - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep()
  *
  * - If @may_sleep is true, will do GFP_KERNEL memory allocations and process
  *   expired items.
  */
 static __always_inline void
 __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
 		      void *ptr, bool may_sleep)
 {

 	struct rcu_pending_pcpu *p;
 	struct rcu_pending_seq *objs;
 	struct genradix_node *new_node = NULL;
 	unsigned long seq, flags;
 	bool start_gp = false;

 	BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));

 	local_irq_save(flags);
 	p = this_cpu_ptr(pending->p);
 	spin_lock(&p->lock);
 	seq = __get_state_synchronize_rcu(pending->srcu);
 restart:
 	if (may_sleep &&
 	    unlikely(process_finished_items(pending, p, flags)))
 		goto check_expired;

 	/*
 	 * In kvfree_rcu() mode, the radix tree is only for slab pointers so
 	 * that we can do kfree_bulk() - vmalloc pointers always use the linked
 	 * list:
 	 */
 	if (ptr && unlikely(is_vmalloc_addr(ptr)))
 		goto list_add;

 	objs = get_object_radix(p, seq);
 	if (unlikely(!objs))
 		goto list_add;

 	if (unlikely(!objs->cursor)) {
 		/*
 		 * New radix tree nodes must be added under @p->lock because the
 		 * tree root is in a darray that can be resized (typically,
 		 * genradix supports concurrent unlocked allocation of new
 		 * nodes) - hence preallocation and the retry loop:
 		 */
 		objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs,
 						objs->nr, &new_node, GFP_ATOMIC|__GFP_NOWARN);
 		if (unlikely(!objs->cursor)) {
 			if (may_sleep) {
 				spin_unlock_irqrestore(&p->lock, flags);

 				gfp_t gfp = GFP_KERNEL;
 				if (!head)
 					gfp |= __GFP_NOFAIL;

 				new_node = genradix_alloc_node(gfp);
 				if (!new_node)
 					may_sleep = false;
 				goto check_expired;
 			}
 list_add:
 			start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags);
 			goto start_gp;
 		}
 	}

 	*objs->cursor++ = ptr ?: head;
 	/* zero cursor if we hit the end of a radix tree node: */
 	if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1)))
 		objs->cursor = NULL;
 	start_gp = !objs->nr;
 	objs->nr++;
 start_gp:
 	if (unlikely(start_gp)) {
 		/*
 		 * We only have one callback (ideally, we would have one for
 		 * every outstanding graceperiod) - so if our callback is
 		 * already in flight, we may still have to start a grace period
 		 * (since we used get_state() above, not start_poll())
 		 */
 		if (!p->cb_armed) {
 			p->cb_armed = true;
 			__call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
 		} else {
 			__start_poll_synchronize_rcu(pending->srcu);
 		}
 	}
 	spin_unlock_irqrestore(&p->lock, flags);
 free_node:
 	if (new_node)
 		genradix_free_node(new_node);
 	return;
 check_expired:
 	if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) {
 		switch ((ulong) pending->process) {
 		case RCU_PENDING_KVFREE:
 			kvfree(ptr);
 			break;
 		case RCU_PENDING_CALL_RCU:
 			head->func(head);
 			break;
 		default:
 			pending->process(pending, head);
 			break;
 		}
 		goto free_node;
 	}

 	local_irq_save(flags);
 	p = this_cpu_ptr(pending->p);
 	spin_lock(&p->lock);
 	goto restart;
 }

 void rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *obj)
 {
 	__rcu_pending_enqueue(pending, obj, NULL, true);
 }

 static struct rcu_head *rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu *p)
 {
 	struct rcu_head *ret = NULL;

 	spin_lock_irq(&p->lock);
 	darray_for_each(p->objs, objs)
 		if (objs->nr) {
 			ret = *genradix_ptr(&objs->objs, --objs->nr);
 			objs->cursor = NULL;
 			if (!objs->nr)
 				genradix_free(&objs->objs);
 			goto out;
 		}

 	static_array_for_each(p->lists, i)
 		if (i->head) {
 			ret = i->head;
 #ifdef __KERNEL__
 			i->head = ret->next;
 #else
 			i->head = (void *) ret->next.next;
 #endif
 			if (!i->head)
 				i->tail = NULL;
 			goto out;
 		}
 out:
 	spin_unlock_irq(&p->lock);

 	return ret;
 }

 struct rcu_head *rcu_pending_dequeue(struct rcu_pending *pending)
 {
 	return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p));
 }

 struct rcu_head *rcu_pending_dequeue_from_all(struct rcu_pending *pending)
 {
 	struct rcu_head *ret = rcu_pending_dequeue(pending);

 	if (ret)
 		return ret;

 	int cpu;
 	for_each_possible_cpu(cpu) {
 		ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu));
 		if (ret)
 			break;
 	}
 	return ret;
 }

 static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending)
 {
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
 		spin_lock_irq(&p->lock);
 		if (__rcu_pending_has_pending(p) || p->cb_armed) {
 			spin_unlock_irq(&p->lock);
 			return true;
 		}
 		spin_unlock_irq(&p->lock);
 	}

 	return false;
 }

 void rcu_pending_exit(struct rcu_pending *pending)
 {
 	int cpu;

 	if (!pending->p)
 		return;

 	while (rcu_pending_has_pending_or_armed(pending)) {
 		__rcu_barrier(pending->srcu);

 		for_each_possible_cpu(cpu) {
 			struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
 			flush_work(&p->work);
 		}
 	}

 	for_each_possible_cpu(cpu) {
 		struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
 		flush_work(&p->work);
 	}

 	for_each_possible_cpu(cpu) {
 		struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);

 		static_array_for_each(p->lists, i)
 			WARN_ON(i->head);
 		WARN_ON(p->objs.nr);
 		darray_exit(&p->objs);
 	}
 	free_percpu(pending->p);
 }

 /**
  * rcu_pending_init: - initialize a rcu_pending
  *
  * @pending:	Object to init
  * @srcu:	May optionally be used with an srcu_struct; if NULL, uses normal
  *		RCU flavor
  * @process:	Callback function invoked on objects once their RCU barriers
  *		have completed; if NULL, kvfree() is used.
  */
 int rcu_pending_init(struct rcu_pending *pending,
 		     struct srcu_struct *srcu,
 		     rcu_pending_process_fn process)
 {
 	pending->p = alloc_percpu(struct rcu_pending_pcpu);
 	if (!pending->p)
 		return -ENOMEM;

 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
 		p->parent	= pending;
 		p->cpu		= cpu;
 		spin_lock_init(&p->lock);
 		darray_init(&p->objs);
 		INIT_WORK(&p->work, rcu_pending_work);
 	}

 	pending->srcu = srcu;
 	pending->process = process;

 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0
	#define pr_fmt(fmt) "%s() " fmt "\n", __func__

	#include <linux/generic-radix-tree.h>
	#include <linux/mm.h>
	#include <linux/percpu.h>
	#include <linux/slab.h>
	#include <linux/srcu.h>
	#include <linux/vmalloc.h>

	#include "rcu_pending.h"
	#include "darray.h"
	#include "util.h"

	#define static_array_for_each(_a, _i) \
	for (typeof(&(_a)[0]) _i = _a; \
	_i < (_a) + ARRAY_SIZE(_a); \
	_i++)

	enum rcu_pending_special {
	RCU_PENDING_KVFREE = 1,
	RCU_PENDING_CALL_RCU = 2,
	};

	#define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
	#define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)

	static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp)
	{
	return ssp
	? get_state_synchronize_srcu(ssp)
	: get_state_synchronize_rcu();
	}

	static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp)
	{
	return ssp
	? start_poll_synchronize_srcu(ssp)
	: start_poll_synchronize_rcu();
	}

	static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie)
	{
	return ssp
	? poll_state_synchronize_srcu(ssp, cookie)
	: poll_state_synchronize_rcu(cookie);
	}

	static inline void __rcu_barrier(struct srcu_struct *ssp)
	{
	return ssp
	? srcu_barrier(ssp)
	: rcu_barrier();
	}

	static inline void __call_rcu(struct srcu_struct ssp, struct rcu_head rhp,
	rcu_callback_t func)
	{
	if (ssp)
	call_srcu(ssp, rhp, func);
	else
	call_rcu(rhp, func);
	}

	struct rcu_pending_seq {
	/*
	* We're using a radix tree like a vector - we're just pushing elements
	* onto the end; we're using a radix tree instead of an actual vector to
	* avoid reallocation overhead
	*/
	GENRADIX(struct rcu_head *) objs;
	size_t nr;
	struct rcu_head **cursor;
	unsigned long seq;
	};

	struct rcu_pending_list {
	struct rcu_head *head;
	struct rcu_head *tail;
	unsigned long seq;
	};

	struct rcu_pending_pcpu {
	struct rcu_pending *parent;
	spinlock_t lock;
	int cpu;

	/*
	* We can't bound the number of unprocessed gp sequence numbers, and we
	* can't efficiently merge radix trees for expired grace periods, so we
	* need darray/vector:
	*/
	DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs;

	/* Third entry is for expired objects: */
	struct rcu_pending_list lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1];

	struct rcu_head cb;
	bool cb_armed;
	struct work_struct work;
	};

	static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p)
	{
	if (p->objs.nr)
	return true;

	static_array_for_each(p->lists, i)
	if (i->head)
	return true;

	return false;
	}

	static void rcu_pending_list_merge(struct rcu_pending_list *l1,
	struct rcu_pending_list *l2)
	{
	#ifdef __KERNEL__
	if (!l1->head)
	l1->head = l2->head;
	else
	l1->tail->next = l2->head;
	#else
	if (!l1->head)
	l1->head = l2->head;
	else
	l1->tail->next.next = (void *) l2->head;
	#endif

	l1->tail = l2->tail;
	l2->head = l2->tail = NULL;
	}

	static void rcu_pending_list_add(struct rcu_pending_list *l,
	struct rcu_head *n)
	{
	#ifdef __KERNEL__
	if (!l->head)
	l->head = n;
	else
	l->tail->next = n;
	l->tail = n;
	n->next = NULL;
	#else
	if (!l->head)
	l->head = n;
	else
	l->tail->next.next = (void *) n;
	l->tail = n;
	n->next.next = NULL;
	#endif
	}

	static void merge_expired_lists(struct rcu_pending_pcpu *p)
	{
	struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];

	for (struct rcu_pending_list *i = p->lists; i < expired; i++)
	if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq))
	rcu_pending_list_merge(expired, i);
	}

	#ifndef __KERNEL__
	static inline void kfree_bulk(size_t nr, void ** p)
	{
	while (nr--)
	kfree(*p);
	}

	#define local_irq_save(flags) \
	do { \
	flags = 0; \
	} while (0)
	#endif

	static noinline void __process_finished_items(struct rcu_pending *pending,
	struct rcu_pending_pcpu *p,
	unsigned long flags)
	{
	struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];
	struct rcu_pending_seq objs = {};
	struct rcu_head *list = NULL;

	if (p->objs.nr &&
	__poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) {
	objs = p->objs.data[0];
	darray_remove_item(&p->objs, p->objs.data);
	}

	merge_expired_lists(p);

	list = expired->head;
	expired->head = expired->tail = NULL;

	spin_unlock_irqrestore(&p->lock, flags);

	switch ((ulong) pending->process) {
	case RCU_PENDING_KVFREE:
	for (size_t i = 0; i < objs.nr; ) {
	size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i);

	kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i));
	i += nr_this_node;
	}
	genradix_free(&objs.objs);

	while (list) {
	struct rcu_head *obj = list;
	#ifdef __KERNEL__
	list = obj->next;
	#else
	list = (void *) obj->next.next;
	#endif

	/*
	* low bit of pointer indicates whether rcu_head needs
	* to be freed - kvfree_rcu_mightsleep()
	*/
	BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);

	void ptr = (void )(((unsigned long) obj->func) & ~1UL);
	bool free_head = ((unsigned long) obj->func) & 1UL;

	kvfree(ptr);
	if (free_head)
	kfree(obj);
	}

	break;

	case RCU_PENDING_CALL_RCU:
	for (size_t i = 0; i < objs.nr; i++) {
	struct rcu_head obj = genradix_ptr(&objs.objs, i);
	obj->func(obj);
	}
	genradix_free(&objs.objs);

	while (list) {
	struct rcu_head *obj = list;
	#ifdef __KERNEL__
	list = obj->next;
	#else
	list = (void *) obj->next.next;
	#endif
	obj->func(obj);
	}
	break;

	default:
	for (size_t i = 0; i < objs.nr; i++)
	pending->process(pending, *genradix_ptr(&objs.objs, i));
	genradix_free(&objs.objs);

	while (list) {
	struct rcu_head *obj = list;
	#ifdef __KERNEL__
	list = obj->next;
	#else
	list = (void *) obj->next.next;
	#endif
	pending->process(pending, obj);
	}
	break;
	}
	}

	static bool process_finished_items(struct rcu_pending *pending,
	struct rcu_pending_pcpu *p,
	unsigned long flags)
	{
	/*
	* XXX: we should grab the gp seq once and avoid multiple function
	* calls, this is called from __rcu_pending_enqueue() fastpath in
	* may_sleep==true mode
	*/
	if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) \|\|
	(p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) \|\|
	(p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) \|\|
	p->lists[2].head) {
	__process_finished_items(pending, p, flags);
	return true;
	}

	return false;
	}

	static void rcu_pending_work(struct work_struct *work)
	{
	struct rcu_pending_pcpu *p =
	container_of(work, struct rcu_pending_pcpu, work);
	struct rcu_pending *pending = p->parent;
	unsigned long flags;

	do {
	spin_lock_irqsave(&p->lock, flags);
	} while (process_finished_items(pending, p, flags));

	spin_unlock_irqrestore(&p->lock, flags);
	}

	static void rcu_pending_rcu_cb(struct rcu_head *rcu)
	{
	struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb);

	schedule_work_on(p->cpu, &p->work);

	unsigned long flags;
	spin_lock_irqsave(&p->lock, flags);
	if (__rcu_pending_has_pending(p)) {
	spin_unlock_irqrestore(&p->lock, flags);
	__call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb);
	} else {
	p->cb_armed = false;
	spin_unlock_irqrestore(&p->lock, flags);
	}
	}

	static __always_inline struct rcu_pending_seq *
	get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq)
	{
	darray_for_each_reverse(p->objs, objs)
	if (objs->seq == seq)
	return objs;

	if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC))
	return NULL;

	return &darray_last(p->objs);
	}

	static noinline bool
	rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq,
	struct rcu_head head, void ptr,
	unsigned long *flags)
	{
	if (ptr) {
	if (!head) {
	/*
	* kvfree_rcu_mightsleep(): we weren't passed an
	* rcu_head, but we need one: use the low bit of the
	* ponter to free to flag that the head needs to be
	* freed as well:
	*/
	ptr = (void *)(((unsigned long) ptr)\|1UL);
	head = kmalloc(sizeof(*head), __GFP_NOWARN);
	if (!head) {
	spin_unlock_irqrestore(&p->lock, *flags);
	head = kmalloc(sizeof(*head), GFP_KERNEL\|__GFP_NOFAIL);
	/*
	* dropped lock, did GFP_KERNEL allocation,
	* check for gp expiration
	*/
	if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) {
	kvfree(--ptr);
	kfree(head);
	spin_lock_irqsave(&p->lock, *flags);
	return false;
	}
	}
	}

	head->func = ptr;
	}
	again:
	for (struct rcu_pending_list *i = p->lists;
	i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
	if (i->seq == seq) {
	rcu_pending_list_add(i, head);
	return false;
	}
	}

	for (struct rcu_pending_list *i = p->lists;
	i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
	if (!i->head) {
	i->seq = seq;
	rcu_pending_list_add(i, head);
	return true;
	}
	}

	merge_expired_lists(p);
	goto again;
	}

	/*
	* __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via
	* pending->pracess) once grace period elapses.
	*
	* Attempt to enqueue items onto a radix tree; if memory allocation fails, fall
	* back to a linked list.
	*
	* - If @ptr is NULL, we're enqueuing an item for a generic @pending with a
	* process callback
	*
	* - If @ptr and @head are both not NULL, we're kvfree_rcu()
	*
	* - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep()
	*
	* - If @may_sleep is true, will do GFP_KERNEL memory allocations and process
	* expired items.
	*/
	static __always_inline void
	__rcu_pending_enqueue(struct rcu_pending pending, struct rcu_head head,
	void *ptr, bool may_sleep)
	{

	struct rcu_pending_pcpu *p;
	struct rcu_pending_seq *objs;
	struct genradix_node *new_node = NULL;
	unsigned long seq, flags;
	bool start_gp = false;

	BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));

	local_irq_save(flags);
	p = this_cpu_ptr(pending->p);
	spin_lock(&p->lock);
	seq = __get_state_synchronize_rcu(pending->srcu);
	restart:
	if (may_sleep &&
	unlikely(process_finished_items(pending, p, flags)))
	goto check_expired;

	/*
	* In kvfree_rcu() mode, the radix tree is only for slab pointers so
	* that we can do kfree_bulk() - vmalloc pointers always use the linked
	* list:
	*/
	if (ptr && unlikely(is_vmalloc_addr(ptr)))
	goto list_add;

	objs = get_object_radix(p, seq);
	if (unlikely(!objs))
	goto list_add;

	if (unlikely(!objs->cursor)) {
	/*
	* New radix tree nodes must be added under @p->lock because the
	* tree root is in a darray that can be resized (typically,
	* genradix supports concurrent unlocked allocation of new
	* nodes) - hence preallocation and the retry loop:
	*/
	objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs,
	objs->nr, &new_node, GFP_ATOMIC\|__GFP_NOWARN);
	if (unlikely(!objs->cursor)) {
	if (may_sleep) {
	spin_unlock_irqrestore(&p->lock, flags);

	gfp_t gfp = GFP_KERNEL;
	if (!head)
	gfp \|= __GFP_NOFAIL;

	new_node = genradix_alloc_node(gfp);
	if (!new_node)
	may_sleep = false;
	goto check_expired;
	}
	list_add:
	start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags);
	goto start_gp;
	}
	}

	*objs->cursor++ = ptr ?: head;
	/* zero cursor if we hit the end of a radix tree node: */
	if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1)))
	objs->cursor = NULL;
	start_gp = !objs->nr;
	objs->nr++;
	start_gp:
	if (unlikely(start_gp)) {
	/*
	* We only have one callback (ideally, we would have one for
	* every outstanding graceperiod) - so if our callback is
	* already in flight, we may still have to start a grace period
	* (since we used get_state() above, not start_poll())
	*/
	if (!p->cb_armed) {
	p->cb_armed = true;
	__call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
	} else {
	__start_poll_synchronize_rcu(pending->srcu);
	}
	}
	spin_unlock_irqrestore(&p->lock, flags);
	free_node:
	if (new_node)
	genradix_free_node(new_node);
	return;
	check_expired:
	if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) {
	switch ((ulong) pending->process) {
	case RCU_PENDING_KVFREE:
	kvfree(ptr);
	break;
	case RCU_PENDING_CALL_RCU:
	head->func(head);
	break;
	default:
	pending->process(pending, head);
	break;
	}
	goto free_node;
	}

	local_irq_save(flags);
	p = this_cpu_ptr(pending->p);
	spin_lock(&p->lock);
	goto restart;
	}

	void rcu_pending_enqueue(struct rcu_pending pending, struct rcu_head obj)
	{
	__rcu_pending_enqueue(pending, obj, NULL, true);
	}

	static struct rcu_head rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu p)
	{
	struct rcu_head *ret = NULL;

	spin_lock_irq(&p->lock);
	darray_for_each(p->objs, objs)
	if (objs->nr) {
	ret = *genradix_ptr(&objs->objs, --objs->nr);
	objs->cursor = NULL;
	if (!objs->nr)
	genradix_free(&objs->objs);
	goto out;
	}

	static_array_for_each(p->lists, i)
	if (i->head) {
	ret = i->head;
	#ifdef __KERNEL__
	i->head = ret->next;
	#else
	i->head = (void *) ret->next.next;
	#endif
	if (!i->head)
	i->tail = NULL;
	goto out;
	}
	out:
	spin_unlock_irq(&p->lock);

	return ret;
	}

	struct rcu_head rcu_pending_dequeue(struct rcu_pending pending)
	{
	return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p));
	}

	struct rcu_head rcu_pending_dequeue_from_all(struct rcu_pending pending)
	{
	struct rcu_head *ret = rcu_pending_dequeue(pending);

	if (ret)
	return ret;

	int cpu;
	for_each_possible_cpu(cpu) {
	ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu));
	if (ret)
	break;
	}
	return ret;
	}

	static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending)
	{
	int cpu;
	for_each_possible_cpu(cpu) {
	struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
	spin_lock_irq(&p->lock);
	if (__rcu_pending_has_pending(p) \|\| p->cb_armed) {
	spin_unlock_irq(&p->lock);
	return true;
	}
	spin_unlock_irq(&p->lock);
	}

	return false;
	}

	void rcu_pending_exit(struct rcu_pending *pending)
	{
	int cpu;

	if (!pending->p)
	return;

	while (rcu_pending_has_pending_or_armed(pending)) {
	__rcu_barrier(pending->srcu);

	for_each_possible_cpu(cpu) {
	struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
	flush_work(&p->work);
	}
	}

	for_each_possible_cpu(cpu) {
	struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
	flush_work(&p->work);
	}

	for_each_possible_cpu(cpu) {
	struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);

	static_array_for_each(p->lists, i)
	WARN_ON(i->head);
	WARN_ON(p->objs.nr);
	darray_exit(&p->objs);
	}
	free_percpu(pending->p);
	}

	/**
	* rcu_pending_init: - initialize a rcu_pending
	*
	* @pending: Object to init
	* @srcu: May optionally be used with an srcu_struct; if NULL, uses normal
	* RCU flavor
	* @process: Callback function invoked on objects once their RCU barriers
	* have completed; if NULL, kvfree() is used.
	*/
	int rcu_pending_init(struct rcu_pending *pending,
	struct srcu_struct *srcu,
	rcu_pending_process_fn process)
	{
	pending->p = alloc_percpu(struct rcu_pending_pcpu);
	if (!pending->p)
	return -ENOMEM;

	int cpu;
	for_each_possible_cpu(cpu) {
	struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
	p->parent = pending;
	p->cpu = cpu;
	spin_lock_init(&p->lock);
	darray_init(&p->objs);
	INIT_WORK(&p->work, rcu_pending_work);
	}

	pending->srcu = srcu;
	pending->process = process;

	return 0;
	}