drivers/md/dm-vdo/indexer/radix-sort.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2023 Red Hat
  */

 #include "radix-sort.h"

 #include <linux/limits.h>
 #include <linux/types.h>

 #include "memory-alloc.h"
 #include "string-utils.h"

 /*
  * This implementation allocates one large object to do the sorting, which can be reused as many
  * times as desired. The amount of memory required is logarithmically proportional to the number of
  * keys to be sorted.
  */

 /* Piles smaller than this are handled with a simple insertion sort. */
 #define INSERTION_SORT_THRESHOLD 12

 /* Sort keys are pointers to immutable fixed-length arrays of bytes. */
 typedef const u8 *sort_key_t;

 /*
  * The keys are separated into piles based on the byte in each keys at the current offset, so the
  * number of keys with each byte must be counted.
  */
 struct histogram {
 	/* The number of non-empty bins */
 	u16 used;
 	/* The index (key byte) of the first non-empty bin */
 	u16 first;
 	/* The index (key byte) of the last non-empty bin */
 	u16 last;
 	/* The number of occurrences of each specific byte */
 	u32 size[256];
 };

 /*
  * Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound
  * on the stack space needed.
  */
 struct task {
 	/* Pointer to the first key to sort. */
 	sort_key_t *first_key;
 	/* Pointer to the last key to sort. */
 	sort_key_t *last_key;
 	/* The offset into the key at which to continue sorting. */
 	u16 offset;
 	/* The number of bytes remaining in the sort keys. */
 	u16 length;
 };

 struct radix_sorter {
 	unsigned int count;
 	struct histogram bins;
 	sort_key_t *pile[256];
 	struct task *end_of_stack;
 	struct task insertion_list[256];
 	struct task stack[];
 };

 /* Compare a segment of two fixed-length keys starting at an offset. */
 static inline int compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
 {
 	return memcmp(&key1[offset], &key2[offset], length);
 }

 /* Insert the next unsorted key into an array of sorted keys. */
 static inline void insert_key(const struct task task, sort_key_t *next)
 {
 	/* Pull the unsorted key out, freeing up the array slot. */
 	sort_key_t unsorted = *next;

 	/* Compare the key to the preceding sorted entries, shifting down ones that are larger. */
 	while ((--next >= task.first_key) &&
 	       (compare(unsorted, next[0], task.offset, task.length) < 0))
 		next[1] = next[0];

 	/* Insert the key into the last slot that was cleared, sorting it. */
 	next[1] = unsorted;
 }

 /*
  * Sort a range of key segments using an insertion sort. This simple sort is faster than the
  * 256-way radix sort when the number of keys to sort is small.
  */
 static inline void insertion_sort(const struct task task)
 {
 	sort_key_t *next;

 	for (next = task.first_key + 1; next <= task.last_key; next++)
 		insert_key(task, next);
 }

 /* Push a sorting task onto a task stack. */
 static inline void push_task(struct task **stack_pointer, sort_key_t *first_key,
 			     u32 count, u16 offset, u16 length)
 {
 	struct task *task = (*stack_pointer)++;

 	task->first_key = first_key;
 	task->last_key = &first_key[count - 1];
 	task->offset = offset;
 	task->length = length;
 }

 static inline void swap_keys(sort_key_t *a, sort_key_t *b)
 {
 	sort_key_t c = *a;
 	*a = *b;
 	*b = c;
 }

 /*
  * Count the number of times each byte value appears in the arrays of keys to sort at the current
  * offset, keeping track of the number of non-empty bins, and the index of the first and last
  * non-empty bin.
  */
 static inline void measure_bins(const struct task task, struct histogram *bins)
 {
 	sort_key_t *key_ptr;

 	/*
 	 * Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears
 	 * it all out as it goes. Even though this structure is re-used, we don't need to pay to
 	 * zero it before starting a new tally.
 	 */
 	bins->first = U8_MAX;
 	bins->last = 0;

 	for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) {
 		/* Increment the count for the byte in the key at the current offset. */
 		u8 bin = (*key_ptr)[task.offset];
 		u32 size = ++bins->size[bin];

 		/* Track non-empty bins. */
 		if (size == 1) {
 			bins->used += 1;
 			if (bin < bins->first)
 				bins->first = bin;

 			if (bin > bins->last)
 				bins->last = bin;
 		}
 	}
 }

 /*
  * Convert the bin sizes to pointers to where each pile goes.
  *
  *   pile[0] = first_key + bin->size[0],
  *   pile[1] = pile[0]  + bin->size[1], etc.
  *
  * After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the
  * next radix position. A new task is put on the stack for each pile containing lots of keys, or a
  * new task is put on the list for each pile containing few keys.
  *
  * @stack: pointer the top of the stack
  * @end_of_stack: the end of the stack
  * @list: pointer the head of the list
  * @pile: array for pointers to the end of each pile
  * @bins: the histogram of the sizes of each pile
  * @first_key: the first key of the stack
  * @offset: the next radix position to sort by
  * @length: the number of bytes remaining in the sort keys
  *
  * Return: UDS_SUCCESS or an error code
  */
 static inline int push_bins(struct task **stack, struct task *end_of_stack,
 			    struct task **list, sort_key_t *pile[],
 			    struct histogram *bins, sort_key_t *first_key,
 			    u16 offset, u16 length)
 {
 	sort_key_t *pile_start = first_key;
 	int bin;

 	for (bin = bins->first; ; bin++) {
 		u32 size = bins->size[bin];

 		/* Skip empty piles. */
 		if (size == 0)
 			continue;

 		/* There's no need to sort empty keys. */
 		if (length > 0) {
 			if (size > INSERTION_SORT_THRESHOLD) {
 				if (*stack >= end_of_stack)
 					return UDS_BAD_STATE;

 				push_task(stack, pile_start, size, offset, length);
 			} else if (size > 1) {
 				push_task(list, pile_start, size, offset, length);
 			}
 		}

 		pile_start += size;
 		pile[bin] = pile_start;
 		if (--bins->used == 0)
 			break;
 	}

 	return UDS_SUCCESS;
 }

 int uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter)
 {
 	int result;
 	unsigned int stack_size = count / INSERTION_SORT_THRESHOLD;
 	struct radix_sorter *radix_sorter;

 	result = vdo_allocate_extended(struct radix_sorter, stack_size, struct task,
 				       __func__, &radix_sorter);
 	if (result != VDO_SUCCESS)
 		return result;

 	radix_sorter->count = count;
 	radix_sorter->end_of_stack = radix_sorter->stack + stack_size;
 	*sorter = radix_sorter;
 	return UDS_SUCCESS;
 }

 void uds_free_radix_sorter(struct radix_sorter *sorter)
 {
 	vdo_free(sorter);
 }

 /*
  * Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation
  * is unstable, so the relative ordering of equal keys is not preserved.
  */
 int uds_radix_sort(struct radix_sorter *sorter, const unsigned char *keys[],
 		   unsigned int count, unsigned short length)
 {
 	struct task start;
 	struct histogram *bins = &sorter->bins;
 	sort_key_t **pile = sorter->pile;
 	struct task *task_stack = sorter->stack;

 	/* All zero-length keys are identical and therefore already sorted. */
 	if ((count == 0) || (length == 0))
 		return UDS_SUCCESS;

 	/* The initial task is to sort the entire length of all the keys. */
 	start = (struct task) {
 		.first_key = keys,
 		.last_key = &keys[count - 1],
 		.offset = 0,
 		.length = length,
 	};

 	if (count <= INSERTION_SORT_THRESHOLD) {
 		insertion_sort(start);
 		return UDS_SUCCESS;
 	}

 	if (count > sorter->count)
 		return UDS_INVALID_ARGUMENT;

 	/*
 	 * Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks
 	 * onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been
 	 * processed, the stack will be empty and all the keys in the starting task will be fully
 	 * sorted.
 	 */
 	for (*task_stack = start; task_stack >= sorter->stack; task_stack--) {
 		const struct task task = *task_stack;
 		struct task *insertion_task_list;
 		int result;
 		sort_key_t *fence;
 		sort_key_t *end;

 		measure_bins(task, bins);

 		/*
 		 * Now that we know how large each bin is, generate pointers for each of the piles
 		 * and push a new task to sort each pile by the next radix byte.
 		 */
 		insertion_task_list = sorter->insertion_list;
 		result = push_bins(&task_stack, sorter->end_of_stack,
 				   &insertion_task_list, pile, bins, task.first_key,
 				   task.offset + 1, task.length - 1);
 		if (result != UDS_SUCCESS) {
 			memset(bins, 0, sizeof(*bins));
 			return result;
 		}

 		/* Now bins->used is zero again. */

 		/*
 		 * Don't bother processing the last pile: when piles 0..N-1 are all in place, then
 		 * pile N must also be in place.
 		 */
 		end = task.last_key - bins->size[bins->last];
 		bins->size[bins->last] = 0;

 		for (fence = task.first_key; fence <= end; ) {
 			u8 bin;
 			sort_key_t key = *fence;

 			/*
 			 * The radix byte of the key tells us which pile it belongs in. Swap it for
 			 * an unprocessed item just below that pile, and repeat.
 			 */
 			while (--pile[bin = key[task.offset]] > fence)
 				swap_keys(pile[bin], &key);

 			/*
 			 * The pile reached the fence. Put the key at the bottom of that pile,
 			 * completing it, and advance the fence to the next pile.
 			 */
 			*fence = key;
 			fence += bins->size[bin];
 			bins->size[bin] = 0;
 		}

 		/* Now bins->size[] is all zero again. */

 		/*
 		 * When the number of keys in a task gets small enough, it is faster to use an
 		 * insertion sort than to keep subdividing into tiny piles.
 		 */
 		while (--insertion_task_list >= sorter->insertion_list)
 			insertion_sort(*insertion_task_list);
 	}

 	return UDS_SUCCESS;
 }
	// SPDX-License-Identifier: GPL-2.0-only
	/*
	* Copyright 2023 Red Hat
	*/

	#include "radix-sort.h"

	#include <linux/limits.h>
	#include <linux/types.h>

	#include "memory-alloc.h"
	#include "string-utils.h"

	/*
	* This implementation allocates one large object to do the sorting, which can be reused as many
	* times as desired. The amount of memory required is logarithmically proportional to the number of
	* keys to be sorted.
	*/

	/* Piles smaller than this are handled with a simple insertion sort. */
	#define INSERTION_SORT_THRESHOLD 12

	/* Sort keys are pointers to immutable fixed-length arrays of bytes. */
	typedef const u8 *sort_key_t;

	/*
	* The keys are separated into piles based on the byte in each keys at the current offset, so the
	* number of keys with each byte must be counted.
	*/
	struct histogram {
	/* The number of non-empty bins */
	u16 used;
	/* The index (key byte) of the first non-empty bin */
	u16 first;
	/* The index (key byte) of the last non-empty bin */
	u16 last;
	/* The number of occurrences of each specific byte */
	u32 size[256];
	};

	/*
	* Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound
	* on the stack space needed.
	*/
	struct task {
	/* Pointer to the first key to sort. */
	sort_key_t *first_key;
	/* Pointer to the last key to sort. */
	sort_key_t *last_key;
	/* The offset into the key at which to continue sorting. */
	u16 offset;
	/* The number of bytes remaining in the sort keys. */
	u16 length;
	};

	struct radix_sorter {
	unsigned int count;
	struct histogram bins;
	sort_key_t *pile[256];
	struct task *end_of_stack;
	struct task insertion_list[256];
	struct task stack[];
	};

	/* Compare a segment of two fixed-length keys starting at an offset. */
	static inline int compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
	{
	return memcmp(&key1[offset], &key2[offset], length);
	}

	/* Insert the next unsorted key into an array of sorted keys. */
	static inline void insert_key(const struct task task, sort_key_t *next)
	{
	/* Pull the unsorted key out, freeing up the array slot. */
	sort_key_t unsorted = *next;

	/* Compare the key to the preceding sorted entries, shifting down ones that are larger. */
	while ((--next >= task.first_key) &&
	(compare(unsorted, next[0], task.offset, task.length) < 0))
	next[1] = next[0];

	/* Insert the key into the last slot that was cleared, sorting it. */
	next[1] = unsorted;
	}

	/*
	* Sort a range of key segments using an insertion sort. This simple sort is faster than the
	* 256-way radix sort when the number of keys to sort is small.
	*/
	static inline void insertion_sort(const struct task task)
	{
	sort_key_t *next;

	for (next = task.first_key + 1; next <= task.last_key; next++)
	insert_key(task, next);
	}

	/* Push a sorting task onto a task stack. */
	static inline void push_task(struct task *stack_pointer, sort_key_t first_key,
	u32 count, u16 offset, u16 length)
	{
	struct task task = (stack_pointer)++;

	task->first_key = first_key;
	task->last_key = &first_key[count - 1];
	task->offset = offset;
	task->length = length;
	}

	static inline void swap_keys(sort_key_t a, sort_key_t b)
	{
	sort_key_t c = *a;
	a = b;
	*b = c;
	}

	/*
	* Count the number of times each byte value appears in the arrays of keys to sort at the current
	* offset, keeping track of the number of non-empty bins, and the index of the first and last
	* non-empty bin.
	*/
	static inline void measure_bins(const struct task task, struct histogram *bins)
	{
	sort_key_t *key_ptr;

	/*
	* Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears
	* it all out as it goes. Even though this structure is re-used, we don't need to pay to
	* zero it before starting a new tally.
	*/
	bins->first = U8_MAX;
	bins->last = 0;

	for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) {
	/* Increment the count for the byte in the key at the current offset. */
	u8 bin = (*key_ptr)[task.offset];
	u32 size = ++bins->size[bin];

	/* Track non-empty bins. */
	if (size == 1) {
	bins->used += 1;
	if (bin < bins->first)
	bins->first = bin;

	if (bin > bins->last)
	bins->last = bin;
	}
	}
	}

	/*
	* Convert the bin sizes to pointers to where each pile goes.
	*
	* pile[0] = first_key + bin->size[0],
	* pile[1] = pile[0] + bin->size[1], etc.
	*
	* After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the
	* next radix position. A new task is put on the stack for each pile containing lots of keys, or a
	* new task is put on the list for each pile containing few keys.
	*
	* @stack: pointer the top of the stack
	* @end_of_stack: the end of the stack
	* @list: pointer the head of the list
	* @pile: array for pointers to the end of each pile
	* @bins: the histogram of the sizes of each pile
	* @first_key: the first key of the stack
	* @offset: the next radix position to sort by
	* @length: the number of bytes remaining in the sort keys
	*
	* Return: UDS_SUCCESS or an error code
	*/
	static inline int push_bins(struct task *stack, struct task end_of_stack,
	struct task *list, sort_key_t pile[],
	struct histogram bins, sort_key_t first_key,
	u16 offset, u16 length)
	{
	sort_key_t *pile_start = first_key;
	int bin;

	for (bin = bins->first; ; bin++) {
	u32 size = bins->size[bin];

	/* Skip empty piles. */
	if (size == 0)
	continue;

	/* There's no need to sort empty keys. */
	if (length > 0) {
	if (size > INSERTION_SORT_THRESHOLD) {
	if (*stack >= end_of_stack)
	return UDS_BAD_STATE;

	push_task(stack, pile_start, size, offset, length);
	} else if (size > 1) {
	push_task(list, pile_start, size, offset, length);
	}
	}

	pile_start += size;
	pile[bin] = pile_start;
	if (--bins->used == 0)
	break;
	}

	return UDS_SUCCESS;
	}

	int uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter)
	{
	int result;
	unsigned int stack_size = count / INSERTION_SORT_THRESHOLD;
	struct radix_sorter *radix_sorter;

	result = vdo_allocate_extended(struct radix_sorter, stack_size, struct task,
	__func__, &radix_sorter);
	if (result != VDO_SUCCESS)
	return result;

	radix_sorter->count = count;
	radix_sorter->end_of_stack = radix_sorter->stack + stack_size;
	*sorter = radix_sorter;
	return UDS_SUCCESS;
	}

	void uds_free_radix_sorter(struct radix_sorter *sorter)
	{
	vdo_free(sorter);
	}

	/*
	* Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation
	* is unstable, so the relative ordering of equal keys is not preserved.
	*/
	int uds_radix_sort(struct radix_sorter sorter, const unsigned char keys[],
	unsigned int count, unsigned short length)
	{
	struct task start;
	struct histogram *bins = &sorter->bins;
	sort_key_t **pile = sorter->pile;
	struct task *task_stack = sorter->stack;

	/* All zero-length keys are identical and therefore already sorted. */
	if ((count == 0) \|\| (length == 0))
	return UDS_SUCCESS;

	/* The initial task is to sort the entire length of all the keys. */
	start = (struct task) {
	.first_key = keys,
	.last_key = &keys[count - 1],
	.offset = 0,
	.length = length,
	};

	if (count <= INSERTION_SORT_THRESHOLD) {
	insertion_sort(start);
	return UDS_SUCCESS;
	}

	if (count > sorter->count)
	return UDS_INVALID_ARGUMENT;

	/*
	* Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks
	* onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been
	* processed, the stack will be empty and all the keys in the starting task will be fully
	* sorted.
	*/
	for (*task_stack = start; task_stack >= sorter->stack; task_stack--) {
	const struct task task = *task_stack;
	struct task *insertion_task_list;
	int result;
	sort_key_t *fence;
	sort_key_t *end;

	measure_bins(task, bins);

	/*
	* Now that we know how large each bin is, generate pointers for each of the piles
	* and push a new task to sort each pile by the next radix byte.
	*/
	insertion_task_list = sorter->insertion_list;
	result = push_bins(&task_stack, sorter->end_of_stack,
	&insertion_task_list, pile, bins, task.first_key,
	task.offset + 1, task.length - 1);
	if (result != UDS_SUCCESS) {
	memset(bins, 0, sizeof(*bins));
	return result;
	}

	/* Now bins->used is zero again. */

	/*
	* Don't bother processing the last pile: when piles 0..N-1 are all in place, then
	* pile N must also be in place.
	*/
	end = task.last_key - bins->size[bins->last];
	bins->size[bins->last] = 0;

	for (fence = task.first_key; fence <= end; ) {
	u8 bin;
	sort_key_t key = *fence;

	/*
	* The radix byte of the key tells us which pile it belongs in. Swap it for
	* an unprocessed item just below that pile, and repeat.
	*/
	while (--pile[bin = key[task.offset]] > fence)
	swap_keys(pile[bin], &key);

	/*
	* The pile reached the fence. Put the key at the bottom of that pile,
	* completing it, and advance the fence to the next pile.
	*/
	*fence = key;
	fence += bins->size[bin];
	bins->size[bin] = 0;
	}

	/* Now bins->size[] is all zero again. */

	/*
	* When the number of keys in a task gets small enough, it is faster to use an
	* insertion sort than to keep subdividing into tiny piles.
	*/
	while (--insertion_task_list >= sorter->insertion_list)
	insertion_sort(*insertion_task_list);
	}

	return UDS_SUCCESS;
	}