drivers/block/drbd/drbd_vli.h - linux - Git at Google

 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 -*- linux-c -*-
    drbd_receiver.c
    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.

    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.

  */

 #ifndef _DRBD_VLI_H
 #define _DRBD_VLI_H

 /*
  * At a granularity of 4KiB storage represented per bit,
  * and stroage sizes of several TiB,
  * and possibly small-bandwidth replication,
  * the bitmap transfer time can take much too long,
  * if transmitted in plain text.
  *
  * We try to reduce the transferred bitmap information
  * by encoding runlengths of bit polarity.
  *
  * We never actually need to encode a "zero" (runlengths are positive).
  * But then we have to store the value of the first bit.
  * The first bit of information thus shall encode if the first runlength
  * gives the number of set or unset bits.
  *
  * We assume that large areas are either completely set or unset,
  * which gives good compression with any runlength method,
  * even when encoding the runlength as fixed size 32bit/64bit integers.
  *
  * Still, there may be areas where the polarity flips every few bits,
  * and encoding the runlength sequence of those areas with fix size
  * integers would be much worse than plaintext.
  *
  * We want to encode small runlength values with minimum code length,
  * while still being able to encode a Huge run of all zeros.
  *
  * Thus we need a Variable Length Integer encoding, VLI.
  *
  * For some cases, we produce more code bits than plaintext input.
  * We need to send incompressible chunks as plaintext, skip over them
  * and then see if the next chunk compresses better.
  *
  * We don't care too much about "excellent" compression ratio for large
  * runlengths (all set/all clear): whether we achieve a factor of 100
  * or 1000 is not that much of an issue.
  * We do not want to waste too much on short runlengths in the "noisy"
  * parts of the bitmap, though.
  *
  * There are endless variants of VLI, we experimented with:
  *  * simple byte-based
  *  * various bit based with different code word length.
  *
  * To avoid yet an other configuration parameter (choice of bitmap compression
  * algorithm) which was difficult to explain and tune, we just chose the one
  * variant that turned out best in all test cases.
  * Based on real world usage patterns, with device sizes ranging from a few GiB
  * to several TiB, file server/mailserver/webserver/mysql/postgress,
  * mostly idle to really busy, the all time winner (though sometimes only
  * marginally better) is:
  */

 /*
  * encoding is "visualised" as
  * __little endian__ bitstream, least significant bit first (left most)
  *
  * this particular encoding is chosen so that the prefix code
  * starts as unary encoding the level, then modified so that
  * 10 levels can be described in 8bit, with minimal overhead
  * for the smaller levels.
  *
  * Number of data bits follow fibonacci sequence, with the exception of the
  * last level (+1 data bit, so it makes 64bit total).  The only worse code when
  * encoding bit polarity runlength is 1 plain bits => 2 code bits.
 prefix    data bits                                    max val  Nº data bits
 0 x                                                         0x2            1
 10 x                                                        0x4            1
 110 xx                                                      0x8            2
 1110 xxx                                                   0x10            3
 11110 xxx xx                                               0x30            5
 111110 xx xxxxxx                                          0x130            8
 11111100  xxxxxxxx xxxxx                                 0x2130           13
 11111110  xxxxxxxx xxxxxxxx xxxxx                      0x202130           21
 11111101  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xx   0x400202130           34
 11111111  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
  * maximum encodable value: 0x100000400202130 == 2**56 + some */

 /* compression "table":
  transmitted   x                                0.29
  as plaintext x                                  ........................
              x                                   ........................
             x                                    ........................
            x    0.59                         0.21........................
           x      ........................................................
          x       .. c ...................................................
         x    0.44.. o ...................................................
        x .......... d ...................................................
       x  .......... e ...................................................
      X.............   ...................................................
     x.............. b ...................................................
 2.0x............... i ...................................................
  #X................ t ...................................................
  #................. s ...........................  plain bits  ..........
 -+-----------------------------------------------------------------------
  1             16              32                              64
 */

 /* LEVEL: (total bits, prefix bits, prefix value),
  * sorted ascending by number of total bits.
  * The rest of the code table is calculated at compiletime from this. */

 /* fibonacci data 1, 1, ... */
 #define VLI_L_1_1() do { \
 	LEVEL( 2, 1, 0x00); \
 	LEVEL( 3, 2, 0x01); \
 	LEVEL( 5, 3, 0x03); \
 	LEVEL( 7, 4, 0x07); \
 	LEVEL(10, 5, 0x0f); \
 	LEVEL(14, 6, 0x1f); \
 	LEVEL(21, 8, 0x3f); \
 	LEVEL(29, 8, 0x7f); \
 	LEVEL(42, 8, 0xbf); \
 	LEVEL(64, 8, 0xff); \
 	} while (0)

 /* finds a suitable level to decode the least significant part of in.
  * returns number of bits consumed.
  *
  * BUG() for bad input, as that would mean a buggy code table. */
 static inline int vli_decode_bits(u64 *out, const u64 in)
 {
 	u64 adj = 1;

 #define LEVEL(t,b,v)					\
 	do {						\
 		if ((in & ((1 << b) -1)) == v) {	\
 			*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj;	\
 			return t;			\
 		}					\
 		adj += 1ULL << (t - b);			\
 	} while (0)

 	VLI_L_1_1();

 	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
 	BUG();
 #undef LEVEL
 }

 /* return number of code bits needed,
  * or negative error number */
 static inline int __vli_encode_bits(u64 *out, const u64 in)
 {
 	u64 max = 0;
 	u64 adj = 1;

 	if (in == 0)
 		return -EINVAL;

 #define LEVEL(t,b,v) do {		\
 		max += 1ULL << (t - b);	\
 		if (in <= max) {	\
 			if (out)	\
 				*out = ((in - adj) << b) | v;	\
 			return t;	\
 		}			\
 		adj = max + 1;		\
 	} while (0)

 	VLI_L_1_1();

 	return -EOVERFLOW;
 #undef LEVEL
 }

 #undef VLI_L_1_1

 /* code from here down is independend of actually used bit code */

 /*
  * Code length is determined by some unique (e.g. unary) prefix.
  * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
  * not a byte stream.
  */

 /* for the bitstream, we need a cursor */
 struct bitstream_cursor {
 	/* the current byte */
 	u8 *b;
 	/* the current bit within *b, nomalized: 0..7 */
 	unsigned int bit;
 };

 /* initialize cursor to point to first bit of stream */
 static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
 {
 	cur->b = s;
 	cur->bit = 0;
 }

 /* advance cursor by that many bits; maximum expected input value: 64,
  * but depending on VLI implementation, it may be more. */
 static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
 {
 	bits += cur->bit;
 	cur->b = cur->b + (bits >> 3);
 	cur->bit = bits & 7;
 }

 /* the bitstream itself knows its length */
 struct bitstream {
 	struct bitstream_cursor cur;
 	unsigned char *buf;
 	size_t buf_len;		/* in bytes */

 	/* for input stream:
 	 * number of trailing 0 bits for padding
 	 * total number of valid bits in stream: buf_len * 8 - pad_bits */
 	unsigned int pad_bits;
 };

 static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
 {
 	bs->buf = s;
 	bs->buf_len = len;
 	bs->pad_bits = pad_bits;
 	bitstream_cursor_reset(&bs->cur, bs->buf);
 }

 static inline void bitstream_rewind(struct bitstream *bs)
 {
 	bitstream_cursor_reset(&bs->cur, bs->buf);
 	memset(bs->buf, 0, bs->buf_len);
 }

 /* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
  * Ignores "pad_bits".
  * Returns zero if bits == 0 (nothing to do).
  * Returns number of bits used if successful.
  *
  * If there is not enough room left in bitstream,
  * leaves bitstream unchanged and returns -ENOBUFS.
  */
 static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
 {
 	unsigned char *b = bs->cur.b;
 	unsigned int tmp;

 	if (bits == 0)
 		return 0;

 	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
 		return -ENOBUFS;

 	/* paranoia: strip off hi bits; they should not be set anyways. */
 	if (bits < 64)
 		val &= ~0ULL >> (64 - bits);

 	*b++ |= (val & 0xff) << bs->cur.bit;

 	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
 		*b++ |= (val >> tmp) & 0xff;

 	bitstream_cursor_advance(&bs->cur, bits);
 	return bits;
 }

 /* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
  *
  * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
  *
  * If there are less than the requested number of valid bits left in the
  * bitstream, still fetches all available bits.
  *
  * Returns number of actually fetched bits.
  */
 static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
 {
 	u64 val;
 	unsigned int n;

 	if (bits > 64)
 		return -EINVAL;

 	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
 		bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
 			- bs->cur.bit - bs->pad_bits;

 	if (bits == 0) {
 		*out = 0;
 		return 0;
 	}

 	/* get the high bits */
 	val = 0;
 	n = (bs->cur.bit + bits + 7) >> 3;
 	/* n may be at most 9, if cur.bit + bits > 64 */
 	/* which means this copies at most 8 byte */
 	if (n) {
 		memcpy(&val, bs->cur.b+1, n - 1);
 		val = le64_to_cpu(val) << (8 - bs->cur.bit);
 	}

 	/* we still need the low bits */
 	val |= bs->cur.b[0] >> bs->cur.bit;

 	/* and mask out bits we don't want */
 	val &= ~0ULL >> (64 - bits);

 	bitstream_cursor_advance(&bs->cur, bits);
 	*out = val;

 	return bits;
 }

 /* encodes @in as vli into @bs;

  * return values
  *  > 0: number of bits successfully stored in bitstream
  * -ENOBUFS @bs is full
  * -EINVAL input zero (invalid)
  * -EOVERFLOW input too large for this vli code (invalid)
  */
 static inline int vli_encode_bits(struct bitstream *bs, u64 in)
 {
 	u64 code;
 	int bits = __vli_encode_bits(&code, in);

 	if (bits <= 0)
 		return bits;

 	return bitstream_put_bits(bs, code, bits);
 }

 #endif
	/* SPDX-License-Identifier: GPL-2.0-only */
	/*
	-- linux-c --
	drbd_receiver.c
	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.

	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.

	*/

	#ifndef _DRBD_VLI_H
	#define _DRBD_VLI_H

	/*
	* At a granularity of 4KiB storage represented per bit,
	* and stroage sizes of several TiB,
	* and possibly small-bandwidth replication,
	* the bitmap transfer time can take much too long,
	* if transmitted in plain text.
	*
	* We try to reduce the transferred bitmap information
	* by encoding runlengths of bit polarity.
	*
	* We never actually need to encode a "zero" (runlengths are positive).
	* But then we have to store the value of the first bit.
	* The first bit of information thus shall encode if the first runlength
	* gives the number of set or unset bits.
	*
	* We assume that large areas are either completely set or unset,
	* which gives good compression with any runlength method,
	* even when encoding the runlength as fixed size 32bit/64bit integers.
	*
	* Still, there may be areas where the polarity flips every few bits,
	* and encoding the runlength sequence of those areas with fix size
	* integers would be much worse than plaintext.
	*
	* We want to encode small runlength values with minimum code length,
	* while still being able to encode a Huge run of all zeros.
	*
	* Thus we need a Variable Length Integer encoding, VLI.
	*
	* For some cases, we produce more code bits than plaintext input.
	* We need to send incompressible chunks as plaintext, skip over them
	* and then see if the next chunk compresses better.
	*
	* We don't care too much about "excellent" compression ratio for large
	* runlengths (all set/all clear): whether we achieve a factor of 100
	* or 1000 is not that much of an issue.
	* We do not want to waste too much on short runlengths in the "noisy"
	* parts of the bitmap, though.
	*
	* There are endless variants of VLI, we experimented with:
	* * simple byte-based
	* * various bit based with different code word length.
	*
	* To avoid yet an other configuration parameter (choice of bitmap compression
	* algorithm) which was difficult to explain and tune, we just chose the one
	* variant that turned out best in all test cases.
	* Based on real world usage patterns, with device sizes ranging from a few GiB
	* to several TiB, file server/mailserver/webserver/mysql/postgress,
	* mostly idle to really busy, the all time winner (though sometimes only
	* marginally better) is:
	*/

	/*
	* encoding is "visualised" as
	* __little endian__ bitstream, least significant bit first (left most)
	*
	* this particular encoding is chosen so that the prefix code
	* starts as unary encoding the level, then modified so that
	* 10 levels can be described in 8bit, with minimal overhead
	* for the smaller levels.
	*
	* Number of data bits follow fibonacci sequence, with the exception of the
	* last level (+1 data bit, so it makes 64bit total). The only worse code when
	* encoding bit polarity runlength is 1 plain bits => 2 code bits.
	prefix data bits max val Nº data bits
	0 x 0x2 1
	10 x 0x4 1
	110 xx 0x8 2
	1110 xxx 0x10 3
	11110 xxx xx 0x30 5
	111110 xx xxxxxx 0x130 8
	11111100 xxxxxxxx xxxxx 0x2130 13
	11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21
	11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34
	11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
	* maximum encodable value: 0x100000400202130 == 2*56 + some /

	/* compression "table":
	transmitted x 0.29
	as plaintext x ........................
	x ........................
	x ........................
	x 0.59 0.21........................
	x ........................................................
	x .. c ...................................................
	x 0.44.. o ...................................................
	x .......... d ...................................................
	x .......... e ...................................................
	X............. ...................................................
	x.............. b ...................................................
	2.0x............... i ...................................................
	#X................ t ...................................................
	#................. s ........................... plain bits ..........
	-+-----------------------------------------------------------------------
	1 16 32 64
	*/

	/* LEVEL: (total bits, prefix bits, prefix value),
	* sorted ascending by number of total bits.
	* The rest of the code table is calculated at compiletime from this. */

	/* fibonacci data 1, 1, ... */
	#define VLI_L_1_1() do { \
	LEVEL( 2, 1, 0x00); \
	LEVEL( 3, 2, 0x01); \
	LEVEL( 5, 3, 0x03); \
	LEVEL( 7, 4, 0x07); \
	LEVEL(10, 5, 0x0f); \
	LEVEL(14, 6, 0x1f); \
	LEVEL(21, 8, 0x3f); \
	LEVEL(29, 8, 0x7f); \
	LEVEL(42, 8, 0xbf); \
	LEVEL(64, 8, 0xff); \
	} while (0)

	/* finds a suitable level to decode the least significant part of in.
	* returns number of bits consumed.
	*
	* BUG() for bad input, as that would mean a buggy code table. */
	static inline int vli_decode_bits(u64 *out, const u64 in)
	{
	u64 adj = 1;

	#define LEVEL(t,b,v) \
	do { \
	if ((in & ((1 << b) -1)) == v) { \
	*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
	return t; \
	} \
	adj += 1ULL << (t - b); \
	} while (0)

	VLI_L_1_1();

	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
	BUG();
	#undef LEVEL
	}

	/* return number of code bits needed,
	* or negative error number */
	static inline int __vli_encode_bits(u64 *out, const u64 in)
	{
	u64 max = 0;
	u64 adj = 1;

	if (in == 0)
	return -EINVAL;

	#define LEVEL(t,b,v) do { \
	max += 1ULL << (t - b); \
	if (in <= max) { \
	if (out) \
	*out = ((in - adj) << b) \| v; \
	return t; \
	} \
	adj = max + 1; \
	} while (0)

	VLI_L_1_1();

	return -EOVERFLOW;
	#undef LEVEL
	}

	#undef VLI_L_1_1

	/* code from here down is independend of actually used bit code */

	/*
	* Code length is determined by some unique (e.g. unary) prefix.
	* This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
	* not a byte stream.
	*/

	/* for the bitstream, we need a cursor */
	struct bitstream_cursor {
	/* the current byte */
	u8 *b;
	/* the current bit within b, nomalized: 0..7 /
	unsigned int bit;
	};

	/* initialize cursor to point to first bit of stream */
	static inline void bitstream_cursor_reset(struct bitstream_cursor cur, void s)
	{
	cur->b = s;
	cur->bit = 0;
	}

	/* advance cursor by that many bits; maximum expected input value: 64,
	* but depending on VLI implementation, it may be more. */
	static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
	{
	bits += cur->bit;
	cur->b = cur->b + (bits >> 3);
	cur->bit = bits & 7;
	}

	/* the bitstream itself knows its length */
	struct bitstream {
	struct bitstream_cursor cur;
	unsigned char *buf;
	size_t buf_len; /* in bytes */

	/* for input stream:
	* number of trailing 0 bits for padding
	* total number of valid bits in stream: buf_len * 8 - pad_bits */
	unsigned int pad_bits;
	};

	static inline void bitstream_init(struct bitstream bs, void s, size_t len, unsigned int pad_bits)
	{
	bs->buf = s;
	bs->buf_len = len;
	bs->pad_bits = pad_bits;
	bitstream_cursor_reset(&bs->cur, bs->buf);
	}

	static inline void bitstream_rewind(struct bitstream *bs)
	{
	bitstream_cursor_reset(&bs->cur, bs->buf);
	memset(bs->buf, 0, bs->buf_len);
	}

	/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
	* Ignores "pad_bits".
	* Returns zero if bits == 0 (nothing to do).
	* Returns number of bits used if successful.
	*
	* If there is not enough room left in bitstream,
	* leaves bitstream unchanged and returns -ENOBUFS.
	*/
	static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
	{
	unsigned char *b = bs->cur.b;
	unsigned int tmp;

	if (bits == 0)
	return 0;

	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
	return -ENOBUFS;

	/* paranoia: strip off hi bits; they should not be set anyways. */
	if (bits < 64)
	val &= ~0ULL >> (64 - bits);

	*b++ \|= (val & 0xff) << bs->cur.bit;

	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
	*b++ \|= (val >> tmp) & 0xff;

	bitstream_cursor_advance(&bs->cur, bits);
	return bits;
	}

	/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
	*
	* If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
	*
	* If there are less than the requested number of valid bits left in the
	* bitstream, still fetches all available bits.
	*
	* Returns number of actually fetched bits.
	*/
	static inline int bitstream_get_bits(struct bitstream bs, u64 out, int bits)
	{
	u64 val;
	unsigned int n;

	if (bits > 64)
	return -EINVAL;

	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
	bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
	- bs->cur.bit - bs->pad_bits;

	if (bits == 0) {
	*out = 0;
	return 0;
	}

	/* get the high bits */
	val = 0;
	n = (bs->cur.bit + bits + 7) >> 3;
	/* n may be at most 9, if cur.bit + bits > 64 */
	/* which means this copies at most 8 byte */
	if (n) {
	memcpy(&val, bs->cur.b+1, n - 1);
	val = le64_to_cpu(val) << (8 - bs->cur.bit);
	}

	/* we still need the low bits */
	val \|= bs->cur.b[0] >> bs->cur.bit;

	/* and mask out bits we don't want */
	val &= ~0ULL >> (64 - bits);

	bitstream_cursor_advance(&bs->cur, bits);
	*out = val;

	return bits;
	}

	/* encodes @in as vli into @bs;

	* return values
	* > 0: number of bits successfully stored in bitstream
	* -ENOBUFS @bs is full
	* -EINVAL input zero (invalid)
	* -EOVERFLOW input too large for this vli code (invalid)
	*/
	static inline int vli_encode_bits(struct bitstream *bs, u64 in)
	{
	u64 code;
	int bits = __vli_encode_bits(&code, in);

	if (bits <= 0)
	return bits;

	return bitstream_put_bits(bs, code, bits);
	}

	#endif