| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * 842 Software Compression |
| * |
| * Copyright (C) 2015 Dan Streetman, IBM Corp |
| * |
| * See 842.h for details of the 842 compressed format. |
| */ |
| |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| #define MODULE_NAME "842_compress" |
| |
| #include <linux/hashtable.h> |
| |
| #include "842.h" |
| #include "842_debugfs.h" |
| |
| #define SW842_HASHTABLE8_BITS (10) |
| #define SW842_HASHTABLE4_BITS (11) |
| #define SW842_HASHTABLE2_BITS (10) |
| |
| /* By default, we allow compressing input buffers of any length, but we must |
| * use the non-standard "short data" template so the decompressor can correctly |
| * reproduce the uncompressed data buffer at the right length. However the |
| * hardware 842 compressor will not recognize the "short data" template, and |
| * will fail to decompress any compressed buffer containing it (I have no idea |
| * why anyone would want to use software to compress and hardware to decompress |
| * but that's beside the point). This parameter forces the compression |
| * function to simply reject any input buffer that isn't a multiple of 8 bytes |
| * long, instead of using the "short data" template, so that all compressed |
| * buffers produced by this function will be decompressable by the 842 hardware |
| * decompressor. Unless you have a specific need for that, leave this disabled |
| * so that any length buffer can be compressed. |
| */ |
| static bool sw842_strict; |
| module_param_named(strict, sw842_strict, bool, 0644); |
| |
| static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ |
| { I8, N0, N0, N0, 0x19 }, /* 8 */ |
| { I4, I4, N0, N0, 0x18 }, /* 18 */ |
| { I4, I2, I2, N0, 0x17 }, /* 25 */ |
| { I2, I2, I4, N0, 0x13 }, /* 25 */ |
| { I2, I2, I2, I2, 0x12 }, /* 32 */ |
| { I4, I2, D2, N0, 0x16 }, /* 33 */ |
| { I4, D2, I2, N0, 0x15 }, /* 33 */ |
| { I2, D2, I4, N0, 0x0e }, /* 33 */ |
| { D2, I2, I4, N0, 0x09 }, /* 33 */ |
| { I2, I2, I2, D2, 0x11 }, /* 40 */ |
| { I2, I2, D2, I2, 0x10 }, /* 40 */ |
| { I2, D2, I2, I2, 0x0d }, /* 40 */ |
| { D2, I2, I2, I2, 0x08 }, /* 40 */ |
| { I4, D4, N0, N0, 0x14 }, /* 41 */ |
| { D4, I4, N0, N0, 0x04 }, /* 41 */ |
| { I2, I2, D4, N0, 0x0f }, /* 48 */ |
| { I2, D2, I2, D2, 0x0c }, /* 48 */ |
| { I2, D4, I2, N0, 0x0b }, /* 48 */ |
| { D2, I2, I2, D2, 0x07 }, /* 48 */ |
| { D2, I2, D2, I2, 0x06 }, /* 48 */ |
| { D4, I2, I2, N0, 0x03 }, /* 48 */ |
| { I2, D2, D4, N0, 0x0a }, /* 56 */ |
| { D2, I2, D4, N0, 0x05 }, /* 56 */ |
| { D4, I2, D2, N0, 0x02 }, /* 56 */ |
| { D4, D2, I2, N0, 0x01 }, /* 56 */ |
| { D8, N0, N0, N0, 0x00 }, /* 64 */ |
| }; |
| |
| struct sw842_hlist_node8 { |
| struct hlist_node node; |
| u64 data; |
| u8 index; |
| }; |
| |
| struct sw842_hlist_node4 { |
| struct hlist_node node; |
| u32 data; |
| u16 index; |
| }; |
| |
| struct sw842_hlist_node2 { |
| struct hlist_node node; |
| u16 data; |
| u8 index; |
| }; |
| |
| #define INDEX_NOT_FOUND (-1) |
| #define INDEX_NOT_CHECKED (-2) |
| |
| struct sw842_param { |
| u8 *in; |
| u8 *instart; |
| u64 ilen; |
| u8 *out; |
| u64 olen; |
| u8 bit; |
| u64 data8[1]; |
| u32 data4[2]; |
| u16 data2[4]; |
| int index8[1]; |
| int index4[2]; |
| int index2[4]; |
| DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); |
| DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); |
| DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); |
| struct sw842_hlist_node8 node8[1 << I8_BITS]; |
| struct sw842_hlist_node4 node4[1 << I4_BITS]; |
| struct sw842_hlist_node2 node2[1 << I2_BITS]; |
| }; |
| |
| #define get_input_data(p, o, b) \ |
| be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) |
| |
| #define init_hashtable_nodes(p, b) do { \ |
| int _i; \ |
| hash_init((p)->htable##b); \ |
| for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \ |
| (p)->node##b[_i].index = _i; \ |
| (p)->node##b[_i].data = 0; \ |
| INIT_HLIST_NODE(&(p)->node##b[_i].node); \ |
| } \ |
| } while (0) |
| |
| #define find_index(p, b, n) ({ \ |
| struct sw842_hlist_node##b *_n; \ |
| p->index##b[n] = INDEX_NOT_FOUND; \ |
| hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \ |
| if (p->data##b[n] == _n->data) { \ |
| p->index##b[n] = _n->index; \ |
| break; \ |
| } \ |
| } \ |
| p->index##b[n] >= 0; \ |
| }) |
| |
| #define check_index(p, b, n) \ |
| ((p)->index##b[n] == INDEX_NOT_CHECKED \ |
| ? find_index(p, b, n) \ |
| : (p)->index##b[n] >= 0) |
| |
| #define replace_hash(p, b, i, d) do { \ |
| struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \ |
| hash_del(&_n->node); \ |
| _n->data = (p)->data##b[d]; \ |
| pr_debug("add hash index%x %x pos %x data %lx\n", b, \ |
| (unsigned int)_n->index, \ |
| (unsigned int)((p)->in - (p)->instart), \ |
| (unsigned long)_n->data); \ |
| hash_add((p)->htable##b, &_n->node, _n->data); \ |
| } while (0) |
| |
| static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; |
| |
| static int add_bits(struct sw842_param *p, u64 d, u8 n); |
| |
| static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) |
| { |
| int ret; |
| |
| if (n <= s) |
| return -EINVAL; |
| |
| ret = add_bits(p, d >> s, n - s); |
| if (ret) |
| return ret; |
| return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); |
| } |
| |
| static int add_bits(struct sw842_param *p, u64 d, u8 n) |
| { |
| int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; |
| u64 o; |
| u8 *out = p->out; |
| |
| pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); |
| |
| if (n > 64) |
| return -EINVAL; |
| |
| /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), |
| * or if we're at the end of the output buffer and would write past end |
| */ |
| if (bits > 64) |
| return __split_add_bits(p, d, n, 32); |
| else if (p->olen < 8 && bits > 32 && bits <= 56) |
| return __split_add_bits(p, d, n, 16); |
| else if (p->olen < 4 && bits > 16 && bits <= 24) |
| return __split_add_bits(p, d, n, 8); |
| |
| if (DIV_ROUND_UP(bits, 8) > p->olen) |
| return -ENOSPC; |
| |
| o = *out & bmask[b]; |
| d <<= s; |
| |
| if (bits <= 8) |
| *out = o | d; |
| else if (bits <= 16) |
| put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); |
| else if (bits <= 24) |
| put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); |
| else if (bits <= 32) |
| put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); |
| else if (bits <= 40) |
| put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); |
| else if (bits <= 48) |
| put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); |
| else if (bits <= 56) |
| put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); |
| else |
| put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); |
| |
| p->bit += n; |
| |
| if (p->bit > 7) { |
| p->out += p->bit / 8; |
| p->olen -= p->bit / 8; |
| p->bit %= 8; |
| } |
| |
| return 0; |
| } |
| |
| static int add_template(struct sw842_param *p, u8 c) |
| { |
| int ret, i, b = 0; |
| u8 *t = comp_ops[c]; |
| bool inv = false; |
| |
| if (c >= OPS_MAX) |
| return -EINVAL; |
| |
| pr_debug("template %x\n", t[4]); |
| |
| ret = add_bits(p, t[4], OP_BITS); |
| if (ret) |
| return ret; |
| |
| for (i = 0; i < 4; i++) { |
| pr_debug("op %x\n", t[i]); |
| |
| switch (t[i] & OP_AMOUNT) { |
| case OP_AMOUNT_8: |
| if (b) |
| inv = true; |
| else if (t[i] & OP_ACTION_INDEX) |
| ret = add_bits(p, p->index8[0], I8_BITS); |
| else if (t[i] & OP_ACTION_DATA) |
| ret = add_bits(p, p->data8[0], 64); |
| else |
| inv = true; |
| break; |
| case OP_AMOUNT_4: |
| if (b == 2 && t[i] & OP_ACTION_DATA) |
| ret = add_bits(p, get_input_data(p, 2, 32), 32); |
| else if (b != 0 && b != 4) |
| inv = true; |
| else if (t[i] & OP_ACTION_INDEX) |
| ret = add_bits(p, p->index4[b >> 2], I4_BITS); |
| else if (t[i] & OP_ACTION_DATA) |
| ret = add_bits(p, p->data4[b >> 2], 32); |
| else |
| inv = true; |
| break; |
| case OP_AMOUNT_2: |
| if (b != 0 && b != 2 && b != 4 && b != 6) |
| inv = true; |
| if (t[i] & OP_ACTION_INDEX) |
| ret = add_bits(p, p->index2[b >> 1], I2_BITS); |
| else if (t[i] & OP_ACTION_DATA) |
| ret = add_bits(p, p->data2[b >> 1], 16); |
| else |
| inv = true; |
| break; |
| case OP_AMOUNT_0: |
| inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); |
| break; |
| default: |
| inv = true; |
| break; |
| } |
| |
| if (ret) |
| return ret; |
| |
| if (inv) { |
| pr_err("Invalid templ %x op %d : %x %x %x %x\n", |
| c, i, t[0], t[1], t[2], t[3]); |
| return -EINVAL; |
| } |
| |
| b += t[i] & OP_AMOUNT; |
| } |
| |
| if (b != 8) { |
| pr_err("Invalid template %x len %x : %x %x %x %x\n", |
| c, b, t[0], t[1], t[2], t[3]); |
| return -EINVAL; |
| } |
| |
| if (sw842_template_counts) |
| atomic_inc(&template_count[t[4]]); |
| |
| return 0; |
| } |
| |
| static int add_repeat_template(struct sw842_param *p, u8 r) |
| { |
| int ret; |
| |
| /* repeat param is 0-based */ |
| if (!r || --r > REPEAT_BITS_MAX) |
| return -EINVAL; |
| |
| ret = add_bits(p, OP_REPEAT, OP_BITS); |
| if (ret) |
| return ret; |
| |
| ret = add_bits(p, r, REPEAT_BITS); |
| if (ret) |
| return ret; |
| |
| if (sw842_template_counts) |
| atomic_inc(&template_repeat_count); |
| |
| return 0; |
| } |
| |
| static int add_short_data_template(struct sw842_param *p, u8 b) |
| { |
| int ret, i; |
| |
| if (!b || b > SHORT_DATA_BITS_MAX) |
| return -EINVAL; |
| |
| ret = add_bits(p, OP_SHORT_DATA, OP_BITS); |
| if (ret) |
| return ret; |
| |
| ret = add_bits(p, b, SHORT_DATA_BITS); |
| if (ret) |
| return ret; |
| |
| for (i = 0; i < b; i++) { |
| ret = add_bits(p, p->in[i], 8); |
| if (ret) |
| return ret; |
| } |
| |
| if (sw842_template_counts) |
| atomic_inc(&template_short_data_count); |
| |
| return 0; |
| } |
| |
| static int add_zeros_template(struct sw842_param *p) |
| { |
| int ret = add_bits(p, OP_ZEROS, OP_BITS); |
| |
| if (ret) |
| return ret; |
| |
| if (sw842_template_counts) |
| atomic_inc(&template_zeros_count); |
| |
| return 0; |
| } |
| |
| static int add_end_template(struct sw842_param *p) |
| { |
| int ret = add_bits(p, OP_END, OP_BITS); |
| |
| if (ret) |
| return ret; |
| |
| if (sw842_template_counts) |
| atomic_inc(&template_end_count); |
| |
| return 0; |
| } |
| |
| static bool check_template(struct sw842_param *p, u8 c) |
| { |
| u8 *t = comp_ops[c]; |
| int i, match, b = 0; |
| |
| if (c >= OPS_MAX) |
| return false; |
| |
| for (i = 0; i < 4; i++) { |
| if (t[i] & OP_ACTION_INDEX) { |
| if (t[i] & OP_AMOUNT_2) |
| match = check_index(p, 2, b >> 1); |
| else if (t[i] & OP_AMOUNT_4) |
| match = check_index(p, 4, b >> 2); |
| else if (t[i] & OP_AMOUNT_8) |
| match = check_index(p, 8, 0); |
| else |
| return false; |
| if (!match) |
| return false; |
| } |
| |
| b += t[i] & OP_AMOUNT; |
| } |
| |
| return true; |
| } |
| |
| static void get_next_data(struct sw842_param *p) |
| { |
| p->data8[0] = get_input_data(p, 0, 64); |
| p->data4[0] = get_input_data(p, 0, 32); |
| p->data4[1] = get_input_data(p, 4, 32); |
| p->data2[0] = get_input_data(p, 0, 16); |
| p->data2[1] = get_input_data(p, 2, 16); |
| p->data2[2] = get_input_data(p, 4, 16); |
| p->data2[3] = get_input_data(p, 6, 16); |
| } |
| |
| /* update the hashtable entries. |
| * only call this after finding/adding the current template |
| * the dataN fields for the current 8 byte block must be already updated |
| */ |
| static void update_hashtables(struct sw842_param *p) |
| { |
| u64 pos = p->in - p->instart; |
| u64 n8 = (pos >> 3) % (1 << I8_BITS); |
| u64 n4 = (pos >> 2) % (1 << I4_BITS); |
| u64 n2 = (pos >> 1) % (1 << I2_BITS); |
| |
| replace_hash(p, 8, n8, 0); |
| replace_hash(p, 4, n4, 0); |
| replace_hash(p, 4, n4, 1); |
| replace_hash(p, 2, n2, 0); |
| replace_hash(p, 2, n2, 1); |
| replace_hash(p, 2, n2, 2); |
| replace_hash(p, 2, n2, 3); |
| } |
| |
| /* find the next template to use, and add it |
| * the p->dataN fields must already be set for the current 8 byte block |
| */ |
| static int process_next(struct sw842_param *p) |
| { |
| int ret, i; |
| |
| p->index8[0] = INDEX_NOT_CHECKED; |
| p->index4[0] = INDEX_NOT_CHECKED; |
| p->index4[1] = INDEX_NOT_CHECKED; |
| p->index2[0] = INDEX_NOT_CHECKED; |
| p->index2[1] = INDEX_NOT_CHECKED; |
| p->index2[2] = INDEX_NOT_CHECKED; |
| p->index2[3] = INDEX_NOT_CHECKED; |
| |
| /* check up to OPS_MAX - 1; last op is our fallback */ |
| for (i = 0; i < OPS_MAX - 1; i++) { |
| if (check_template(p, i)) |
| break; |
| } |
| |
| ret = add_template(p, i); |
| if (ret) |
| return ret; |
| |
| return 0; |
| } |
| |
| /** |
| * sw842_compress |
| * |
| * Compress the uncompressed buffer of length @ilen at @in to the output buffer |
| * @out, using no more than @olen bytes, using the 842 compression format. |
| * |
| * Returns: 0 on success, error on failure. The @olen parameter |
| * will contain the number of output bytes written on success, or |
| * 0 on error. |
| */ |
| int sw842_compress(const u8 *in, unsigned int ilen, |
| u8 *out, unsigned int *olen, void *wmem) |
| { |
| struct sw842_param *p = (struct sw842_param *)wmem; |
| int ret; |
| u64 last, next, pad, total; |
| u8 repeat_count = 0; |
| u32 crc; |
| |
| BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); |
| |
| init_hashtable_nodes(p, 8); |
| init_hashtable_nodes(p, 4); |
| init_hashtable_nodes(p, 2); |
| |
| p->in = (u8 *)in; |
| p->instart = p->in; |
| p->ilen = ilen; |
| p->out = out; |
| p->olen = *olen; |
| p->bit = 0; |
| |
| total = p->olen; |
| |
| *olen = 0; |
| |
| /* if using strict mode, we can only compress a multiple of 8 */ |
| if (sw842_strict && (ilen % 8)) { |
| pr_err("Using strict mode, can't compress len %d\n", ilen); |
| return -EINVAL; |
| } |
| |
| /* let's compress at least 8 bytes, mkay? */ |
| if (unlikely(ilen < 8)) |
| goto skip_comp; |
| |
| /* make initial 'last' different so we don't match the first time */ |
| last = ~get_unaligned((u64 *)p->in); |
| |
| while (p->ilen > 7) { |
| next = get_unaligned((u64 *)p->in); |
| |
| /* must get the next data, as we need to update the hashtable |
| * entries with the new data every time |
| */ |
| get_next_data(p); |
| |
| /* we don't care about endianness in last or next; |
| * we're just comparing 8 bytes to another 8 bytes, |
| * they're both the same endianness |
| */ |
| if (next == last) { |
| /* repeat count bits are 0-based, so we stop at +1 */ |
| if (++repeat_count <= REPEAT_BITS_MAX) |
| goto repeat; |
| } |
| if (repeat_count) { |
| ret = add_repeat_template(p, repeat_count); |
| repeat_count = 0; |
| if (next == last) /* reached max repeat bits */ |
| goto repeat; |
| } |
| |
| if (next == 0) |
| ret = add_zeros_template(p); |
| else |
| ret = process_next(p); |
| |
| if (ret) |
| return ret; |
| |
| repeat: |
| last = next; |
| update_hashtables(p); |
| p->in += 8; |
| p->ilen -= 8; |
| } |
| |
| if (repeat_count) { |
| ret = add_repeat_template(p, repeat_count); |
| if (ret) |
| return ret; |
| } |
| |
| skip_comp: |
| if (p->ilen > 0) { |
| ret = add_short_data_template(p, p->ilen); |
| if (ret) |
| return ret; |
| |
| p->in += p->ilen; |
| p->ilen = 0; |
| } |
| |
| ret = add_end_template(p); |
| if (ret) |
| return ret; |
| |
| /* |
| * crc(0:31) is appended to target data starting with the next |
| * bit after End of stream template. |
| * nx842 calculates CRC for data in big-endian format. So doing |
| * same here so that sw842 decompression can be used for both |
| * compressed data. |
| */ |
| crc = crc32_be(0, in, ilen); |
| ret = add_bits(p, crc, CRC_BITS); |
| if (ret) |
| return ret; |
| |
| if (p->bit) { |
| p->out++; |
| p->olen--; |
| p->bit = 0; |
| } |
| |
| /* pad compressed length to multiple of 8 */ |
| pad = (8 - ((total - p->olen) % 8)) % 8; |
| if (pad) { |
| if (pad > p->olen) /* we were so close! */ |
| return -ENOSPC; |
| memset(p->out, 0, pad); |
| p->out += pad; |
| p->olen -= pad; |
| } |
| |
| if (unlikely((total - p->olen) > UINT_MAX)) |
| return -ENOSPC; |
| |
| *olen = total - p->olen; |
| |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(sw842_compress); |
| |
| static int __init sw842_init(void) |
| { |
| if (sw842_template_counts) |
| sw842_debugfs_create(); |
| |
| return 0; |
| } |
| module_init(sw842_init); |
| |
| static void __exit sw842_exit(void) |
| { |
| if (sw842_template_counts) |
| sw842_debugfs_remove(); |
| } |
| module_exit(sw842_exit); |
| |
| MODULE_LICENSE("GPL"); |
| MODULE_DESCRIPTION("Software 842 Compressor"); |
| MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); |