Kent Overstreet | e5baf3d | 2022-10-21 19:20:09 -0400 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _BCACHEFS_BKEY_CMP_H |
| 3 | #define _BCACHEFS_BKEY_CMP_H |
| 4 | |
| 5 | #include "bkey.h" |
| 6 | |
| 7 | #ifdef CONFIG_X86_64 |
| 8 | static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, |
| 9 | unsigned nr_key_bits) |
| 10 | { |
| 11 | long d0, d1, d2, d3; |
| 12 | int cmp; |
| 13 | |
| 14 | /* we shouldn't need asm for this, but gcc is being retarded: */ |
| 15 | |
| 16 | asm(".intel_syntax noprefix;" |
| 17 | "xor eax, eax;" |
| 18 | "xor edx, edx;" |
| 19 | "1:;" |
| 20 | "mov r8, [rdi];" |
| 21 | "mov r9, [rsi];" |
| 22 | "sub ecx, 64;" |
| 23 | "jl 2f;" |
| 24 | |
| 25 | "cmp r8, r9;" |
| 26 | "jnz 3f;" |
| 27 | |
| 28 | "lea rdi, [rdi - 8];" |
| 29 | "lea rsi, [rsi - 8];" |
| 30 | "jmp 1b;" |
| 31 | |
| 32 | "2:;" |
| 33 | "not ecx;" |
| 34 | "shr r8, 1;" |
| 35 | "shr r9, 1;" |
| 36 | "shr r8, cl;" |
| 37 | "shr r9, cl;" |
| 38 | "cmp r8, r9;" |
| 39 | |
| 40 | "3:\n" |
| 41 | "seta al;" |
| 42 | "setb dl;" |
| 43 | "sub eax, edx;" |
| 44 | ".att_syntax prefix;" |
| 45 | : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp) |
| 46 | : "0" (l), "1" (r), "3" (nr_key_bits) |
| 47 | : "r8", "r9", "cc", "memory"); |
| 48 | |
| 49 | return cmp; |
| 50 | } |
| 51 | #else |
| 52 | static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, |
| 53 | unsigned nr_key_bits) |
| 54 | { |
| 55 | u64 l_v, r_v; |
| 56 | |
| 57 | if (!nr_key_bits) |
| 58 | return 0; |
| 59 | |
| 60 | /* for big endian, skip past header */ |
| 61 | nr_key_bits += high_bit_offset; |
| 62 | l_v = *l & (~0ULL >> high_bit_offset); |
| 63 | r_v = *r & (~0ULL >> high_bit_offset); |
| 64 | |
| 65 | while (1) { |
| 66 | if (nr_key_bits < 64) { |
| 67 | l_v >>= 64 - nr_key_bits; |
| 68 | r_v >>= 64 - nr_key_bits; |
| 69 | nr_key_bits = 0; |
| 70 | } else { |
| 71 | nr_key_bits -= 64; |
| 72 | } |
| 73 | |
| 74 | if (!nr_key_bits || l_v != r_v) |
| 75 | break; |
| 76 | |
| 77 | l = next_word(l); |
| 78 | r = next_word(r); |
| 79 | |
| 80 | l_v = *l; |
| 81 | r_v = *r; |
| 82 | } |
| 83 | |
| 84 | return cmp_int(l_v, r_v); |
| 85 | } |
| 86 | #endif |
| 87 | |
| 88 | static inline __pure __flatten |
| 89 | int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l, |
| 90 | const struct bkey_packed *r, |
| 91 | const struct btree *b) |
| 92 | { |
| 93 | const struct bkey_format *f = &b->format; |
| 94 | int ret; |
| 95 | |
| 96 | EBUG_ON(!bkey_packed(l) || !bkey_packed(r)); |
| 97 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); |
| 98 | |
| 99 | ret = __bkey_cmp_bits(high_word(f, l), |
| 100 | high_word(f, r), |
| 101 | b->nr_key_bits); |
| 102 | |
| 103 | EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l), |
| 104 | bkey_unpack_pos(b, r))); |
| 105 | return ret; |
| 106 | } |
| 107 | |
| 108 | static inline __pure __flatten |
| 109 | int bch2_bkey_cmp_packed_inlined(const struct btree *b, |
| 110 | const struct bkey_packed *l, |
| 111 | const struct bkey_packed *r) |
| 112 | { |
| 113 | struct bkey unpacked; |
| 114 | |
| 115 | if (likely(bkey_packed(l) && bkey_packed(r))) |
| 116 | return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); |
| 117 | |
| 118 | if (bkey_packed(l)) { |
| 119 | __bkey_unpack_key_format_checked(b, &unpacked, l); |
| 120 | l = (void *) &unpacked; |
| 121 | } else if (bkey_packed(r)) { |
| 122 | __bkey_unpack_key_format_checked(b, &unpacked, r); |
| 123 | r = (void *) &unpacked; |
| 124 | } |
| 125 | |
| 126 | return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); |
| 127 | } |
| 128 | |
| 129 | #endif /* _BCACHEFS_BKEY_CMP_H */ |