s390/bpf: implement bpf_tail_call() helper
bpf_tail_call() arguments:
- ctx......: Context pointer
- jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
- index....: Index in the jump table
In this implementation s390x JIT does stack unwinding and jumps into the
callee program prologue. Caller and callee use the same stack.
With this patch a tail call generates the following code on s390x:
if (index >= array->map.max_entries)
goto out
000003ff8001c7e4: e31030100016 llgf %r1,16(%r3)
000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828
if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
goto out;
000003ff8001c7f0: a7080001 lhi %r0,1
000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15)
000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828
prog = array->prog[index];
if (prog == NULL)
goto out;
000003ff8001c800: eb140003000d sllg %r1,%r4,3
000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1)
000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828
Restore registers before calling function
000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15)
000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15)
goto *(prog->bpf_func + tail_call_start);
000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0)
000003ff8001c824: 47f01006 bc 15,6(%r1)
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index de156ba..f6498ee 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -28,6 +28,9 @@
* | old backchain | |
* +---------------+ |
* | r15 - r6 | |
+ * +---------------+ |
+ * | 4 byte align | |
+ * | tail_call_cnt | |
* BFP -> +===============+ |
* | | |
* | BPF stack | |
@@ -46,14 +49,17 @@
* R15 -> +---------------+ + low
*
* We get 160 bytes stack space from calling function, but only use
- * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
*/
#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160)
-#define STK_160_UNUSED (160 - 11 * 8)
+#define STK_160_UNUSED (160 - 12 * 8)
#define STK_OFF (STK_SPACE - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
+#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
+#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
+
/* Offset to skip condition code check */
#define OFF_OK 4