1
0
Fork 0
mirror of https://git.openwrt.org/openwrt/openwrt.git synced 2024-05-12 18:46:21 +02:00

kernel: backport a rewrite of the mips eBPF JIT implementation

This adds support for eBPF JIT for 32 bit targets and significantly improves
correctness.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2021-10-13 17:49:31 +02:00
parent 0eed96ca5d
commit f2e1e156c0
22 changed files with 9497 additions and 10 deletions

View File

@ -0,0 +1,65 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:02 +0200
Subject: [PATCH] MIPS: uasm: Enable muhu opcode for MIPS R6
Enable the 'muhu' instruction, complementing the existing 'mulu', needed
to implement a MIPS32 BPF JIT.
Also fix a typo in the existing definition of 'dmulu'.
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
This patch is a dependency for my 32-bit MIPS eBPF JIT.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -145,6 +145,7 @@ Ip_u1(_mtlo);
Ip_u3u1u2(_mul);
Ip_u1u2(_multu);
Ip_u3u1u2(_mulu);
+Ip_u3u1u2(_muhu);
Ip_u3u1u2(_nor);
Ip_u3u1u2(_or);
Ip_u2u1u3(_ori);
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -90,7 +90,7 @@ static const struct insn insn_table[insn
RS | RT | RD},
[insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
[insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
- [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op),
+ [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op),
RS | RT | RD},
[insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
[insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
@@ -150,6 +150,8 @@ static const struct insn insn_table[insn
[insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
[insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
RS | RT | RD},
+ [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op),
+ RS | RT | RD},
#ifndef CONFIG_CPU_MIPSR6
[insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
#else
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -59,7 +59,7 @@ enum opcode {
insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
- insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor,
+ insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor,
insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
@@ -344,6 +344,7 @@ I_u1(_mtlo)
I_u3u1u2(_mul)
I_u1u2(_multu)
I_u3u1u2(_mulu)
+I_u3u1u2(_muhu)
I_u3u1u2(_nor)
I_u3u1u2(_or)
I_u2u1u3(_ori)

View File

@ -0,0 +1,31 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:03 +0200
Subject: [PATCH] mips: uasm: Add workaround for Loongson-2F nop CPU errata
This patch implements a workaround for the Loongson-2F nop in generated,
code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before,
the binutils option -mfix-loongson2f-nop was enabled, but no workaround
was done when emitting MIPS code. Now, the nop pseudo instruction is
emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This
is consistent with the workaround implemented by binutils.
Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uas
#define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off)
#define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3)
#define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b)
+#ifdef CONFIG_CPU_NOP_WORKAROUNDS
+#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0)
+#else
#define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0)
+#endif
#define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1)
static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,

View File

@ -0,0 +1,1005 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:05 +0200
Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS
This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and
MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
create mode 100644 arch/mips/net/bpf_jit_comp64.c
--- /dev/null
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -0,0 +1,991 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Just-In-Time compiler for eBPF bytecode on MIPS.
+ * Implementation of JIT functions for 64-bit CPUs.
+ *
+ * Copyright (c) 2021 Anyfi Networks AB.
+ * Author: Johan Almbladh <johan.almbladh@gmail.com>
+ *
+ * Based on code and ideas from
+ * Copyright (c) 2017 Cavium, Inc.
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
+#include <asm/uasm.h>
+
+#include "bpf_jit_comp.h"
+
+/* MIPS t0-t3 are not available in the n64 ABI */
+#undef MIPS_R_T0
+#undef MIPS_R_T1
+#undef MIPS_R_T2
+#undef MIPS_R_T3
+
+/* Stack is 16-byte aligned in n64 ABI */
+#define MIPS_STACK_ALIGNMENT 16
+
+/* Extra 64-bit eBPF registers used by JIT */
+#define JIT_REG_TC (MAX_BPF_JIT_REG + 0)
+#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1)
+
+/* Number of prologue bytes to skip when doing a tail call */
+#define JIT_TCALL_SKIP 4
+
+/* Callee-saved CPU registers that the JIT must preserve */
+#define JIT_CALLEE_REGS \
+ (BIT(MIPS_R_S0) | \
+ BIT(MIPS_R_S1) | \
+ BIT(MIPS_R_S2) | \
+ BIT(MIPS_R_S3) | \
+ BIT(MIPS_R_S4) | \
+ BIT(MIPS_R_S5) | \
+ BIT(MIPS_R_S6) | \
+ BIT(MIPS_R_S7) | \
+ BIT(MIPS_R_GP) | \
+ BIT(MIPS_R_FP) | \
+ BIT(MIPS_R_RA))
+
+/* Caller-saved CPU registers available for JIT use */
+#define JIT_CALLER_REGS \
+ (BIT(MIPS_R_A5) | \
+ BIT(MIPS_R_A6) | \
+ BIT(MIPS_R_A7))
+/*
+ * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers.
+ * MIPS registers t4 - t7 may be used by the JIT as temporary registers.
+ * MIPS registers t8 - t9 are reserved for single-register common functions.
+ */
+static const u8 bpf2mips64[] = {
+ /* Return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = MIPS_R_V0,
+ /* Arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = MIPS_R_A0,
+ [BPF_REG_2] = MIPS_R_A1,
+ [BPF_REG_3] = MIPS_R_A2,
+ [BPF_REG_4] = MIPS_R_A3,
+ [BPF_REG_5] = MIPS_R_A4,
+ /* Callee-saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = MIPS_R_S0,
+ [BPF_REG_7] = MIPS_R_S1,
+ [BPF_REG_8] = MIPS_R_S2,
+ [BPF_REG_9] = MIPS_R_S3,
+ /* Read-only frame pointer to access the eBPF stack */
+ [BPF_REG_FP] = MIPS_R_FP,
+ /* Temporary register for blinding constants */
+ [BPF_REG_AX] = MIPS_R_AT,
+ /* Tail call count register, caller-saved */
+ [JIT_REG_TC] = MIPS_R_A5,
+ /* Constant for register zero-extension */
+ [JIT_REG_ZX] = MIPS_R_V1,
+};
+
+/*
+ * MIPS 32-bit operations on 64-bit registers generate a sign-extended
+ * result. However, the eBPF ISA mandates zero-extension, so we rely on the
+ * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic
+ * operations, right shift and byte swap require properly sign-extended
+ * operands or the result is unpredictable. We emit explicit sign-extensions
+ * in those cases.
+ */
+
+/* Sign extension */
+static void emit_sext(struct jit_context *ctx, u8 dst, u8 src)
+{
+ emit(ctx, sll, dst, src, 0);
+ clobber_reg(ctx, dst);
+}
+
+/* Zero extension */
+static void emit_zext(struct jit_context *ctx, u8 dst)
+{
+ if (cpu_has_mips64r2 || cpu_has_mips64r6) {
+ emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ } else {
+ emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]);
+ access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Zero extension, if verifier does not do it for us */
+static void emit_zext_ver(struct jit_context *ctx, u8 dst)
+{
+ if (!ctx->program->aux->verifier_zext)
+ emit_zext(ctx, dst);
+}
+
+/* dst = imm (64-bit) */
+static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64)
+{
+ if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) {
+ emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64);
+ } else if (imm64 >= 0xffffffff80000000ULL ||
+ (imm64 < 0x80000000 && imm64 > 0xffff)) {
+ emit(ctx, lui, dst, (s16)(imm64 >> 16));
+ emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff);
+ } else {
+ u8 acc = MIPS_R_ZERO;
+ int k;
+
+ for (k = 0; k < 4; k++) {
+ u16 half = imm64 >> (48 - 16 * k);
+
+ if (acc == dst)
+ emit(ctx, dsll, dst, dst, 16);
+
+ if (half) {
+ emit(ctx, ori, dst, acc, half);
+ acc = dst;
+ }
+ }
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* ALU immediate operation (64-bit) */
+static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
+{
+ switch (BPF_OP(op)) {
+ /* dst = dst | imm */
+ case BPF_OR:
+ emit(ctx, ori, dst, dst, (u16)imm);
+ break;
+ /* dst = dst ^ imm */
+ case BPF_XOR:
+ emit(ctx, xori, dst, dst, (u16)imm);
+ break;
+ /* dst = -dst */
+ case BPF_NEG:
+ emit(ctx, dsubu, dst, MIPS_R_ZERO, dst);
+ break;
+ /* dst = dst << imm */
+ case BPF_LSH:
+ emit(ctx, dsll_safe, dst, dst, imm);
+ break;
+ /* dst = dst >> imm */
+ case BPF_RSH:
+ emit(ctx, dsrl_safe, dst, dst, imm);
+ break;
+ /* dst = dst >> imm (arithmetic) */
+ case BPF_ARSH:
+ emit(ctx, dsra_safe, dst, dst, imm);
+ break;
+ /* dst = dst + imm */
+ case BPF_ADD:
+ emit(ctx, daddiu, dst, dst, imm);
+ break;
+ /* dst = dst - imm */
+ case BPF_SUB:
+ emit(ctx, daddiu, dst, dst, -imm);
+ break;
+ default:
+ /* Width-generic operations */
+ emit_alu_i(ctx, dst, imm, op);
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* ALU register operation (64-bit) */
+static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
+{
+ switch (BPF_OP(op)) {
+ /* dst = dst << src */
+ case BPF_LSH:
+ emit(ctx, dsllv, dst, dst, src);
+ break;
+ /* dst = dst >> src */
+ case BPF_RSH:
+ emit(ctx, dsrlv, dst, dst, src);
+ break;
+ /* dst = dst >> src (arithmetic) */
+ case BPF_ARSH:
+ emit(ctx, dsrav, dst, dst, src);
+ break;
+ /* dst = dst + src */
+ case BPF_ADD:
+ emit(ctx, daddu, dst, dst, src);
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ emit(ctx, dsubu, dst, dst, src);
+ break;
+ /* dst = dst * src */
+ case BPF_MUL:
+ if (cpu_has_mips64r6) {
+ emit(ctx, dmulu, dst, dst, src);
+ } else {
+ emit(ctx, dmultu, dst, src);
+ emit(ctx, mflo, dst);
+ }
+ break;
+ /* dst = dst / src */
+ case BPF_DIV:
+ if (cpu_has_mips64r6) {
+ emit(ctx, ddivu_r6, dst, dst, src);
+ } else {
+ emit(ctx, ddivu, dst, src);
+ emit(ctx, mflo, dst);
+ }
+ break;
+ /* dst = dst % src */
+ case BPF_MOD:
+ if (cpu_has_mips64r6) {
+ emit(ctx, dmodu, dst, dst, src);
+ } else {
+ emit(ctx, ddivu, dst, src);
+ emit(ctx, mfhi, dst);
+ }
+ break;
+ default:
+ /* Width-generic operations */
+ emit_alu_r(ctx, dst, src, op);
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Swap sub words in a register double word */
+static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits)
+{
+ u8 tmp = MIPS_R_T9;
+
+ emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */
+ emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */
+ emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */
+ emit(ctx, and, dst, dst, mask); /* dst = dst & mask */
+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
+}
+
+/* Swap bytes and truncate a register double word, word or half word */
+static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width)
+{
+ switch (width) {
+ /* Swap bytes in a double word */
+ case 64:
+ if (cpu_has_mips64r2 || cpu_has_mips64r6) {
+ emit(ctx, dsbh, dst, dst);
+ emit(ctx, dshd, dst, dst);
+ } else {
+ u8 t1 = MIPS_R_T6;
+ u8 t2 = MIPS_R_T7;
+
+ emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */
+ emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */
+ emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */
+
+ emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff);
+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
+ emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */
+
+ emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */
+ emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */
+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
+ emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */
+ }
+ break;
+ /* Swap bytes in a half word */
+ /* Swap bytes in a word */
+ case 32:
+ case 16:
+ emit_sext(ctx, dst, dst);
+ emit_bswap_r(ctx, dst, width);
+ if (cpu_has_mips64r2 || cpu_has_mips64r6)
+ emit_zext(ctx, dst);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Truncate a register double word, word or half word */
+static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width)
+{
+ switch (width) {
+ case 64:
+ break;
+ /* Zero-extend a word */
+ case 32:
+ emit_zext(ctx, dst);
+ break;
+ /* Zero-extend a half word */
+ case 16:
+ emit(ctx, andi, dst, dst, 0xffff);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Load operation: dst = *(size*)(src + off) */
+static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
+{
+ switch (size) {
+ /* Load a byte */
+ case BPF_B:
+ emit(ctx, lbu, dst, off, src);
+ break;
+ /* Load a half word */
+ case BPF_H:
+ emit(ctx, lhu, dst, off, src);
+ break;
+ /* Load a word */
+ case BPF_W:
+ emit(ctx, lwu, dst, off, src);
+ break;
+ /* Load a double word */
+ case BPF_DW:
+ emit(ctx, ld, dst, off, src);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Store operation: *(size *)(dst + off) = src */
+static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
+{
+ switch (size) {
+ /* Store a byte */
+ case BPF_B:
+ emit(ctx, sb, src, off, dst);
+ break;
+ /* Store a half word */
+ case BPF_H:
+ emit(ctx, sh, src, off, dst);
+ break;
+ /* Store a word */
+ case BPF_W:
+ emit(ctx, sw, src, off, dst);
+ break;
+ /* Store a double word */
+ case BPF_DW:
+ emit(ctx, sd, src, off, dst);
+ break;
+ }
+}
+
+/* Atomic read-modify-write */
+static void emit_atomic_r64(struct jit_context *ctx,
+ u8 dst, u8 src, s16 off, u8 code)
+{
+ u8 t1 = MIPS_R_T6;
+ u8 t2 = MIPS_R_T7;
+
+ emit(ctx, lld, t1, off, dst);
+ switch (code) {
+ case BPF_ADD:
+ emit(ctx, daddu, t2, t1, src);
+ break;
+ case BPF_AND:
+ emit(ctx, and, t2, t1, src);
+ break;
+ case BPF_OR:
+ emit(ctx, or, t2, t1, src);
+ break;
+ case BPF_XOR:
+ emit(ctx, xor, t2, t1, src);
+ break;
+ }
+ emit(ctx, scd, t2, off, dst);
+ emit(ctx, beqz, t2, -16);
+ emit(ctx, nop); /* Delay slot */
+}
+
+/* Function call */
+static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
+{
+ u8 zx = bpf2mips64[JIT_REG_ZX];
+ u8 tmp = MIPS_R_T6;
+ bool fixed;
+ u64 addr;
+
+ /* Decode the call address */
+ if (bpf_jit_get_func_addr(ctx->program, insn, false,
+ &addr, &fixed) < 0)
+ return -1;
+ if (!fixed)
+ return -1;
+
+ /* Push caller-saved registers on stack */
+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
+
+ /* Emit function call */
+ emit_mov_i64(ctx, tmp, addr);
+ emit(ctx, jalr, MIPS_R_RA, tmp);
+ emit(ctx, nop); /* Delay slot */
+
+ /* Restore caller-saved registers */
+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
+
+ /* Re-initialize the JIT zero-extension register if accessed */
+ if (ctx->accessed & BIT(JIT_REG_ZX)) {
+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
+ emit(ctx, dsrl32, zx, zx, 0);
+ }
+
+ clobber_reg(ctx, MIPS_R_RA);
+ clobber_reg(ctx, MIPS_R_V0);
+ clobber_reg(ctx, MIPS_R_V1);
+ return 0;
+}
+
+/* Function tail call */
+static int emit_tail_call(struct jit_context *ctx)
+{
+ u8 ary = bpf2mips64[BPF_REG_2];
+ u8 ind = bpf2mips64[BPF_REG_3];
+ u8 tcc = bpf2mips64[JIT_REG_TC];
+ u8 tmp = MIPS_R_T6;
+ int off;
+
+ /*
+ * Tail call:
+ * eBPF R1 - function argument (context ptr), passed in a0-a1
+ * eBPF R2 - ptr to object with array of function entry points
+ * eBPF R3 - array index of function to be called
+ */
+
+ /* if (ind >= ary->map.max_entries) goto out */
+ off = offsetof(struct bpf_array, map.max_entries);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/
+ emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */
+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
+
+ /* if (--TCC < 0) goto out */
+ emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */
+ emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */
+ /* (next insn delay slot) */
+ /* prog = ary->ptrs[ind] */
+ off = offsetof(struct bpf_array, ptrs);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */
+ emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */
+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
+
+ /* if (prog == 0) goto out */
+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
+ emit(ctx, nop); /* Delay slot */
+
+ /* func = prog->bpf_func + 8 (prologue skip offset) */
+ off = offsetof(struct bpf_prog, bpf_func);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
+ emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */
+
+ /* goto func */
+ build_epilogue(ctx, tmp);
+ access_reg(ctx, JIT_REG_TC);
+ return 0;
+}
+
+/*
+ * Stack frame layout for a JITed program (stack grows down).
+ *
+ * Higher address : Previous stack frame :
+ * +===========================+ <--- MIPS sp before call
+ * | Callee-saved registers, |
+ * | including RA and FP |
+ * +---------------------------+ <--- eBPF FP (MIPS fp)
+ * | Local eBPF variables |
+ * | allocated by program |
+ * +---------------------------+
+ * | Reserved for caller-saved |
+ * | registers |
+ * Lower address +===========================+ <--- MIPS sp
+ */
+
+/* Build program prologue to set up the stack and registers */
+void build_prologue(struct jit_context *ctx)
+{
+ u8 fp = bpf2mips64[BPF_REG_FP];
+ u8 tc = bpf2mips64[JIT_REG_TC];
+ u8 zx = bpf2mips64[JIT_REG_ZX];
+ int stack, saved, locals, reserved;
+
+ /*
+ * The first instruction initializes the tail call count register.
+ * On a tail call, the calling function jumps into the prologue
+ * after this instruction.
+ */
+ emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff));
+
+ /* === Entry-point for tail calls === */
+
+ /*
+ * If the eBPF frame pointer and tail call count registers were
+ * accessed they must be preserved. Mark them as clobbered here
+ * to save and restore them on the stack as needed.
+ */
+ if (ctx->accessed & BIT(BPF_REG_FP))
+ clobber_reg(ctx, fp);
+ if (ctx->accessed & BIT(JIT_REG_TC))
+ clobber_reg(ctx, tc);
+ if (ctx->accessed & BIT(JIT_REG_ZX))
+ clobber_reg(ctx, zx);
+
+ /* Compute the stack space needed for callee-saved registers */
+ saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64);
+ saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
+
+ /* Stack space used by eBPF program local data */
+ locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
+
+ /*
+ * If we are emitting function calls, reserve extra stack space for
+ * caller-saved registers needed by the JIT. The required space is
+ * computed automatically during resource usage discovery (pass 1).
+ */
+ reserved = ctx->stack_used;
+
+ /* Allocate the stack frame */
+ stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
+ if (stack)
+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack);
+
+ /* Store callee-saved registers on stack */
+ push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
+
+ /* Initialize the eBPF frame pointer if accessed */
+ if (ctx->accessed & BIT(BPF_REG_FP))
+ emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved);
+
+ /* Initialize the ePF JIT zero-extension register if accessed */
+ if (ctx->accessed & BIT(JIT_REG_ZX)) {
+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
+ emit(ctx, dsrl32, zx, zx, 0);
+ }
+
+ ctx->saved_size = saved;
+ ctx->stack_size = stack;
+}
+
+/* Build the program epilogue to restore the stack and registers */
+void build_epilogue(struct jit_context *ctx, int dest_reg)
+{
+ /* Restore callee-saved registers from stack */
+ pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
+ ctx->stack_size - ctx->saved_size);
+
+ /* Release the stack frame */
+ if (ctx->stack_size)
+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
+
+ /* Jump to return address and sign-extend the 32-bit return value */
+ emit(ctx, jr, dest_reg);
+ emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */
+}
+
+/* Build one eBPF instruction */
+int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
+{
+ u8 dst = bpf2mips64[insn->dst_reg];
+ u8 src = bpf2mips64[insn->src_reg];
+ u8 code = insn->code;
+ s16 off = insn->off;
+ s32 imm = insn->imm;
+ s32 val, rel;
+ u8 alu, jmp;
+
+ switch (code) {
+ /* ALU operations */
+ /* dst = imm */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ emit_mov_i(ctx, dst, imm);
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_X:
+ if (imm == 1) {
+ /* Special mov32 for zext */
+ emit_zext(ctx, dst);
+ } else {
+ emit_mov_r(ctx, dst, src);
+ emit_zext_ver(ctx, dst);
+ }
+ break;
+ /* dst = -dst */
+ case BPF_ALU | BPF_NEG:
+ emit_sext(ctx, dst, dst);
+ emit_alu_i(ctx, dst, 0, BPF_NEG);
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst & imm */
+ /* dst = dst | imm */
+ /* dst = dst ^ imm */
+ /* dst = dst << imm */
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_alu_i(ctx, dst, val, alu);
+ }
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst >> imm */
+ /* dst = dst >> imm (arithmetic) */
+ /* dst = dst + imm */
+ /* dst = dst - imm */
+ /* dst = dst * imm */
+ /* dst = dst / imm */
+ /* dst = dst % imm */
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_sext(ctx, dst, dst);
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_sext(ctx, dst, dst);
+ emit_alu_i(ctx, dst, val, alu);
+ }
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst & src */
+ /* dst = dst | src */
+ /* dst = dst ^ src */
+ /* dst = dst << src */
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU | BPF_LSH | BPF_X:
+ emit_alu_r(ctx, dst, src, BPF_OP(code));
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst >> src */
+ /* dst = dst >> src (arithmetic) */
+ /* dst = dst + src */
+ /* dst = dst - src */
+ /* dst = dst * src */
+ /* dst = dst / src */
+ /* dst = dst % src */
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ emit_sext(ctx, dst, dst);
+ emit_sext(ctx, MIPS_R_T4, src);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = imm (64-bit) */
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ emit_mov_i(ctx, dst, imm);
+ break;
+ /* dst = src (64-bit) */
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ emit_mov_r(ctx, dst, src);
+ break;
+ /* dst = -dst (64-bit) */
+ case BPF_ALU64 | BPF_NEG:
+ emit_alu_i64(ctx, dst, 0, BPF_NEG);
+ break;
+ /* dst = dst & imm (64-bit) */
+ /* dst = dst | imm (64-bit) */
+ /* dst = dst ^ imm (64-bit) */
+ /* dst = dst << imm (64-bit) */
+ /* dst = dst >> imm (64-bit) */
+ /* dst = dst >> imm ((64-bit, arithmetic) */
+ /* dst = dst + imm (64-bit) */
+ /* dst = dst - imm (64-bit) */
+ /* dst = dst * imm (64-bit) */
+ /* dst = dst / imm (64-bit) */
+ /* dst = dst % imm (64-bit) */
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_alu_i64(ctx, dst, val, alu);
+ }
+ break;
+ /* dst = dst & src (64-bit) */
+ /* dst = dst | src (64-bit) */
+ /* dst = dst ^ src (64-bit) */
+ /* dst = dst << src (64-bit) */
+ /* dst = dst >> src (64-bit) */
+ /* dst = dst >> src (64-bit, arithmetic) */
+ /* dst = dst + src (64-bit) */
+ /* dst = dst - src (64-bit) */
+ /* dst = dst * src (64-bit) */
+ /* dst = dst / src (64-bit) */
+ /* dst = dst % src (64-bit) */
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ emit_alu_r64(ctx, dst, src, BPF_OP(code));
+ break;
+ /* dst = htole(dst) */
+ /* dst = htobe(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ if (BPF_SRC(code) ==
+#ifdef __BIG_ENDIAN
+ BPF_FROM_LE
+#else
+ BPF_FROM_BE
+#endif
+ )
+ emit_bswap_r64(ctx, dst, imm);
+ else
+ emit_trunc_r64(ctx, dst, imm);
+ break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW:
+ emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32));
+ return 1;
+ /* LDX: dst = *(size *)(src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ emit_ldx(ctx, dst, src, off, BPF_SIZE(code));
+ break;
+ /* ST: *(size *)(dst + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code));
+ break;
+ /* STX: *(size *)(dst + off) = src */
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ emit_stx(ctx, dst, src, off, BPF_SIZE(code));
+ break;
+ /* Speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+ /* Atomics */
+ case BPF_STX | BPF_XADD | BPF_W:
+ case BPF_STX | BPF_XADD | BPF_DW:
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_AND:
+ case BPF_OR:
+ case BPF_XOR:
+ if (BPF_SIZE(code) == BPF_DW) {
+ emit_atomic_r64(ctx, dst, src, off, imm);
+ } else { /* 32-bit, no fetch */
+ emit_sext(ctx, MIPS_R_T4, src);
+ emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm);
+ }
+ break;
+ default:
+ goto notyet;
+ }
+ break;
+ /* PC += off if dst == src */
+ /* PC += off if dst != src */
+ /* PC += off if dst & src */
+ /* PC += off if dst > src */
+ /* PC += off if dst >= src */
+ /* PC += off if dst < src */
+ /* PC += off if dst <= src */
+ /* PC += off if dst > src (signed) */
+ /* PC += off if dst >= src (signed) */
+ /* PC += off if dst < src (signed) */
+ /* PC += off if dst <= src (signed) */
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ if (off == 0)
+ break;
+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
+ emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */
+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == imm */
+ /* PC += off if dst != imm */
+ /* PC += off if dst & imm */
+ /* PC += off if dst > imm */
+ /* PC += off if dst >= imm */
+ /* PC += off if dst < imm */
+ /* PC += off if dst <= imm */
+ /* PC += off if dst > imm (signed) */
+ /* PC += off if dst >= imm (signed) */
+ /* PC += off if dst < imm (signed) */
+ /* PC += off if dst <= imm (signed) */
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ if (off == 0)
+ break;
+ setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
+ if (valid_jmp_i(jmp, imm)) {
+ emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp);
+ } else {
+ /* Move large immediate to register, sign-extended */
+ emit_mov_i(ctx, MIPS_R_T5, imm);
+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
+ }
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == src */
+ /* PC += off if dst != src */
+ /* PC += off if dst & src */
+ /* PC += off if dst > src */
+ /* PC += off if dst >= src */
+ /* PC += off if dst < src */
+ /* PC += off if dst <= src */
+ /* PC += off if dst > src (signed) */
+ /* PC += off if dst >= src (signed) */
+ /* PC += off if dst < src (signed) */
+ /* PC += off if dst <= src (signed) */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ if (off == 0)
+ break;
+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
+ emit_jmp_r(ctx, dst, src, rel, jmp);
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == imm */
+ /* PC += off if dst != imm */
+ /* PC += off if dst & imm */
+ /* PC += off if dst > imm */
+ /* PC += off if dst >= imm */
+ /* PC += off if dst < imm */
+ /* PC += off if dst <= imm */
+ /* PC += off if dst > imm (signed) */
+ /* PC += off if dst >= imm (signed) */
+ /* PC += off if dst < imm (signed) */
+ /* PC += off if dst <= imm (signed) */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (off == 0)
+ break;
+ setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
+ if (valid_jmp_i(jmp, imm)) {
+ emit_jmp_i(ctx, dst, imm, rel, jmp);
+ } else {
+ /* Move large immediate to register */
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp);
+ }
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off */
+ case BPF_JMP | BPF_JA:
+ if (off == 0)
+ break;
+ if (emit_ja(ctx, off) < 0)
+ goto toofar;
+ break;
+ /* Tail call */
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (emit_tail_call(ctx) < 0)
+ goto invalid;
+ break;
+ /* Function call */
+ case BPF_JMP | BPF_CALL:
+ if (emit_call(ctx, insn) < 0)
+ goto invalid;
+ break;
+ /* Function return */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * Optimization: when last instruction is EXIT
+ * simply continue to epilogue.
+ */
+ if (ctx->bpf_index == ctx->program->len - 1)
+ break;
+ if (emit_exit(ctx) < 0)
+ goto toofar;
+ break;
+
+ default:
+invalid:
+ pr_err_once("unknown opcode %02x\n", code);
+ return -EINVAL;
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+toofar:
+ pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
+ ctx->bpf_index, code);
+ return -E2BIG;
+ }
+ return 0;
+}

View File

@ -0,0 +1,120 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:06 +0200
Subject: [PATCH] mips: bpf: Add JIT workarounds for CPU errata
This patch adds workarounds for the following CPU errata to the MIPS
eBPF JIT, if enabled in the kernel configuration.
- R10000 ll/sc weak ordering
- Loongson-3 ll/sc weak ordering
- Loongson-2F jump hang
The Loongson-2F nop errata is implemented in uasm, which the JIT uses,
so no additional mitigations are needed for that.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx,
/* Atomic read-modify-write (32-bit) */
void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
{
+ LLSC_sync(ctx);
emit(ctx, ll, MIPS_R_T9, off, dst);
switch (code) {
case BPF_ADD:
@@ -420,18 +421,19 @@ void emit_atomic_r(struct jit_context *c
break;
}
emit(ctx, sc, MIPS_R_T8, off, dst);
- emit(ctx, beqz, MIPS_R_T8, -16);
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset);
emit(ctx, nop); /* Delay slot */
}
/* Atomic compare-and-exchange (32-bit) */
void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
{
+ LLSC_sync(ctx);
emit(ctx, ll, MIPS_R_T9, off, dst);
emit(ctx, bne, MIPS_R_T9, res, 12);
emit(ctx, move, MIPS_R_T8, src); /* Delay slot */
emit(ctx, sc, MIPS_R_T8, off, dst);
- emit(ctx, beqz, MIPS_R_T8, -20);
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset);
emit(ctx, move, res, MIPS_R_T9); /* Delay slot */
clobber_reg(ctx, res);
}
--- a/arch/mips/net/bpf_jit_comp.h
+++ b/arch/mips/net/bpf_jit_comp.h
@@ -87,7 +87,7 @@ struct jit_context {
};
/* Emit the instruction if the JIT memory space has been allocated */
-#define emit(ctx, func, ...) \
+#define __emit(ctx, func, ...) \
do { \
if ((ctx)->target != NULL) { \
u32 *p = &(ctx)->target[ctx->jit_index]; \
@@ -95,6 +95,30 @@ do { \
} \
(ctx)->jit_index++; \
} while (0)
+#define emit(...) __emit(__VA_ARGS__)
+
+/* Workaround for R10000 ll/sc errata */
+#ifdef CONFIG_WAR_R10000
+#define LLSC_beqz beqzl
+#else
+#define LLSC_beqz beqz
+#endif
+
+/* Workaround for Loongson-3 ll/sc errata */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+#define LLSC_sync(ctx) emit(ctx, sync, 0)
+#define LLSC_offset 4
+#else
+#define LLSC_sync(ctx)
+#define LLSC_offset 0
+#endif
+
+/* Workaround for Loongson-2F jump errata */
+#ifdef CONFIG_CPU_JUMP_WORKAROUNDS
+#define JALR_MASK 0xffffffffcfffffffULL
+#else
+#define JALR_MASK (~0ULL)
+#endif
/*
* Mark a BPF register as accessed, it needs to be
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_c
u8 t1 = MIPS_R_T6;
u8 t2 = MIPS_R_T7;
+ LLSC_sync(ctx);
emit(ctx, lld, t1, off, dst);
switch (code) {
case BPF_ADD:
@@ -391,7 +392,7 @@ static void emit_atomic_r64(struct jit_c
break;
}
emit(ctx, scd, t2, off, dst);
- emit(ctx, beqz, t2, -16);
+ emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset);
emit(ctx, nop); /* Delay slot */
}
@@ -414,7 +415,7 @@ static int emit_call(struct jit_context
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
/* Emit function call */
- emit_mov_i64(ctx, tmp, addr);
+ emit_mov_i64(ctx, tmp, addr & JALR_MASK);
emit(ctx, jalr, MIPS_R_RA, tmp);
emit(ctx, nop); /* Delay slot */

View File

@ -0,0 +1,61 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:07 +0200
Subject: [PATCH] mips: bpf: Enable eBPF JITs
This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also
disables the old cBPF JIT to so cBPF programs are converted to use the
new JIT.
Workarounds for R4000 CPU errata are not implemented by the JIT, so the
JIT is disabled if any of those workarounds are configured.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3294,6 +3294,7 @@ S: Supported
F: arch/arm64/net/
BPF JIT for MIPS (32-BIT AND 64-BIT)
+M: Johan Almbladh <johan.almbladh@anyfinetworks.com>
M: Paul Burton <paulburton@kernel.org>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -49,7 +49,6 @@ config MIPS
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
select HAVE_ASM_MODVERSIONS
- select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
select HAVE_CONTEXT_TRACKING
select HAVE_TIF_NOHZ
select HAVE_C_RECORDMCOUNT
@@ -57,7 +56,10 @@ config MIPS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
+ select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
+ !CPU_DADDI_WORKAROUNDS && \
+ !CPU_R4000_WORKAROUNDS && \
+ !CPU_R4400_WORKAROUNDS
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -2,9 +2,10 @@
# MIPS networking code
obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
+obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o
ifeq ($(CONFIG_32BIT),y)
- obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o
else
- obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o
endif

View File

@ -0,0 +1,387 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:08 +0200
Subject: [PATCH] mips: bpf: Remove old BPF JIT implementations
This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations.
They are replaced by a new eBPF implementation that supports both 32-bit
and 64-bit MIPS CPUs.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
delete mode 100644 arch/mips/net/bpf_jit.c
delete mode 100644 arch/mips/net/bpf_jit.h
delete mode 100644 arch/mips/net/bpf_jit_asm.S
delete mode 100644 arch/mips/net/ebpf_jit.c
--- a/arch/mips/net/bpf_jit.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Just-In-Time compiler for BPF filters on MIPS
- *
- * Copyright (c) 2014 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- */
-
-#ifndef BPF_JIT_MIPS_OP_H
-#define BPF_JIT_MIPS_OP_H
-
-/* Registers used by JIT */
-#define MIPS_R_ZERO 0
-#define MIPS_R_V0 2
-#define MIPS_R_A0 4
-#define MIPS_R_A1 5
-#define MIPS_R_T4 12
-#define MIPS_R_T5 13
-#define MIPS_R_T6 14
-#define MIPS_R_T7 15
-#define MIPS_R_S0 16
-#define MIPS_R_S1 17
-#define MIPS_R_S2 18
-#define MIPS_R_S3 19
-#define MIPS_R_S4 20
-#define MIPS_R_S5 21
-#define MIPS_R_S6 22
-#define MIPS_R_S7 23
-#define MIPS_R_SP 29
-#define MIPS_R_RA 31
-
-/* Conditional codes */
-#define MIPS_COND_EQ 0x1
-#define MIPS_COND_GE (0x1 << 1)
-#define MIPS_COND_GT (0x1 << 2)
-#define MIPS_COND_NE (0x1 << 3)
-#define MIPS_COND_ALL (0x1 << 4)
-/* Conditionals on X register or K immediate */
-#define MIPS_COND_X (0x1 << 5)
-#define MIPS_COND_K (0x1 << 6)
-
-#define r_ret MIPS_R_V0
-
-/*
- * Use 2 scratch registers to avoid pipeline interlocks.
- * There is no overhead during epilogue and prologue since
- * any of the $s0-$s6 registers will only be preserved if
- * they are going to actually be used.
- */
-#define r_skb_hl MIPS_R_S0 /* skb header length */
-#define r_skb_data MIPS_R_S1 /* skb actual data */
-#define r_off MIPS_R_S2
-#define r_A MIPS_R_S3
-#define r_X MIPS_R_S4
-#define r_skb MIPS_R_S5
-#define r_M MIPS_R_S6
-#define r_skb_len MIPS_R_S7
-#define r_s0 MIPS_R_T4 /* scratch reg 1 */
-#define r_s1 MIPS_R_T5 /* scratch reg 2 */
-#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
-#define r_tmp MIPS_R_T7 /* No need to preserve this */
-#define r_zero MIPS_R_ZERO
-#define r_sp MIPS_R_SP
-#define r_ra MIPS_R_RA
-
-#ifndef __ASSEMBLY__
-
-/* Declare ASM helpers */
-
-#define DECLARE_LOAD_FUNC(func) \
- extern u8 func(unsigned long *skb, int offset); \
- extern u8 func##_negative(unsigned long *skb, int offset); \
- extern u8 func##_positive(unsigned long *skb, int offset)
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#endif
-
-#endif /* BPF_JIT_MIPS_OP_H */
--- a/arch/mips/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
- * compiler.
- *
- * Copyright (C) 2015 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include <asm/asm.h>
-#include <asm/isa-rev.h>
-#include <asm/regdef.h>
-#include "bpf_jit.h"
-
-/* ABI
- *
- * r_skb_hl skb header length
- * r_skb_data skb data
- * r_off(a1) offset register
- * r_A BPF register A
- * r_X PF register X
- * r_skb(a0) *skb
- * r_M *scratch memory
- * r_skb_le skb length
- * r_s0 Scratch register 0
- * r_s1 Scratch register 1
- *
- * On entry:
- * a0: *skb
- * a1: offset (imm or imm + X)
- *
- * All non-BPF-ABI registers are free for use. On return, we only
- * care about r_ret. The BPF-ABI registers are assumed to remain
- * unmodified during the entire filter operation.
- */
-
-#define skb a0
-#define offset a1
-#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
-
- /* We know better :) so prevent assembler reordering etc */
- .set noreorder
-
-#define is_offset_negative(TYPE) \
- /* If offset is negative we have more work to do */ \
- slti t0, offset, 0; \
- bgtz t0, bpf_slow_path_##TYPE##_neg; \
- /* Be careful what follows in DS. */
-
-#define is_offset_in_header(SIZE, TYPE) \
- /* Reading from header? */ \
- addiu $r_s0, $r_skb_hl, -SIZE; \
- slt t0, $r_s0, offset; \
- bgtz t0, bpf_slow_path_##TYPE; \
-
-LEAF(sk_load_word)
- is_offset_negative(word)
-FEXPORT(sk_load_word_positive)
- is_offset_in_header(4, word)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- .set reorder
- lw $r_A, 0(t1)
- .set noreorder
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_A
- rotr $r_A, t0, 16
-# else
- sll t0, $r_A, 24
- srl t1, $r_A, 24
- srl t2, $r_A, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_A, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_word)
-
-LEAF(sk_load_half)
- is_offset_negative(half)
-FEXPORT(sk_load_half_positive)
- is_offset_in_header(2, half)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lhu $r_A, 0(t1)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh $r_A, $r_A
-# else
- sll t0, $r_A, 8
- srl t1, $r_A, 8
- andi t0, t0, 0xff00
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_half)
-
-LEAF(sk_load_byte)
- is_offset_negative(byte)
-FEXPORT(sk_load_byte_positive)
- is_offset_in_header(1, byte)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lbu $r_A, 0(t1)
- jr $r_ra
- move $r_ret, zero
- END(sk_load_byte)
-
-/*
- * call skb_copy_bits:
- * (prototype in linux/skbuff.h)
- *
- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
- *
- * o32 mandates we leave 4 spaces for argument registers in case
- * the callee needs to use them. Even though we don't care about
- * the argument registers ourselves, we need to allocate that space
- * to remain ABI compliant since the callee may want to use that space.
- * We also allocate 2 more spaces for $r_ra and our return register (*to).
- *
- * n64 is a bit different. The *caller* will allocate the space to preserve
- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
- * good reason but it does not matter that much really.
- *
- * (void *to) is returned in r_s0
- *
- */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define DS_OFFSET(SIZE) (4 * SZREG)
-#else
-#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
-#endif
-#define bpf_slow_path_common(SIZE) \
- /* Quick check. Are we within reasonable boundaries? */ \
- LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
- sltu $r_s0, offset, $r_s1; \
- beqz $r_s0, fault; \
- /* Load 4th argument in DS */ \
- LONG_ADDIU a3, zero, SIZE; \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, skb_copy_bits; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- /* Assign low slot to a2 */ \
- PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
- jalr t0; \
- /* Reset our destination slot (DS but it's ok) */ \
- INT_S zero, (4 * SZREG)($r_sp); \
- /* \
- * skb_copy_bits returns 0 on success and -EFAULT \
- * on error. Our data live in a2. Do not bother with \
- * our data if an error has been returned. \
- */ \
- /* Restore our frame */ \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- INT_L $r_s0, (4 * SZREG)($r_sp); \
- bltz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- move $r_ret, zero; \
-
-NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
- bpf_slow_path_common(4)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_s0
- jr $r_ra
- rotr $r_A, t0, 16
-# else
- sll t0, $r_s0, 24
- srl t1, $r_s0, 24
- srl t2, $r_s0, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_s0, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_word)
-
-NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
- bpf_slow_path_common(2)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- jr $r_ra
- wsbh $r_A, $r_s0
-# else
- sll t0, $r_s0, 8
- andi t1, $r_s0, 0xff00
- andi t0, t0, 0xff00
- srl t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_half)
-
-NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
- bpf_slow_path_common(1)
- jr $r_ra
- move $r_A, $r_s0
-
- END(bpf_slow_path_byte)
-
-/*
- * Negative entry points
- */
- .macro bpf_is_end_of_data
- li t0, SKF_LL_OFF
- /* Reading link layer data? */
- slt t1, offset, t0
- bgtz t1, fault
- /* Be careful what follows in DS. */
- .endm
-/*
- * call skb_copy_bits:
- * (prototype in linux/filter.h)
- *
- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
- * int k, unsigned int size)
- *
- * see above (bpf_slow_path_common) for ABI restrictions
- */
-#define bpf_negative_common(SIZE) \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- jalr t0; \
- li a2, SIZE; \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- /* Check return pointer */ \
- beqz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- /* Preserve our pointer */ \
- move $r_s0, v0; \
- /* Set return value */ \
- move $r_ret, zero; \
-
-bpf_slow_path_word_neg:
- bpf_is_end_of_data
-NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(4)
- jr $r_ra
- lw $r_A, 0($r_s0)
- END(sk_load_word_negative)
-
-bpf_slow_path_half_neg:
- bpf_is_end_of_data
-NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(2)
- jr $r_ra
- lhu $r_A, 0($r_s0)
- END(sk_load_half_negative)
-
-bpf_slow_path_byte_neg:
- bpf_is_end_of_data
-NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(1)
- jr $r_ra
- lbu $r_A, 0($r_s0)
- END(sk_load_byte_negative)
-
-fault:
- jr $r_ra
- addiu $r_ret, zero, 1

View File

@ -25,7 +25,7 @@ Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2078,7 +2078,7 @@ config CPU_MIPS32
@@ -2080,7 +2080,7 @@ config CPU_MIPS32
config CPU_MIPS64
bool
default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \

View File

@ -0,0 +1,65 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:02 +0200
Subject: [PATCH] MIPS: uasm: Enable muhu opcode for MIPS R6
Enable the 'muhu' instruction, complementing the existing 'mulu', needed
to implement a MIPS32 BPF JIT.
Also fix a typo in the existing definition of 'dmulu'.
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
This patch is a dependency for my 32-bit MIPS eBPF JIT.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -145,6 +145,7 @@ Ip_u1(_mtlo);
Ip_u3u1u2(_mul);
Ip_u1u2(_multu);
Ip_u3u1u2(_mulu);
+Ip_u3u1u2(_muhu);
Ip_u3u1u2(_nor);
Ip_u3u1u2(_or);
Ip_u2u1u3(_ori);
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -90,7 +90,7 @@ static const struct insn insn_table[insn
RS | RT | RD},
[insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
[insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
- [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op),
+ [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op),
RS | RT | RD},
[insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
[insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
@@ -150,6 +150,8 @@ static const struct insn insn_table[insn
[insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
[insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
RS | RT | RD},
+ [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op),
+ RS | RT | RD},
#ifndef CONFIG_CPU_MIPSR6
[insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
#else
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -59,7 +59,7 @@ enum opcode {
insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
- insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor,
+ insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor,
insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
@@ -344,6 +344,7 @@ I_u1(_mtlo)
I_u3u1u2(_mul)
I_u1u2(_multu)
I_u3u1u2(_mulu)
+I_u3u1u2(_muhu)
I_u3u1u2(_nor)
I_u3u1u2(_or)
I_u2u1u3(_ori)

View File

@ -0,0 +1,31 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:03 +0200
Subject: [PATCH] mips: uasm: Add workaround for Loongson-2F nop CPU errata
This patch implements a workaround for the Loongson-2F nop in generated,
code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before,
the binutils option -mfix-loongson2f-nop was enabled, but no workaround
was done when emitting MIPS code. Now, the nop pseudo instruction is
emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This
is consistent with the workaround implemented by binutils.
Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uas
#define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off)
#define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3)
#define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b)
+#ifdef CONFIG_CPU_NOP_WORKAROUNDS
+#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0)
+#else
#define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0)
+#endif
#define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1)
static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,

View File

@ -0,0 +1,1005 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:05 +0200
Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS
This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and
MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
create mode 100644 arch/mips/net/bpf_jit_comp64.c
--- /dev/null
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -0,0 +1,991 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Just-In-Time compiler for eBPF bytecode on MIPS.
+ * Implementation of JIT functions for 64-bit CPUs.
+ *
+ * Copyright (c) 2021 Anyfi Networks AB.
+ * Author: Johan Almbladh <johan.almbladh@gmail.com>
+ *
+ * Based on code and ideas from
+ * Copyright (c) 2017 Cavium, Inc.
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
+#include <asm/uasm.h>
+
+#include "bpf_jit_comp.h"
+
+/* MIPS t0-t3 are not available in the n64 ABI */
+#undef MIPS_R_T0
+#undef MIPS_R_T1
+#undef MIPS_R_T2
+#undef MIPS_R_T3
+
+/* Stack is 16-byte aligned in n64 ABI */
+#define MIPS_STACK_ALIGNMENT 16
+
+/* Extra 64-bit eBPF registers used by JIT */
+#define JIT_REG_TC (MAX_BPF_JIT_REG + 0)
+#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1)
+
+/* Number of prologue bytes to skip when doing a tail call */
+#define JIT_TCALL_SKIP 4
+
+/* Callee-saved CPU registers that the JIT must preserve */
+#define JIT_CALLEE_REGS \
+ (BIT(MIPS_R_S0) | \
+ BIT(MIPS_R_S1) | \
+ BIT(MIPS_R_S2) | \
+ BIT(MIPS_R_S3) | \
+ BIT(MIPS_R_S4) | \
+ BIT(MIPS_R_S5) | \
+ BIT(MIPS_R_S6) | \
+ BIT(MIPS_R_S7) | \
+ BIT(MIPS_R_GP) | \
+ BIT(MIPS_R_FP) | \
+ BIT(MIPS_R_RA))
+
+/* Caller-saved CPU registers available for JIT use */
+#define JIT_CALLER_REGS \
+ (BIT(MIPS_R_A5) | \
+ BIT(MIPS_R_A6) | \
+ BIT(MIPS_R_A7))
+/*
+ * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers.
+ * MIPS registers t4 - t7 may be used by the JIT as temporary registers.
+ * MIPS registers t8 - t9 are reserved for single-register common functions.
+ */
+static const u8 bpf2mips64[] = {
+ /* Return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = MIPS_R_V0,
+ /* Arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = MIPS_R_A0,
+ [BPF_REG_2] = MIPS_R_A1,
+ [BPF_REG_3] = MIPS_R_A2,
+ [BPF_REG_4] = MIPS_R_A3,
+ [BPF_REG_5] = MIPS_R_A4,
+ /* Callee-saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = MIPS_R_S0,
+ [BPF_REG_7] = MIPS_R_S1,
+ [BPF_REG_8] = MIPS_R_S2,
+ [BPF_REG_9] = MIPS_R_S3,
+ /* Read-only frame pointer to access the eBPF stack */
+ [BPF_REG_FP] = MIPS_R_FP,
+ /* Temporary register for blinding constants */
+ [BPF_REG_AX] = MIPS_R_AT,
+ /* Tail call count register, caller-saved */
+ [JIT_REG_TC] = MIPS_R_A5,
+ /* Constant for register zero-extension */
+ [JIT_REG_ZX] = MIPS_R_V1,
+};
+
+/*
+ * MIPS 32-bit operations on 64-bit registers generate a sign-extended
+ * result. However, the eBPF ISA mandates zero-extension, so we rely on the
+ * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic
+ * operations, right shift and byte swap require properly sign-extended
+ * operands or the result is unpredictable. We emit explicit sign-extensions
+ * in those cases.
+ */
+
+/* Sign extension */
+static void emit_sext(struct jit_context *ctx, u8 dst, u8 src)
+{
+ emit(ctx, sll, dst, src, 0);
+ clobber_reg(ctx, dst);
+}
+
+/* Zero extension */
+static void emit_zext(struct jit_context *ctx, u8 dst)
+{
+ if (cpu_has_mips64r2 || cpu_has_mips64r6) {
+ emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ } else {
+ emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]);
+ access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Zero extension, if verifier does not do it for us */
+static void emit_zext_ver(struct jit_context *ctx, u8 dst)
+{
+ if (!ctx->program->aux->verifier_zext)
+ emit_zext(ctx, dst);
+}
+
+/* dst = imm (64-bit) */
+static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64)
+{
+ if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) {
+ emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64);
+ } else if (imm64 >= 0xffffffff80000000ULL ||
+ (imm64 < 0x80000000 && imm64 > 0xffff)) {
+ emit(ctx, lui, dst, (s16)(imm64 >> 16));
+ emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff);
+ } else {
+ u8 acc = MIPS_R_ZERO;
+ int k;
+
+ for (k = 0; k < 4; k++) {
+ u16 half = imm64 >> (48 - 16 * k);
+
+ if (acc == dst)
+ emit(ctx, dsll, dst, dst, 16);
+
+ if (half) {
+ emit(ctx, ori, dst, acc, half);
+ acc = dst;
+ }
+ }
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* ALU immediate operation (64-bit) */
+static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
+{
+ switch (BPF_OP(op)) {
+ /* dst = dst | imm */
+ case BPF_OR:
+ emit(ctx, ori, dst, dst, (u16)imm);
+ break;
+ /* dst = dst ^ imm */
+ case BPF_XOR:
+ emit(ctx, xori, dst, dst, (u16)imm);
+ break;
+ /* dst = -dst */
+ case BPF_NEG:
+ emit(ctx, dsubu, dst, MIPS_R_ZERO, dst);
+ break;
+ /* dst = dst << imm */
+ case BPF_LSH:
+ emit(ctx, dsll_safe, dst, dst, imm);
+ break;
+ /* dst = dst >> imm */
+ case BPF_RSH:
+ emit(ctx, dsrl_safe, dst, dst, imm);
+ break;
+ /* dst = dst >> imm (arithmetic) */
+ case BPF_ARSH:
+ emit(ctx, dsra_safe, dst, dst, imm);
+ break;
+ /* dst = dst + imm */
+ case BPF_ADD:
+ emit(ctx, daddiu, dst, dst, imm);
+ break;
+ /* dst = dst - imm */
+ case BPF_SUB:
+ emit(ctx, daddiu, dst, dst, -imm);
+ break;
+ default:
+ /* Width-generic operations */
+ emit_alu_i(ctx, dst, imm, op);
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* ALU register operation (64-bit) */
+static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
+{
+ switch (BPF_OP(op)) {
+ /* dst = dst << src */
+ case BPF_LSH:
+ emit(ctx, dsllv, dst, dst, src);
+ break;
+ /* dst = dst >> src */
+ case BPF_RSH:
+ emit(ctx, dsrlv, dst, dst, src);
+ break;
+ /* dst = dst >> src (arithmetic) */
+ case BPF_ARSH:
+ emit(ctx, dsrav, dst, dst, src);
+ break;
+ /* dst = dst + src */
+ case BPF_ADD:
+ emit(ctx, daddu, dst, dst, src);
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ emit(ctx, dsubu, dst, dst, src);
+ break;
+ /* dst = dst * src */
+ case BPF_MUL:
+ if (cpu_has_mips64r6) {
+ emit(ctx, dmulu, dst, dst, src);
+ } else {
+ emit(ctx, dmultu, dst, src);
+ emit(ctx, mflo, dst);
+ }
+ break;
+ /* dst = dst / src */
+ case BPF_DIV:
+ if (cpu_has_mips64r6) {
+ emit(ctx, ddivu_r6, dst, dst, src);
+ } else {
+ emit(ctx, ddivu, dst, src);
+ emit(ctx, mflo, dst);
+ }
+ break;
+ /* dst = dst % src */
+ case BPF_MOD:
+ if (cpu_has_mips64r6) {
+ emit(ctx, dmodu, dst, dst, src);
+ } else {
+ emit(ctx, ddivu, dst, src);
+ emit(ctx, mfhi, dst);
+ }
+ break;
+ default:
+ /* Width-generic operations */
+ emit_alu_r(ctx, dst, src, op);
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Swap sub words in a register double word */
+static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits)
+{
+ u8 tmp = MIPS_R_T9;
+
+ emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */
+ emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */
+ emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */
+ emit(ctx, and, dst, dst, mask); /* dst = dst & mask */
+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
+}
+
+/* Swap bytes and truncate a register double word, word or half word */
+static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width)
+{
+ switch (width) {
+ /* Swap bytes in a double word */
+ case 64:
+ if (cpu_has_mips64r2 || cpu_has_mips64r6) {
+ emit(ctx, dsbh, dst, dst);
+ emit(ctx, dshd, dst, dst);
+ } else {
+ u8 t1 = MIPS_R_T6;
+ u8 t2 = MIPS_R_T7;
+
+ emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */
+ emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */
+ emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */
+
+ emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff);
+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
+ emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */
+
+ emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */
+ emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */
+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */
+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */
+ emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */
+ }
+ break;
+ /* Swap bytes in a half word */
+ /* Swap bytes in a word */
+ case 32:
+ case 16:
+ emit_sext(ctx, dst, dst);
+ emit_bswap_r(ctx, dst, width);
+ if (cpu_has_mips64r2 || cpu_has_mips64r6)
+ emit_zext(ctx, dst);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Truncate a register double word, word or half word */
+static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width)
+{
+ switch (width) {
+ case 64:
+ break;
+ /* Zero-extend a word */
+ case 32:
+ emit_zext(ctx, dst);
+ break;
+ /* Zero-extend a half word */
+ case 16:
+ emit(ctx, andi, dst, dst, 0xffff);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Load operation: dst = *(size*)(src + off) */
+static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
+{
+ switch (size) {
+ /* Load a byte */
+ case BPF_B:
+ emit(ctx, lbu, dst, off, src);
+ break;
+ /* Load a half word */
+ case BPF_H:
+ emit(ctx, lhu, dst, off, src);
+ break;
+ /* Load a word */
+ case BPF_W:
+ emit(ctx, lwu, dst, off, src);
+ break;
+ /* Load a double word */
+ case BPF_DW:
+ emit(ctx, ld, dst, off, src);
+ break;
+ }
+ clobber_reg(ctx, dst);
+}
+
+/* Store operation: *(size *)(dst + off) = src */
+static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
+{
+ switch (size) {
+ /* Store a byte */
+ case BPF_B:
+ emit(ctx, sb, src, off, dst);
+ break;
+ /* Store a half word */
+ case BPF_H:
+ emit(ctx, sh, src, off, dst);
+ break;
+ /* Store a word */
+ case BPF_W:
+ emit(ctx, sw, src, off, dst);
+ break;
+ /* Store a double word */
+ case BPF_DW:
+ emit(ctx, sd, src, off, dst);
+ break;
+ }
+}
+
+/* Atomic read-modify-write */
+static void emit_atomic_r64(struct jit_context *ctx,
+ u8 dst, u8 src, s16 off, u8 code)
+{
+ u8 t1 = MIPS_R_T6;
+ u8 t2 = MIPS_R_T7;
+
+ emit(ctx, lld, t1, off, dst);
+ switch (code) {
+ case BPF_ADD:
+ emit(ctx, daddu, t2, t1, src);
+ break;
+ case BPF_AND:
+ emit(ctx, and, t2, t1, src);
+ break;
+ case BPF_OR:
+ emit(ctx, or, t2, t1, src);
+ break;
+ case BPF_XOR:
+ emit(ctx, xor, t2, t1, src);
+ break;
+ }
+ emit(ctx, scd, t2, off, dst);
+ emit(ctx, beqz, t2, -16);
+ emit(ctx, nop); /* Delay slot */
+}
+
+/* Function call */
+static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
+{
+ u8 zx = bpf2mips64[JIT_REG_ZX];
+ u8 tmp = MIPS_R_T6;
+ bool fixed;
+ u64 addr;
+
+ /* Decode the call address */
+ if (bpf_jit_get_func_addr(ctx->program, insn, false,
+ &addr, &fixed) < 0)
+ return -1;
+ if (!fixed)
+ return -1;
+
+ /* Push caller-saved registers on stack */
+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
+
+ /* Emit function call */
+ emit_mov_i64(ctx, tmp, addr);
+ emit(ctx, jalr, MIPS_R_RA, tmp);
+ emit(ctx, nop); /* Delay slot */
+
+ /* Restore caller-saved registers */
+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
+
+ /* Re-initialize the JIT zero-extension register if accessed */
+ if (ctx->accessed & BIT(JIT_REG_ZX)) {
+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
+ emit(ctx, dsrl32, zx, zx, 0);
+ }
+
+ clobber_reg(ctx, MIPS_R_RA);
+ clobber_reg(ctx, MIPS_R_V0);
+ clobber_reg(ctx, MIPS_R_V1);
+ return 0;
+}
+
+/* Function tail call */
+static int emit_tail_call(struct jit_context *ctx)
+{
+ u8 ary = bpf2mips64[BPF_REG_2];
+ u8 ind = bpf2mips64[BPF_REG_3];
+ u8 tcc = bpf2mips64[JIT_REG_TC];
+ u8 tmp = MIPS_R_T6;
+ int off;
+
+ /*
+ * Tail call:
+ * eBPF R1 - function argument (context ptr), passed in a0-a1
+ * eBPF R2 - ptr to object with array of function entry points
+ * eBPF R3 - array index of function to be called
+ */
+
+ /* if (ind >= ary->map.max_entries) goto out */
+ off = offsetof(struct bpf_array, map.max_entries);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/
+ emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */
+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
+
+ /* if (--TCC < 0) goto out */
+ emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */
+ emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */
+ /* (next insn delay slot) */
+ /* prog = ary->ptrs[ind] */
+ off = offsetof(struct bpf_array, ptrs);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */
+ emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */
+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
+
+ /* if (prog == 0) goto out */
+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
+ emit(ctx, nop); /* Delay slot */
+
+ /* func = prog->bpf_func + 8 (prologue skip offset) */
+ off = offsetof(struct bpf_prog, bpf_func);
+ if (off > 0x7fff)
+ return -1;
+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */
+ emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */
+
+ /* goto func */
+ build_epilogue(ctx, tmp);
+ access_reg(ctx, JIT_REG_TC);
+ return 0;
+}
+
+/*
+ * Stack frame layout for a JITed program (stack grows down).
+ *
+ * Higher address : Previous stack frame :
+ * +===========================+ <--- MIPS sp before call
+ * | Callee-saved registers, |
+ * | including RA and FP |
+ * +---------------------------+ <--- eBPF FP (MIPS fp)
+ * | Local eBPF variables |
+ * | allocated by program |
+ * +---------------------------+
+ * | Reserved for caller-saved |
+ * | registers |
+ * Lower address +===========================+ <--- MIPS sp
+ */
+
+/* Build program prologue to set up the stack and registers */
+void build_prologue(struct jit_context *ctx)
+{
+ u8 fp = bpf2mips64[BPF_REG_FP];
+ u8 tc = bpf2mips64[JIT_REG_TC];
+ u8 zx = bpf2mips64[JIT_REG_ZX];
+ int stack, saved, locals, reserved;
+
+ /*
+ * The first instruction initializes the tail call count register.
+ * On a tail call, the calling function jumps into the prologue
+ * after this instruction.
+ */
+ emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff));
+
+ /* === Entry-point for tail calls === */
+
+ /*
+ * If the eBPF frame pointer and tail call count registers were
+ * accessed they must be preserved. Mark them as clobbered here
+ * to save and restore them on the stack as needed.
+ */
+ if (ctx->accessed & BIT(BPF_REG_FP))
+ clobber_reg(ctx, fp);
+ if (ctx->accessed & BIT(JIT_REG_TC))
+ clobber_reg(ctx, tc);
+ if (ctx->accessed & BIT(JIT_REG_ZX))
+ clobber_reg(ctx, zx);
+
+ /* Compute the stack space needed for callee-saved registers */
+ saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64);
+ saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
+
+ /* Stack space used by eBPF program local data */
+ locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
+
+ /*
+ * If we are emitting function calls, reserve extra stack space for
+ * caller-saved registers needed by the JIT. The required space is
+ * computed automatically during resource usage discovery (pass 1).
+ */
+ reserved = ctx->stack_used;
+
+ /* Allocate the stack frame */
+ stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
+ if (stack)
+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack);
+
+ /* Store callee-saved registers on stack */
+ push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
+
+ /* Initialize the eBPF frame pointer if accessed */
+ if (ctx->accessed & BIT(BPF_REG_FP))
+ emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved);
+
+ /* Initialize the ePF JIT zero-extension register if accessed */
+ if (ctx->accessed & BIT(JIT_REG_ZX)) {
+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
+ emit(ctx, dsrl32, zx, zx, 0);
+ }
+
+ ctx->saved_size = saved;
+ ctx->stack_size = stack;
+}
+
+/* Build the program epilogue to restore the stack and registers */
+void build_epilogue(struct jit_context *ctx, int dest_reg)
+{
+ /* Restore callee-saved registers from stack */
+ pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
+ ctx->stack_size - ctx->saved_size);
+
+ /* Release the stack frame */
+ if (ctx->stack_size)
+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
+
+ /* Jump to return address and sign-extend the 32-bit return value */
+ emit(ctx, jr, dest_reg);
+ emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */
+}
+
+/* Build one eBPF instruction */
+int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
+{
+ u8 dst = bpf2mips64[insn->dst_reg];
+ u8 src = bpf2mips64[insn->src_reg];
+ u8 code = insn->code;
+ s16 off = insn->off;
+ s32 imm = insn->imm;
+ s32 val, rel;
+ u8 alu, jmp;
+
+ switch (code) {
+ /* ALU operations */
+ /* dst = imm */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ emit_mov_i(ctx, dst, imm);
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_X:
+ if (imm == 1) {
+ /* Special mov32 for zext */
+ emit_zext(ctx, dst);
+ } else {
+ emit_mov_r(ctx, dst, src);
+ emit_zext_ver(ctx, dst);
+ }
+ break;
+ /* dst = -dst */
+ case BPF_ALU | BPF_NEG:
+ emit_sext(ctx, dst, dst);
+ emit_alu_i(ctx, dst, 0, BPF_NEG);
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst & imm */
+ /* dst = dst | imm */
+ /* dst = dst ^ imm */
+ /* dst = dst << imm */
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_alu_i(ctx, dst, val, alu);
+ }
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst >> imm */
+ /* dst = dst >> imm (arithmetic) */
+ /* dst = dst + imm */
+ /* dst = dst - imm */
+ /* dst = dst * imm */
+ /* dst = dst / imm */
+ /* dst = dst % imm */
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_sext(ctx, dst, dst);
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_sext(ctx, dst, dst);
+ emit_alu_i(ctx, dst, val, alu);
+ }
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst & src */
+ /* dst = dst | src */
+ /* dst = dst ^ src */
+ /* dst = dst << src */
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU | BPF_LSH | BPF_X:
+ emit_alu_r(ctx, dst, src, BPF_OP(code));
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = dst >> src */
+ /* dst = dst >> src (arithmetic) */
+ /* dst = dst + src */
+ /* dst = dst - src */
+ /* dst = dst * src */
+ /* dst = dst / src */
+ /* dst = dst % src */
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ emit_sext(ctx, dst, dst);
+ emit_sext(ctx, MIPS_R_T4, src);
+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ emit_zext_ver(ctx, dst);
+ break;
+ /* dst = imm (64-bit) */
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ emit_mov_i(ctx, dst, imm);
+ break;
+ /* dst = src (64-bit) */
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ emit_mov_r(ctx, dst, src);
+ break;
+ /* dst = -dst (64-bit) */
+ case BPF_ALU64 | BPF_NEG:
+ emit_alu_i64(ctx, dst, 0, BPF_NEG);
+ break;
+ /* dst = dst & imm (64-bit) */
+ /* dst = dst | imm (64-bit) */
+ /* dst = dst ^ imm (64-bit) */
+ /* dst = dst << imm (64-bit) */
+ /* dst = dst >> imm (64-bit) */
+ /* dst = dst >> imm ((64-bit, arithmetic) */
+ /* dst = dst + imm (64-bit) */
+ /* dst = dst - imm (64-bit) */
+ /* dst = dst * imm (64-bit) */
+ /* dst = dst / imm (64-bit) */
+ /* dst = dst % imm (64-bit) */
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ if (!valid_alu_i(BPF_OP(code), imm)) {
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code));
+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
+ emit_alu_i64(ctx, dst, val, alu);
+ }
+ break;
+ /* dst = dst & src (64-bit) */
+ /* dst = dst | src (64-bit) */
+ /* dst = dst ^ src (64-bit) */
+ /* dst = dst << src (64-bit) */
+ /* dst = dst >> src (64-bit) */
+ /* dst = dst >> src (64-bit, arithmetic) */
+ /* dst = dst + src (64-bit) */
+ /* dst = dst - src (64-bit) */
+ /* dst = dst * src (64-bit) */
+ /* dst = dst / src (64-bit) */
+ /* dst = dst % src (64-bit) */
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ emit_alu_r64(ctx, dst, src, BPF_OP(code));
+ break;
+ /* dst = htole(dst) */
+ /* dst = htobe(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ if (BPF_SRC(code) ==
+#ifdef __BIG_ENDIAN
+ BPF_FROM_LE
+#else
+ BPF_FROM_BE
+#endif
+ )
+ emit_bswap_r64(ctx, dst, imm);
+ else
+ emit_trunc_r64(ctx, dst, imm);
+ break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW:
+ emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32));
+ return 1;
+ /* LDX: dst = *(size *)(src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ emit_ldx(ctx, dst, src, off, BPF_SIZE(code));
+ break;
+ /* ST: *(size *)(dst + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code));
+ break;
+ /* STX: *(size *)(dst + off) = src */
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ emit_stx(ctx, dst, src, off, BPF_SIZE(code));
+ break;
+ /* Speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+ /* Atomics */
+ case BPF_STX | BPF_XADD | BPF_W:
+ case BPF_STX | BPF_XADD | BPF_DW:
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_AND:
+ case BPF_OR:
+ case BPF_XOR:
+ if (BPF_SIZE(code) == BPF_DW) {
+ emit_atomic_r64(ctx, dst, src, off, imm);
+ } else { /* 32-bit, no fetch */
+ emit_sext(ctx, MIPS_R_T4, src);
+ emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm);
+ }
+ break;
+ default:
+ goto notyet;
+ }
+ break;
+ /* PC += off if dst == src */
+ /* PC += off if dst != src */
+ /* PC += off if dst & src */
+ /* PC += off if dst > src */
+ /* PC += off if dst >= src */
+ /* PC += off if dst < src */
+ /* PC += off if dst <= src */
+ /* PC += off if dst > src (signed) */
+ /* PC += off if dst >= src (signed) */
+ /* PC += off if dst < src (signed) */
+ /* PC += off if dst <= src (signed) */
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ if (off == 0)
+ break;
+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
+ emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */
+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == imm */
+ /* PC += off if dst != imm */
+ /* PC += off if dst & imm */
+ /* PC += off if dst > imm */
+ /* PC += off if dst >= imm */
+ /* PC += off if dst < imm */
+ /* PC += off if dst <= imm */
+ /* PC += off if dst > imm (signed) */
+ /* PC += off if dst >= imm (signed) */
+ /* PC += off if dst < imm (signed) */
+ /* PC += off if dst <= imm (signed) */
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ if (off == 0)
+ break;
+ setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
+ if (valid_jmp_i(jmp, imm)) {
+ emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp);
+ } else {
+ /* Move large immediate to register, sign-extended */
+ emit_mov_i(ctx, MIPS_R_T5, imm);
+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
+ }
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == src */
+ /* PC += off if dst != src */
+ /* PC += off if dst & src */
+ /* PC += off if dst > src */
+ /* PC += off if dst >= src */
+ /* PC += off if dst < src */
+ /* PC += off if dst <= src */
+ /* PC += off if dst > src (signed) */
+ /* PC += off if dst >= src (signed) */
+ /* PC += off if dst < src (signed) */
+ /* PC += off if dst <= src (signed) */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ if (off == 0)
+ break;
+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
+ emit_jmp_r(ctx, dst, src, rel, jmp);
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off if dst == imm */
+ /* PC += off if dst != imm */
+ /* PC += off if dst & imm */
+ /* PC += off if dst > imm */
+ /* PC += off if dst >= imm */
+ /* PC += off if dst < imm */
+ /* PC += off if dst <= imm */
+ /* PC += off if dst > imm (signed) */
+ /* PC += off if dst >= imm (signed) */
+ /* PC += off if dst < imm (signed) */
+ /* PC += off if dst <= imm (signed) */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (off == 0)
+ break;
+ setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
+ if (valid_jmp_i(jmp, imm)) {
+ emit_jmp_i(ctx, dst, imm, rel, jmp);
+ } else {
+ /* Move large immediate to register */
+ emit_mov_i(ctx, MIPS_R_T4, imm);
+ emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp);
+ }
+ if (finish_jmp(ctx, jmp, off) < 0)
+ goto toofar;
+ break;
+ /* PC += off */
+ case BPF_JMP | BPF_JA:
+ if (off == 0)
+ break;
+ if (emit_ja(ctx, off) < 0)
+ goto toofar;
+ break;
+ /* Tail call */
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (emit_tail_call(ctx) < 0)
+ goto invalid;
+ break;
+ /* Function call */
+ case BPF_JMP | BPF_CALL:
+ if (emit_call(ctx, insn) < 0)
+ goto invalid;
+ break;
+ /* Function return */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * Optimization: when last instruction is EXIT
+ * simply continue to epilogue.
+ */
+ if (ctx->bpf_index == ctx->program->len - 1)
+ break;
+ if (emit_exit(ctx) < 0)
+ goto toofar;
+ break;
+
+ default:
+invalid:
+ pr_err_once("unknown opcode %02x\n", code);
+ return -EINVAL;
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+toofar:
+ pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
+ ctx->bpf_index, code);
+ return -E2BIG;
+ }
+ return 0;
+}

View File

@ -0,0 +1,120 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:06 +0200
Subject: [PATCH] mips: bpf: Add JIT workarounds for CPU errata
This patch adds workarounds for the following CPU errata to the MIPS
eBPF JIT, if enabled in the kernel configuration.
- R10000 ll/sc weak ordering
- Loongson-3 ll/sc weak ordering
- Loongson-2F jump hang
The Loongson-2F nop errata is implemented in uasm, which the JIT uses,
so no additional mitigations are needed for that.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx,
/* Atomic read-modify-write (32-bit) */
void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
{
+ LLSC_sync(ctx);
emit(ctx, ll, MIPS_R_T9, off, dst);
switch (code) {
case BPF_ADD:
@@ -420,18 +421,19 @@ void emit_atomic_r(struct jit_context *c
break;
}
emit(ctx, sc, MIPS_R_T8, off, dst);
- emit(ctx, beqz, MIPS_R_T8, -16);
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset);
emit(ctx, nop); /* Delay slot */
}
/* Atomic compare-and-exchange (32-bit) */
void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
{
+ LLSC_sync(ctx);
emit(ctx, ll, MIPS_R_T9, off, dst);
emit(ctx, bne, MIPS_R_T9, res, 12);
emit(ctx, move, MIPS_R_T8, src); /* Delay slot */
emit(ctx, sc, MIPS_R_T8, off, dst);
- emit(ctx, beqz, MIPS_R_T8, -20);
+ emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset);
emit(ctx, move, res, MIPS_R_T9); /* Delay slot */
clobber_reg(ctx, res);
}
--- a/arch/mips/net/bpf_jit_comp.h
+++ b/arch/mips/net/bpf_jit_comp.h
@@ -87,7 +87,7 @@ struct jit_context {
};
/* Emit the instruction if the JIT memory space has been allocated */
-#define emit(ctx, func, ...) \
+#define __emit(ctx, func, ...) \
do { \
if ((ctx)->target != NULL) { \
u32 *p = &(ctx)->target[ctx->jit_index]; \
@@ -95,6 +95,30 @@ do { \
} \
(ctx)->jit_index++; \
} while (0)
+#define emit(...) __emit(__VA_ARGS__)
+
+/* Workaround for R10000 ll/sc errata */
+#ifdef CONFIG_WAR_R10000
+#define LLSC_beqz beqzl
+#else
+#define LLSC_beqz beqz
+#endif
+
+/* Workaround for Loongson-3 ll/sc errata */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+#define LLSC_sync(ctx) emit(ctx, sync, 0)
+#define LLSC_offset 4
+#else
+#define LLSC_sync(ctx)
+#define LLSC_offset 0
+#endif
+
+/* Workaround for Loongson-2F jump errata */
+#ifdef CONFIG_CPU_JUMP_WORKAROUNDS
+#define JALR_MASK 0xffffffffcfffffffULL
+#else
+#define JALR_MASK (~0ULL)
+#endif
/*
* Mark a BPF register as accessed, it needs to be
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_c
u8 t1 = MIPS_R_T6;
u8 t2 = MIPS_R_T7;
+ LLSC_sync(ctx);
emit(ctx, lld, t1, off, dst);
switch (code) {
case BPF_ADD:
@@ -391,7 +392,7 @@ static void emit_atomic_r64(struct jit_c
break;
}
emit(ctx, scd, t2, off, dst);
- emit(ctx, beqz, t2, -16);
+ emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset);
emit(ctx, nop); /* Delay slot */
}
@@ -414,7 +415,7 @@ static int emit_call(struct jit_context
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
/* Emit function call */
- emit_mov_i64(ctx, tmp, addr);
+ emit_mov_i64(ctx, tmp, addr & JALR_MASK);
emit(ctx, jalr, MIPS_R_RA, tmp);
emit(ctx, nop); /* Delay slot */

View File

@ -0,0 +1,54 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:07 +0200
Subject: [PATCH] mips: bpf: Enable eBPF JITs
This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also
disables the old cBPF JIT to so cBPF programs are converted to use the
new JIT.
Workarounds for R4000 CPU errata are not implemented by the JIT, so the
JIT is disabled if any of those workarounds are configured.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3099,6 +3099,7 @@ S: Supported
F: arch/arm64/net/
BPF JIT for MIPS (32-BIT AND 64-BIT)
+M: Johan Almbladh <johan.almbladh@anyfinetworks.com>
M: Paul Burton <paulburton@kernel.org>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -46,8 +46,10 @@ config MIPS
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
select HAVE_ASM_MODVERSIONS
- select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
- select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
+ select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
+ !CPU_DADDI_WORKAROUNDS && \
+ !CPU_R4000_WORKAROUNDS && \
+ !CPU_R4400_WORKAROUNDS
select HAVE_CONTEXT_TRACKING
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -2,9 +2,10 @@
# MIPS networking code
obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
+obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o
ifeq ($(CONFIG_32BIT),y)
- obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o
else
- obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o
endif

View File

@ -0,0 +1,387 @@
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Tue, 5 Oct 2021 18:54:08 +0200
Subject: [PATCH] mips: bpf: Remove old BPF JIT implementations
This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations.
They are replaced by a new eBPF implementation that supports both 32-bit
and 64-bit MIPS CPUs.
Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
---
delete mode 100644 arch/mips/net/bpf_jit.c
delete mode 100644 arch/mips/net/bpf_jit.h
delete mode 100644 arch/mips/net/bpf_jit_asm.S
delete mode 100644 arch/mips/net/ebpf_jit.c
--- a/arch/mips/net/bpf_jit.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Just-In-Time compiler for BPF filters on MIPS
- *
- * Copyright (c) 2014 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- */
-
-#ifndef BPF_JIT_MIPS_OP_H
-#define BPF_JIT_MIPS_OP_H
-
-/* Registers used by JIT */
-#define MIPS_R_ZERO 0
-#define MIPS_R_V0 2
-#define MIPS_R_A0 4
-#define MIPS_R_A1 5
-#define MIPS_R_T4 12
-#define MIPS_R_T5 13
-#define MIPS_R_T6 14
-#define MIPS_R_T7 15
-#define MIPS_R_S0 16
-#define MIPS_R_S1 17
-#define MIPS_R_S2 18
-#define MIPS_R_S3 19
-#define MIPS_R_S4 20
-#define MIPS_R_S5 21
-#define MIPS_R_S6 22
-#define MIPS_R_S7 23
-#define MIPS_R_SP 29
-#define MIPS_R_RA 31
-
-/* Conditional codes */
-#define MIPS_COND_EQ 0x1
-#define MIPS_COND_GE (0x1 << 1)
-#define MIPS_COND_GT (0x1 << 2)
-#define MIPS_COND_NE (0x1 << 3)
-#define MIPS_COND_ALL (0x1 << 4)
-/* Conditionals on X register or K immediate */
-#define MIPS_COND_X (0x1 << 5)
-#define MIPS_COND_K (0x1 << 6)
-
-#define r_ret MIPS_R_V0
-
-/*
- * Use 2 scratch registers to avoid pipeline interlocks.
- * There is no overhead during epilogue and prologue since
- * any of the $s0-$s6 registers will only be preserved if
- * they are going to actually be used.
- */
-#define r_skb_hl MIPS_R_S0 /* skb header length */
-#define r_skb_data MIPS_R_S1 /* skb actual data */
-#define r_off MIPS_R_S2
-#define r_A MIPS_R_S3
-#define r_X MIPS_R_S4
-#define r_skb MIPS_R_S5
-#define r_M MIPS_R_S6
-#define r_skb_len MIPS_R_S7
-#define r_s0 MIPS_R_T4 /* scratch reg 1 */
-#define r_s1 MIPS_R_T5 /* scratch reg 2 */
-#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
-#define r_tmp MIPS_R_T7 /* No need to preserve this */
-#define r_zero MIPS_R_ZERO
-#define r_sp MIPS_R_SP
-#define r_ra MIPS_R_RA
-
-#ifndef __ASSEMBLY__
-
-/* Declare ASM helpers */
-
-#define DECLARE_LOAD_FUNC(func) \
- extern u8 func(unsigned long *skb, int offset); \
- extern u8 func##_negative(unsigned long *skb, int offset); \
- extern u8 func##_positive(unsigned long *skb, int offset)
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#endif
-
-#endif /* BPF_JIT_MIPS_OP_H */
--- a/arch/mips/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
- * compiler.
- *
- * Copyright (C) 2015 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include <asm/asm.h>
-#include <asm/isa-rev.h>
-#include <asm/regdef.h>
-#include "bpf_jit.h"
-
-/* ABI
- *
- * r_skb_hl skb header length
- * r_skb_data skb data
- * r_off(a1) offset register
- * r_A BPF register A
- * r_X PF register X
- * r_skb(a0) *skb
- * r_M *scratch memory
- * r_skb_le skb length
- * r_s0 Scratch register 0
- * r_s1 Scratch register 1
- *
- * On entry:
- * a0: *skb
- * a1: offset (imm or imm + X)
- *
- * All non-BPF-ABI registers are free for use. On return, we only
- * care about r_ret. The BPF-ABI registers are assumed to remain
- * unmodified during the entire filter operation.
- */
-
-#define skb a0
-#define offset a1
-#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
-
- /* We know better :) so prevent assembler reordering etc */
- .set noreorder
-
-#define is_offset_negative(TYPE) \
- /* If offset is negative we have more work to do */ \
- slti t0, offset, 0; \
- bgtz t0, bpf_slow_path_##TYPE##_neg; \
- /* Be careful what follows in DS. */
-
-#define is_offset_in_header(SIZE, TYPE) \
- /* Reading from header? */ \
- addiu $r_s0, $r_skb_hl, -SIZE; \
- slt t0, $r_s0, offset; \
- bgtz t0, bpf_slow_path_##TYPE; \
-
-LEAF(sk_load_word)
- is_offset_negative(word)
-FEXPORT(sk_load_word_positive)
- is_offset_in_header(4, word)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- .set reorder
- lw $r_A, 0(t1)
- .set noreorder
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_A
- rotr $r_A, t0, 16
-# else
- sll t0, $r_A, 24
- srl t1, $r_A, 24
- srl t2, $r_A, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_A, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_word)
-
-LEAF(sk_load_half)
- is_offset_negative(half)
-FEXPORT(sk_load_half_positive)
- is_offset_in_header(2, half)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lhu $r_A, 0(t1)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh $r_A, $r_A
-# else
- sll t0, $r_A, 8
- srl t1, $r_A, 8
- andi t0, t0, 0xff00
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_half)
-
-LEAF(sk_load_byte)
- is_offset_negative(byte)
-FEXPORT(sk_load_byte_positive)
- is_offset_in_header(1, byte)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lbu $r_A, 0(t1)
- jr $r_ra
- move $r_ret, zero
- END(sk_load_byte)
-
-/*
- * call skb_copy_bits:
- * (prototype in linux/skbuff.h)
- *
- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
- *
- * o32 mandates we leave 4 spaces for argument registers in case
- * the callee needs to use them. Even though we don't care about
- * the argument registers ourselves, we need to allocate that space
- * to remain ABI compliant since the callee may want to use that space.
- * We also allocate 2 more spaces for $r_ra and our return register (*to).
- *
- * n64 is a bit different. The *caller* will allocate the space to preserve
- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
- * good reason but it does not matter that much really.
- *
- * (void *to) is returned in r_s0
- *
- */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define DS_OFFSET(SIZE) (4 * SZREG)
-#else
-#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
-#endif
-#define bpf_slow_path_common(SIZE) \
- /* Quick check. Are we within reasonable boundaries? */ \
- LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
- sltu $r_s0, offset, $r_s1; \
- beqz $r_s0, fault; \
- /* Load 4th argument in DS */ \
- LONG_ADDIU a3, zero, SIZE; \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, skb_copy_bits; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- /* Assign low slot to a2 */ \
- PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
- jalr t0; \
- /* Reset our destination slot (DS but it's ok) */ \
- INT_S zero, (4 * SZREG)($r_sp); \
- /* \
- * skb_copy_bits returns 0 on success and -EFAULT \
- * on error. Our data live in a2. Do not bother with \
- * our data if an error has been returned. \
- */ \
- /* Restore our frame */ \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- INT_L $r_s0, (4 * SZREG)($r_sp); \
- bltz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- move $r_ret, zero; \
-
-NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
- bpf_slow_path_common(4)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_s0
- jr $r_ra
- rotr $r_A, t0, 16
-# else
- sll t0, $r_s0, 24
- srl t1, $r_s0, 24
- srl t2, $r_s0, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_s0, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_word)
-
-NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
- bpf_slow_path_common(2)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- jr $r_ra
- wsbh $r_A, $r_s0
-# else
- sll t0, $r_s0, 8
- andi t1, $r_s0, 0xff00
- andi t0, t0, 0xff00
- srl t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_half)
-
-NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
- bpf_slow_path_common(1)
- jr $r_ra
- move $r_A, $r_s0
-
- END(bpf_slow_path_byte)
-
-/*
- * Negative entry points
- */
- .macro bpf_is_end_of_data
- li t0, SKF_LL_OFF
- /* Reading link layer data? */
- slt t1, offset, t0
- bgtz t1, fault
- /* Be careful what follows in DS. */
- .endm
-/*
- * call skb_copy_bits:
- * (prototype in linux/filter.h)
- *
- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
- * int k, unsigned int size)
- *
- * see above (bpf_slow_path_common) for ABI restrictions
- */
-#define bpf_negative_common(SIZE) \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- jalr t0; \
- li a2, SIZE; \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- /* Check return pointer */ \
- beqz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- /* Preserve our pointer */ \
- move $r_s0, v0; \
- /* Set return value */ \
- move $r_ret, zero; \
-
-bpf_slow_path_word_neg:
- bpf_is_end_of_data
-NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(4)
- jr $r_ra
- lw $r_A, 0($r_s0)
- END(sk_load_word_negative)
-
-bpf_slow_path_half_neg:
- bpf_is_end_of_data
-NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(2)
- jr $r_ra
- lhu $r_A, 0($r_s0)
- END(sk_load_half_negative)
-
-bpf_slow_path_byte_neg:
- bpf_is_end_of_data
-NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(1)
- jr $r_ra
- lbu $r_A, 0($r_s0)
- END(sk_load_byte_negative)
-
-fault:
- jr $r_ra
- addiu $r_ret, zero, 1

View File

@ -192,7 +192,7 @@ Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17584,6 +17584,14 @@ L: linux-gpio@vger.kernel.org
@@ -17585,6 +17585,14 @@ L: linux-gpio@vger.kernel.org
S: Maintained
F: drivers/gpio/gpio-ws16c48.c

View File

@ -10,7 +10,7 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1165,6 +1165,10 @@ config MIPS_MSC
@@ -1167,6 +1167,10 @@ config MIPS_MSC
config SYNC_R4K
bool

View File

@ -10,7 +10,7 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1159,6 +1159,10 @@ config SYNC_R4K
@@ -1161,6 +1161,10 @@ config SYNC_R4K
config MIPS_MACHINE
def_bool n

View File

@ -9,7 +9,7 @@ Acked-by: Rob Landley <rob@landley.net>
---
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1080,9 +1080,6 @@ config FW_ARC
@@ -1082,9 +1082,6 @@ config FW_ARC
config ARCH_MAY_HAVE_PC_FDC
bool
@ -19,7 +19,7 @@ Acked-by: Rob Landley <rob@landley.net>
config CEVT_BCM1480
bool
@@ -3172,6 +3169,18 @@ choice
@@ -3174,6 +3171,18 @@ choice
bool "Extend builtin kernel arguments with bootloader arguments"
endchoice

View File

@ -25,7 +25,7 @@ Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2037,7 +2037,8 @@ config CPU_MIPS32
@@ -2039,7 +2039,8 @@ config CPU_MIPS32
config CPU_MIPS64
bool

View File

@ -9,7 +9,7 @@ Acked-by: Rob Landley <rob@landley.net>
---
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1069,9 +1069,6 @@ config FW_ARC
@@ -1071,9 +1071,6 @@ config FW_ARC
config ARCH_MAY_HAVE_PC_FDC
bool
@ -19,7 +19,7 @@ Acked-by: Rob Landley <rob@landley.net>
config CEVT_BCM1480
bool
@@ -3044,6 +3041,18 @@ choice
@@ -3046,6 +3043,18 @@ choice
bool "Extend builtin kernel arguments with bootloader arguments"
endchoice

View File

@ -16,7 +16,7 @@ Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -626,6 +626,7 @@ config RALINK
@@ -628,6 +628,7 @@ config RALINK
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_MIPS16