diff --git a/ext/opcache/jit/ir/.gitignore b/ext/opcache/jit/ir/.gitignore index 7a37a4fd0593..367a68671bc0 100644 --- a/ext/opcache/jit/ir/.gitignore +++ b/ext/opcache/jit/ir/.gitignore @@ -20,3 +20,6 @@ tests/**/*.log win32/vcpkg win32/build_* + +fuzz/build/ +fuzz/corpus/ diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index a02332e0d39c..f6a0cb60af98 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -161,6 +161,8 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted case IR_CHAR: if (insn->val.c == '\\') { fprintf(f, "'\\\\'"); + } else if (insn->val.c == '\'') { + fprintf(f, "'\\\''"); } else if (insn->val.c >= ' ') { fprintf(f, "'%c'", insn->val.c); } else if (insn->val.c == '\t') { @@ -283,6 +285,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_kind_src IR_OPND_CONTROL #define ir_op_kind_reg IR_OPND_CONTROL_DEP #define ir_op_kind_ret IR_OPND_CONTROL_REF +#define ir_op_kind_grd IR_OPND_CONTROL_GUARD #define ir_op_kind_str IR_OPND_STR #define ir_op_kind_num IR_OPND_NUM #define ir_op_kind_fld IR_OPND_STR @@ -1843,7 +1846,7 @@ int ir_mem_unprotect(void *ptr, size_t size) int ir_mem_flush(void *ptr, size_t size) { - return 1; + return FlushInstructionCache(GetCurrentProcess(), ptr, size) == TRUE ? 1 : 0; } #else @@ -2168,7 +2171,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, i if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) { break; } - } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_VSTORE) { + } else if (insn->op == IR_MERGE + || insn->op == IR_LOOP_BEGIN + || insn->op == IR_VSTORE + || (insn->op == IR_BEGIN && insn->op2)) { return IR_UNUSED; } ref = insn->op1; @@ -2233,7 +2239,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref, if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) { break; } - } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_STORE) { + } else if (insn->op == IR_MERGE + || insn->op == IR_LOOP_BEGIN + || insn->op == IR_STORE + || (insn->op == IR_BEGIN && insn->op2)) { break; } ref = insn->op1; @@ -2326,7 +2335,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_store_i(ir_ctx *ctx, ir_ref ref, ir_ref } } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { guarded = 1; - } else if (insn->op >= IR_START || insn->op == IR_CALL) { + } else if (insn->op >= IR_START) { + if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) { + /* skip END */ + ref = insn->op1; + insn = &ctx->ir_base[ref]; + } else { + break; + } + } else if (insn->op == IR_CALL) { break; } next = ref; @@ -2407,7 +2424,15 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vstore_i(ir_ctx *ctx, ir_ref ref, ir_re } } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { guarded = 1; - } else if (insn->op >= IR_START || insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) { + } else if (insn->op >= IR_START) { + if (insn->op == IR_BEGIN && insn->op1 && !insn->op2) { + /* skip END */ + ref = insn->op1; + insn = &ctx->ir_base[ref]; + } else { + break; + } + } else if (insn->op == IR_CALL || insn->op == IR_LOAD || insn->op == IR_STORE) { break; } next = ref; @@ -2422,6 +2447,37 @@ ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref var, ir_ref val) } /* IR Construction API */ +static ir_ref ir_last_guard(ir_ctx *ctx) +{ + ir_ref ref; + ir_insn *insn; + + IR_ASSERT(ctx->control); + ref = ctx->control; + while (1) { + insn = &ctx->ir_base[ref]; + if (IR_IS_BB_START(insn->op) || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { + if (insn->op == IR_START) ref = IR_UNUSED; + break; + } + ref = insn->op1; + } + return ref; +} + +ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2) +{ + ir_ref guard = (IR_IS_TYPE_FP(type) || (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0)) ? + IR_UNUSED : ir_last_guard(ctx); + return ir_fold3(ctx, IR_OPT(IR_DIV, type), op1, op2, guard); +} + +ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2) +{ + ir_ref guard = (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 != 0) ? + IR_UNUSED : ir_last_guard(ctx); + return ir_fold3(ctx, IR_OPT(IR_MOD, type), op1, op2, guard); +} ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num) { diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index b0a96b511bd0..01db4ecf6b15 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -210,6 +210,7 @@ typedef enum _ir_type { * arg - argument reference CALL/TAILCALL/CARG->CARG * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN) * reg - data-control dependency on region (PHI, VAR, PARAM) + * grd - optional data-control dependency guard (DIV, MOD) * ret - reference to a previous RETURN instruction (RETURN) * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL) * num - number: argument number (PARAM) @@ -265,8 +266,8 @@ typedef enum _ir_type { _(ADD, d2C, def, def, ___) /* addition */ \ _(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \ _(MUL, d2C, def, def, ___) /* multiplication */ \ - _(DIV, d2, def, def, ___) /* division */ \ - _(MOD, d2, def, def, ___) /* modulo */ \ + _(DIV, d3, def, def, grd) /* division */ \ + _(MOD, d3, def, def, grd) /* modulo */ \ _(NEG, d1, def, ___, ___) /* change sign */ \ _(ABS, d1, def, ___, ___) /* absolute value */ \ /* (LDEXP, MIN, MAX, FPMATH) */ \ @@ -383,6 +384,14 @@ typedef enum _ir_type { _(RETURN, T2X1, src, def, ret) /* function return */ \ _(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \ \ + /* inline assembler */ \ + _(ASM, xN, src, def, def) /* GCC inline assembler */ \ + /* op2 - asm template string */ \ + /* op3 - asm constraint string */ \ + /* opN - asm input argument */ \ + _(ASM_OUT, x1, src, ___, ___) /* ASM data output projection */ \ + _(ASM_GOTO, E1, src, ___, ___) /* ASM goto (bb end after ASM) */ \ + \ /* deoptimization helper */ \ _(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index bdf6b027b9fe..fc4bb84f1e05 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -402,6 +402,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; const ir_proto_t *proto; const ir_call_conv_dsc *cc; + ir_ref next; constraints->def_reg = IR_REG_NONE; constraints->hints_count = 0; @@ -562,11 +563,13 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } - if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { - insn = &ctx->ir_base[insn->op2]; - if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) { - constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); - n++; + if (IR_IS_CONST_REF(insn->op2)) { + if (insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } } } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); @@ -751,6 +754,10 @@ get_arg_hints: break; case IR_SNAPSHOT: flags = 0; + next = ir_next_control(ctx, ref); + if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) { + flags = IR_EXTEND_INPUTS_TO_NEXT; + } break; case IR_VA_START: flags = IR_OP2_MUST_BE_IN_REG; @@ -1199,10 +1206,6 @@ binop_fp: if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) { - // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP -//??? && (insn->op2 == ref - 1 || -//??? (insn->op2 == ctx->prev_ref[ref] - 1 -//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_GUARD_CMP_INT; @@ -1265,6 +1268,12 @@ binop_fp: return IR_FUSED | IR_ARGVAL; case IR_NOP: return IR_SKIPPED | IR_NOP; + case IR_ASM: + case IR_ASM_OUT: + case IR_ASM_GOTO: + fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n"); + exit(1); + return IR_SKIPPED | IR_NOP; default: break; } @@ -4996,7 +5005,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); | .addr &addr - if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + if (ctx->ir_base[bb->start].op1 == def + && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { bb->flags |= IR_BB_EMPTY; } continue; diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index 084216a06343..9492945b1362 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -118,31 +118,31 @@ extern "C" { #define ir_MUL_D(_op1, _op2) ir_BINARY_OP_D(IR_MUL, (_op1), (_op2)) #define ir_MUL_F(_op1, _op2) ir_BINARY_OP_F(IR_MUL, (_op1), (_op2)) -#define ir_DIV(_type, _op1, _op2) ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2)) -#define ir_DIV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2)) -#define ir_DIV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2)) -#define ir_DIV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2)) -#define ir_DIV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2)) -#define ir_DIV_A(_op1, _op2) ir_BINARY_OP_A(IR_DIV, (_op1), (_op2)) -#define ir_DIV_C(_op1, _op2) ir_BINARY_OP_C(IR_DIV, (_op1), (_op2)) -#define ir_DIV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2)) -#define ir_DIV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2)) -#define ir_DIV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2)) -#define ir_DIV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2)) +#define ir_DIV(_type, _op1, _op2) _ir_DIV(_ir_CTX, (_type), (_op1), (_op2)) +#define ir_DIV_U8(_op1, _op2) ir_DIV(IR_U8, (_op1), (_op2)) +#define ir_DIV_U16(_op1, _op2) ir_DIV(IR_U16, (_op1), (_op2)) +#define ir_DIV_U32(_op1, _op2) ir_DIV(IR_U32, (_op1), (_op2)) +#define ir_DIV_U64(_op1, _op2) ir_DIV(IR_U64, (_op1), (_op2)) +#define ir_DIV_A(_op1, _op2) ir_DIV(IR_ADDR, (_op1), (_op2)) +#define ir_DIV_C(_op1, _op2) ir_DIV(IR_CHAR, (_op1), (_op2)) +#define ir_DIV_I8(_op1, _op2) ir_DIV(IR_I8, (_op1), (_op2)) +#define ir_DIV_I16(_op1, _op2) ir_DIV(IR_I16, (_op1), (_op2)) +#define ir_DIV_I32(_op1, _op2) ir_DIV(IR_I32, (_op1), (_op2)) +#define ir_DIV_I64(_op1, _op2) ir_DIV(IR_I64, (_op1), (_op2)) #define ir_DIV_D(_op1, _op2) ir_BINARY_OP_D(IR_DIV, (_op1), (_op2)) #define ir_DIV_F(_op1, _op2) ir_BINARY_OP_F(IR_DIV, (_op1), (_op2)) -#define ir_MOD(_type, _op1, _op2) ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2)) -#define ir_MOD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2)) -#define ir_MOD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2)) -#define ir_MOD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2)) -#define ir_MOD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2)) -#define ir_MOD_A(_op1, _op2) ir_BINARY_OP_A(IR_MOD, (_op1), (_op2)) -#define ir_MOD_C(_op1, _op2) ir_BINARY_OP_C(IR_MOD, (_op1), (_op2)) -#define ir_MOD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2)) -#define ir_MOD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2)) -#define ir_MOD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2)) -#define ir_MOD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2)) +#define ir_MOD(_type, _op1, _op2) _ir_MOD(_ir_CTX, (_type), (_op1), (_op2)) +#define ir_MOD_U8(_op1, _op2) ir_MOD(IR_U8, (_op1), (_op2)) +#define ir_MOD_U16(_op1, _op2) ir_MOD(IR_U16, (_op1), (_op2)) +#define ir_MOD_U32(_op1, _op2) ir_MOD(IR_U32, (_op1), (_op2)) +#define ir_MOD_U64(_op1, _op2) ir_MOD(IR_U64, (_op1), (_op2)) +#define ir_MOD_A(_op1, _op2) ir_MOD(IR_ADDR, (_op1), (_op2)) +#define ir_MOD_C(_op1, _op2) ir_MOD(IR_CHAR, (_op1), (_op2)) +#define ir_MOD_I8(_op1, _op2) ir_MOD(IR_I8, (_op1), (_op2)) +#define ir_MOD_I16(_op1, _op2) ir_MOD(IR_I16, (_op1), (_op2)) +#define ir_MOD_I32(_op1, _op2) ir_MOD(IR_I32, (_op1), (_op2)) +#define ir_MOD_I64(_op1, _op2) ir_MOD(IR_I64, (_op1), (_op2)) #define ir_NEG(_type, _op1) ir_UNARY_OP(IR_NEG, (_type), (_op1)) #define ir_NEG_C(_op1) ir_UNARY_OP_C(IR_NEG, (_op1)) @@ -633,6 +633,8 @@ extern "C" { #define ir_MERGE_WITH_EMPTY_TRUE(_if) do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0) #define ir_MERGE_WITH_EMPTY_FALSE(_if) do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0) +ir_ref _ir_DIV(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2); +ir_ref _ir_MOD(ir_ctx *ctx, ir_type type, ir_ref op1, ir_ref op2); ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset); ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2); ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 40041004c561..92042ea8cbb5 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -1502,6 +1502,23 @@ static bool ir_is_merged_loop_back_edge(ir_ctx *ctx, uint32_t hdr, uint32_t b) } #endif +static bool ir_should_align_loop(ir_ctx *ctx, ir_chain *chains, uint32_t b, ir_block *bb) +{ + uint32_t n = bb->predecessors_count; + uint32_t *p = ctx->cfg_edges + bb->predecessors; + + for (; n > 0; p++, n--) { + uint32_t pred = *p; + if (chains[pred].head) { + if (ir_chain_head(chains, pred) == b) return 1; + } else { + if (ir_should_align_loop(ctx, chains, b, &ctx->cfg_blocks[pred])) return 1; + } + } + + return 0; +} + static int ir_schedule_blocks_bottom_up(ir_ctx *ctx) { uint32_t max_edges_count = ctx->cfg_edges_count / 2; @@ -1862,7 +1879,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx) if (chains[b].head == b) { bb = &ctx->cfg_blocks[b]; if (bb->loop_depth) { - if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) { + if (ir_should_align_loop(ctx, chains, b, bb)) { bb->flags |= IR_BB_ALIGN_LOOP; } } diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index ee951291b1b0..e1be7f6544df 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -148,6 +148,12 @@ bool ir_check(const ir_ctx *ctx) bool ok = 1; ir_check_ctx check_ctx; + if (ctx->insns_count < 1 || ctx->ir_base[1].op != IR_START) { + fprintf(stderr, "ir_base[1].op invalid opcode (%d)\n", + (ctx->insns_count < 1) ? IR_NOP : ctx->ir_base[0].op); + ok = 0; + } + check_ctx.arena = NULL; check_ctx.use_set = NULL; check_ctx.input_set = NULL; @@ -297,6 +303,14 @@ bool ir_check(const ir_ctx *ctx) ok = 0; } break; + case IR_OPND_CONTROL_GUARD: + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_START) + && use_insn->op != IR_GUARD + && use_insn->op != IR_GUARD_NOT) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_START or GUARD\n", i, j, use); + ok = 0; + } + break; default: fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use); ok = 0; @@ -306,6 +320,8 @@ bool ir_check(const ir_ctx *ctx) /* pass (function returns void) */ } else if (insn->op == IR_BEGIN && j == 1) { /* pass (start of unreachable basic block) */ + } else if (IR_OPND_KIND(flags, j) == IR_OPND_CONTROL_GUARD) { + /* reference to control guard is optional */ } else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF && (insn->op != IR_SNAPSHOT || j == 1)) { fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use); @@ -413,6 +429,7 @@ bool ir_check(const ir_ctx *ctx) } break; case IR_IGOTO: + case IR_ASM_GOTO: break; default: /* skip data references */ @@ -464,6 +481,10 @@ bool ir_check(const ir_ctx *ctx) // if (!ok) { // ir_dump_codegen(ctx, stderr); // } + +#ifndef IR_CHECK_NO_ABORT IR_ASSERT(ok); +#endif + return ok; } diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index 037003f021a7..3b34294d1c76 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -142,6 +142,7 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE break; case IR_OPND_CONTROL_DEP: case IR_OPND_CONTROL_REF: + case IR_OPND_CONTROL_GUARD: fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT); break; case IR_OPND_LABEL_REF: @@ -650,6 +651,7 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) case IR_OPND_CONTROL: case IR_OPND_CONTROL_DEP: case IR_OPND_CONTROL_REF: + case IR_OPND_CONTROL_GUARD: fprintf(f, "%sl_%d", first ? "(" : ", ", ref); first = 0; break; @@ -680,6 +682,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; + } else if (opnd_kind == IR_OPND_CONTROL_GUARD) { + /* skip */ } else if (j != n && (IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) { fprintf(f, "%snull", first ? "(" : ", "); diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 136bbb0e08e1..cbe049be932c 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1679,44 +1679,6 @@ IR_FOLD(EQ(SEXT, C_I16)) IR_FOLD(EQ(SEXT, C_I32)) IR_FOLD(EQ(SEXT, C_I64)) IR_FOLD(EQ(SEXT, C_ADDR)) -{ - if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) { - /* pass */ - } else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) { - opt = IR_OPT(IR_NOT, IR_BOOL); - op1 = op1_insn->op1; - op2 = IR_UNUSED; - IR_FOLD_RESTART; - } else { - ir_type type = ctx->ir_base[op1_insn->op1].type; - - if (op1_insn->op == IR_ZEXT - && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) { - IR_FOLD_NEXT; - } - if (IR_IS_TYPE_SIGNED(type)) { - switch (ir_type_size[type]) { - case 1: val.i64 = op2_insn->val.i8; break; - case 2: val.i64 = op2_insn->val.i16; break; - case 4: val.i64 = op2_insn->val.i32; break; - default: val.u64 = op2_insn->val.u64; break; - } - } else { - switch (ir_type_size[type]) { - case 1: val.u64 = op2_insn->val.u8; break; - case 2: val.u64 = op2_insn->val.u16; break; - case 4: val.u64 = op2_insn->val.u32; break; - default: val.u64 = op2_insn->val.u64; break; - } - } - op1 = op1_insn->op1; - op2 = ir_const(ctx, val, type); - IR_FOLD_RESTART; - } - - IR_FOLD_NEXT; -} - IR_FOLD(NE(ZEXT, C_U16)) IR_FOLD(NE(ZEXT, C_U32)) IR_FOLD(NE(ZEXT, C_U64)) @@ -1731,16 +1693,93 @@ IR_FOLD(NE(SEXT, C_I16)) IR_FOLD(NE(SEXT, C_I32)) IR_FOLD(NE(SEXT, C_I64)) IR_FOLD(NE(SEXT, C_ADDR)) -{ - if (ctx->use_lists && ctx->use_lists[op1_insn->op1].count != 1) { +IR_FOLD(ULT(ZEXT, C_U16)) +IR_FOLD(ULT(ZEXT, C_U32)) +IR_FOLD(ULT(ZEXT, C_U64)) +IR_FOLD(ULT(ZEXT, C_I16)) +IR_FOLD(ULT(ZEXT, C_I32)) +IR_FOLD(ULT(ZEXT, C_I64)) +IR_FOLD(ULT(ZEXT, C_ADDR)) +IR_FOLD(UGE(ZEXT, C_U16)) +IR_FOLD(UGE(ZEXT, C_U32)) +IR_FOLD(UGE(ZEXT, C_U64)) +IR_FOLD(UGE(ZEXT, C_I16)) +IR_FOLD(UGE(ZEXT, C_I32)) +IR_FOLD(UGE(ZEXT, C_I64)) +IR_FOLD(UGE(ZEXT, C_ADDR)) +IR_FOLD(ULE(ZEXT, C_U16)) +IR_FOLD(ULE(ZEXT, C_U32)) +IR_FOLD(ULE(ZEXT, C_U64)) +IR_FOLD(ULE(ZEXT, C_I16)) +IR_FOLD(ULE(ZEXT, C_I32)) +IR_FOLD(ULE(ZEXT, C_I64)) +IR_FOLD(ULE(ZEXT, C_ADDR)) +IR_FOLD(UGT(ZEXT, C_U16)) +IR_FOLD(UGT(ZEXT, C_U32)) +IR_FOLD(UGT(ZEXT, C_U64)) +IR_FOLD(UGT(ZEXT, C_I16)) +IR_FOLD(UGT(ZEXT, C_I32)) +IR_FOLD(UGT(ZEXT, C_I64)) +IR_FOLD(UGT(ZEXT, C_ADDR)) +IR_FOLD(LT(SEXT, C_U16)) +IR_FOLD(LT(SEXT, C_U32)) +IR_FOLD(LT(SEXT, C_U64)) +IR_FOLD(LT(SEXT, C_I16)) +IR_FOLD(LT(SEXT, C_I32)) +IR_FOLD(LT(SEXT, C_I64)) +IR_FOLD(LT(SEXT, C_ADDR)) +IR_FOLD(GE(SEXT, C_U16)) +IR_FOLD(GE(SEXT, C_U32)) +IR_FOLD(GE(SEXT, C_U64)) +IR_FOLD(GE(SEXT, C_I16)) +IR_FOLD(GE(SEXT, C_I32)) +IR_FOLD(GE(SEXT, C_I64)) +IR_FOLD(GE(SEXT, C_ADDR)) +IR_FOLD(LE(SEXT, C_U16)) +IR_FOLD(LE(SEXT, C_U32)) +IR_FOLD(LE(SEXT, C_U64)) +IR_FOLD(LE(SEXT, C_I16)) +IR_FOLD(LE(SEXT, C_I32)) +IR_FOLD(LE(SEXT, C_I64)) +IR_FOLD(LE(SEXT, C_ADDR)) +IR_FOLD(GT(SEXT, C_U16)) +IR_FOLD(GT(SEXT, C_U32)) +IR_FOLD(GT(SEXT, C_U64)) +IR_FOLD(GT(SEXT, C_I16)) +IR_FOLD(GT(SEXT, C_I32)) +IR_FOLD(GT(SEXT, C_I64)) +IR_FOLD(GT(SEXT, C_ADDR)) +{ + if (ctx->use_lists && ctx->use_lists[op1].count != 1) { /* pass */ - } else if (op2_insn->val.u64 == 0 && ctx->ir_base[op1_insn->op1].type == IR_BOOL) { - IR_FOLD_COPY(op1_insn->op1); } else { ir_type type = ctx->ir_base[op1_insn->op1].type; - if (op1_insn->op == IR_ZEXT - && (op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0) { + if (type == IR_BOOL && op2_insn->val.u64 == 0) { + if ((opt & IR_OPT_OP_MASK) == IR_EQ) { + opt = IR_OPT(IR_NOT, IR_BOOL); + op1 = op1_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } else if ((opt & IR_OPT_OP_MASK) == IR_NE) { + IR_FOLD_COPY(op1_insn->op1); + } + } + if ((op2_insn->val.u64 >> (ir_type_size[type] * 8)) != 0 + && (op1_insn->op != IR_SEXT || (op2_insn->val.i64 >> (ir_type_size[type] * 8)) != -1)) { + if ((opt & IR_OPT_OP_MASK) == IR_EQ + || (opt & IR_OPT_OP_MASK) == IR_UGT + || (opt & IR_OPT_OP_MASK) == IR_UGE) { + IR_FOLD_COPY(IR_FALSE); + } else if ((opt & IR_OPT_OP_MASK) == IR_NE + || (opt & IR_OPT_OP_MASK) == IR_ULT + || (opt & IR_OPT_OP_MASK) == IR_ULE) { + IR_FOLD_COPY(IR_TRUE); + } else if ((opt & IR_OPT_OP_MASK) == IR_GT || (opt & IR_OPT_OP_MASK) == IR_GE) { + IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_FALSE : IR_TRUE); + } else if ((opt & IR_OPT_OP_MASK) == IR_LT || (opt & IR_OPT_OP_MASK) == IR_LE) { + IR_FOLD_COPY(op2_insn->val.i64 >= 0 ? IR_TRUE : IR_FALSE); + } IR_FOLD_NEXT; } if (IR_IS_TYPE_SIGNED(type)) { @@ -1765,6 +1804,43 @@ IR_FOLD(NE(SEXT, C_ADDR)) IR_FOLD_NEXT; } +IR_FOLD(EQ(ZEXT, ZEXT)) +IR_FOLD(NE(ZEXT, ZEXT)) +IR_FOLD(ULT(ZEXT, ZEXT)) +IR_FOLD(UGE(ZEXT, ZEXT)) +IR_FOLD(ULE(ZEXT, ZEXT)) +IR_FOLD(UGT(ZEXT, ZEXT)) +IR_FOLD(EQ(SEXT, SEXT)) +IR_FOLD(NE(SEXT, SEXT)) +IR_FOLD(LT(SEXT, SEXT)) +IR_FOLD(GE(SEXT, SEXT)) +IR_FOLD(LE(SEXT, SEXT)) +IR_FOLD(GT(SEXT, SEXT)) +{ + if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type + && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) { + op1 = op1_insn->op1; + op2 = op2_insn->op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(LT(ZEXT, ZEXT)) +IR_FOLD(GE(ZEXT, ZEXT)) +IR_FOLD(LE(ZEXT, ZEXT)) +IR_FOLD(GT(ZEXT, ZEXT)) +{ + if (ctx->ir_base[op1_insn->op1].type == ctx->ir_base[op2_insn->op1].type + && (!ctx->use_lists || (ctx->use_lists[op1].count == 1 && ctx->use_lists[op2].count == 1))) { + op1 = op1_insn->op1; + op2 = op2_insn->op1; + opt += 4; /* LT -> ULT, ... */ + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + IR_FOLD(NOT(EQ)) IR_FOLD(NOT(NE)) IR_FOLD(NOT(LT)) diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 1b45eb834ce6..b194eeb81770 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -148,7 +148,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) } #if IR_GCM_SPLIT -/* Partially Dead Code Elimination through splitting the node and sunking the clones +/* Partially Dead Code Elimination through splitting the node and sinking the clones * * This code is based on the Benedikt Meurer's idea first implemented in V8. * See: https://codereview.chromium.org/899433005 @@ -309,6 +309,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) struct { ir_ref ref; uint32_t block; + uint32_t lca; uint32_t use_count; uint32_t use; } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count); @@ -344,8 +345,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) clone = clones_count++; ir_hashtab_add(&hash, j, clone); clones[clone].block = j; + clones[clone].lca = i; clones[clone].use_count = 0; clones[clone].use = (uint32_t)-1; + } else { + clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i); } uses[uses_count].ref = use; uses[uses_count].block = i; @@ -367,8 +371,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) clone = clones_count++; ir_hashtab_add(&hash, j, clone); clones[clone].block = j; + clones[clone].lca = i; clones[clone].use_count = 0; clones[clone].use = -1; + } else { + clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i); } uses[uses_count].ref = use; uses[uses_count].block = i; @@ -378,6 +385,42 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) } } + /* Select best blocks to insert clones */ + for (i = 0; i < clones_count; i++) { + uint32_t b0 = clones[i].block; + uint32_t lca = clones[i].lca; + + if (b0 != lca) { + ir_block *bb = &ctx->cfg_blocks[lca]; + uint32_t loop_depth = bb->loop_depth; + + if (loop_depth) { + uint32_t b; + uint32_t best; + + best = b = lca; + do { + b = bb->dom_parent; + bb = &ctx->cfg_blocks[b]; + if (bb->loop_depth < loop_depth) { + if (!bb->loop_depth) { + best = b; + break; + } + loop_depth = bb->loop_depth; + best = b; + } + } while (b != b0); + lca = best; + } + clones[i].block = lca; + } + } + + // TODO: instead of inserting clone into the block where the expressin is partially available, + // we should insert PHI and the actual clones into the block sources where it's not available + // (similar to SSAPRE) + #ifdef IR_DEBUG if (ctx->flags & IR_DEBUG_GCM_SPLIT) { for (i = 0; i < clones_count; i++) { @@ -1170,11 +1213,11 @@ int ir_schedule(ir_ctx *ctx) ir_ref use = *p; ir_insn *use_insn = &ctx->ir_base[use]; if (!_xlat[use] && ctx->cfg_map[use]) { - IR_ASSERT(ctx->cfg_map[use] == b); if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR || use_insn->op == IR_PI || use_insn->op == IR_PHI) { + IR_ASSERT(ctx->cfg_map[use] == b); if (_prev[use] != phis) { /* remove "use" */ _prev[_next[use]] = _prev[use]; diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 9e3a3a171b46..3e1051ca3379 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -949,10 +949,11 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref) #define IR_OPND_LABEL_REF 0x3 #define IR_OPND_CONTROL_DEP 0x4 #define IR_OPND_CONTROL_REF 0x5 -#define IR_OPND_STR 0x6 -#define IR_OPND_NUM 0x7 -#define IR_OPND_PROB 0x8 -#define IR_OPND_PROTO 0x9 +#define IR_OPND_CONTROL_GUARD 0x6 +#define IR_OPND_STR 0x7 +#define IR_OPND_NUM 0x8 +#define IR_OPND_PROB 0x9 +#define IR_OPND_PROTO 0xa #define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \ ((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28)) @@ -966,7 +967,7 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref) (((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf) #define IR_IS_REF_OPND_KIND(kind) \ - ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF) + ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_GUARD) IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn) { @@ -1223,6 +1224,7 @@ typedef struct _ir_use_pos ir_use_pos; #define IR_USE_SHOULD_BE_IN_REG (1<<1) #define IR_DEF_REUSES_OP1_REG (1<<2) #define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3) +#define IR_EXTEND_INPUTS_TO_NEXT (1<<4) /* used for SNAPSHOT followed by GUARD */ #define IR_FUSED_USE (1<<6) #define IR_PHI_USE (1<<7) diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index aff9aa7bab3f..f22e06083786 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -799,6 +799,34 @@ int ir_compute_live_ranges(ir_ctx *ctx) ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); continue; } + } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) { + ir_ref next = ir_next_control(ctx, ref); + ir_live_pos use_pos; + + IR_ASSERT(insn->op == IR_SNAPSHOT); + j = 2; + p = insn->ops + 2; + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + IR_ASSERT(v); + use_pos = IR_USE_LIVE_POS_FROM_REF(next); + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED); + } + } + continue; } IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); @@ -1418,6 +1446,34 @@ int ir_compute_live_ranges(ir_ctx *ctx) ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); continue; } + } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) { + ir_ref next = ir_next_control(ctx, ref); + ir_live_pos use_pos; + + IR_ASSERT(insn->op == IR_SNAPSHOT); + j = 2; + p = insn->ops + 2; + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + IR_ASSERT(v); + use_pos = IR_USE_LIVE_POS_FROM_REF(next); + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED); + } + } + continue; } IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); @@ -3004,6 +3060,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li { ir_live_pos nextUsePos[IR_REG_NUM]; ir_live_pos blockPos[IR_REG_NUM]; + int score, best_score, scores[IR_REG_NUM]; int i, reg; ir_live_pos pos, next_use_pos; ir_live_interval *other, *prev; @@ -3032,6 +3089,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { nextUsePos[i] = 0x7fffffff; blockPos[i] = 0x7fffffff; + scores[i] = 0; } } else { available = IR_REGSET_GP; @@ -3050,6 +3108,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { nextUsePos[i] = 0x7fffffff; blockPos[i] = 0x7fffffff; + scores[i] = 0; } } @@ -3080,6 +3139,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); if (pos < nextUsePos[reg]) { nextUsePos[reg] = pos; + /* Prefer splitting interval that was already splitted before */ + scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0; } } } @@ -3100,6 +3161,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li IR_REGSET_FOREACH(regset, reg) { if (overlap < nextUsePos[reg]) { nextUsePos[reg] = overlap; + scores[reg] = 0; } if (overlap < blockPos[reg]) { blockPos[reg] = overlap; @@ -3113,6 +3175,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) { if (overlap < nextUsePos[reg]) { nextUsePos[reg] = overlap; + scores[reg] = 0; } if (overlap < blockPos[reg]) { blockPos[reg] = overlap; @@ -3122,6 +3185,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); if (pos < nextUsePos[reg]) { nextUsePos[reg] = pos; + /* Prefer splitting interval that was already splitted before */ + scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0; } } } @@ -3141,12 +3206,17 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* reg = register with highest nextUsePos */ pos = nextUsePos[reg]; + best_score = (scores[reg] << 28) + nextUsePos[reg]; tmp_regset = available; IR_REGSET_EXCL(tmp_regset, reg); IR_REGSET_FOREACH(tmp_regset, i) { if (nextUsePos[i] > pos) { pos = nextUsePos[i]; + } + score = (scores[i] << 28) + nextUsePos[i]; + if (score > best_score) { reg = i; + best_score = score; } } IR_REGSET_FOREACH_END(); diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index 3f1d943c6870..8b3f3b5c6b5d 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -283,7 +283,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) n = ir_operands_count(ctx, insn); if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) { fprintf(f, "/%d", n); - } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) { + } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL || insn->op == IR_ASM) && n != 2) { fprintf(f, "/%d", n - 2); } else if (insn->op == IR_PHI && n != 3) { fprintf(f, "/%d", n - 1); @@ -321,6 +321,7 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) case IR_OPND_CONTROL: case IR_OPND_CONTROL_DEP: case IR_OPND_CONTROL_REF: + case IR_OPND_CONTROL_GUARD: fprintf(f, "%sl_%d", first ? "(" : ", ", ref); first = 0; break; @@ -352,6 +353,8 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; + } else if (opnd_kind == IR_OPND_CONTROL_GUARD) { + /* skip */ } else if (j != n && (IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) { fprintf(f, "%snull", first ? "(" : ", "); diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 921790fd92bd..f2b8616e2afa 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -609,6 +609,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_val IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); n = IR_INPUT_EDGES_COUNT(flags); + if (insn->op == IR_DIV || insn->op == IR_MOD) { + /* skip data-control guard edge */ + n--; + } for (p = insn->ops + 1; n > 0; p++, n--) { ir_ref input = *p; if (input > 0) { @@ -1419,7 +1423,7 @@ static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_r if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) { use_list = &ctx->use_lists[op2]; } - if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) { + if (op3 > 0 && (!use_list || use_list->count > ctx->use_lists[op3].count)) { use_list = &ctx->use_lists[op3]; } if (use_list) { @@ -1907,6 +1911,46 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist); } insn->type = type; + if (IR_IS_TYPE_SIGNED(type)) { + ir_insn *cond = &ctx->ir_base[insn->op1]; + if (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE) { + if (cond->op1 == insn->op2 && cond->op2 == insn->op3) { + insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MIN : IR_MAX; + ir_use_list_remove_one(ctx, insn->op1, ref); + ir_bitqueue_add(worklist, insn->op1); + insn->op1 = insn->op2; + insn->op2 = insn->op3; + insn->op3 = IR_UNUSED; + } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) { + insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MAX : IR_MIN; + ir_use_list_remove_one(ctx, insn->op1, ref); + ir_bitqueue_add(worklist, insn->op1); + insn->op1 = insn->op2; + insn->op2 = insn->op3; + insn->op3 = IR_UNUSED; + } + } + } else { + IR_ASSERT(IR_IS_TYPE_UNSIGNED(type)); + ir_insn *cond = &ctx->ir_base[insn->op1]; + if (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE) { + if (cond->op1 == insn->op2 && cond->op2 == insn->op3) { + insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MIN : IR_MAX; + ir_use_list_remove_one(ctx, insn->op1, ref); + ir_bitqueue_add(worklist, insn->op1); + insn->op1 = insn->op2; + insn->op2 = insn->op3; + insn->op3 = IR_UNUSED; + } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) { + insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MAX : IR_MIN; + ir_use_list_remove_one(ctx, insn->op1, ref); + ir_bitqueue_add(worklist, insn->op1); + insn->op1 = insn->op2; + insn->op2 = insn->op3; + insn->op3 = IR_UNUSED; + } + } + } return ref; case IR_PHI: for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) { @@ -1995,7 +2039,7 @@ static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop const ir_ref *p; ir_ref n, input, result, ctrl; - if (IR_IS_CONST_REF(val)) { + if (val <= 0) { /* constant or IR_UNUSED */ return 1; /* IR_START */ } @@ -2129,14 +2173,14 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref const ir_insn *use_insn = &ctx->ir_base[use]; if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) { - if (use_insn->op1 == phi_ref) { + if (use_insn->op1 == op_ref) { if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) { return 0; } if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) { continue; } - } else if (use_insn->op2 == phi_ref) { + } else if (use_insn->op2 == op_ref) { if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) { return 0; } @@ -2521,6 +2565,52 @@ static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref) && ctx->ir_base[ref].val.u32 == 0; } +static bool ir_fix_min_max_const(ir_ctx *ctx, ir_insn *cond, ir_ref ref) +{ + if (cond->op == IR_ULE) { + /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) => max(x, 4) */ + /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) => min(x, 4) */ + if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) + && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) + && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1 + && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) { + cond->op2 = ref; + return 1; + } + } else if (cond->op == IR_UGE) { + /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) => min(x, 2) */ + /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) => max(x, 2) */ + if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) + && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) + && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 + 1 + && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) { + cond->op2 = ref; + return 1; + } + } else if (cond->op == IR_LE) { + /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) => max(x, 4) */ + /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) => min(x, 4) */ + if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) + && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) + && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1 + && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) { + cond->op2 = ref; + return 1; + } + } else if (cond->op == IR_GE) { + /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) => min(x, 2) */ + /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) => max(x, 2) */ + if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) + && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) + && ctx->ir_base[cond->op2].val.i64 == ctx->ir_base[ref].val.i64 + 1 + && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) { + cond->op2 = ref; + return 1; + } + } + return 0; +} + static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) { IR_ASSERT(insn->inputs_count == 3); @@ -2560,8 +2650,18 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } if (is_cmp - && ((insn->op2 == cond->op1 && insn->op3 == cond->op2) - || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) { + && ((insn->op2 == cond->op1 + && (insn->op3 == cond->op2 + || (IR_IS_CONST_REF(cond->op2) + && (IR_IS_CONST_REF(insn->op3) + && IR_IS_TYPE_INT(insn->type) + && ir_fix_min_max_const(ctx, cond, insn->op3))))) + || (insn->op3 == cond->op1 + && (insn->op2 == cond->op2 + || (IR_IS_CONST_REF(cond->op2) + && (IR_IS_CONST_REF(insn->op2) + && IR_IS_TYPE_INT(insn->type) + && ir_fix_min_max_const(ctx, cond, insn->op2))))))) { /* MAX/MIN * * prev prev @@ -2612,14 +2712,14 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re next->op1 = root->op1; ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); - if (!IR_IS_CONST_REF(insn->op1)) { - ir_use_list_remove_one(ctx, insn->op1, cond_ref); - } - if (!IR_IS_CONST_REF(insn->op2)) { - ir_use_list_remove_one(ctx, insn->op2, cond_ref); - } if (ctx->use_lists[cond_ref].count == 1) { + if (!IR_IS_CONST_REF(insn->op1)) { + ir_use_list_remove_one(ctx, insn->op1, cond_ref); + } + if (!IR_IS_CONST_REF(insn->op2)) { + ir_use_list_remove_one(ctx, insn->op2, cond_ref); + } MAKE_NOP(cond); CLEAR_USES(cond_ref); } else { ir_use_list_remove_one(ctx, cond_ref, root_ref); @@ -2705,11 +2805,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re next->op1 = root->op1; ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); ir_use_list_remove_one(ctx, insn->op1, neg_ref); - if (!IR_IS_CONST_REF(insn->op1)) { - ir_use_list_remove_one(ctx, insn->op1, cond_ref); - } if (ctx->use_lists[cond_ref].count == 1) { + if (!IR_IS_CONST_REF(insn->op1)) { + ir_use_list_remove_one(ctx, insn->op1, cond_ref); + } MAKE_NOP(cond); CLEAR_USES(cond_ref); } else { ir_use_list_remove_one(ctx, cond_ref, root_ref); @@ -2727,7 +2827,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } return 1; - } else if (insn->op2 <= cond_ref && insn->op3 <= cond_ref + } else if (insn->op2 <= root_ref && insn->op3 <= root_ref && cond->op != IR_OVERFLOW // TODO: temporary disable IF-conversion for RLOAD. // We don't track anti-dependencies in GCM and Local Scheduling. @@ -3437,6 +3537,13 @@ static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref con } } + if (condition_insn->op == IR_SHL && IR_IS_CONST_REF(condition_insn->op1)) { + ir_insn *val_insn = &ctx->ir_base[condition_insn->op1]; + if (!IR_IS_SYM_CONST(val_insn->op) && val_insn->val.u64 == 1) { + return IR_TRUE; + } + } + while ((condition_insn->op == IR_BITCAST || condition_insn->op == IR_ZEXT || condition_insn->op == IR_SEXT) diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 9cd41c37ffef..ca42001a8816 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1273,6 +1273,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; const ir_proto_t *proto; const ir_call_conv_dsc *cc; + ir_ref next; constraints->def_reg = IR_REG_NONE; constraints->hints_count = 0; @@ -1345,9 +1346,11 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; op2_const: insn = &ctx->ir_base[ref]; - if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { - constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); - n++; + if (IR_IS_CONST_REF(insn->op2)) { + if (insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; @@ -1712,6 +1715,10 @@ get_arg_hints: break; case IR_SNAPSHOT: flags = 0; + next = ir_next_control(ctx, ref); + if (ctx->ir_base[next].op == IR_GUARD || ctx->ir_base[next].op == IR_GUARD_NOT) { + flags = IR_EXTEND_INPUTS_TO_NEXT; + } break; case IR_VA_START: flags = IR_OP2_MUST_BE_IN_REG; @@ -3078,10 +3085,6 @@ store_int: if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) { - // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP -//??? && (insn->op2 == ref - 1 || -//??? (insn->op2 == ctx->prev_ref[ref] - 1 -//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { if (IR_IS_CONST_REF(op2_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) @@ -3262,6 +3265,12 @@ store_int: return IR_FUSED | IR_ARGVAL; case IR_NOP: return IR_SKIPPED | IR_NOP; + case IR_ASM: + case IR_ASM_OUT: + case IR_ASM_GOTO: + fprintf(stderr, "ERROR: IR_ASM is not implemented yet\n"); + exit(1); + return IR_SKIPPED | IR_NOP; default: break; } @@ -9429,7 +9438,8 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); | .aword &addr - if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + if (ctx->ir_base[bb->start].op1 == def + && ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { bb->flags |= IR_BB_EMPTY; } continue; diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index 4b3cb663686d..727022472bf5 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -8568,7 +8568,7 @@ int ZEND_FASTCALL zend_jit_trace_hot_side(zend_execute_data *execute_data, uint3 do { ex = ex->prev_execute_data; n++; - } while (ex && zend_jit_traces[root].op_array != &ex->func->op_array); + } while (ex && (!ex->func || zend_jit_traces[root].op_array != &ex->func->op_array)); if (ex && n <= ZEND_JIT_TRACE_MAX_RET_DEPTH) { ret_depth = n; }