diff options
Diffstat (limited to 'py')
-rw-r--r-- | py/argcheck.c | 4 | ||||
-rw-r--r-- | py/asmarm.c | 114 | ||||
-rw-r--r-- | py/asmarm.h | 51 | ||||
-rw-r--r-- | py/asmbase.c | 2 | ||||
-rw-r--r-- | py/asmrv32.c | 73 | ||||
-rw-r--r-- | py/asmrv32.h | 64 | ||||
-rw-r--r-- | py/asmthumb.c | 115 | ||||
-rw-r--r-- | py/asmthumb.h | 118 | ||||
-rw-r--r-- | py/asmx64.h | 27 | ||||
-rw-r--r-- | py/asmx86.h | 27 | ||||
-rw-r--r-- | py/asmxtensa.c | 178 | ||||
-rw-r--r-- | py/asmxtensa.h | 70 | ||||
-rw-r--r-- | py/bc.c | 2 | ||||
-rw-r--r-- | py/binary.c | 2 | ||||
-rw-r--r-- | py/dynruntime.h | 4 | ||||
-rw-r--r-- | py/dynruntime.mk | 7 | ||||
-rw-r--r-- | py/emitinlinextensa.c | 211 | ||||
-rw-r--r-- | py/emitnative.c | 202 | ||||
-rw-r--r-- | py/emitndebug.c | 4 | ||||
-rw-r--r-- | py/misc.h | 9 | ||||
-rw-r--r-- | py/modio.c | 9 | ||||
-rw-r--r-- | py/modmath.c | 7 | ||||
-rw-r--r-- | py/mpconfig.h | 18 | ||||
-rw-r--r-- | py/mpprint.c | 13 | ||||
-rw-r--r-- | py/mpprint.h | 17 | ||||
-rw-r--r-- | py/mpz.c | 16 | ||||
-rw-r--r-- | py/nativeglue.h | 2 | ||||
-rw-r--r-- | py/nlr.c | 2 | ||||
-rw-r--r-- | py/nlr.h | 6 | ||||
-rw-r--r-- | py/nlraarch64.c | 2 | ||||
-rw-r--r-- | py/nlrmips.c | 2 | ||||
-rw-r--r-- | py/nlrpowerpc.c | 4 | ||||
-rw-r--r-- | py/nlrrv32.c | 2 | ||||
-rw-r--r-- | py/nlrrv64.c | 2 | ||||
-rw-r--r-- | py/nlrthumb.c | 2 | ||||
-rw-r--r-- | py/nlrx64.c | 2 | ||||
-rw-r--r-- | py/nlrx86.c | 2 | ||||
-rw-r--r-- | py/nlrxtensa.c | 2 | ||||
-rw-r--r-- | py/obj.h | 20 | ||||
-rw-r--r-- | py/objarray.c | 31 | ||||
-rw-r--r-- | py/objfloat.c | 12 | ||||
-rw-r--r-- | py/objint.c | 5 | ||||
-rw-r--r-- | py/objlist.c | 101 | ||||
-rw-r--r-- | py/objstr.c | 13 | ||||
-rw-r--r-- | py/parsenum.c | 34 | ||||
-rw-r--r-- | py/persistentcode.c | 18 | ||||
-rw-r--r-- | py/repl.c | 4 | ||||
-rw-r--r-- | py/runtime.c | 69 | ||||
-rw-r--r-- | py/runtime.h | 36 | ||||
-rw-r--r-- | py/scheduler.c | 26 |
50 files changed, 1136 insertions, 627 deletions
diff --git a/py/argcheck.c b/py/argcheck.c index 35b116ec0d..298c19bcfd 100644 --- a/py/argcheck.c +++ b/py/argcheck.c @@ -137,12 +137,12 @@ void mp_arg_parse_all_kw_array(size_t n_pos, size_t n_kw, const mp_obj_t *args, mp_arg_parse_all(n_pos, args, &kw_args, n_allowed, allowed, out_vals); } -NORETURN void mp_arg_error_terse_mismatch(void) { +MP_NORETURN void mp_arg_error_terse_mismatch(void) { mp_raise_TypeError(MP_ERROR_TEXT("argument num/types mismatch")); } #if MICROPY_CPYTHON_COMPAT -NORETURN void mp_arg_error_unimpl_kw(void) { +MP_NORETURN void mp_arg_error_unimpl_kw(void) { mp_raise_NotImplementedError(MP_ERROR_TEXT("keyword argument(s) not implemented - use normal args instead")); } #endif diff --git a/py/asmarm.c b/py/asmarm.c index 6fa751b32e..15bc73b61e 100644 --- a/py/asmarm.c +++ b/py/asmarm.c @@ -36,7 +36,7 @@ #include "py/asmarm.h" -#define SIGNED_FIT24(x) (((x) & 0xff800000) == 0) || (((x) & 0xff000000) == 0xff000000) +#define REG_TEMP ASM_ARM_REG_R8 // Insert word into instruction flow static void emit(asm_arm_t *as, uint op) { @@ -171,8 +171,8 @@ void asm_arm_entry(asm_arm_t *as, int num_locals) { if (as->stack_adjust < 0x100) { emit_al(as, asm_arm_op_sub_imm(ASM_ARM_REG_SP, ASM_ARM_REG_SP, as->stack_adjust)); } else { - asm_arm_mov_reg_i32_optimised(as, ASM_ARM_REG_R8, as->stack_adjust); - emit_al(as, asm_arm_op_sub_reg(ASM_ARM_REG_SP, ASM_ARM_REG_SP, ASM_ARM_REG_R8)); + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, as->stack_adjust); + emit_al(as, asm_arm_op_sub_reg(ASM_ARM_REG_SP, ASM_ARM_REG_SP, REG_TEMP)); } } } @@ -182,8 +182,8 @@ void asm_arm_exit(asm_arm_t *as) { if (as->stack_adjust < 0x100) { emit_al(as, asm_arm_op_add_imm(ASM_ARM_REG_SP, ASM_ARM_REG_SP, as->stack_adjust)); } else { - asm_arm_mov_reg_i32_optimised(as, ASM_ARM_REG_R8, as->stack_adjust); - emit_al(as, asm_arm_op_add_reg(ASM_ARM_REG_SP, ASM_ARM_REG_SP, ASM_ARM_REG_R8)); + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, as->stack_adjust); + emit_al(as, asm_arm_op_add_reg(ASM_ARM_REG_SP, ASM_ARM_REG_SP, REG_TEMP)); } } @@ -293,10 +293,10 @@ void asm_arm_orr_reg_reg_reg(asm_arm_t *as, uint rd, uint rn, uint rm) { void asm_arm_mov_reg_local_addr(asm_arm_t *as, uint rd, int local_num) { if (local_num >= 0x40) { - // mov r8, #local_num*4 - // add rd, sp, r8 - asm_arm_mov_reg_i32_optimised(as, ASM_ARM_REG_R8, local_num << 2); - emit_al(as, asm_arm_op_add_reg(rd, ASM_ARM_REG_SP, ASM_ARM_REG_R8)); + // mov temp, #local_num*4 + // add rd, sp, temp + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, local_num << 2); + emit_al(as, asm_arm_op_add_reg(rd, ASM_ARM_REG_SP, REG_TEMP)); } else { // add rd, sp, #local_num*4 emit_al(as, asm_arm_op_add_imm(rd, ASM_ARM_REG_SP, local_num << 2)); @@ -333,14 +333,22 @@ void asm_arm_asr_reg_reg(asm_arm_t *as, uint rd, uint rs) { emit_al(as, 0x1a00050 | (rd << 12) | (rs << 8) | rd); } -void asm_arm_ldr_reg_reg(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { - // ldr rd, [rn, #off] - emit_al(as, 0x5900000 | (rn << 16) | (rd << 12) | byte_offset); +void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { + if (byte_offset < 0x1000) { + // ldr rd, [rn, #off] + emit_al(as, 0x5900000 | (rn << 16) | (rd << 12) | byte_offset); + } else { + // mov temp, #off + // ldr rd, [rn, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x7900000 | (rn << 16) | (rd << 12) | REG_TEMP); + } } -void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn) { - // ldrh rd, [rn] - emit_al(as, 0x1d000b0 | (rn << 16) | (rd << 12)); +void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldrh doesn't support scaled register index + emit_al(as, 0x1a00080 | (REG_TEMP << 12) | rn); // mov temp, rn, lsl #1 + emit_al(as, 0x19000b0 | (rm << 16) | (rd << 12) | REG_TEMP); // ldrh rd, [rm, temp]; } void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { @@ -348,31 +356,69 @@ void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offs // ldrh rd, [rn, #off] emit_al(as, 0x1d000b0 | (rn << 16) | (rd << 12) | ((byte_offset & 0xf0) << 4) | (byte_offset & 0xf)); } else { - // mov r8, #off - // ldrh rd, [rn, r8] - asm_arm_mov_reg_i32_optimised(as, ASM_ARM_REG_R8, byte_offset); - emit_al(as, 0x19000b0 | (rn << 16) | (rd << 12) | ASM_ARM_REG_R8); + // mov temp, #off + // ldrh rd, [rn, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x19000b0 | (rn << 16) | (rd << 12) | REG_TEMP); } } -void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn) { - // ldrb rd, [rn] - emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12)); +void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldrb rd, [rm, rn] + emit_al(as, 0x7d00000 | (rm << 16) | (rd << 12) | rn); +} + +void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { + if (byte_offset < 0x1000) { + // ldrb rd, [rn, #off] + emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12) | byte_offset); + } else { + // mov temp, #off + // ldrb rd, [rn, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x7d00000 | (rn << 16) | (rd << 12) | REG_TEMP); + } } -void asm_arm_str_reg_reg(asm_arm_t *as, uint rd, uint rm, uint byte_offset) { - // str rd, [rm, #off] - emit_al(as, 0x5800000 | (rm << 16) | (rd << 12) | byte_offset); +void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldr rd, [rm, rn, lsl #2] + emit_al(as, 0x7900100 | (rm << 16) | (rd << 12) | rn); } -void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm) { - // strh rd, [rm] - emit_al(as, 0x1c000b0 | (rm << 16) | (rd << 12)); +void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset) { + if (byte_offset < 0x1000) { + // str rd, [rm, #off] + emit_al(as, 0x5800000 | (rm << 16) | (rd << 12) | byte_offset); + } else { + // mov temp, #off + // str rd, [rm, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x7800000 | (rm << 16) | (rd << 12) | REG_TEMP); + } } -void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm) { - // strb rd, [rm] - emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12)); +void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { + if (byte_offset < 0x100) { + // strh rd, [rn, #off] + emit_al(as, 0x1c000b0 | (rn << 16) | (rd << 12) | ((byte_offset & 0xf0) << 4) | (byte_offset & 0xf)); + } else { + // mov temp, #off + // strh rd, [rn, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x18000b0 | (rn << 16) | (rd << 12) | REG_TEMP); + } +} + +void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset) { + if (byte_offset < 0x1000) { + // strb rd, [rm, #off] + emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12) | byte_offset); + } else { + // mov temp, #off + // strb rd, [rm, temp] + asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset); + emit_al(as, 0x7c00000 | (rm << 16) | (rd << 12) | REG_TEMP); + } } void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { @@ -382,8 +428,8 @@ void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { void asm_arm_strh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { // strh doesn't support scaled register index - emit_al(as, 0x1a00080 | (ASM_ARM_REG_R8 << 12) | rn); // mov r8, rn, lsl #1 - emit_al(as, 0x18000b0 | (rm << 16) | (rd << 12) | ASM_ARM_REG_R8); // strh rd, [rm, r8] + emit_al(as, 0x1a00080 | (REG_TEMP << 12) | rn); // mov temp, rn, lsl #1 + emit_al(as, 0x18000b0 | (rm << 16) | (rd << 12) | REG_TEMP); // strh rd, [rm, temp] } void asm_arm_strb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { @@ -398,7 +444,7 @@ void asm_arm_bcc_label(asm_arm_t *as, int cond, uint label) { rel -= 8; // account for instruction prefetch, PC is 8 bytes ahead of this instruction rel >>= 2; // in ARM mode the branch target is 32-bit aligned, so the 2 LSB are omitted - if (SIGNED_FIT24(rel)) { + if (MP_FIT_SIGNED(24, rel)) { emit(as, cond | 0xa000000 | (rel & 0xffffff)); } else { printf("asm_arm_bcc: branch does not fit in 24 bits\n"); diff --git a/py/asmarm.h b/py/asmarm.h index 4a4253aef6..0d68812145 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -109,13 +109,18 @@ void asm_arm_lsr_reg_reg(asm_arm_t *as, uint rd, uint rs); void asm_arm_asr_reg_reg(asm_arm_t *as, uint rd, uint rs); // memory -void asm_arm_ldr_reg_reg(asm_arm_t *as, uint rd, uint rn, uint byte_offset); -void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn); +void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset); void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset); -void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn); -void asm_arm_str_reg_reg(asm_arm_t *as, uint rd, uint rm, uint byte_offset); -void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm); -void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm); +void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset); +void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset); +void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset); +void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset); + +// load from array +void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); +void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); +void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); + // store to array void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); void asm_arm_strh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); @@ -202,18 +207,28 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); #define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_arm_sub_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src)) #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_arm_mul_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src)) -#define ASM_LOAD_REG_REG(as, reg_dest, reg_base) asm_arm_ldr_reg_reg((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_ldr_reg_reg((as), (reg_dest), (reg_base), 4 * (word_offset)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_arm_ldrb_reg_reg((as), (reg_dest), (reg_base)) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_arm_ldrh_reg_reg((as), (reg_dest), (reg_base)) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (uint16_offset)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_arm_ldr_reg_reg((as), (reg_dest), (reg_base), 0) - -#define ASM_STORE_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base), 0) -#define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_str_reg_reg((as), (reg_dest), (reg_base), 4 * (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_value, reg_base) asm_arm_strb_reg_reg((as), (reg_value), (reg_base)) -#define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base)) -#define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base), 0) +#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_arm_ldrb_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset)) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (halfword_offset)) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_ldr_reg_reg_offset((as), (reg_dest), (reg_base), 4 * (word_offset)) + +#define ASM_STORE_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), (word_offset)) +#define ASM_STORE8_REG_REG(as, reg_value, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_value), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_value, reg_base, byte_offset) asm_arm_strb_reg_reg_offset((as), (reg_value), (reg_base), (byte_offset)) +#define ASM_STORE16_REG_REG(as, reg_value, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_value), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_value, reg_base, halfword_offset) asm_arm_strh_reg_reg_offset((as), (reg_value), (reg_base), 2 * (halfword_offset)) +#define ASM_STORE32_REG_REG(as, reg_value, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) asm_arm_str_reg_reg_offset((as), (reg_value), (reg_base), 4 * (word_offset)) + +#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrb_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrh_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldr_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strb_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strh_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_str_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) #endif // GENERIC_ASM_API diff --git a/py/asmbase.c b/py/asmbase.c index 3fce543a7f..f1b823fa36 100644 --- a/py/asmbase.c +++ b/py/asmbase.c @@ -53,7 +53,7 @@ void mp_asm_base_start_pass(mp_asm_base_t *as, int pass) { } else { // allocating executable RAM is platform specific MP_PLAT_ALLOC_EXEC(as->code_offset, (void **)&as->code_base, &as->code_size); - assert(as->code_base != NULL); + assert(as->code_size == 0 || as->code_base != NULL); } as->pass = pass; as->suppress = false; diff --git a/py/asmrv32.c b/py/asmrv32.c index c24d05a138..158b552191 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -450,18 +450,24 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP); } -void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { - mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE); +static const uint8_t RV32_LOAD_OPCODE_TABLE[3] = { + 0x04, 0x05, 0x02 +}; - if (scaled_offset >= 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) { +void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + int32_t scaled_offset = offset << operation_size; + + if (scaled_offset >= 0 && operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && MP_FIT_UNSIGNED(6, scaled_offset)) { // c.lw rd', offset(rs') asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset); return; } - if (FIT_SIGNED(scaled_offset, 12)) { - // lw rd, offset(rs) - asm_rv32_opcode_lw(state, rd, rs, scaled_offset); + if (MP_FIT_SIGNED(12, scaled_offset)) { + // lbu|lhu|lw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rs, scaled_offset)); return; } @@ -469,12 +475,12 @@ void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_ mp_uint_t lower = 0; split_immediate(scaled_offset, &upper, &lower); - // lui rd, HI(offset) ; Or c.lui if possible - // c.add rd, rs - // lw rd, LO(offset)(rd) + // lui rd, HI(offset) ; Or c.lui if possible + // c.add rd, rs + // lbu|lhu|lw rd, LO(offset)(rd) load_upper_immediate(state, rd, upper); asm_rv32_opcode_cadd(state, rd, rs); - asm_rv32_opcode_lw(state, rd, rd, lower); + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rd, lower)); } void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) { @@ -497,12 +503,20 @@ void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) { asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, REG_TEMP2, lower); } -void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { - mp_int_t scaled_offset = offset * ASM_WORD_SIZE; +void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + int32_t scaled_offset = offset << operation_size; - if (FIT_SIGNED(scaled_offset, 12)) { - // sw rd, offset(rs) - asm_rv32_opcode_sw(state, rd, rs, scaled_offset); + if (scaled_offset >= 0 && operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && MP_FIT_UNSIGNED(6, scaled_offset)) { + // c.sw rd', offset(rs') + asm_rv32_opcode_csw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset); + return; + } + + if (MP_FIT_SIGNED(12, scaled_offset)) { + // sb|sh|sw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, rs, rd, scaled_offset)); return; } @@ -510,12 +524,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint mp_uint_t lower = 0; split_immediate(scaled_offset, &upper, &lower); - // lui temporary, HI(offset) ; Or c.lui if possible - // c.add temporary, rs - // sw rd, LO(offset)(temporary) + // lui temporary, HI(offset) ; Or c.lui if possible + // c.add temporary, rs + // sb|sh|sw rd, LO(offset)(temporary) load_upper_immediate(state, REG_TEMP2, upper); asm_rv32_opcode_cadd(state, REG_TEMP2, rs); - asm_rv32_opcode_sw(state, rd, REG_TEMP2, lower); + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, REG_TEMP2, rd, lower)); } void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label) { @@ -530,27 +544,6 @@ void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t labe asm_rv32_opcode_addi(state, rd, rd, lower); } -void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { - mp_int_t scaled_offset = offset * sizeof(uint16_t); - - if (FIT_SIGNED(scaled_offset, 12)) { - // lhu rd, offset(rs) - asm_rv32_opcode_lhu(state, rd, rs, scaled_offset); - return; - } - - mp_uint_t upper = 0; - mp_uint_t lower = 0; - split_immediate(scaled_offset, &upper, &lower); - - // lui rd, HI(offset) ; Or c.lui if possible - // c.add rd, rs - // lhu rd, LO(offset)(rd) - load_upper_immediate(state, rd, upper); - asm_rv32_opcode_cadd(state, rd, rs); - asm_rv32_opcode_lhu(state, rd, rd, lower); -} - void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) { if (rs == rd) { // c.li rd, 0 diff --git a/py/asmrv32.h b/py/asmrv32.h index b09f48eb12..99c2226ef3 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -709,14 +709,13 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index); void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label); void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label); void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label); -void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset); -void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset); +void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size); void asm_rv32_emit_mov_local_reg(asm_rv32_t *state, mp_uint_t local, mp_uint_t rs); void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local); void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local); void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label); void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs); -void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_uint_t base, mp_int_t offset); +void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_uint_t base, int32_t offset, mp_uint_t operation_size); #define ASM_T asm_rv32_t #define ASM_ENTRY(state, labels) asm_rv32_entry(state, labels) @@ -732,12 +731,13 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_JUMP_IF_REG_NONZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_nonzero(state, rs, label) #define ASM_JUMP_IF_REG_ZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_eq(state, rs, ASM_RV32_REG_ZERO, label) #define ASM_JUMP_REG(state, rs) asm_rv32_opcode_cjr(state, rs) -#define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load16_reg_reg_offset(state, rd, rs, offset) -#define ASM_LOAD16_REG_REG(state, rd, rs) asm_rv32_opcode_lhu(state, rd, rs, 0) -#define ASM_LOAD32_REG_REG(state, rd, rs) ASM_LOAD_REG_REG_OFFSET(state, rd, rs, 0) -#define ASM_LOAD8_REG_REG(state, rd, rs) asm_rv32_opcode_lbu(state, rd, rs, 0) -#define ASM_LOAD_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset) -#define ASM_LOAD_REG_REG(state, rd, rs) ASM_LOAD32_REG_REG(state, rd, rs) +#define ASM_LOAD_REG_REG_OFFSET(state, rd, rs, offset) ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, offset) +#define ASM_LOAD8_REG_REG(state, rd, rs) ASM_LOAD8_REG_REG_OFFSET(state, rd, rs, 0) +#define ASM_LOAD16_REG_REG(state, rd, rs) ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, 0) +#define ASM_LOAD32_REG_REG(state, rd, rs) ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, 0) +#define ASM_LOAD8_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 0) +#define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 1) +#define ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 2) #define ASM_LSL_REG_REG(state, rd, rs) asm_rv32_opcode_sll(state, rd, rd, rs) #define ASM_LSR_REG_REG(state, rd, rs) asm_rv32_opcode_srl(state, rd, rd, rs) #define ASM_MOV_LOCAL_REG(state, local, rs) asm_rv32_emit_mov_local_reg(state, local, rs) @@ -750,14 +750,50 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_NEG_REG(state, rd) asm_rv32_opcode_sub(state, rd, ASM_RV32_REG_ZERO, rd) #define ASM_NOT_REG(state, rd) asm_rv32_opcode_xori(state, rd, rd, -1) #define ASM_OR_REG_REG(state, rd, rs) asm_rv32_opcode_or(state, rd, rd, rs) -#define ASM_STORE16_REG_REG(state, rs1, rs2) asm_rv32_opcode_sh(state, rs1, rs2, 0) -#define ASM_STORE32_REG_REG(state, rs1, rs2) ASM_STORE_REG_REG_OFFSET(state, rs1, rs2, 0) -#define ASM_STORE8_REG_REG(state, rs1, rs2) asm_rv32_opcode_sb(state, rs1, rs2, 0) -#define ASM_STORE_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset) -#define ASM_STORE_REG_REG(state, rs1, rs2) ASM_STORE32_REG_REG(state, rs1, rs2) +#define ASM_STORE_REG_REG_OFFSET(state, rd, rs, offset) ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) +#define ASM_STORE8_REG_REG(state, rs1, rs2) ASM_STORE8_REG_REG_OFFSET(state, rs1, rs2, 0) +#define ASM_STORE16_REG_REG(state, rs1, rs2) ASM_STORE16_REG_REG_OFFSET(state, rs1, rs2, 0) +#define ASM_STORE32_REG_REG(state, rs1, rs2) ASM_STORE32_REG_REG_OFFSET(state, rs1, rs2, 0) +#define ASM_STORE8_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 0) +#define ASM_STORE16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 1) +#define ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 2) #define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) #define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) #define ASM_CLR_REG(state, rd) +#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_lbu(state, rd, rs1, 0); \ + } while (0) +#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 1); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_lhu(state, rd, rs1, 0); \ + } while (0) +#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 2); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_lw(state, rd, rs1, 0); \ + } while (0) +#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_sb(state, rd, rs1, 0); \ + } while (0) +#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 1); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_sh(state, rd, rs1, 0); \ + } while (0) +#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 2); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_sw(state, rd, rs1, 0); \ + } while (0) #endif diff --git a/py/asmthumb.c b/py/asmthumb.c index 420815e802..18c3db9e4e 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -37,7 +37,6 @@ #include "py/asmthumb.h" #include "py/misc.h" -#define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32) #define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128) #define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0) #define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0) @@ -52,12 +51,6 @@ #define OP_SUB_W_RRI_HI(reg_src) (0xf2a0 | (reg_src)) #define OP_SUB_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff)) -#define OP_LDR_W_HI(reg_base) (0xf8d0 | (reg_base)) -#define OP_LDR_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12)) - -#define OP_LDRH_W_HI(reg_base) (0xf8b0 | (reg_base)) -#define OP_LDRH_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12)) - static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) { return mp_asm_base_get_cur_to_write_bytes(&as->base, n); } @@ -432,11 +425,6 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label) { asm_thumb_add_reg_reg(as, rlo_dest, ASM_THUMB_REG_R15); // 2 bytes } -// ARMv7-M only -static inline void asm_thumb_ldr_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) { - asm_thumb_op32(as, OP_LDR_W_HI(reg_base), OP_LDR_W_LO(reg_dest, word_offset * 4)); -} - // emits code for: reg_dest = reg_base + offset << offset_shift static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint offset_shift) { if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8) { @@ -450,12 +438,12 @@ static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint re asm_thumb_lsl_rlo_rlo_i5(as, reg_dest, reg_dest, offset_shift); asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_base); } else if (reg_dest != reg_base) { - asm_thumb_mov_rlo_i16(as, reg_dest, offset << offset_shift); - asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_dest); + asm_thumb_mov_reg_i32_optimised(as, reg_dest, offset << offset_shift); + asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_base); } else { uint reg_other = reg_dest ^ 7; asm_thumb_op16(as, OP_PUSH_RLIST((1 << reg_other))); - asm_thumb_mov_rlo_i16(as, reg_other, offset << offset_shift); + asm_thumb_mov_reg_i32_optimised(as, reg_other, offset << offset_shift); asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_other); asm_thumb_op16(as, OP_POP_RLIST((1 << reg_other))); } @@ -464,30 +452,48 @@ static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint re } } -void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) { - if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(word_offset)) { - asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_base, word_offset); - } else if (asm_thumb_allow_armv7m(as)) { - asm_thumb_ldr_reg_reg_i12(as, reg_dest, reg_base, word_offset); +#define OP_LDR_STR_W_HI(operation_size, reg) ((0xf880 | (operation_size) << 5) | (reg)) +#define OP_LDR_STR_W_LO(reg, imm12) (((reg) << 12) | (imm12)) + +#define OP_LDR 0x01 +#define OP_STR 0x00 + +#define OP_LDR_W 0x10 +#define OP_STR_W 0x00 + +static const uint8_t OP_LDR_STR_TABLE[3] = { + 0x0E, 0x10, 0x0C +}; + +void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint operation_size) { + assert(operation_size <= 2 && "Operation size out of range."); + + if (MP_FIT_UNSIGNED(5, offset) && (reg_dest < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) { + // Can use T1 encoding + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[operation_size] | OP_LDR) << 11) | (offset << 6) | (reg_base << 3) | reg_dest); + } else if (asm_thumb_allow_armv7m(as) && MP_FIT_UNSIGNED(12, offset << operation_size)) { + // Can use T3 encoding + asm_thumb_op32(as, (OP_LDR_STR_W_HI(operation_size, reg_base) | OP_LDR_W), OP_LDR_STR_W_LO(reg_dest, (offset << operation_size))); } else { - asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, word_offset - 31, 2); - asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_dest, 31); + // Must use the generic sequence + asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, offset - 31, operation_size); + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[operation_size] | OP_LDR) << 11) | (31 << 6) | (reg_dest << 3) | (reg_dest)); } } -// ARMv7-M only -static inline void asm_thumb_ldrh_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) { - asm_thumb_op32(as, OP_LDRH_W_HI(reg_base), OP_LDRH_W_LO(reg_dest, uint16_offset * 2)); -} +void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint operation_size) { + assert(operation_size <= 2 && "Operation size out of range."); -void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) { - if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(uint16_offset)) { - asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_base, uint16_offset); - } else if (asm_thumb_allow_armv7m(as)) { - asm_thumb_ldrh_reg_reg_i12(as, reg_dest, reg_base, uint16_offset); + if (MP_FIT_UNSIGNED(5, offset) && (reg_src < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) { + // Can use T1 encoding + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[operation_size] | OP_STR) << 11) | (offset << 6) | (reg_base << 3) | reg_src); + } else if (asm_thumb_allow_armv7m(as) && MP_FIT_UNSIGNED(12, offset << operation_size)) { + // Can use T3 encoding + asm_thumb_op32(as, (OP_LDR_STR_W_HI(operation_size, reg_base) | OP_STR_W), OP_LDR_STR_W_LO(reg_src, (offset << operation_size))); } else { - asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, uint16_offset - 31, 1); - asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_dest, 31); + // Must use the generic sequence + asm_thumb_add_reg_reg_offset(as, reg_base, reg_base, offset - 31, operation_size); + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[operation_size] | OP_STR) << 11) | (31 << 6) | (reg_base << 3) | reg_src); } } @@ -495,6 +501,7 @@ void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint r #define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff)) #define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff)) +// In Thumb1 mode, this may clobber r1. void asm_thumb_b_label(asm_thumb_t *as, uint label) { mp_uint_t dest = get_label_dest(as, label); mp_int_t rel = dest - as->base.code_offset; @@ -514,19 +521,40 @@ void asm_thumb_b_label(asm_thumb_t *as, uint label) { if (asm_thumb_allow_armv7m(as)) { asm_thumb_op32(as, OP_BW_HI(rel), OP_BW_LO(rel)); } else { + // this code path has to be the same instruction size irrespective of the value of rel + bool need_align = as->base.code_offset & 2u; if (SIGNED_FIT12(rel)) { - // this code path has to be the same number of instructions irrespective of rel asm_thumb_op16(as, OP_B_N(rel)); - } else { asm_thumb_op16(as, ASM_THUMB_OP_NOP); - if (dest != (mp_uint_t)-1) { - // we have an actual branch > 12 bits; this is not handled yet - mp_raise_NotImplementedError(MP_ERROR_TEXT("native method too big")); + asm_thumb_op16(as, ASM_THUMB_OP_NOP); + asm_thumb_op16(as, ASM_THUMB_OP_NOP); + if (need_align) { + asm_thumb_op16(as, ASM_THUMB_OP_NOP); + } + } else { + // do a large jump using: + // (nop) + // ldr r1, [pc, _data] + // add pc, r1 + // _data: .word rel + // + // note: can't use r0 as a temporary because native code can have the return value + // in that register and use a large jump to get to the exit point of the function + + rel -= 2; // account for the "ldr r1, [pc, _data]" + if (need_align) { + asm_thumb_op16(as, ASM_THUMB_OP_NOP); + rel -= 2; // account for this nop } + asm_thumb_ldr_rlo_pcrel_i8(as, ASM_THUMB_REG_R1, 0); + asm_thumb_add_reg_reg(as, ASM_THUMB_REG_R15, ASM_THUMB_REG_R1); + asm_thumb_op16(as, rel & 0xffff); + asm_thumb_op16(as, rel >> 16); } } } +// In Thumb1 mode, this may clobber r1. void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) { mp_uint_t dest = get_label_dest(as, label); mp_int_t rel = dest - as->base.code_offset; @@ -547,8 +575,15 @@ void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) { asm_thumb_op32(as, OP_BCC_W_HI(cond, rel), OP_BCC_W_LO(rel)); } else { // reverse the sense of the branch to jump over a longer branch - asm_thumb_op16(as, OP_BCC_N(cond ^ 1, 0)); + size_t code_offset_start = as->base.code_offset; + byte *c = asm_thumb_get_cur_to_write_bytes(as, 2); asm_thumb_b_label(as, label); + size_t bytes_to_skip = as->base.code_offset - code_offset_start; + uint16_t op = OP_BCC_N(cond ^ 1, bytes_to_skip - 4); + if (c != NULL) { + c[0] = op; + c[1] = op >> 8; + } } } @@ -569,7 +604,7 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel) { void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp) { // Load ptr to function from table, indexed by fun_id, then call it - asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id); + asm_thumb_load_reg_reg_offset(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id, 2); asm_thumb_op16(as, OP_BLX(reg_temp)); } diff --git a/py/asmthumb.h b/py/asmthumb.h index a9e68d7adb..9cd9d32d83 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -251,6 +251,50 @@ static inline void asm_thumb_bx_reg(asm_thumb_t *as, uint r_src) { asm_thumb_format_5(as, ASM_THUMB_FORMAT_5_BX, 0, r_src); } +// FORMAT 7: load/store with register offset +// FORMAT 8: load/store sign-extended byte/halfword + +#define ASM_THUMB_FORMAT_7_LDR (0x5800) +#define ASM_THUMB_FORMAT_7_STR (0x5000) +#define ASM_THUMB_FORMAT_7_WORD_TRANSFER (0x0000) +#define ASM_THUMB_FORMAT_7_BYTE_TRANSFER (0x0400) +#define ASM_THUMB_FORMAT_8_LDRH (0x5A00) +#define ASM_THUMB_FORMAT_8_STRH (0x5200) + +#define ASM_THUMB_FORMAT_7_8_ENCODE(op, rlo_dest, rlo_base, rlo_index) \ + ((op) | ((rlo_index) << 6) | ((rlo_base) << 3) | ((rlo_dest))) + +static inline void asm_thumb_format_7_8(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_base, uint rlo_index) { + assert(rlo_dest < ASM_THUMB_REG_R8); + assert(rlo_base < ASM_THUMB_REG_R8); + assert(rlo_index < ASM_THUMB_REG_R8); + asm_thumb_op16(as, ASM_THUMB_FORMAT_7_8_ENCODE(op, rlo_dest, rlo_base, rlo_index)); +} + +static inline void asm_thumb_ldrb_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_LDR | ASM_THUMB_FORMAT_7_BYTE_TRANSFER, rlo_dest, rlo_base, rlo_index); +} + +static inline void asm_thumb_ldrh_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_8_LDRH, rlo_dest, rlo_base, rlo_index); +} + +static inline void asm_thumb_ldr_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_LDR | ASM_THUMB_FORMAT_7_WORD_TRANSFER, rlo_dest, rlo_base, rlo_index); +} + +static inline void asm_thumb_strb_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_STR | ASM_THUMB_FORMAT_7_BYTE_TRANSFER, rlo_src, rlo_base, rlo_index); +} + +static inline void asm_thumb_strh_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_8_STRH, rlo_dest, rlo_base, rlo_index); +} + +static inline void asm_thumb_str_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint rlo_index) { + asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_STR | ASM_THUMB_FORMAT_7_WORD_TRANSFER, rlo_src, rlo_base, rlo_index); +} + // FORMAT 9: load/store with immediate offset // For word transfers the offset must be aligned, and >>2 @@ -273,24 +317,6 @@ static inline void asm_thumb_format_9_10(asm_thumb_t *as, uint op, uint rlo_dest asm_thumb_op16(as, ASM_THUMB_FORMAT_9_10_ENCODE(op, rlo_dest, rlo_base, offset)); } -static inline void asm_thumb_str_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint word_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_WORD_TRANSFER, rlo_src, rlo_base, word_offset); -} -static inline void asm_thumb_strb_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint byte_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER, rlo_src, rlo_base, byte_offset); -} -static inline void asm_thumb_strh_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint uint16_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_10_STRH, rlo_src, rlo_base, uint16_offset); -} -static inline void asm_thumb_ldr_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint word_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_WORD_TRANSFER, rlo_dest, rlo_base, word_offset); -} -static inline void asm_thumb_ldrb_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint byte_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER, rlo_dest, rlo_base, byte_offset); -} -static inline void asm_thumb_ldrh_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint uint16_offset) { - asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_10_LDRH, rlo_dest, rlo_base, uint16_offset); -} static inline void asm_thumb_lsl_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_src, uint shift) { asm_thumb_format_1(as, ASM_THUMB_FORMAT_1_LSL, rlo_dest, rlo_src, shift); } @@ -338,8 +364,10 @@ void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); // void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num); // convenience void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label); -void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience -void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience +// Generate optimised load dest, [src, #offset] sequence +void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint operation_size); +// Generate optimised store src, [dest, #offset] sequence +void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint operation_size); void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch @@ -418,18 +446,44 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); #define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_thumb_sub_rlo_rlo_rlo((as), (reg_dest), (reg_dest), (reg_src)) #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_thumb_format_4((as), ASM_THUMB_FORMAT_4_MUL, (reg_dest), (reg_src)) -#define ASM_LOAD_REG_REG(as, reg_dest, reg_base) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_ldr_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (word_offset)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrb_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrh_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_thumb_ldrh_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (uint16_offset)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) - -#define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_thumb_strb_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0) +#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset), 0) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (halfword_offset), 1) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (word_offset), 2) + +#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), (word_offset)) +#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (byte_offset), 0) +#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, halfword_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (halfword_offset), 1) +#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (word_offset), 2) + +#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ + asm_thumb_ldrh_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + } while (0) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ + asm_thumb_ldr_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + } while (0) +#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_thumb_strb_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ + asm_thumb_strh_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + } while (0) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ + asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + } while (0) #endif // GENERIC_ASM_API diff --git a/py/asmx64.h b/py/asmx64.h index c63e31797e..f2fb5da180 100644 --- a/py/asmx64.h +++ b/py/asmx64.h @@ -205,18 +205,21 @@ void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r32); #define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_x64_sub_r64_r64((as), (reg_dest), (reg_src)) #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_x64_mul_r64_r64((as), (reg_dest), (reg_src)) -#define ASM_LOAD_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem64_to_r64((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x64_mov_mem64_to_r64((as), (reg_base), 8 * (word_offset), (reg_dest)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem8_to_r64zx((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem16_to_r64zx((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_x64_mov_mem16_to_r64zx((as), (reg_base), 2 * (uint16_offset), (reg_dest)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem32_to_r64zx((as), (reg_base), 0, (reg_dest)) - -#define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_x64_mov_r64_to_mem64((as), (reg_src), (reg_base), 0) -#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x64_mov_r64_to_mem64((as), (reg_src), (reg_base), 8 * (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_x64_mov_r8_to_mem8((as), (reg_src), (reg_base), 0) -#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_x64_mov_r16_to_mem16((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_x64_mov_r32_to_mem32((as), (reg_src), (reg_base), 0) +#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, qword_offset) asm_x64_mov_mem64_to_r64((as), (reg_base), 8 * (qword_offset), (reg_dest)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_x64_mov_mem8_to_r64zx((as), (reg_base), (byte_offset), (reg_dest)) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x64_mov_mem16_to_r64zx((as), (reg_base), 2 * (word_offset), (reg_dest)) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, dword_offset) asm_x64_mov_mem32_to_r64zx((as), (reg_base), 4 * (dword_offset), (reg_dest)) + +#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, qword_offset) asm_x64_mov_r64_to_mem64((as), (reg_src), (reg_base), 8 * (qword_offset)) +#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_x64_mov_r8_to_mem8((as), (reg_src), (reg_base), (byte_offset)) +#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x64_mov_r16_to_mem16((as), (reg_src), (reg_base), 2 * (word_offset)) +#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x64_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) #endif // GENERIC_ASM_API diff --git a/py/asmx86.h b/py/asmx86.h index 027d44151e..2cec38ed45 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -200,18 +200,21 @@ void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r #define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_x86_sub_r32_r32((as), (reg_dest), (reg_src)) #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_x86_mul_r32_r32((as), (reg_dest), (reg_src)) -#define ASM_LOAD_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem32_to_r32((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x86_mov_mem32_to_r32((as), (reg_base), 4 * (word_offset), (reg_dest)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem8_to_r32zx((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem16_to_r32zx((as), (reg_base), 0, (reg_dest)) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_x86_mov_mem16_to_r32zx((as), (reg_base), 2 * (uint16_offset), (reg_dest)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem32_to_r32((as), (reg_base), 0, (reg_dest)) - -#define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 0) -#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_x86_mov_r8_to_mem8((as), (reg_src), (reg_base), 0) -#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_x86_mov_r16_to_mem16((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 0) +#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, dword_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (dword_offset)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_x86_mov_mem8_to_r32zx((as), (reg_base), (byte_offset), (reg_dest)) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x86_mov_mem16_to_r32zx((as), (reg_base), 2 * (word_offset), (reg_dest)) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, dword_offset) asm_x86_mov_mem32_to_r32((as), (reg_base), 4 * (dword_offset), (reg_dest)) + +#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), (dword_offset)) +#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_x86_mov_r8_to_mem8((as), (reg_src), (reg_base), (byte_offset)) +#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x86_mov_r16_to_mem16((as), (reg_src), (reg_base), 2 * (word_offset)) +#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) #endif // GENERIC_ASM_API diff --git a/py/asmxtensa.c b/py/asmxtensa.c index 0fbe351dcf..bc3e717d9f 100644 --- a/py/asmxtensa.c +++ b/py/asmxtensa.c @@ -34,9 +34,20 @@ #include "py/asmxtensa.h" +#if N_XTENSAWIN +#define REG_TEMP ASM_XTENSA_REG_TEMPORARY_WIN +#else +#define REG_TEMP ASM_XTENSA_REG_TEMPORARY +#endif + #define WORD_SIZE (4) +#define SIGNED_FIT6(x) ((((x) & 0xffffffe0) == 0) || (((x) & 0xffffffe0) == 0xffffffe0)) #define SIGNED_FIT8(x) ((((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)) #define SIGNED_FIT12(x) ((((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)) +#define SIGNED_FIT18(x) ((((x) & 0xfffe0000) == 0) || (((x) & 0xfffe0000) == 0xfffe0000)) + +#define ET_OUT_OF_RANGE MP_ERROR_TEXT("ERROR: xtensa %q out of range") +#define ET_NOT_ALIGNED MP_ERROR_TEXT("ERROR: %q %q not word-aligned") void asm_xtensa_end_pass(asm_xtensa_t *as) { as->num_const = as->cur_const; @@ -47,9 +58,9 @@ void asm_xtensa_end_pass(asm_xtensa_t *as) { if (as->base.pass == MP_ASM_PASS_EMIT) { uint8_t *d = as->base.code_base; printf("XTENSA ASM:"); - for (int i = 0; i < ((as->base.code_size + 15) & ~15); ++i) { + for (size_t i = 0; i < ((as->base.code_size + 15) & ~15); ++i) { if (i % 16 == 0) { - printf("\n%08x:", (uint32_t)&d[i]); + printf("\n%p:", &d[i]); } if (i % 2 == 0) { printf(" "); @@ -62,10 +73,12 @@ void asm_xtensa_end_pass(asm_xtensa_t *as) { } void asm_xtensa_entry(asm_xtensa_t *as, int num_locals) { - // jump over the constants - asm_xtensa_op_j(as, as->num_const * WORD_SIZE + 4 - 4); - mp_asm_base_get_cur_to_write_bytes(&as->base, 1); // padding/alignment byte - as->const_table = (uint32_t *)mp_asm_base_get_cur_to_write_bytes(&as->base, as->num_const * 4); + if (as->num_const > 0) { + // jump over the constants + asm_xtensa_op_j(as, as->num_const * WORD_SIZE + 4 - 4); + mp_asm_base_get_cur_to_write_bytes(&as->base, 1); // padding/alignment byte + as->const_table = (uint32_t *)mp_asm_base_get_cur_to_write_bytes(&as->base, as->num_const * 4); + } // adjust the stack-pointer to store a0, a12, a13, a14, a15 and locals, 16-byte aligned as->stack_adjust = (((ASM_XTENSA_NUM_REGS_SAVED + num_locals) * WORD_SIZE) + 15) & ~15; @@ -146,22 +159,60 @@ void asm_xtensa_j_label(asm_xtensa_t *as, uint label) { asm_xtensa_op_j(as, rel); } +static bool calculate_branch_displacement(asm_xtensa_t *as, uint label, ptrdiff_t *displacement) { + assert(displacement != NULL && "Displacement pointer is NULL"); + + uint32_t label_offset = get_label_dest(as, label); + *displacement = (ptrdiff_t)(label_offset - as->base.code_offset - 4); + return (label_offset != (uint32_t)-1) && (*displacement < 0); +} + void asm_xtensa_bccz_reg_label(asm_xtensa_t *as, uint cond, uint reg, uint label) { - uint32_t dest = get_label_dest(as, label); - int32_t rel = dest - as->base.code_offset - 4; - if (as->base.pass == MP_ASM_PASS_EMIT && !SIGNED_FIT12(rel)) { - printf("ERROR: xtensa bccz out of range\n"); + ptrdiff_t rel = 0; + bool can_emit_short_jump = calculate_branch_displacement(as, label, &rel); + + if (can_emit_short_jump && SIGNED_FIT12(rel)) { + // Backwards BCCZ opcodes with an offset that fits in 12 bits can + // be emitted without any change. + asm_xtensa_op_bccz(as, cond, reg, rel); + return; } - asm_xtensa_op_bccz(as, cond, reg, rel); + + // Range is effectively extended to 18 bits, as a more complex jump code + // sequence is emitted. + if (as->base.pass == MP_ASM_PASS_EMIT && !SIGNED_FIT18(rel - 6)) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_OUT_OF_RANGE, MP_QSTR_bccz); + } + + // ~BCCZ skip ; +0 <- Condition is flipped here (EQ -> NE, etc.) + // J addr ; +3 + // skip: ; +6 + asm_xtensa_op_bccz(as, cond ^ 1, reg, 6 - 4); + asm_xtensa_op_j(as, rel - 3); } void asm_xtensa_bcc_reg_reg_label(asm_xtensa_t *as, uint cond, uint reg1, uint reg2, uint label) { - uint32_t dest = get_label_dest(as, label); - int32_t rel = dest - as->base.code_offset - 4; - if (as->base.pass == MP_ASM_PASS_EMIT && !SIGNED_FIT8(rel)) { - printf("ERROR: xtensa bcc out of range\n"); + ptrdiff_t rel = 0; + bool can_emit_short_jump = calculate_branch_displacement(as, label, &rel); + + if (can_emit_short_jump && SIGNED_FIT8(rel)) { + // Backwards BCC opcodes with an offset that fits in 8 bits can + // be emitted without any change. + asm_xtensa_op_bcc(as, cond, reg1, reg2, rel); + return; + } + + // Range is effectively extended to 18 bits, as a more complex jump code + // sequence is emitted. + if (as->base.pass == MP_ASM_PASS_EMIT && !SIGNED_FIT18(rel - 6)) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_OUT_OF_RANGE, MP_QSTR_bcc); } - asm_xtensa_op_bcc(as, cond, reg1, reg2, rel); + + // ~BCC skip ; +0 <- Condition is flipped here (EQ -> NE, etc.) + // J addr ; +3 + // skip: ; +6 + asm_xtensa_op_bcc(as, cond ^ 8, reg1, reg2, 6 - 4); + asm_xtensa_op_j(as, rel - 3); } // convenience function; reg_dest must be different from reg_src[12] @@ -179,6 +230,8 @@ size_t asm_xtensa_mov_reg_i32(asm_xtensa_t *as, uint reg_dest, uint32_t i32) { // store the constant in the table if (as->const_table != NULL) { as->const_table[as->cur_const] = i32; + } else { + assert((as->base.pass != MP_ASM_PASS_EMIT) && "Constants table was not built."); } ++as->cur_const; return loc; @@ -240,17 +293,53 @@ void asm_xtensa_l32i_optimised(asm_xtensa_t *as, uint reg_dest, uint reg_base, u } else if (word_offset < 256) { asm_xtensa_op_l32i(as, reg_dest, reg_base, word_offset); } else { - mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("asm overflow")); + asm_xtensa_mov_reg_i32_optimised(as, reg_dest, word_offset * 4); + asm_xtensa_op_add_n(as, reg_dest, reg_base, reg_dest); + asm_xtensa_op_l32i_n(as, reg_dest, reg_dest, 0); } } -void asm_xtensa_s32i_optimised(asm_xtensa_t *as, uint reg_src, uint reg_base, uint word_offset) { - if (word_offset < 16) { - asm_xtensa_op_s32i_n(as, reg_src, reg_base, word_offset); - } else if (word_offset < 256) { - asm_xtensa_op_s32i(as, reg_src, reg_base, word_offset); +void asm_xtensa_load_reg_reg_offset(asm_xtensa_t *as, uint reg_dest, uint reg_base, uint offset, uint operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + if (operation_size == 2 && MP_FIT_UNSIGNED(4, offset)) { + asm_xtensa_op_l32i_n(as, reg_dest, reg_base, offset); + return; + } + + if (MP_FIT_UNSIGNED(8, offset)) { + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(2, operation_size, reg_base, reg_dest, offset)); + return; + } + + asm_xtensa_mov_reg_i32_optimised(as, reg_dest, offset << operation_size); + asm_xtensa_op_add_n(as, reg_dest, reg_base, reg_dest); + if (operation_size == 2) { + asm_xtensa_op_l32i_n(as, reg_dest, reg_dest, 0); } else { - mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("asm overflow")); + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(2, operation_size, reg_dest, reg_dest, 0)); + } +} + +void asm_xtensa_store_reg_reg_offset(asm_xtensa_t *as, uint reg_src, uint reg_base, uint offset, uint operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + if (operation_size == 2 && MP_FIT_UNSIGNED(4, offset)) { + asm_xtensa_op_s32i_n(as, reg_src, reg_base, offset); + return; + } + + if (MP_FIT_UNSIGNED(8, offset)) { + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(2, 0x04 | operation_size, reg_base, reg_src, offset)); + return; + } + + asm_xtensa_mov_reg_i32_optimised(as, REG_TEMP, offset << operation_size); + asm_xtensa_op_add_n(as, REG_TEMP, reg_base, REG_TEMP); + if (operation_size == 2) { + asm_xtensa_op_s32i_n(as, reg_src, REG_TEMP, 0); + } else { + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(2, 0x04 | operation_size, REG_TEMP, reg_src, 0)); } } @@ -264,4 +353,47 @@ void asm_xtensa_call_ind_win(asm_xtensa_t *as, uint idx) { asm_xtensa_op_callx8(as, ASM_XTENSA_REG_A8); } +void asm_xtensa_bit_branch(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t bit, mp_uint_t label, mp_uint_t condition) { + uint32_t dest = get_label_dest(as, label); + int32_t rel = dest - as->base.code_offset - 4; + if (as->base.pass == MP_ASM_PASS_EMIT && !SIGNED_FIT8(rel)) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_OUT_OF_RANGE, MP_QSTR_bit_branch); + } + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(7, condition | ((bit >> 4) & 0x01), reg, bit & 0x0F, rel & 0xFF)); +} + +void asm_xtensa_call0(asm_xtensa_t *as, mp_uint_t label) { + uint32_t dest = get_label_dest(as, label); + int32_t rel = dest - as->base.code_offset - 3; + if (as->base.pass == MP_ASM_PASS_EMIT) { + if ((dest & 0x03) != 0) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_NOT_ALIGNED, MP_QSTR_call0, MP_QSTR_target); + } + if ((rel & 0x03) != 0) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_NOT_ALIGNED, MP_QSTR_call0, MP_QSTR_location); + } + if (!SIGNED_FIT18(rel)) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_OUT_OF_RANGE, MP_QSTR_call0); + } + } + asm_xtensa_op_call0(as, rel); +} + +void asm_xtensa_l32r(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t label) { + uint32_t dest = get_label_dest(as, label); + int32_t rel = dest - as->base.code_offset; + if (as->base.pass == MP_ASM_PASS_EMIT) { + if ((dest & 0x03) != 0) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_NOT_ALIGNED, MP_QSTR_l32r, MP_QSTR_target); + } + if ((rel & 0x03) != 0) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_NOT_ALIGNED, MP_QSTR_l32r, MP_QSTR_location); + } + if (!SIGNED_FIT18(rel) || (rel >= 0)) { + mp_raise_msg_varg(&mp_type_RuntimeError, ET_OUT_OF_RANGE, MP_QSTR_l32r); + } + } + asm_xtensa_op_l32r(as, reg, as->base.code_offset, dest); +} + #endif // MICROPY_EMIT_XTENSA || MICROPY_EMIT_INLINE_XTENSA || MICROPY_EMIT_XTENSAWIN diff --git a/py/asmxtensa.h b/py/asmxtensa.h index d2f37bf828..7f113ca12e 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -64,9 +64,11 @@ #define ASM_XTENSA_REG_A14 (14) #define ASM_XTENSA_REG_A15 (15) -// for bccz +// for bccz and bcci #define ASM_XTENSA_CCZ_EQ (0) #define ASM_XTENSA_CCZ_NE (1) +#define ASM_XTENSA_CCZ_LT (2) +#define ASM_XTENSA_CCZ_GE (3) // for bcc and setcc #define ASM_XTENSA_CC_NONE (0) @@ -291,15 +293,24 @@ void asm_xtensa_mov_local_reg(asm_xtensa_t *as, int local_num, uint reg_src); void asm_xtensa_mov_reg_local(asm_xtensa_t *as, uint reg_dest, int local_num); void asm_xtensa_mov_reg_local_addr(asm_xtensa_t *as, uint reg_dest, int local_num); void asm_xtensa_mov_reg_pcrel(asm_xtensa_t *as, uint reg_dest, uint label); -void asm_xtensa_l32i_optimised(asm_xtensa_t *as, uint reg_dest, uint reg_base, uint word_offset); -void asm_xtensa_s32i_optimised(asm_xtensa_t *as, uint reg_src, uint reg_base, uint word_offset); +void asm_xtensa_load_reg_reg_offset(asm_xtensa_t *as, uint reg_dest, uint reg_base, uint offset, uint operation_size); +void asm_xtensa_store_reg_reg_offset(asm_xtensa_t *as, uint reg_src, uint reg_base, uint offset, uint operation_size); void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx); void asm_xtensa_call_ind_win(asm_xtensa_t *as, uint idx); +void asm_xtensa_bit_branch(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t bit, mp_uint_t label, mp_uint_t condition); +void asm_xtensa_immediate_branch(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t immediate, mp_uint_t label, mp_uint_t cond); +void asm_xtensa_call0(asm_xtensa_t *as, mp_uint_t label); +void asm_xtensa_l32r(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t label); // Holds a pointer to mp_fun_table #define ASM_XTENSA_REG_FUN_TABLE ASM_XTENSA_REG_A15 #define ASM_XTENSA_REG_FUN_TABLE_WIN ASM_XTENSA_REG_A7 +// Internal temporary register (currently aliased to REG_ARG_5 for xtensa, +// and to REG_TEMP2 for xtensawin). +#define ASM_XTENSA_REG_TEMPORARY ASM_XTENSA_REG_A6 +#define ASM_XTENSA_REG_TEMPORARY_WIN ASM_XTENSA_REG_A12 + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -407,16 +418,51 @@ void asm_xtensa_call_ind_win(asm_xtensa_t *as, uint idx); #define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_xtensa_op_sub((as), (reg_dest), (reg_dest), (reg_src)) #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_xtensa_op_mull((as), (reg_dest), (reg_dest), (reg_src)) -#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_l32i_optimised((as), (reg_dest), (reg_base), (word_offset)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l8ui((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), (uint16_offset)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_xtensa_load_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset), 0) +#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_xtensa_op_add_n((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_l8ui((as), (reg_dest), (reg_base), 0); \ + } while (0) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_xtensa_load_reg_reg_offset((as), (reg_dest), (reg_base), (halfword_offset), 1) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0); \ + } while (0) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_load_reg_reg_offset((as), (reg_dest), (reg_base), (word_offset), 2) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0); \ + } while (0) -#define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_s32i_optimised((as), (reg_dest), (reg_base), (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s8i((as), (reg_src), (reg_base), 0) -#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s16i((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s32i_n((as), (reg_src), (reg_base), 0) +#define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset)) +#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_xtensa_store_reg_reg_offset((as), (reg_src), (reg_base), (byte_offset), 0) +#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_xtensa_op_add_n((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_s8i((as), (reg_val), (reg_base), 0); \ + } while (0) +#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, halfword_offset) asm_xtensa_store_reg_reg_offset((as), (reg_src), (reg_base), (halfword_offset), 1) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_s16i((as), (reg_val), (reg_base), 0); \ + } while (0) +#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_store_reg_reg_offset((as), (reg_dest), (reg_base), (word_offset), 2) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_s32i_n((as), (reg_val), (reg_base), 0); \ + } while (0) #endif // GENERIC_ASM_API @@ -88,7 +88,7 @@ const byte *mp_decode_uint_skip(const byte *ptr) { return ptr; } -static NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, size_t expected, size_t given) { +static MP_NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, size_t expected, size_t given) { #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE // generic message, used also for other argument issues (void)f; diff --git a/py/binary.c b/py/binary.c index 4fc8f751ad..48d3421bca 100644 --- a/py/binary.c +++ b/py/binary.c @@ -196,7 +196,7 @@ static float mp_decode_half_float(uint16_t hf) { ++e; } - fpu.i = ((hf & 0x8000) << 16) | (e << 23) | (m << 13); + fpu.i = ((hf & 0x8000u) << 16) | (e << 23) | (m << 13); return fpu.f; } diff --git a/py/dynruntime.h b/py/dynruntime.h index c93111bbd4..0e438da4b7 100644 --- a/py/dynruntime.h +++ b/py/dynruntime.h @@ -70,7 +70,7 @@ #define m_realloc(ptr, new_num_bytes) (m_realloc_dyn((ptr), (new_num_bytes))) #define m_realloc_maybe(ptr, new_num_bytes, allow_move) (m_realloc_maybe_dyn((ptr), (new_num_bytes), (allow_move))) -static NORETURN inline void m_malloc_fail_dyn(size_t num_bytes) { +static MP_NORETURN inline void m_malloc_fail_dyn(size_t num_bytes) { mp_fun_table.raise_msg( mp_fun_table.load_global(MP_QSTR_MemoryError), "memory allocation failed"); @@ -295,7 +295,7 @@ static inline mp_obj_t mp_obj_new_exception_arg1_dyn(const mp_obj_type_t *exc_ty return mp_call_function_n_kw(MP_OBJ_FROM_PTR(exc_type), 1, 0, &args[0]); } -static NORETURN inline void mp_raise_dyn(mp_obj_t o) { +static MP_NORETURN inline void mp_raise_dyn(mp_obj_t o) { mp_fun_table.raise(o); for (;;) { } diff --git a/py/dynruntime.mk b/py/dynruntime.mk index 1ef521bd9a..030728cfc9 100644 --- a/py/dynruntime.mk +++ b/py/dynruntime.mk @@ -63,14 +63,14 @@ else ifeq ($(ARCH),armv6m) # thumb CROSS = arm-none-eabi- CFLAGS_ARCH += -mthumb -mcpu=cortex-m0 -MICROPY_FLOAT_IMPL ?= none +MICROPY_FLOAT_IMPL ?= float else ifeq ($(ARCH),armv7m) # thumb CROSS = arm-none-eabi- CFLAGS_ARCH += -mthumb -mcpu=cortex-m3 -MICROPY_FLOAT_IMPL ?= none +MICROPY_FLOAT_IMPL ?= float else ifeq ($(ARCH),armv7emsp) @@ -172,6 +172,9 @@ endif endif MPY_LD_FLAGS += $(addprefix -l, $(LIBGCC_PATH) $(LIBM_PATH)) endif +ifneq ($(MPY_EXTERN_SYM_FILE),) +MPY_LD_FLAGS += --externs "$(realpath $(MPY_EXTERN_SYM_FILE))" +endif CFLAGS += $(CFLAGS_EXTRA) diff --git a/py/emitinlinextensa.c b/py/emitinlinextensa.c index fed259cfc6..d0eb3d566f 100644 --- a/py/emitinlinextensa.c +++ b/py/emitinlinextensa.c @@ -173,7 +173,7 @@ static int get_arg_label(emit_inline_asm_t *emit, const char *op, mp_parse_node_ #define RRI8_B (2) typedef struct _opcode_table_3arg_t { - uint16_t name; // actually a qstr, which should fit in 16 bits + qstr_short_t name; uint8_t type; uint8_t a0 : 4; uint8_t a1 : 4; @@ -187,6 +187,13 @@ static const opcode_table_3arg_t opcode_table_3arg[] = { {MP_QSTR_add, RRR, 0, 8}, {MP_QSTR_sub, RRR, 0, 12}, {MP_QSTR_mull, RRR, 2, 8}, + {MP_QSTR_addx2, RRR, 0, 9}, + {MP_QSTR_addx4, RRR, 0, 10}, + {MP_QSTR_addx8, RRR, 0, 11}, + {MP_QSTR_subx2, RRR, 0, 13}, + {MP_QSTR_subx4, RRR, 0, 14}, + {MP_QSTR_subx8, RRR, 0, 15}, + {MP_QSTR_src, RRR, 1, 8}, // load/store/addi opcodes: reg, reg, imm // upper nibble of type encodes the range of the immediate arg @@ -208,21 +215,62 @@ static const opcode_table_3arg_t opcode_table_3arg[] = { {MP_QSTR_bge, RRI8_B, ASM_XTENSA_CC_GE, 0}, {MP_QSTR_bgeu, RRI8_B, ASM_XTENSA_CC_GEU, 0}, {MP_QSTR_blt, RRI8_B, ASM_XTENSA_CC_LT, 0}, + {MP_QSTR_bltu, RRI8_B, ASM_XTENSA_CC_LTU, 0}, {MP_QSTR_bnall, RRI8_B, ASM_XTENSA_CC_NALL, 0}, {MP_QSTR_bne, RRI8_B, ASM_XTENSA_CC_NE, 0}, {MP_QSTR_bnone, RRI8_B, ASM_XTENSA_CC_NONE, 0}, }; +// The index of the first four qstrs matches the CCZ condition value to be +// embedded into the opcode. +static const qstr_short_t BCCZ_OPCODES[] = { + MP_QSTR_beqz, MP_QSTR_bnez, MP_QSTR_bltz, MP_QSTR_bgez, + MP_QSTR_beqz_n, MP_QSTR_bnez_n +}; + +#if MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES +typedef struct _single_opcode_t { + qstr_short_t name; + uint16_t value; +} single_opcode_t; + +static const single_opcode_t NOARGS_OPCODES[] = { + {MP_QSTR_dsync, 0x2030}, + {MP_QSTR_esync, 0x2020}, + {MP_QSTR_extw, 0x20D0}, + {MP_QSTR_ill, 0x0000}, + {MP_QSTR_isync, 0x2000}, + {MP_QSTR_memw, 0x20C0}, + {MP_QSTR_rsync, 0x2010}, +}; +#endif + static void emit_inline_xtensa_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_args, mp_parse_node_t *pn_args) { size_t op_len; const char *op_str = (const char *)qstr_data(op, &op_len); if (n_args == 0) { - if (op == MP_QSTR_ret_n) { + if (op == MP_QSTR_ret_n || op == MP_QSTR_ret) { asm_xtensa_op_ret_n(&emit->as); - } else { - goto unknown_op; + return; + } else if (op == MP_QSTR_nop) { + asm_xtensa_op24(&emit->as, 0x20F0); + return; + } else if (op == MP_QSTR_nop_n) { + asm_xtensa_op16(&emit->as, 0xF03D); + return; } + #if MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES + for (size_t index = 0; index < MP_ARRAY_SIZE(NOARGS_OPCODES); index++) { + const single_opcode_t *opcode = &NOARGS_OPCODES[index]; + if (op == opcode->name) { + asm_xtensa_op24(&emit->as, opcode->value); + return; + } + } + #endif + + goto unknown_op; } else if (n_args == 1) { if (op == MP_QSTR_callx0) { @@ -234,19 +282,45 @@ static void emit_inline_xtensa_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_ } else if (op == MP_QSTR_jx) { uint r0 = get_arg_reg(emit, op_str, pn_args[0]); asm_xtensa_op_jx(&emit->as, r0); + } else if (op == MP_QSTR_ssl) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + asm_xtensa_op_ssl(&emit->as, r0); + } else if (op == MP_QSTR_ssr) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + asm_xtensa_op_ssr(&emit->as, r0); + } else if (op == MP_QSTR_ssai) { + mp_uint_t sa = get_arg_i(emit, op_str, pn_args[0], 0, 31); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 4, 4, sa & 0x0F, (sa >> 4) & 0x01)); + } else if (op == MP_QSTR_ssa8b) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 4, 3, r0, 0)); + } else if (op == MP_QSTR_ssa8l) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 4, 2, r0, 0)); + } else if (op == MP_QSTR_call0) { + mp_uint_t label = get_arg_label(emit, op_str, pn_args[0]); + asm_xtensa_call0(&emit->as, label); + #if MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES + } else if (op == MP_QSTR_fsync) { + mp_uint_t imm3 = get_arg_i(emit, op_str, pn_args[0], 0, 7); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 0, 2, 8 | imm3, 0)); + } else if (op == MP_QSTR_ill_n) { + asm_xtensa_op16(&emit->as, 0xF06D); + #endif } else { goto unknown_op; } } else if (n_args == 2) { uint r0 = get_arg_reg(emit, op_str, pn_args[0]); - if (op == MP_QSTR_beqz) { - int label = get_arg_label(emit, op_str, pn_args[1]); - asm_xtensa_bccz_reg_label(&emit->as, ASM_XTENSA_CCZ_EQ, r0, label); - } else if (op == MP_QSTR_bnez) { - int label = get_arg_label(emit, op_str, pn_args[1]); - asm_xtensa_bccz_reg_label(&emit->as, ASM_XTENSA_CCZ_NE, r0, label); - } else if (op == MP_QSTR_mov || op == MP_QSTR_mov_n) { + for (size_t index = 0; index < MP_ARRAY_SIZE(BCCZ_OPCODES); index++) { + if (op == BCCZ_OPCODES[index]) { + mp_uint_t label = get_arg_label(emit, op_str, pn_args[1]); + asm_xtensa_bccz_reg_label(&emit->as, index & 0x03, r0, label); + return; + } + } + if (op == MP_QSTR_mov || op == MP_QSTR_mov_n) { // we emit mov.n for both "mov" and "mov_n" opcodes uint r1 = get_arg_reg(emit, op_str, pn_args[1]); asm_xtensa_op_mov_n(&emit->as, r0, r1); @@ -254,7 +328,53 @@ static void emit_inline_xtensa_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_ // for convenience we emit l32r if the integer doesn't fit in movi uint32_t imm = get_arg_i(emit, op_str, pn_args[1], 0, 0); asm_xtensa_mov_reg_i32(&emit->as, r0, imm); - } else { + } else if (op == MP_QSTR_abs_) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 6, r0, 1, r1)); + } else if (op == MP_QSTR_neg) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 6, r0, 0, r1)); + } else if (op == MP_QSTR_sll) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 10, r0, r1, 0)); + } else if (op == MP_QSTR_sra) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 11, r0, 0, r1)); + } else if (op == MP_QSTR_srl) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 9, r0, 0, r1)); + } else if (op == MP_QSTR_nsa) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 4, 14, r1, r0)); + } else if (op == MP_QSTR_nsau) { + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 0, 4, 15, r1, r0)); + } else if (op == MP_QSTR_l32r) { + mp_uint_t label = get_arg_label(emit, op_str, pn_args[1]); + asm_xtensa_l32r(&emit->as, r0, label); + } else if (op == MP_QSTR_movi_n) { + mp_int_t imm = get_arg_i(emit, op_str, pn_args[1], -32, 95); + asm_xtensa_op_movi_n(&emit->as, r0, imm); + } else + #if MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES + if (op == MP_QSTR_rsr) { + mp_uint_t sr = get_arg_i(emit, op_str, pn_args[1], 0, 255); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RSR(0, 3, 0, sr, r0)); + } else if (op == MP_QSTR_rur) { + mp_uint_t imm8 = get_arg_i(emit, op_str, pn_args[1], 0, 255); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 3, 14, r0, (imm8 >> 4) & 0x0F, imm8 & 0x0F)); + } else if (op == MP_QSTR_wsr) { + mp_uint_t sr = get_arg_i(emit, op_str, pn_args[1], 0, 255); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RSR(0, 3, 1, sr, r0)); + } else if (op == MP_QSTR_wur) { + mp_uint_t sr = get_arg_i(emit, op_str, pn_args[1], 0, 255); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RSR(0, 3, 15, sr, r0)); + } else if (op == MP_QSTR_xsr) { + mp_uint_t sr = get_arg_i(emit, op_str, pn_args[1], 0, 255); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RSR(0, 1, 6, sr, r0)); + } else + #endif + { goto unknown_op; } @@ -288,7 +408,72 @@ static void emit_inline_xtensa_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_ return; } } - goto unknown_op; + + if (op == MP_QSTR_add_n) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t r2 = get_arg_reg(emit, op_str, pn_args[2]); + asm_xtensa_op16(&emit->as, ASM_XTENSA_ENCODE_RRRN(10, r0, r1, r2)); + } else if (op == MP_QSTR_addi_n) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_int_t imm4 = get_arg_i(emit, op_str, pn_args[2], -1, 15); + asm_xtensa_op16(&emit->as, ASM_XTENSA_ENCODE_RRRN(11, r0, r1, (imm4 != 0 ? imm4 : -1))); + } else if (op == MP_QSTR_addmi) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_int_t imm8 = get_arg_i(emit, op_str, pn_args[2], -128 * 256, 127 * 256); + if ((imm8 & 0xFF) != 0) { + emit_inline_xtensa_error_exc(emit, mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, MP_ERROR_TEXT("%d is not a multiple of %d"), imm8, 256)); + } else { + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRI8(2, 13, r1, r0, imm8 >> 8)); + } + } else if (op == MP_QSTR_bbci) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t bit = get_arg_i(emit, op_str, pn_args[1], 0, 31); + mp_int_t label = get_arg_label(emit, op_str, pn_args[2]); + asm_xtensa_bit_branch(&emit->as, r0, bit, label, 6); + } else if (op == MP_QSTR_bbsi) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t bit = get_arg_i(emit, op_str, pn_args[1], 0, 31); + mp_uint_t label = get_arg_label(emit, op_str, pn_args[2]); + asm_xtensa_bit_branch(&emit->as, r0, bit, label, 14); + } else if (op == MP_QSTR_slli) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t bits = 32 - get_arg_i(emit, op_str, pn_args[2], 1, 31); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 0 | ((bits >> 4) & 0x01), r0, r1, bits & 0x0F)); + } else if (op == MP_QSTR_srai) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t bits = get_arg_i(emit, op_str, pn_args[2], 0, 31); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 2 | ((bits >> 4) & 0x01), r0, bits & 0x0F, r1)); + } else if (op == MP_QSTR_srli) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t bits = get_arg_i(emit, op_str, pn_args[2], 0, 15); + asm_xtensa_op24(&emit->as, ASM_XTENSA_ENCODE_RRR(0, 1, 4, r0, bits, r1)); + } else if (op == MP_QSTR_l32i_n) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t imm = get_arg_i(emit, op_str, pn_args[2], 0, 60); + if ((imm & 0x03) != 0) { + emit_inline_xtensa_error_exc(emit, mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, MP_ERROR_TEXT("%d is not a multiple of %d"), imm, 4)); + } else { + asm_xtensa_op_l32i_n(&emit->as, r0, r1, imm >> 2); + } + } else if (op == MP_QSTR_s32i_n) { + mp_uint_t r0 = get_arg_reg(emit, op_str, pn_args[0]); + mp_uint_t r1 = get_arg_reg(emit, op_str, pn_args[1]); + mp_uint_t imm = get_arg_i(emit, op_str, pn_args[2], 0, 60); + if ((imm & 0x03) != 0) { + emit_inline_xtensa_error_exc(emit, mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, MP_ERROR_TEXT("%d is not a multiple of %d"), imm, 4)); + } else { + asm_xtensa_op_s32i_n(&emit->as, r0, r1, imm >> 2); + } + } else { + goto unknown_op; + } } else { goto unknown_op; diff --git a/py/emitnative.c b/py/emitnative.c index 1aab0a9eb7..f3ab483e8a 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -180,12 +180,6 @@ static const uint8_t reg_local_table[MAX_REGS_FOR_LOCAL_VARS] = {REG_LOCAL_1, RE *emit->error_slot = mp_obj_new_exception_msg_varg(&mp_type_ViperTypeError, __VA_ARGS__); \ } while (0) -#if N_RV32 -#define FIT_SIGNED(value, bits) \ - ((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \ - (((value) & ~((1U << ((bits) - 1)) - 1)) == ~((1U << ((bits) - 1)) - 1))) -#endif - typedef enum { STACK_VALUE, STACK_REG, @@ -1537,87 +1531,50 @@ static void emit_native_load_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - // TODO optimise to use thumb ldrb r1, [r2, r3] + #ifdef ASM_LOAD8_REG_REG_OFFSET + ASM_LOAD8_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); + #else if (index_value != 0) { // index is non-zero - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_ldrb_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 12)) { - asm_rv32_opcode_lbu(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_op_l8ui(emit->as, REG_RET, reg_base, index_value); - break; - } - #endif need_reg_single(emit, reg_index, 0); ASM_MOV_REG_IMM(emit->as, reg_index, index_value); ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base reg_base = reg_index; } ASM_LOAD8_REG_REG(emit->as, REG_RET, reg_base); // load from (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory + #ifdef ASM_LOAD16_REG_REG_OFFSET + ASM_LOAD16_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); + #else if (index_value != 0) { // index is a non-zero immediate - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_ldrh_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 11)) { - asm_rv32_opcode_lhu(emit->as, REG_RET, reg_base, index_value << 1); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_op_l16ui(emit->as, REG_RET, reg_base, index_value); - break; - } - #endif need_reg_single(emit, reg_index, 0); ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 1); ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 2*index to base reg_base = reg_index; } ASM_LOAD16_REG_REG(emit->as, REG_RET, reg_base); // load from (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to 32-bit memory + #ifdef ASM_LOAD32_REG_REG_OFFSET + ASM_LOAD32_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); + #else if (index_value != 0) { // index is a non-zero immediate - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_ldr_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 10)) { - asm_rv32_opcode_lw(emit->as, REG_RET, reg_base, index_value << 2); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_l32i_optimised(emit->as, REG_RET, reg_base, index_value); - break; - } - #endif need_reg_single(emit, reg_index, 0); ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 2); ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base reg_base = reg_index; } ASM_LOAD32_REG_REG(emit->as, REG_RET, reg_base); // load from (base+4*index) + #endif break; } default: @@ -1638,40 +1595,36 @@ static void emit_native_load_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - // TODO optimise to use thumb ldrb r1, [r2, r3] + #ifdef ASM_LOAD8_REG_REG_REG + ASM_LOAD8_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD8_REG_REG(emit->as, REG_RET, REG_ARG_1); // store value to (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory - #if N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_LOAD16_REG_REG_REG + ASM_LOAD16_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to word-size memory - #if N_RV32 - asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2); - asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); - asm_rv32_opcode_lw(emit->as, REG_RET, REG_ARG_1, 0); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_l32i_n(emit->as, REG_RET, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_LOAD32_REG_REG_REG + ASM_LOAD32_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD32_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+4*index) + #endif break; } default: @@ -1814,92 +1767,47 @@ static void emit_native_store_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - // TODO optimise to use thumb strb r1, [r2, r3] + #ifdef ASM_STORE8_REG_REG_OFFSET + ASM_STORE8_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); + #else if (index_value != 0) { // index is non-zero - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_strb_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 12)) { - asm_rv32_opcode_sb(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_op_s8i(emit->as, REG_RET, reg_base, index_value); - break; - } - #endif ASM_MOV_REG_IMM(emit->as, reg_index, index_value); - #if N_ARM - asm_arm_strb_reg_reg_reg(emit->as, reg_value, reg_base, reg_index); - return; - #endif ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base reg_base = reg_index; } ASM_STORE8_REG_REG(emit->as, reg_value, reg_base); // store value to (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory + #ifdef ASM_STORE16_REG_REG_OFFSET + ASM_STORE16_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); + #else if (index_value != 0) { // index is a non-zero immediate - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_strh_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 11)) { - asm_rv32_opcode_sh(emit->as, reg_value, reg_base, index_value << 1); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_op_s16i(emit->as, REG_RET, reg_base, index_value); - break; - } - #endif ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 1); ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 2*index to base reg_base = reg_index; } ASM_STORE16_REG_REG(emit->as, reg_value, reg_base); // store value to (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to 32-bit memory + #ifdef ASM_STORE32_REG_REG_OFFSET + ASM_STORE32_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); + #else if (index_value != 0) { // index is a non-zero immediate - #if N_THUMB - if (index_value > 0 && index_value < 32) { - asm_thumb_str_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 - if (FIT_SIGNED(index_value, 10)) { - asm_rv32_opcode_sw(emit->as, reg_value, reg_base, index_value << 2); - break; - } - #elif N_XTENSA || N_XTENSAWIN - if (index_value > 0 && index_value < 256) { - asm_xtensa_s32i_optimised(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_ARM - ASM_MOV_REG_IMM(emit->as, reg_index, index_value); - asm_arm_str_reg_reg_reg(emit->as, reg_value, reg_base, reg_index); - return; - #endif ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 2); ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base reg_base = reg_index; } ASM_STORE32_REG_REG(emit->as, reg_value, reg_base); // store value to (base+4*index) + #endif break; } default: @@ -1930,50 +1838,36 @@ static void emit_native_store_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - // TODO optimise to use thumb strb r1, [r2, r3] - #if N_ARM - asm_arm_strb_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #endif + #ifdef ASM_STORE8_REG_REG_REG + ASM_STORE8_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE8_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory - #if N_ARM - asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_s16i(emit->as, reg_value, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_STORE16_REG_REG_REG + ASM_STORE16_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE16_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to 32-bit memory - #if N_ARM - asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_RV32 - asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2); - asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); - asm_rv32_opcode_sw(emit->as, reg_value, REG_ARG_1, 0); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_s32i_n(emit->as, reg_value, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_STORE32_REG_REG_REG + ASM_STORE32_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE32_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+4*index) + #endif break; } default: diff --git a/py/emitndebug.c b/py/emitndebug.c index bd896a75c8..c068a9a9a1 100644 --- a/py/emitndebug.c +++ b/py/emitndebug.c @@ -251,8 +251,6 @@ static void asm_debug_setcc_reg_reg_reg(asm_debug_t *as, int op, int reg1, int r #define ASM_MUL_REG_REG(as, reg_dest, reg_src) \ asm_debug_reg_reg(as, "mul", reg_dest, reg_src) -#define ASM_LOAD_REG_REG(as, reg_dest, reg_base) \ - asm_debug_reg_reg(as, "load", reg_dest, reg_base) #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) \ asm_debug_reg_reg_offset(as, "load", reg_dest, reg_base, word_offset) #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) \ @@ -264,8 +262,6 @@ static void asm_debug_setcc_reg_reg_reg(asm_debug_t *as, int op, int reg1, int r #define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) \ asm_debug_reg_reg(as, "load32", reg_dest, reg_base) -#define ASM_STORE_REG_REG(as, reg_src, reg_base) \ - asm_debug_reg_reg(as, "store", reg_src, reg_base) #define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) \ asm_debug_reg_reg_offset(as, "store", reg_src, reg_base, word_offset) #define ASM_STORE8_REG_REG(as, reg_src, reg_base) \ @@ -105,7 +105,7 @@ void *m_realloc(void *ptr, size_t new_num_bytes); void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move); void m_free(void *ptr); #endif -NORETURN void m_malloc_fail(size_t num_bytes); +MP_NORETURN void m_malloc_fail(size_t num_bytes); #if MICROPY_TRACKED_ALLOC // These alloc/free functions track the pointers in a linked list so the GC does not reclaim @@ -390,11 +390,16 @@ static inline uint32_t mp_popcount(uint32_t x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; - return x * 0x01010101; + return (x * 0x01010101) >> 24; } #endif #endif +#define MP_FIT_UNSIGNED(bits, value) (((value) & (~0U << (bits))) == 0) +#define MP_FIT_SIGNED(bits, value) \ + (MP_FIT_UNSIGNED(((bits) - 1), (value)) || \ + (((value) & (~0U << ((bits) - 1))) == (~0U << ((bits) - 1)))) + // mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants static inline uint32_t mp_clz_mpi(mp_int_t x) { #ifdef __XC16__ diff --git a/py/modio.c b/py/modio.c index d3e563dbcf..9aeb42d30a 100644 --- a/py/modio.c +++ b/py/modio.c @@ -169,12 +169,13 @@ static mp_obj_t bufwriter_flush(mp_obj_t self_in) { int err; mp_uint_t out_sz = mp_stream_write_exactly(self->stream, self->buf, self->len, &err); (void)out_sz; - // TODO: try to recover from a case of non-blocking stream, e.g. move - // remaining chunk to the beginning of buffer. - assert(out_sz == self->len); - self->len = 0; if (err != 0) { mp_raise_OSError(err); + } else { + // TODO: try to recover from a case of non-blocking stream, e.g. move + // remaining chunk to the beginning of buffer. + assert(out_sz == self->len); + self->len = 0; } } diff --git a/py/modmath.c b/py/modmath.c index 4d51a28d0b..919a8ccd9d 100644 --- a/py/modmath.c +++ b/py/modmath.c @@ -37,7 +37,7 @@ #define MP_PI_4 MICROPY_FLOAT_CONST(0.78539816339744830962) #define MP_3_PI_4 MICROPY_FLOAT_CONST(2.35619449019234492885) -static NORETURN void math_error(void) { +static MP_NORETURN void math_error(void) { mp_raise_ValueError(MP_ERROR_TEXT("math domain error")); } @@ -161,6 +161,11 @@ MATH_FUN_2(atan2, atan2) MATH_FUN_1_TO_INT(ceil, ceil) // copysign(x, y) static mp_float_t MICROPY_FLOAT_C_FUN(copysign_func)(mp_float_t x, mp_float_t y) { + #if MICROPY_PY_MATH_COPYSIGN_FIX_NAN + if (isnan(y)) { + y = 0.0; + } + #endif return MICROPY_FLOAT_C_FUN(copysign)(x, y); } MATH_FUN_2(copysign, copysign_func) diff --git a/py/mpconfig.h b/py/mpconfig.h index 9001b8983b..94b8453003 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -406,6 +406,11 @@ #define MICROPY_EMIT_INLINE_XTENSA (0) #endif +// Whether to support uncommon Xtensa inline assembler opcodes +#ifndef MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES +#define MICROPY_EMIT_INLINE_XTENSA_UNCOMMON_OPCODES (0) +#endif + // Whether to emit Xtensa-Windowed native code #ifndef MICROPY_EMIT_XTENSAWIN #define MICROPY_EMIT_XTENSAWIN (0) @@ -1318,6 +1323,11 @@ typedef double mp_float_t; #define MICROPY_PY___FILE__ (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES) #endif +// Whether to process __all__ when importing all public symbols from module +#ifndef MICROPY_MODULE___ALL__ +#define MICROPY_MODULE___ALL__ (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_BASIC_FEATURES) +#endif + // Whether to provide mem-info related functions in micropython module #ifndef MICROPY_PY_MICROPYTHON_MEM_INFO #define MICROPY_PY_MICROPYTHON_MEM_INFO (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) @@ -2115,8 +2125,12 @@ typedef double mp_float_t; #endif // INT_FMT // Modifier for function which doesn't return -#ifndef NORETURN -#define NORETURN __attribute__((noreturn)) +#ifndef MP_NORETURN +#define MP_NORETURN __attribute__((noreturn)) +#endif + +#if !MICROPY_PREVIEW_VERSION_2 +#define NORETURN MP_NORETURN #endif // Modifier for weak functions diff --git a/py/mpprint.c b/py/mpprint.c index 291e4145ff..86dbfad05e 100644 --- a/py/mpprint.c +++ b/py/mpprint.c @@ -58,7 +58,7 @@ int mp_print_str(const mp_print_t *print, const char *str) { return len; } -int mp_print_strn(const mp_print_t *print, const char *str, size_t len, int flags, char fill, int width) { +int mp_print_strn(const mp_print_t *print, const char *str, size_t len, unsigned int flags, char fill, int width) { int left_pad = 0; int right_pad = 0; int pad = width - len; @@ -201,7 +201,7 @@ static int mp_print_int(const mp_print_t *print, mp_uint_t x, int sgn, int base, return len; } -int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char, int flags, char fill, int width, int prec) { +int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, unsigned int base, int base_char, int flags, char fill, int width, int prec) { // These are the only values for "base" that are required to be supported by this // function, since Python only allows the user to format integers in these bases. // If needed this function could be generalised to handle other values. @@ -248,10 +248,7 @@ int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char int prefix_len = prefix - prefix_buf; prefix = prefix_buf; - char comma = '\0'; - if (flags & PF_FLAG_SHOW_COMMA) { - comma = ','; - } + char comma = flags >> PF_FLAG_SEP_POS; // The size of this buffer is rather arbitrary. If it's not large // enough, a dynamic one will be allocated. @@ -340,7 +337,7 @@ int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char } #if MICROPY_PY_BUILTINS_FLOAT -int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, int flags, char fill, int width, int prec) { +int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int flags, char fill, int width, int prec) { char buf[32]; char sign = '\0'; int chrs = 0; @@ -416,8 +413,6 @@ int mp_vprintf(const mp_print_t *print, const char *fmt, va_list args) { flags |= PF_FLAG_SHOW_SIGN; } else if (*fmt == ' ') { flags |= PF_FLAG_SPACE_SIGN; - } else if (*fmt == '!') { - flags |= PF_FLAG_NO_TRAILZ; } else if (*fmt == '0') { flags |= PF_FLAG_PAD_AFTER_SIGN; fill = '0'; diff --git a/py/mpprint.h b/py/mpprint.h index 8383ea8579..583f00bda8 100644 --- a/py/mpprint.h +++ b/py/mpprint.h @@ -31,13 +31,12 @@ #define PF_FLAG_LEFT_ADJUST (0x001) #define PF_FLAG_SHOW_SIGN (0x002) #define PF_FLAG_SPACE_SIGN (0x004) -#define PF_FLAG_NO_TRAILZ (0x008) -#define PF_FLAG_SHOW_PREFIX (0x010) -#define PF_FLAG_SHOW_COMMA (0x020) -#define PF_FLAG_PAD_AFTER_SIGN (0x040) -#define PF_FLAG_CENTER_ADJUST (0x080) -#define PF_FLAG_ADD_PERCENT (0x100) -#define PF_FLAG_SHOW_OCTAL_LETTER (0x200) +#define PF_FLAG_SHOW_PREFIX (0x008) +#define PF_FLAG_PAD_AFTER_SIGN (0x010) +#define PF_FLAG_CENTER_ADJUST (0x020) +#define PF_FLAG_ADD_PERCENT (0x040) +#define PF_FLAG_SHOW_OCTAL_LETTER (0x080) +#define PF_FLAG_SEP_POS (9) // must be above all the above PF_FLAGs #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES #define MP_PYTHON_PRINTER &mp_sys_stdout_print @@ -69,9 +68,9 @@ extern const mp_print_t mp_sys_stdout_print; #endif int mp_print_str(const mp_print_t *print, const char *str); -int mp_print_strn(const mp_print_t *print, const char *str, size_t len, int flags, char fill, int width); +int mp_print_strn(const mp_print_t *print, const char *str, size_t len, unsigned int flags, char fill, int width); #if MICROPY_PY_BUILTINS_FLOAT -int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, int flags, char fill, int width, int prec); +int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int flags, char fill, int width, int prec); #endif int mp_printf(const mp_print_t *print, const char *fmt, ...); @@ -1537,7 +1537,8 @@ mp_int_t mpz_hash(const mpz_t *z) { mp_uint_t val = 0; mpz_dig_t *d = z->dig + z->len; - while (d-- > z->dig) { + while (d > z->dig) { + d--; val = (val << DIG_SIZE) | *d; } @@ -1552,11 +1553,12 @@ bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { mp_uint_t val = 0; mpz_dig_t *d = i->dig + i->len; - while (d-- > i->dig) { + while (d > i->dig) { if (val > (~(MP_OBJ_WORD_MSBIT_HIGH) >> DIG_SIZE)) { // will overflow return false; } + d--; val = (val << DIG_SIZE) | *d; } @@ -1577,11 +1579,12 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { mp_uint_t val = 0; mpz_dig_t *d = i->dig + i->len; - while (d-- > i->dig) { + while (d > i->dig) { if (val > (~(MP_OBJ_WORD_MSBIT_HIGH) >> (DIG_SIZE - 1))) { // will overflow return false; } + d--; val = (val << DIG_SIZE) | *d; } @@ -1642,7 +1645,8 @@ mp_float_t mpz_as_float(const mpz_t *i) { mp_float_t val = 0; mpz_dig_t *d = i->dig + i->len; - while (d-- > i->dig) { + while (d > i->dig) { + d--; val = val * DIG_BASE + *d; } @@ -1672,6 +1676,8 @@ size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, ch size_t ilen = i->len; + int n_comma = (base == 10) ? 3 : 4; + char *s = str; if (ilen == 0) { if (prefix) { @@ -1717,7 +1723,7 @@ size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, ch break; } } - if (!done && comma && (s - last_comma) == 3) { + if (!done && comma && (s - last_comma) == n_comma) { *s++ = comma; last_comma = s; } diff --git a/py/nativeglue.h b/py/nativeglue.h index e96fd7b66a..2c7923c56d 100644 --- a/py/nativeglue.h +++ b/py/nativeglue.h @@ -143,7 +143,7 @@ typedef struct _mp_fun_table_t { int (*printf_)(const mp_print_t *print, const char *fmt, ...); int (*vprintf_)(const mp_print_t *print, const char *fmt, va_list args); #if defined(__GNUC__) - NORETURN // Only certain compilers support no-return attributes in function pointer declarations + MP_NORETURN // Only certain compilers support no-return attributes in function pointer declarations #endif void (*raise_msg)(const mp_obj_type_t *exc_type, mp_rom_error_text_t msg); const mp_obj_type_t *(*obj_get_type)(mp_const_obj_t o_in); @@ -81,7 +81,7 @@ void nlr_call_jump_callbacks(nlr_buf_t *nlr) { } #if MICROPY_ENABLE_VM_ABORT -NORETURN void nlr_jump_abort(void) { +MP_NORETURN void nlr_jump_abort(void) { MP_STATE_THREAD(nlr_top) = MP_STATE_VM(nlr_abort); nlr_jump(NULL); } @@ -177,18 +177,18 @@ unsigned int nlr_push(nlr_buf_t *); unsigned int nlr_push_tail(nlr_buf_t *top); void nlr_pop(void); -NORETURN void nlr_jump(void *val); +MP_NORETURN void nlr_jump(void *val); #if MICROPY_ENABLE_VM_ABORT #define nlr_set_abort(buf) MP_STATE_VM(nlr_abort) = buf #define nlr_get_abort() MP_STATE_VM(nlr_abort) -NORETURN void nlr_jump_abort(void); +MP_NORETURN void nlr_jump_abort(void); #endif // This must be implemented by a port. It's called by nlr_jump // if no nlr buf has been pushed. It must not return, but rather // should bail out with a fatal error. -NORETURN void nlr_jump_fail(void *val); +MP_NORETURN void nlr_jump_fail(void *val); // use nlr_raise instead of nlr_jump so that debugging is easier #ifndef MICROPY_DEBUG_NLR diff --git a/py/nlraarch64.c b/py/nlraarch64.c index d6d87ebc50..3318004b5e 100644 --- a/py/nlraarch64.c +++ b/py/nlraarch64.c @@ -56,7 +56,7 @@ __asm( #endif ); -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) MP_STATIC_ASSERT(offsetof(nlr_buf_t, regs) == 16); // asm assumes it diff --git a/py/nlrmips.c b/py/nlrmips.c index cba52b16a2..5c55db7e26 100644 --- a/py/nlrmips.c +++ b/py/nlrmips.c @@ -57,7 +57,7 @@ __asm( ".end nlr_push \n" ); -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm( "move $4, %0 \n" diff --git a/py/nlrpowerpc.c b/py/nlrpowerpc.c index 8a69fe1eec..cf140400e6 100644 --- a/py/nlrpowerpc.c +++ b/py/nlrpowerpc.c @@ -78,7 +78,7 @@ unsigned int nlr_push(nlr_buf_t *nlr) { return 0; } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm__ volatile ( @@ -167,7 +167,7 @@ unsigned int nlr_push(nlr_buf_t *nlr) { return 0; } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm__ volatile ( diff --git a/py/nlrrv32.c b/py/nlrrv32.c index 9a12ede400..565a8629db 100644 --- a/py/nlrrv32.c +++ b/py/nlrrv32.c @@ -50,7 +50,7 @@ __attribute__((naked)) unsigned int nlr_push(nlr_buf_t *nlr) { ); } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( "add x10, x0, %0 \n" // Load nlr_buf address. diff --git a/py/nlrrv64.c b/py/nlrrv64.c index e7ba79797b..b7d1467b8f 100644 --- a/py/nlrrv64.c +++ b/py/nlrrv64.c @@ -50,7 +50,7 @@ __attribute__((naked)) unsigned int nlr_push(nlr_buf_t *nlr) { ); } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( "add x10, x0, %0 \n" // Load nlr_buf address. diff --git a/py/nlrthumb.c b/py/nlrthumb.c index e7b24f242b..8546308a3d 100644 --- a/py/nlrthumb.c +++ b/py/nlrthumb.c @@ -100,7 +100,7 @@ __attribute__((naked)) unsigned int nlr_push(nlr_buf_t *nlr) { #endif } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( diff --git a/py/nlrx64.c b/py/nlrx64.c index d1ad91ff7d..51224729fc 100644 --- a/py/nlrx64.c +++ b/py/nlrx64.c @@ -100,7 +100,7 @@ unsigned int nlr_push(nlr_buf_t *nlr) { #endif } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( diff --git a/py/nlrx86.c b/py/nlrx86.c index 085e30d203..26bf0dc6cc 100644 --- a/py/nlrx86.c +++ b/py/nlrx86.c @@ -78,7 +78,7 @@ unsigned int nlr_push(nlr_buf_t *nlr) { #endif } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( diff --git a/py/nlrxtensa.c b/py/nlrxtensa.c index ff7af6edee..2d1bf35e38 100644 --- a/py/nlrxtensa.c +++ b/py/nlrxtensa.c @@ -55,7 +55,7 @@ unsigned int nlr_push(nlr_buf_t *nlr) { return 0; // needed to silence compiler warning } -NORETURN void nlr_jump(void *val) { +MP_NORETURN void nlr_jump(void *val) { MP_NLR_JUMP_HEAD(val, top) __asm volatile ( @@ -184,13 +184,15 @@ static inline bool mp_obj_is_small_int(mp_const_obj_t o) { #define MP_OBJ_NEW_SMALL_INT(small_int) ((mp_obj_t)((((mp_uint_t)(small_int)) << 1) | 1)) #if MICROPY_PY_BUILTINS_FLOAT -#define MP_OBJ_NEW_CONST_FLOAT(f) MP_ROM_PTR((mp_obj_t)((((((uint64_t)f) & ~3) | 2) + 0x80800000) & 0xffffffff)) +#include <math.h> +// note: MP_OBJ_NEW_CONST_FLOAT should be a MP_ROM_PTR but that macro isn't available yet +#define MP_OBJ_NEW_CONST_FLOAT(f) ((mp_obj_t)((((((uint64_t)f) & ~3) | 2) + 0x80800000) & 0xffffffff)) #define mp_const_float_e MP_OBJ_NEW_CONST_FLOAT(0x402df854) #define mp_const_float_pi MP_OBJ_NEW_CONST_FLOAT(0x40490fdb) +#define mp_const_float_nan MP_OBJ_NEW_CONST_FLOAT(0x7fc00000) #if MICROPY_PY_MATH_CONSTANTS #define mp_const_float_tau MP_OBJ_NEW_CONST_FLOAT(0x40c90fdb) #define mp_const_float_inf MP_OBJ_NEW_CONST_FLOAT(0x7f800000) -#define mp_const_float_nan MP_OBJ_NEW_CONST_FLOAT(0xffc00000) #endif static inline bool mp_obj_is_float(mp_const_obj_t o) { @@ -207,6 +209,10 @@ static inline mp_float_t mp_obj_float_get(mp_const_obj_t o) { return num.f; } static inline mp_obj_t mp_obj_new_float(mp_float_t f) { + if (isnan(f)) { + // prevent creation of bad nanboxed pointers via array.array or struct + return mp_const_float_nan; + } union { mp_float_t f; mp_uint_t u; @@ -257,12 +263,13 @@ static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { #error MICROPY_OBJ_REPR_D requires MICROPY_FLOAT_IMPL_DOUBLE #endif +#include <math.h> #define mp_const_float_e {((mp_obj_t)((uint64_t)0x4005bf0a8b145769 + 0x8004000000000000))} #define mp_const_float_pi {((mp_obj_t)((uint64_t)0x400921fb54442d18 + 0x8004000000000000))} +#define mp_const_float_nan {((mp_obj_t)((uint64_t)0x7ff8000000000000 + 0x8004000000000000))} #if MICROPY_PY_MATH_CONSTANTS #define mp_const_float_tau {((mp_obj_t)((uint64_t)0x401921fb54442d18 + 0x8004000000000000))} #define mp_const_float_inf {((mp_obj_t)((uint64_t)0x7ff0000000000000 + 0x8004000000000000))} -#define mp_const_float_nan {((mp_obj_t)((uint64_t)0xfff8000000000000 + 0x8004000000000000))} #endif static inline bool mp_obj_is_float(mp_const_obj_t o) { @@ -276,6 +283,13 @@ static inline mp_float_t mp_obj_float_get(mp_const_obj_t o) { return num.f; } static inline mp_obj_t mp_obj_new_float(mp_float_t f) { + if (isnan(f)) { + // prevent creation of bad nanboxed pointers via array.array or struct + struct { + uint64_t r; + } num = mp_const_float_nan; + return num.r; + } union { mp_float_t f; uint64_t r; diff --git a/py/objarray.c b/py/objarray.c index 9d68aa7061..1fd0269390 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -113,6 +113,19 @@ static mp_obj_array_t *array_new(char typecode, size_t n) { #endif #if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY +static void array_extend_impl(mp_obj_array_t *array, mp_obj_t arg, char typecode, size_t len) { + mp_obj_t iterable = mp_getiter(arg, NULL); + mp_obj_t item; + size_t i = 0; + while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) { + if (len == 0) { + array_append(MP_OBJ_FROM_PTR(array), item); + } else { + mp_binary_set_val_array(typecode, array->items, i++, item); + } + } +} + static mp_obj_t array_construct(char typecode, mp_obj_t initializer) { // bytearrays can be raw-initialised from anything with the buffer protocol // other arrays can only be raw-initialised from bytes and bytearray objects @@ -142,18 +155,7 @@ static mp_obj_t array_construct(char typecode, mp_obj_t initializer) { } mp_obj_array_t *array = array_new(typecode, len); - - mp_obj_t iterable = mp_getiter(initializer, NULL); - mp_obj_t item; - size_t i = 0; - while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) { - if (len == 0) { - array_append(MP_OBJ_FROM_PTR(array), item); - } else { - mp_binary_set_val_array(typecode, array->items, i++, item); - } - } - + array_extend_impl(array, initializer, typecode, len); return MP_OBJ_FROM_PTR(array); } #endif @@ -413,7 +415,10 @@ static mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in) { // allow to extend by anything that has the buffer protocol (extension to CPython) mp_buffer_info_t arg_bufinfo; - mp_get_buffer_raise(arg_in, &arg_bufinfo, MP_BUFFER_READ); + if (!mp_get_buffer(arg_in, &arg_bufinfo, MP_BUFFER_READ)) { + array_extend_impl(self, arg_in, 0, 0); + return mp_const_none; + } size_t sz = mp_binary_get_size('@', self->typecode, NULL); diff --git a/py/objfloat.c b/py/objfloat.c index 0728fce315..81b0daa620 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -34,6 +34,11 @@ #if MICROPY_PY_BUILTINS_FLOAT +// Workaround a bug in Windows SDK version 10.0.26100.0, where NAN is no longer constant. +#if defined(_MSC_VER) && !defined(_UCRT_NOISY_NAN) +#define _UCRT_NOISY_NAN +#endif + #include <math.h> #include "py/formatfloat.h" @@ -47,13 +52,6 @@ #define M_PI (3.14159265358979323846) #endif -// Workaround a bug in recent MSVC where NAN is no longer constant. -// (By redefining back to the previous MSVC definition of NAN) -#if defined(_MSC_VER) && _MSC_VER >= 1942 -#undef NAN -#define NAN (-(float)(((float)(1e+300 * 1e+300)) * 0.0F)) -#endif - typedef struct _mp_obj_float_t { mp_obj_base_t base; mp_float_t value; diff --git a/py/objint.c b/py/objint.c index 4be6009a44..87d8a27852 100644 --- a/py/objint.c +++ b/py/objint.c @@ -209,7 +209,7 @@ static const uint8_t log_base2_floor[] = { size_t mp_int_format_size(size_t num_bits, int base, const char *prefix, char comma) { assert(2 <= base && base <= 16); size_t num_digits = num_bits / log_base2_floor[base - 1] + 1; - size_t num_commas = comma ? num_digits / 3 : 0; + size_t num_commas = comma ? (base == 10 ? num_digits / 3 : num_digits / 4): 0; size_t prefix_len = prefix ? strlen(prefix) : 0; return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte } @@ -251,6 +251,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co sign = '-'; } + int n_comma = (base == 10) ? 3 : 4; size_t needed_size = mp_int_format_size(sizeof(fmt_int_t) * 8, base, prefix, comma); if (needed_size > *buf_size) { *buf = m_new(char, needed_size); @@ -275,7 +276,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co c += '0'; } *(--b) = c; - if (comma && num != 0 && b > str && (last_comma - b) == 3) { + if (comma && num != 0 && b > str && (last_comma - b) == n_comma) { *(--b) = comma; last_comma = b; } diff --git a/py/objlist.c b/py/objlist.c index 9a88de3892..71414a849f 100644 --- a/py/objlist.c +++ b/py/objlist.c @@ -159,76 +159,63 @@ static mp_obj_t list_binary_op(mp_binary_op_t op, mp_obj_t lhs, mp_obj_t rhs) { } static mp_obj_t list_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { - if (value == MP_OBJ_NULL) { - // delete - #if MICROPY_PY_BUILTINS_SLICE - if (mp_obj_is_type(index, &mp_type_slice)) { - mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in); - mp_bound_slice_t slice; - if (!mp_seq_get_fast_slice_indexes(self->len, index, &slice)) { - mp_raise_NotImplementedError(NULL); + #if MICROPY_PY_BUILTINS_SLICE + if (mp_obj_is_type(index, &mp_type_slice)) { + mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in); + mp_bound_slice_t slice; + bool fast = mp_seq_get_fast_slice_indexes(self->len, index, &slice); + if (value == MP_OBJ_SENTINEL) { + // load + if (!fast) { + return mp_seq_extract_slice(self->items, &slice); } - - mp_int_t len_adj = slice.start - slice.stop; - assert(len_adj <= 0); - mp_seq_replace_slice_no_grow(self->items, self->len, slice.start, slice.stop, self->items /*NULL*/, 0, sizeof(*self->items)); + mp_obj_list_t *res = list_new(slice.stop - slice.start); + mp_seq_copy(res->items, self->items + slice.start, res->len, mp_obj_t); + return MP_OBJ_FROM_PTR(res); + } + // assign/delete + if (value == MP_OBJ_NULL) { + // delete is equivalent to slice assignment of an empty sequence + value = mp_const_empty_tuple; + } + if (!fast) { + mp_raise_NotImplementedError(NULL); + } + size_t value_len; + mp_obj_t *value_items; + mp_obj_get_array(value, &value_len, &value_items); + mp_int_t len_adj = value_len - (slice.stop - slice.start); + if (len_adj > 0) { + if (self->len + len_adj > self->alloc) { + // TODO: Might optimize memory copies here by checking if block can + // be grown inplace or not + self->items = m_renew(mp_obj_t, self->items, self->alloc, self->len + len_adj); + self->alloc = self->len + len_adj; + } + mp_seq_replace_slice_grow_inplace(self->items, self->len, + slice.start, slice.stop, value_items, value_len, len_adj, sizeof(*self->items)); + } else { + mp_seq_replace_slice_no_grow(self->items, self->len, + slice.start, slice.stop, value_items, value_len, sizeof(*self->items)); // Clear "freed" elements at the end of list mp_seq_clear(self->items, self->len + len_adj, self->len, sizeof(*self->items)); - self->len += len_adj; - return mp_const_none; + // TODO: apply allocation policy re: alloc_size } - #endif + self->len += len_adj; + return mp_const_none; + } + #endif + if (value == MP_OBJ_NULL) { + // delete mp_obj_t args[2] = {self_in, index}; list_pop(2, args); return mp_const_none; } else if (value == MP_OBJ_SENTINEL) { // load mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in); - #if MICROPY_PY_BUILTINS_SLICE - if (mp_obj_is_type(index, &mp_type_slice)) { - mp_bound_slice_t slice; - if (!mp_seq_get_fast_slice_indexes(self->len, index, &slice)) { - return mp_seq_extract_slice(self->items, &slice); - } - mp_obj_list_t *res = list_new(slice.stop - slice.start); - mp_seq_copy(res->items, self->items + slice.start, res->len, mp_obj_t); - return MP_OBJ_FROM_PTR(res); - } - #endif size_t index_val = mp_get_index(self->base.type, self->len, index, false); return self->items[index_val]; } else { - #if MICROPY_PY_BUILTINS_SLICE - if (mp_obj_is_type(index, &mp_type_slice)) { - mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in); - size_t value_len; - mp_obj_t *value_items; - mp_obj_get_array(value, &value_len, &value_items); - mp_bound_slice_t slice_out; - if (!mp_seq_get_fast_slice_indexes(self->len, index, &slice_out)) { - mp_raise_NotImplementedError(NULL); - } - mp_int_t len_adj = value_len - (slice_out.stop - slice_out.start); - if (len_adj > 0) { - if (self->len + len_adj > self->alloc) { - // TODO: Might optimize memory copies here by checking if block can - // be grown inplace or not - self->items = m_renew(mp_obj_t, self->items, self->alloc, self->len + len_adj); - self->alloc = self->len + len_adj; - } - mp_seq_replace_slice_grow_inplace(self->items, self->len, - slice_out.start, slice_out.stop, value_items, value_len, len_adj, sizeof(*self->items)); - } else { - mp_seq_replace_slice_no_grow(self->items, self->len, - slice_out.start, slice_out.stop, value_items, value_len, sizeof(*self->items)); - // Clear "freed" elements at the end of list - mp_seq_clear(self->items, self->len + len_adj, self->len, sizeof(*self->items)); - // TODO: apply allocation policy re: alloc_size - } - self->len += len_adj; - return mp_const_none; - } - #endif mp_obj_list_store(self_in, index, value); return mp_const_none; } diff --git a/py/objstr.c b/py/objstr.c index a160ab415c..c81fc682fd 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -40,7 +40,7 @@ static mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_ #endif static mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); -static NORETURN void bad_implicit_conversion(mp_obj_t self_in); +static MP_NORETURN void bad_implicit_conversion(mp_obj_t self_in); static mp_obj_t mp_obj_new_str_type_from_vstr(const mp_obj_type_t *type, vstr_t *vstr); @@ -1001,7 +1001,7 @@ static mp_obj_t arg_as_int(mp_obj_t arg) { #endif #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE -static NORETURN void terse_str_format_value_error(void) { +static MP_NORETURN void terse_str_format_value_error(void) { mp_raise_ValueError(MP_ERROR_TEXT("bad format string")); } #else @@ -1184,7 +1184,7 @@ static vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *ar int width = -1; int precision = -1; char type = '\0'; - int flags = 0; + unsigned int flags = 0; if (format_spec) { // The format specifier (from http://docs.python.org/2/library/string.html#formatspec) @@ -1229,8 +1229,9 @@ static vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *ar } } s = str_to_int(s, stop, &width); - if (*s == ',') { - flags |= PF_FLAG_SHOW_COMMA; + if (*s == ',' || *s == '_') { + MP_STATIC_ASSERT((unsigned)'_' << PF_FLAG_SEP_POS >> PF_FLAG_SEP_POS == '_'); + flags |= (unsigned)*s << PF_FLAG_SEP_POS; s++; } if (*s == '.') { @@ -2357,7 +2358,7 @@ bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { } } -static NORETURN void bad_implicit_conversion(mp_obj_t self_in) { +static MP_NORETURN void bad_implicit_conversion(mp_obj_t self_in) { #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE mp_raise_TypeError(MP_ERROR_TEXT("can't convert to str implicitly")); #else diff --git a/py/parsenum.c b/py/parsenum.c index 3281eb4b85..7e6695fbfc 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -36,7 +36,7 @@ #include <math.h> #endif -static NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) { +static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) { // if lex!=NULL then the parser called us and we need to convert the // exception's type from ValueError to SyntaxError and add traceback info if (lex != NULL) { @@ -227,13 +227,13 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex const char *top = str + len; mp_float_t dec_val = 0; - bool dec_neg = false; #if MICROPY_PY_BUILTINS_COMPLEX unsigned int real_imag_state = REAL_IMAG_STATE_START; mp_float_t dec_real = 0; -parse_start: +parse_start:; #endif + bool dec_neg = false; // skip leading space for (; str < top && unichar_isspace(*str); str++) { @@ -252,24 +252,18 @@ parse_start: const char *str_val_start = str; // determine what the string is - if (str < top && (str[0] | 0x20) == 'i') { - // string starts with 'i', should be 'inf' or 'infinity' (case insensitive) - if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') { - // inf - str += 3; - dec_val = (mp_float_t)INFINITY; - if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') { - // infinity - str += 5; - } - } - } else if (str < top && (str[0] | 0x20) == 'n') { - // string starts with 'n', should be 'nan' (case insensitive) - if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') { - // NaN - str += 3; - dec_val = MICROPY_FLOAT_C_FUN(nan)(""); + if (str + 2 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') { + // 'inf' or 'infinity' (case insensitive) + str += 3; + dec_val = (mp_float_t)INFINITY; + if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') { + // infinity + str += 5; } + } else if (str + 2 < top && (str[0] | 0x20) == 'n' && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') { + // 'nan' (case insensitive) + str += 3; + dec_val = MICROPY_FLOAT_C_FUN(nan)(""); } else { // string should be a decimal number parse_dec_in_t in = PARSE_DEC_IN_INTG; diff --git a/py/persistentcode.c b/py/persistentcode.c index 2a42b904bc..6ec0717f94 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -72,6 +72,8 @@ typedef struct _bytecode_prelude_t { static int read_byte(mp_reader_t *reader); static size_t read_uint(mp_reader_t *reader); +#if MICROPY_EMIT_MACHINE_CODE + #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA // An mp_obj_list_t that tracks native text/BSS/rodata to prevent the GC from reclaiming them. @@ -86,8 +88,6 @@ static void track_root_pointer(void *ptr) { #endif -#if MICROPY_EMIT_MACHINE_CODE - typedef struct _reloc_info_t { mp_reader_t *reader; mp_module_context_t *context; @@ -415,15 +415,17 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co // Relocate and commit code to executable address space reloc_info_t ri = {reader, context, rodata, bss}; + #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA + if (native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC) { + // Track the function data memory so it's not reclaimed by the GC. + track_root_pointer(fun_data); + } + #endif #if defined(MP_PLAT_COMMIT_EXEC) void *opt_ri = (native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC) ? &ri : NULL; fun_data = MP_PLAT_COMMIT_EXEC(fun_data, fun_data_len, opt_ri); #else if (native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC) { - #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA - // Track the function data memory so it's not reclaimed by the GC. - track_root_pointer(fun_data); - #endif // Do the relocations. mp_native_relocate(&ri, fun_data, (uintptr_t)fun_data); } @@ -757,7 +759,7 @@ static void bit_vector_clear(bit_vector_t *self) { static bool bit_vector_is_set(bit_vector_t *self, size_t index) { const size_t bits_size = sizeof(*self->bits) * MP_BITS_PER_BYTE; return index / bits_size < self->alloc - && (self->bits[index / bits_size] & (1 << (index % bits_size))) != 0; + && (self->bits[index / bits_size] & ((uintptr_t)1 << (index % bits_size))) != 0; } static void bit_vector_set(bit_vector_t *self, size_t index) { @@ -768,7 +770,7 @@ static void bit_vector_set(bit_vector_t *self, size_t index) { self->bits = m_renew(uintptr_t, self->bits, self->alloc, new_alloc); self->alloc = new_alloc; } - self->bits[index / bits_size] |= 1 << (index % bits_size); + self->bits[index / bits_size] |= (uintptr_t)1 << (index % bits_size); } typedef struct _mp_opcode_t { @@ -218,6 +218,10 @@ static void print_completions(const mp_print_t *print, for (qstr q = q_first; q <= q_last; ++q) { size_t d_len; const char *d_str = (const char *)qstr_data(q, &d_len); + // filter out words that begin with underscore unless there's already a partial match + if (s_len == 0 && d_str[0] == '_') { + continue; + } if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) { if (test_qstr(obj, q)) { int gap = (line_len + WORD_SLOT_LEN - 1) / WORD_SLOT_LEN * WORD_SLOT_LEN - line_len; diff --git a/py/runtime.c b/py/runtime.c index 58819819ad..90587a010a 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -123,7 +123,7 @@ void mp_init(void) { MP_STATE_VM(mp_module_builtins_override_dict) = NULL; #endif - #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA + #if MICROPY_EMIT_MACHINE_CODE && (MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA) MP_STATE_VM(persistent_code_root_pointers) = MP_OBJ_NULL; #endif @@ -1247,6 +1247,19 @@ void mp_load_method(mp_obj_t base, qstr attr, mp_obj_t *dest) { mp_raise_msg_varg(&mp_type_AttributeError, MP_ERROR_TEXT("type object '%q' has no attribute '%q'"), ((mp_obj_type_t *)MP_OBJ_TO_PTR(base))->name, attr); + #if MICROPY_MODULE___ALL__ && MICROPY_ERROR_REPORTING >= MICROPY_ERROR_REPORTING_DETAILED + } else if (mp_obj_is_type(base, &mp_type_module)) { + // report errors in __all__ as done by CPython + mp_obj_t dest_name[2]; + qstr module_name = MP_QSTR_; + mp_load_method_maybe(base, MP_QSTR___name__, dest_name); + if (mp_obj_is_qstr(dest_name[0])) { + module_name = mp_obj_str_get_qstr(dest_name[0]); + } + mp_raise_msg_varg(&mp_type_AttributeError, + MP_ERROR_TEXT("module '%q' has no attribute '%q'"), + module_name, attr); + #endif } else { mp_raise_msg_varg(&mp_type_AttributeError, MP_ERROR_TEXT("'%s' object has no attribute '%q'"), @@ -1593,8 +1606,28 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name) { void mp_import_all(mp_obj_t module) { DEBUG_printf("import all %p\n", module); - // TODO: Support __all__ mp_map_t *map = &mp_obj_module_get_globals(module)->map; + + #if MICROPY_MODULE___ALL__ + mp_map_elem_t *elem = mp_map_lookup(map, MP_OBJ_NEW_QSTR(MP_QSTR___all__), MP_MAP_LOOKUP); + if (elem != NULL) { + // When __all__ is defined, we must explicitly load all specified + // symbols, possibly invoking the module __getattr__ function + size_t len; + mp_obj_t *items; + mp_obj_get_array(elem->value, &len, &items); + for (size_t i = 0; i < len; i++) { + qstr qname = mp_obj_str_get_qstr(items[i]); + mp_obj_t dest[2]; + mp_load_method(module, qname, dest); + mp_store_name(qname, dest[0]); + } + return; + } + #endif + + // By default, the set of public names includes all names found in the module's + // namespace which do not begin with an underscore character ('_') for (size_t i = 0; i < map->alloc; i++) { if (mp_map_slot_is_filled(map, i)) { // Entry in module global scope may be generated programmatically @@ -1649,7 +1682,7 @@ mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_i #endif // MICROPY_ENABLE_COMPILER -NORETURN void m_malloc_fail(size_t num_bytes) { +MP_NORETURN void m_malloc_fail(size_t num_bytes) { DEBUG_printf("memory allocation failed, allocating %u bytes\n", (uint)num_bytes); #if MICROPY_ENABLE_GC if (gc_is_locked()) { @@ -1662,25 +1695,25 @@ NORETURN void m_malloc_fail(size_t num_bytes) { #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NONE -NORETURN void mp_raise_type(const mp_obj_type_t *exc_type) { +MP_NORETURN void mp_raise_type(const mp_obj_type_t *exc_type) { nlr_raise(mp_obj_new_exception(exc_type)); } -NORETURN void mp_raise_ValueError_no_msg(void) { +MP_NORETURN void mp_raise_ValueError_no_msg(void) { mp_raise_type(&mp_type_ValueError); } -NORETURN void mp_raise_TypeError_no_msg(void) { +MP_NORETURN void mp_raise_TypeError_no_msg(void) { mp_raise_type(&mp_type_TypeError); } -NORETURN void mp_raise_NotImplementedError_no_msg(void) { +MP_NORETURN void mp_raise_NotImplementedError_no_msg(void) { mp_raise_type(&mp_type_NotImplementedError); } #else -NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, mp_rom_error_text_t msg) { +MP_NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, mp_rom_error_text_t msg) { if (msg == NULL) { nlr_raise(mp_obj_new_exception(exc_type)); } else { @@ -1688,7 +1721,7 @@ NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, mp_rom_error_text_t ms } } -NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, mp_rom_error_text_t fmt, ...) { +MP_NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, mp_rom_error_text_t fmt, ...) { va_list args; va_start(args, fmt); mp_obj_t exc = mp_obj_new_exception_msg_vlist(exc_type, fmt, args); @@ -1696,25 +1729,25 @@ NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, mp_rom_error_text nlr_raise(exc); } -NORETURN void mp_raise_ValueError(mp_rom_error_text_t msg) { +MP_NORETURN void mp_raise_ValueError(mp_rom_error_text_t msg) { mp_raise_msg(&mp_type_ValueError, msg); } -NORETURN void mp_raise_TypeError(mp_rom_error_text_t msg) { +MP_NORETURN void mp_raise_TypeError(mp_rom_error_text_t msg) { mp_raise_msg(&mp_type_TypeError, msg); } -NORETURN void mp_raise_NotImplementedError(mp_rom_error_text_t msg) { +MP_NORETURN void mp_raise_NotImplementedError(mp_rom_error_text_t msg) { mp_raise_msg(&mp_type_NotImplementedError, msg); } #endif -NORETURN void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg) { +MP_NORETURN void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg) { nlr_raise(mp_obj_new_exception_arg1(exc_type, arg)); } -NORETURN void mp_raise_StopIteration(mp_obj_t arg) { +MP_NORETURN void mp_raise_StopIteration(mp_obj_t arg) { if (arg == MP_OBJ_NULL) { mp_raise_type(&mp_type_StopIteration); } else { @@ -1722,7 +1755,7 @@ NORETURN void mp_raise_StopIteration(mp_obj_t arg) { } } -NORETURN void mp_raise_TypeError_int_conversion(mp_const_obj_t arg) { +MP_NORETURN void mp_raise_TypeError_int_conversion(mp_const_obj_t arg) { #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE (void)arg; mp_raise_TypeError(MP_ERROR_TEXT("can't convert to int")); @@ -1732,11 +1765,11 @@ NORETURN void mp_raise_TypeError_int_conversion(mp_const_obj_t arg) { #endif } -NORETURN void mp_raise_OSError(int errno_) { +MP_NORETURN void mp_raise_OSError(int errno_) { mp_raise_type_arg(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(errno_)); } -NORETURN void mp_raise_OSError_with_filename(int errno_, const char *filename) { +MP_NORETURN void mp_raise_OSError_with_filename(int errno_, const char *filename) { vstr_t vstr; vstr_init(&vstr, 32); vstr_printf(&vstr, "can't open %s", filename); @@ -1746,7 +1779,7 @@ NORETURN void mp_raise_OSError_with_filename(int errno_, const char *filename) { } #if MICROPY_STACK_CHECK || MICROPY_ENABLE_PYSTACK -NORETURN void mp_raise_recursion_depth(void) { +MP_NORETURN void mp_raise_recursion_depth(void) { mp_raise_type_arg(&mp_type_RuntimeError, MP_OBJ_NEW_QSTR(MP_QSTR_maximum_space_recursion_space_depth_space_exceeded)); } #endif diff --git a/py/runtime.h b/py/runtime.h index ffbc3972a3..a93488e2cd 100644 --- a/py/runtime.h +++ b/py/runtime.h @@ -128,7 +128,7 @@ void mp_event_wait_indefinite(void); void mp_event_wait_ms(mp_uint_t timeout_ms); // extra printing method specifically for mp_obj_t's which are integral type -int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char, int flags, char fill, int width, int prec); +int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, unsigned base, int base_char, int flags, char fill, int width, int prec); void mp_arg_check_num_sig(size_t n_args, size_t n_kw, uint32_t sig); static inline void mp_arg_check_num(size_t n_args, size_t n_kw, size_t n_args_min, size_t n_args_max, bool takes_kw) { @@ -136,8 +136,8 @@ static inline void mp_arg_check_num(size_t n_args, size_t n_kw, size_t n_args_mi } void mp_arg_parse_all(size_t n_pos, const mp_obj_t *pos, mp_map_t *kws, size_t n_allowed, const mp_arg_t *allowed, mp_arg_val_t *out_vals); void mp_arg_parse_all_kw_array(size_t n_pos, size_t n_kw, const mp_obj_t *args, size_t n_allowed, const mp_arg_t *allowed, mp_arg_val_t *out_vals); -NORETURN void mp_arg_error_terse_mismatch(void); -NORETURN void mp_arg_error_unimpl_kw(void); +MP_NORETURN void mp_arg_error_terse_mismatch(void); +MP_NORETURN void mp_arg_error_unimpl_kw(void); static inline mp_obj_dict_t *mp_locals_get(void) { return MP_STATE_THREAD(dict_locals); @@ -246,10 +246,10 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name); void mp_import_all(mp_obj_t module); #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NONE -NORETURN void mp_raise_type(const mp_obj_type_t *exc_type); -NORETURN void mp_raise_ValueError_no_msg(void); -NORETURN void mp_raise_TypeError_no_msg(void); -NORETURN void mp_raise_NotImplementedError_no_msg(void); +MP_NORETURN void mp_raise_type(const mp_obj_type_t *exc_type); +MP_NORETURN void mp_raise_ValueError_no_msg(void); +MP_NORETURN void mp_raise_TypeError_no_msg(void); +MP_NORETURN void mp_raise_NotImplementedError_no_msg(void); #define mp_raise_msg(exc_type, msg) mp_raise_type(exc_type) #define mp_raise_msg_varg(exc_type, ...) mp_raise_type(exc_type) #define mp_raise_ValueError(msg) mp_raise_ValueError_no_msg() @@ -257,19 +257,19 @@ NORETURN void mp_raise_NotImplementedError_no_msg(void); #define mp_raise_NotImplementedError(msg) mp_raise_NotImplementedError_no_msg() #else #define mp_raise_type(exc_type) mp_raise_msg(exc_type, NULL) -NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, mp_rom_error_text_t msg); -NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, mp_rom_error_text_t fmt, ...); -NORETURN void mp_raise_ValueError(mp_rom_error_text_t msg); -NORETURN void mp_raise_TypeError(mp_rom_error_text_t msg); -NORETURN void mp_raise_NotImplementedError(mp_rom_error_text_t msg); +MP_NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, mp_rom_error_text_t msg); +MP_NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, mp_rom_error_text_t fmt, ...); +MP_NORETURN void mp_raise_ValueError(mp_rom_error_text_t msg); +MP_NORETURN void mp_raise_TypeError(mp_rom_error_text_t msg); +MP_NORETURN void mp_raise_NotImplementedError(mp_rom_error_text_t msg); #endif -NORETURN void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg); -NORETURN void mp_raise_StopIteration(mp_obj_t arg); -NORETURN void mp_raise_TypeError_int_conversion(mp_const_obj_t arg); -NORETURN void mp_raise_OSError(int errno_); -NORETURN void mp_raise_OSError_with_filename(int errno_, const char *filename); -NORETURN void mp_raise_recursion_depth(void); +MP_NORETURN void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg); +MP_NORETURN void mp_raise_StopIteration(mp_obj_t arg); +MP_NORETURN void mp_raise_TypeError_int_conversion(mp_const_obj_t arg); +MP_NORETURN void mp_raise_OSError(int errno_); +MP_NORETURN void mp_raise_OSError_with_filename(int errno_, const char *filename); +MP_NORETURN void mp_raise_recursion_depth(void); #if MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG #undef mp_check_self diff --git a/py/scheduler.c b/py/scheduler.c index 2170b9577e..d4cdb59efb 100644 --- a/py/scheduler.c +++ b/py/scheduler.c @@ -88,17 +88,21 @@ static inline void mp_sched_run_pending(void) { #if MICROPY_SCHEDULER_STATIC_NODES // Run all pending C callbacks. - while (MP_STATE_VM(sched_head) != NULL) { - mp_sched_node_t *node = MP_STATE_VM(sched_head); - MP_STATE_VM(sched_head) = node->next; - if (MP_STATE_VM(sched_head) == NULL) { - MP_STATE_VM(sched_tail) = NULL; - } - mp_sched_callback_t callback = node->callback; - node->callback = NULL; - MICROPY_END_ATOMIC_SECTION(atomic_state); - callback(node); - atomic_state = MICROPY_BEGIN_ATOMIC_SECTION(); + mp_sched_node_t *original_tail = MP_STATE_VM(sched_tail); + if (original_tail != NULL) { + mp_sched_node_t *node; + do { + node = MP_STATE_VM(sched_head); + MP_STATE_VM(sched_head) = node->next; + if (MP_STATE_VM(sched_head) == NULL) { + MP_STATE_VM(sched_tail) = NULL; + } + mp_sched_callback_t callback = node->callback; + node->callback = NULL; + MICROPY_END_ATOMIC_SECTION(atomic_state); + callback(node); + atomic_state = MICROPY_BEGIN_ATOMIC_SECTION(); + } while (node != original_tail); // Don't execute any callbacks scheduled during this run } #endif |