summaryrefslogtreecommitdiffstatshomepage
path: root/py
diff options
context:
space:
mode:
Diffstat (limited to 'py')
-rw-r--r--py/.gitignore1
-rw-r--r--py/Makefile50
-rw-r--r--py/asmthumb.c421
-rw-r--r--py/asmthumb.h60
-rw-r--r--py/asmx64.c621
-rw-r--r--py/asmx64.h76
-rw-r--r--py/bc.c272
-rw-r--r--py/bc.h97
-rw-r--r--py/compile.c2510
-rw-r--r--py/compile.h1
-rw-r--r--py/emit.h120
-rw-r--r--py/emitbc.c692
-rw-r--r--py/emitcommon.c171
-rw-r--r--py/emitcpy.c834
-rw-r--r--py/emitthumb.c673
-rw-r--r--py/emitx64.c680
-rw-r--r--py/grammar.h300
-rw-r--r--py/lexer.c677
-rw-r--r--py/lexer.h141
-rw-r--r--py/lexerfile.c23
-rw-r--r--py/machine.h4
-rw-r--r--py/main.c58
-rw-r--r--py/malloc.c56
-rw-r--r--py/misc.c84
-rw-r--r--py/misc.h91
-rw-r--r--py/parse.c565
-rw-r--r--py/parse.h54
-rw-r--r--py/qstr.c56
-rw-r--r--py/runtime.c944
-rw-r--r--py/runtime.h121
-rw-r--r--py/scope.c218
-rw-r--r--py/scope.h58
32 files changed, 10729 insertions, 0 deletions
diff --git a/py/.gitignore b/py/.gitignore
new file mode 100644
index 0000000000..5761abcfdf
--- /dev/null
+++ b/py/.gitignore
@@ -0,0 +1 @@
+*.o
diff --git a/py/Makefile b/py/Makefile
new file mode 100644
index 0000000000..669453dba9
--- /dev/null
+++ b/py/Makefile
@@ -0,0 +1,50 @@
+CC = gcc
+CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG
+LDFLAGS =
+
+SRC = \
+ malloc.c \
+ misc.c \
+ qstr.c \
+ lexer.c \
+ lexerfile.c \
+ parse.c \
+ scope.c \
+ compile.c \
+ emitcommon.c \
+ emitcpy.c \
+ emitbc.c \
+ asmx64.c \
+ emitx64v2.c \
+ emitthumb.c \
+ asmthumb.c \
+ runtime.c \
+ bc.c \
+ main.c \
+
+SRC_ASM = \
+ runtime1.s \
+
+OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o)
+LIB =
+PROG = py
+
+$(PROG): $(OBJ)
+ $(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS)
+
+runtime.o: runtime.c
+ $(CC) $(CFLAGS) -O3 -c -o $@ $<
+
+bc.o: bc.c
+ $(CC) $(CFLAGS) -O3 -c -o $@ $<
+
+parse.o: grammar.h
+compile.o: grammar.h
+emitcpy.o: emit.h
+emitbc.o: emit.h
+emitx64.o: emit.h
+emitx64v2.o: emit.h
+emitthumb.o: emit.h
+
+clean:
+ /bin/rm $(OBJ)
diff --git a/py/asmthumb.c b/py/asmthumb.c
new file mode 100644
index 0000000000..ea7547d4b1
--- /dev/null
+++ b/py/asmthumb.c
@@ -0,0 +1,421 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "asmthumb.h"
+
+#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
+#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
+#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
+#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
+#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
+
+struct _asm_thumb_t {
+ int pass;
+ uint code_offset;
+ uint code_size;
+ byte *code_base;
+ byte dummy_data[8];
+
+ int next_label;
+ int max_num_labels;
+ int *label_offsets;
+ int num_locals;
+ uint push_reglist;
+ uint stack_adjust;
+};
+
+asm_thumb_t *asm_thumb_new() {
+ asm_thumb_t *as;
+
+ as = m_new(asm_thumb_t, 1);
+ as->pass = 0;
+ as->code_offset = 0;
+ as->code_size = 0;
+ as->code_base = NULL;
+ as->label_offsets = NULL;
+ as->num_locals = 0;
+
+ return as;
+}
+
+void asm_thumb_free(asm_thumb_t *as, bool free_code) {
+ if (free_code) {
+ m_free(as->code_base);
+ }
+ /*
+ if (as->label != NULL) {
+ int i;
+ for (i = 0; i < as->label->len; ++i)
+ {
+ Label *lab = &g_array_index(as->label, Label, i);
+ if (lab->unresolved != NULL)
+ g_array_free(lab->unresolved, true);
+ }
+ g_array_free(as->label, true);
+ }
+ */
+ m_free(as);
+}
+
+void asm_thumb_start_pass(asm_thumb_t *as, int pass) {
+ as->pass = pass;
+ as->code_offset = 0;
+ as->next_label = 1;
+ if (pass == ASM_THUMB_PASS_1) {
+ as->max_num_labels = 0;
+ } else {
+ if (pass == ASM_THUMB_PASS_2) {
+ memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
+ }
+ }
+}
+
+void asm_thumb_end_pass(asm_thumb_t *as) {
+ if (as->pass == ASM_THUMB_PASS_1) {
+ // calculate number of labels need
+ if (as->next_label > as->max_num_labels) {
+ as->max_num_labels = as->next_label;
+ }
+ as->label_offsets = m_new(int, as->max_num_labels);
+ } else if (as->pass == ASM_THUMB_PASS_2) {
+ // calculate size of code in bytes
+ as->code_size = as->code_offset;
+ as->code_base = m_new(byte, as->code_size);
+ printf("code_size: %u\n", as->code_size);
+ }
+
+ /*
+ // check labels are resolved
+ if (as->label != NULL)
+ {
+ int i;
+ for (i = 0; i < as->label->len; ++i)
+ if (g_array_index(as->label, Label, i).unresolved != NULL)
+ return false;
+ }
+ */
+}
+
+// all functions must go through this one to emit bytes
+static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) {
+ //printf("emit %d\n", num_bytes_to_write);
+ if (as->pass < ASM_THUMB_PASS_3) {
+ as->code_offset += num_bytes_to_write;
+ return as->dummy_data;
+ } else {
+ assert(as->code_offset + num_bytes_to_write <= as->code_size);
+ byte *c = as->code_base + as->code_offset;
+ as->code_offset += num_bytes_to_write;
+ return c;
+ }
+}
+
+uint asm_thumb_get_code_size(asm_thumb_t *as) {
+ return as->code_size;
+}
+
+void *asm_thumb_get_code(asm_thumb_t *as) {
+ // need to set low bit to indicate that it's thumb code
+ return (void *)(((machine_uint_t)as->code_base) | 1);
+}
+
+/*
+static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) {
+ byte *c = asm_thumb_get_cur_to_write_bytes(as, 1);
+ c[0] = b1;
+}
+*/
+
+static void asm_thumb_write_op16(asm_thumb_t *as, uint op) {
+ byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
+ // little endian
+ c[0] = op;
+ c[1] = op >> 8;
+}
+
+static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) {
+ byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
+ // little endian, op1 then op2
+ c[0] = op1;
+ c[1] = op1 >> 8;
+ c[2] = op2;
+ c[3] = op2 >> 8;
+}
+
+/*
+#define IMM32_L0(x) ((x) & 0xff)
+#define IMM32_L1(x) (((x) >> 8) & 0xff)
+#define IMM32_L2(x) (((x) >> 16) & 0xff)
+#define IMM32_L3(x) (((x) >> 24) & 0xff)
+
+static void asm_thumb_write_word32(asm_thumb_t *as, int w32) {
+ byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
+ c[0] = IMM32_L0(w32);
+ c[1] = IMM32_L1(w32);
+ c[2] = IMM32_L2(w32);
+ c[3] = IMM32_L3(w32);
+}
+*/
+
+// rlolist is a bit map indicating desired lo-registers
+#define OP_PUSH_RLIST(rlolist) (0xb400 | (rlolist))
+#define OP_PUSH_RLIST_LR(rlolist) (0xb400 | 0x0100 | (rlolist))
+#define OP_POP_RLIST(rlolist) (0xbc00 | (rlolist))
+#define OP_POP_RLIST_PC(rlolist) (0xbc00 | 0x0100 | (rlolist))
+
+#define OP_ADD_SP(num_words) (0xb000 | (num_words))
+#define OP_SUB_SP(num_words) (0xb080 | (num_words))
+
+void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
+ // work out what to push and how many extra space to reserve on stack
+ // so that we have enough for all locals and it's aligned an 8-byte boundary
+ uint reglist;
+ uint stack_adjust;
+ if (num_locals < 0) {
+ num_locals = 0;
+ }
+ // don't ppop r0 because it's used for return value
+ switch (num_locals) {
+ case 0:
+ reglist = 0xf2;
+ stack_adjust = 0;
+ break;
+
+ case 1:
+ reglist = 0xf2;
+ stack_adjust = 0;
+ break;
+
+ case 2:
+ reglist = 0xfe;
+ stack_adjust = 0;
+ break;
+
+ case 3:
+ reglist = 0xfe;
+ stack_adjust = 0;
+ break;
+
+ default:
+ reglist = 0xfe;
+ stack_adjust = ((num_locals - 3) + 1) & (~1);
+ break;
+ }
+ asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist));
+ if (stack_adjust > 0) {
+ asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust));
+ }
+ as->push_reglist = reglist;
+ as->stack_adjust = stack_adjust;
+ as->num_locals = num_locals;
+}
+
+void asm_thumb_exit(asm_thumb_t *as) {
+ if (as->stack_adjust > 0) {
+ asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust));
+ }
+ asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist));
+}
+
+int asm_thumb_label_new(asm_thumb_t *as) {
+ return as->next_label++;
+}
+
+void asm_thumb_label_assign(asm_thumb_t *as, int label) {
+ if (as->pass > ASM_THUMB_PASS_1) {
+ assert(label < as->max_num_labels);
+ if (as->pass == ASM_THUMB_PASS_2) {
+ // assign label offset
+ assert(as->label_offsets[label] == -1);
+ as->label_offsets[label] = as->code_offset;
+ } else if (as->pass == ASM_THUMB_PASS_3) {
+ // ensure label offset has not changed from PASS_2 to PASS_3
+ //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
+ assert(as->label_offsets[label] == as->code_offset);
+ }
+ }
+}
+
+// the i8 value will be zero extended into the r32 register!
+void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) {
+ assert(rlo_dest < REG_R8);
+ // movs rlo_dest, #i8
+ asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8);
+}
+
+// if loading lo half, the i16 value will be zero extended into the r32 register!
+void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) {
+ assert(reg_dest < REG_R15);
+ uint op;
+ if (load_hi_half) {
+ // movt reg_dest, #i16
+ op = 0xf2c0;
+ } else {
+ // movw reg_dest, #i16
+ op = 0xf240;
+ }
+ asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff));
+}
+
+void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) {
+ // movw, movt does it in 8 bytes
+ // ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw
+
+ asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
+ asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true);
+}
+
+void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
+ if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
+ asm_thumb_mov_reg_i8(as, reg_dest, i32);
+ } else if (UNSIGNED_FIT16(i32)) {
+ asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
+ } else {
+ asm_thumb_mov_reg_i32(as, reg_dest, i32);
+ }
+}
+
+void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
+ uint op_lo;
+ if (reg_src < 8) {
+ op_lo = reg_src << 3;
+ } else {
+ op_lo = 0x40 | ((reg_src - 8) << 3);
+ }
+ if (reg_dest < 8) {
+ op_lo |= reg_dest;
+ } else {
+ op_lo |= 0x80 | (reg_dest - 8);
+ }
+ asm_thumb_write_op16(as, 0x4600 | op_lo);
+}
+
+#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
+#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
+
+void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
+ assert(rlo_src < REG_R8);
+ int word_offset = as->num_locals - local_num - 1;
+ assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
+ asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
+}
+
+void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
+ assert(rlo_dest < REG_R8);
+ int word_offset = as->num_locals - local_num - 1;
+ assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
+ asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
+}
+
+void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) {
+ assert(0);
+ // see format 12, load address
+ asm_thumb_write_op16(as, 0x0000);
+}
+
+#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest))
+
+void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) {
+ asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b));
+}
+
+#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a))
+
+void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) {
+ asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b));
+}
+
+void asm_thumb_ite_ge(asm_thumb_t *as) {
+ asm_thumb_write_op16(as, 0xbfac);
+}
+
+#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
+// this could be wrong, because it should have a range of +/- 16MiB...
+#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
+#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))
+
+void asm_thumb_b_label(asm_thumb_t *as, int label) {
+ if (as->pass > ASM_THUMB_PASS_1) {
+ int dest = as->label_offsets[label];
+ int rel = dest - as->code_offset;
+ rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
+ if (dest >= 0 && rel <= -4) {
+ // is a backwards jump, so we know the size of the jump on the first pass
+ // calculate rel assuming 12 bit relative jump
+ if (SIGNED_FIT12(rel)) {
+ asm_thumb_write_op16(as, OP_B(rel));
+ } else {
+ goto large_jump;
+ }
+ } else {
+ // is a forwards jump, so need to assume it's large
+ large_jump:
+ asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
+ }
+ }
+}
+
+#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8))
+// all these bit arithmetics need coverage testing!
+#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff))
+#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f))
+#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff))
+
+void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) {
+ assert(rlo < REG_R8);
+
+ // compare reg with 0
+ asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0));
+
+ // branch if equal
+ if (as->pass > ASM_THUMB_PASS_1) {
+ int dest = as->label_offsets[label];
+ int rel = dest - as->code_offset;
+ rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
+ if (dest >= 0 && rel <= -4) {
+ // is a backwards jump, so we know the size of the jump on the first pass
+ // calculate rel assuming 12 bit relative jump
+ if (SIGNED_FIT9(rel)) {
+ asm_thumb_write_op16(as, OP_BEQ(rel));
+ } else {
+ goto large_jump;
+ }
+ } else {
+ // is a forwards jump, so need to assume it's large
+ large_jump:
+ asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel));
+ }
+ }
+}
+
+#define OP_BLX(reg) (0x4780 | ((reg) << 3))
+#define OP_SVC(arg) (0xdf00 | (arg))
+#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest))
+
+void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) {
+ /* TODO make this use less bytes
+ uint rlo_base = REG_R3;
+ uint rlo_dest = REG_R7;
+ uint word_offset = 4;
+ asm_thumb_write_op16(as, 0x0000);
+ asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset]
+ asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg
+ */
+
+ if (0) {
+ // load ptr to function into register using immediate, then branch
+ // not relocatable
+ asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr);
+ asm_thumb_write_op16(as, OP_BLX(reg_temp));
+ } else if (1) {
+ asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id));
+ asm_thumb_write_op16(as, OP_BLX(reg_temp));
+ } else {
+ // use SVC
+ asm_thumb_write_op16(as, OP_SVC(fun_id));
+ }
+}
diff --git a/py/asmthumb.h b/py/asmthumb.h
new file mode 100644
index 0000000000..d3ffb9a003
--- /dev/null
+++ b/py/asmthumb.h
@@ -0,0 +1,60 @@
+#define ASM_THUMB_PASS_1 (1)
+#define ASM_THUMB_PASS_2 (2)
+#define ASM_THUMB_PASS_3 (3)
+
+#define REG_R0 (0)
+#define REG_R1 (1)
+#define REG_R2 (2)
+#define REG_R3 (3)
+#define REG_R4 (4)
+#define REG_R5 (5)
+#define REG_R6 (6)
+#define REG_R7 (7)
+#define REG_R8 (8)
+#define REG_R9 (9)
+#define REG_R10 (10)
+#define REG_R11 (11)
+#define REG_R12 (12)
+#define REG_R13 (13)
+#define REG_R14 (14)
+#define REG_R15 (15)
+#define REG_LR (REG_R14)
+
+#define REG_RET REG_R0
+#define REG_ARG_1 REG_R0
+#define REG_ARG_2 REG_R1
+#define REG_ARG_3 REG_R2
+#define REG_ARG_4 REG_R3
+
+typedef struct _asm_thumb_t asm_thumb_t;
+
+asm_thumb_t *asm_thumb_new();
+void asm_thumb_free(asm_thumb_t *as, bool free_code);
+void asm_thumb_start_pass(asm_thumb_t *as, int pass);
+void asm_thumb_end_pass(asm_thumb_t *as);
+uint asm_thumb_get_code_size(asm_thumb_t *as);
+void *asm_thumb_get_code(asm_thumb_t *as);
+
+void asm_thumb_entry(asm_thumb_t *as, int num_locals);
+void asm_thumb_exit(asm_thumb_t *as);
+
+int asm_thumb_label_new(asm_thumb_t *as);
+void asm_thumb_label_assign(asm_thumb_t *as, int label);
+
+// argument order follows ARM, in general dest is first
+
+void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src);
+void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src);
+void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src);
+void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src);
+void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src);
+void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num);
+void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num);
+
+void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b);
+void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b);
+void asm_thumb_ite_ge(asm_thumb_t *as);
+
+void asm_thumb_b_label(asm_thumb_t *as, int label);
+void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label);
+void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp);
diff --git a/py/asmx64.c b/py/asmx64.c
new file mode 100644
index 0000000000..59c8113bc4
--- /dev/null
+++ b/py/asmx64.c
@@ -0,0 +1,621 @@
+#include <stdio.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "misc.h"
+#include "asmx64.h"
+
+/* all offsets are measured in multiples of 8 bytes */
+#define WORD_SIZE (8)
+
+#define OPCODE_NOP (0x90)
+#define OPCODE_PUSH_R64 (0x50)
+#define OPCODE_PUSH_I64 (0x68)
+#define OPCODE_PUSH_M64 (0xff) /* /6 */
+#define OPCODE_POP_R64 (0x58)
+#define OPCODE_RET (0xc3)
+#define OPCODE_MOV_I8_TO_R8 (0xb0) /* +rb */
+#define OPCODE_MOV_I64_TO_R64 (0xb8)
+#define OPCODE_MOV_I32_TO_RM32 (0xc7)
+#define OPCODE_MOV_R64_TO_RM64 (0x89)
+#define OPCODE_MOV_RM64_TO_R64 (0x8b)
+#define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */
+#define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */
+#define OPCODE_ADD_R64_TO_RM64 (0x01)
+#define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */
+#define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */
+#define OPCODE_SUB_R64_FROM_RM64 (0x29)
+#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */
+#define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */
+#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */
+#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */
+#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */
+#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */
+#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */
+#define OPCODE_CMP_R64_WITH_RM64 (0x39)
+#define OPCODE_CMP_RM32_WITH_R32 (0x3b)
+#define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */
+#define OPCODE_JMP_REL8 (0xeb)
+#define OPCODE_JMP_REL32 (0xe9)
+#define OPCODE_JCC_REL8 (0x70) /* | jcc type */
+#define OPCODE_JCC_REL32_A (0x0f)
+#define OPCODE_JCC_REL32_B (0x80) /* | jcc type */
+#define OPCODE_SETCC_RM8_A (0x0f)
+#define OPCODE_SETCC_RM8_B (0x90) /* | jcc type, /0 */
+#define OPCODE_CALL_REL32 (0xe8)
+#define OPCODE_CALL_RM32 (0xff) /* /2 */
+#define OPCODE_LEAVE (0xc9)
+
+#define MODRM_R64(x) ((x) << 3)
+#define MODRM_RM_DISP0 (0x00)
+#define MODRM_RM_DISP8 (0x40)
+#define MODRM_RM_DISP32 (0x80)
+#define MODRM_RM_REG (0xc0)
+#define MODRM_RM_R64(x) (x)
+
+#define REX_PREFIX (0x40)
+#define REX_W (0x08) // width
+#define REX_R (0x04) // register
+#define REX_X (0x02) // index
+#define REX_B (0x01) // base
+
+#define IMM32_L0(x) ((x) & 0xff)
+#define IMM32_L1(x) (((x) >> 8) & 0xff)
+#define IMM32_L2(x) (((x) >> 16) & 0xff)
+#define IMM32_L3(x) (((x) >> 24) & 0xff)
+#define IMM64_L4(x) (((x) >> 32) & 0xff)
+#define IMM64_L5(x) (((x) >> 40) & 0xff)
+#define IMM64_L6(x) (((x) >> 48) & 0xff)
+#define IMM64_L7(x) (((x) >> 56) & 0xff)
+
+#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0)
+#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0)
+#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
+
+struct _asm_x64_t {
+ int pass;
+ uint code_offset;
+ uint code_size;
+ byte *code_base;
+ byte dummy_data[8];
+
+ int next_label;
+ int max_num_labels;
+ int *label_offsets;
+};
+
+// for allocating memory, see src/v8/src/platform-linux.cc
+void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) {
+ req_size = (req_size + 0xfff) & (~0xfff);
+ int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0);
+ void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED) {
+ assert(0);
+ }
+ *alloc_size = req_size;
+ return ptr;
+}
+
+asm_x64_t* asm_x64_new() {
+ asm_x64_t* as;
+
+ as = m_new(asm_x64_t, 1);
+ as->pass = 0;
+ as->code_offset = 0;
+ as->code_size = 0;
+ as->code_base = NULL;
+ as->label_offsets = NULL;
+
+ return as;
+}
+
+void asm_x64_free(asm_x64_t* as, bool free_code) {
+ if (free_code) {
+ m_free(as->code_base);
+ }
+ /*
+ if (as->label != NULL) {
+ int i;
+ for (i = 0; i < as->label->len; ++i)
+ {
+ Label* lab = &g_array_index(as->label, Label, i);
+ if (lab->unresolved != NULL)
+ g_array_free(lab->unresolved, true);
+ }
+ g_array_free(as->label, true);
+ }
+ */
+ m_free(as);
+}
+
+void asm_x64_start_pass(asm_x64_t *as, int pass) {
+ as->pass = pass;
+ as->code_offset = 0;
+ as->next_label = 1;
+ if (pass == ASM_X64_PASS_1) {
+ as->max_num_labels = 0;
+ } else {
+ if (pass == ASM_X64_PASS_2) {
+ memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
+ }
+ }
+}
+
+void asm_x64_end_pass(asm_x64_t *as) {
+ if (as->pass == ASM_X64_PASS_1) {
+ // calculate number of labels need
+ if (as->next_label > as->max_num_labels) {
+ as->max_num_labels = as->next_label;
+ }
+ as->label_offsets = m_new(int, as->max_num_labels);
+ } else if (as->pass == ASM_X64_PASS_2) {
+ // calculate size of code in bytes
+ as->code_size = as->code_offset;
+ as->code_base = m_new(byte, as->code_size);
+ printf("code_size: %u\n", as->code_size);
+ }
+
+ /*
+ // check labels are resolved
+ if (as->label != NULL)
+ {
+ int i;
+ for (i = 0; i < as->label->len; ++i)
+ if (g_array_index(as->label, Label, i).unresolved != NULL)
+ return false;
+ }
+ */
+}
+
+// all functions must go through this one to emit bytes
+static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) {
+ //printf("emit %d\n", num_bytes_to_write);
+ if (as->pass < ASM_X64_PASS_3) {
+ as->code_offset += num_bytes_to_write;
+ return as->dummy_data;
+ } else {
+ assert(as->code_offset + num_bytes_to_write <= as->code_size);
+ byte *c = as->code_base + as->code_offset;
+ as->code_offset += num_bytes_to_write;
+ return c;
+ }
+}
+
+uint asm_x64_get_code_size(asm_x64_t* as) {
+ return as->code_size;
+}
+
+void* asm_x64_get_code(asm_x64_t* as) {
+ return as->code_base;
+}
+
+static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) {
+ byte* c = asm_x64_get_cur_to_write_bytes(as, 1);
+ c[0] = b1;
+}
+
+static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) {
+ byte* c = asm_x64_get_cur_to_write_bytes(as, 2);
+ c[0] = b1;
+ c[1] = b2;
+}
+
+static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) {
+ byte* c = asm_x64_get_cur_to_write_bytes(as, 3);
+ c[0] = b1;
+ c[1] = b2;
+ c[2] = b3;
+}
+
+static void asm_x64_write_word32(asm_x64_t* as, int w32) {
+ byte* c = asm_x64_get_cur_to_write_bytes(as, 4);
+ c[0] = IMM32_L0(w32);
+ c[1] = IMM32_L1(w32);
+ c[2] = IMM32_L2(w32);
+ c[3] = IMM32_L3(w32);
+}
+
+static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) {
+ byte* c = asm_x64_get_cur_to_write_bytes(as, 8);
+ c[0] = IMM32_L0(w64);
+ c[1] = IMM32_L1(w64);
+ c[2] = IMM32_L2(w64);
+ c[3] = IMM32_L3(w64);
+ c[4] = IMM64_L4(w64);
+ c[5] = IMM64_L5(w64);
+ c[6] = IMM64_L6(w64);
+ c[7] = IMM64_L7(w64);
+}
+
+/* unused
+static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) {
+ byte* c;
+ assert(offset + 4 <= as->code_size);
+ c = as->code_base + offset;
+ c[0] = IMM32_L0(w32);
+ c[1] = IMM32_L1(w32);
+ c[2] = IMM32_L2(w32);
+ c[3] = IMM32_L3(w32);
+}
+*/
+
+static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) {
+ assert(disp_r64 != REG_RSP);
+
+ if (disp_offset == 0 && disp_r64 != REG_RBP) {
+ asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64));
+ } else if (SIGNED_FIT8(disp_offset)) {
+ asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset));
+ } else {
+ asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64));
+ asm_x64_write_word32(as, disp_offset);
+ }
+}
+
+void asm_x64_nop(asm_x64_t* as)
+{
+ asm_x64_write_byte_1(as, OPCODE_NOP);
+}
+
+void asm_x64_push_r64(asm_x64_t* as, int src_r64)
+{
+ asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64);
+}
+
+void asm_x64_push_i32(asm_x64_t* as, int src_i32)
+{
+ asm_x64_write_byte_1(as, OPCODE_PUSH_I64);
+ asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits
+}
+
+void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) {
+ asm_x64_write_byte_1(as, OPCODE_PUSH_M64);
+ asm_x64_write_r64_disp(as, 6, src_r64, src_offset);
+}
+
+void asm_x64_pop_r64(asm_x64_t* as, int dest_r64)
+{
+ asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64);
+}
+
+static void asm_x64_ret(asm_x64_t* as)
+{
+ asm_x64_write_byte_1(as, OPCODE_RET);
+}
+
+void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) {
+ // defaults to 32 bit operation
+ asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+}
+
+void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64);
+ asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
+}
+
+void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64);
+ asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
+}
+
+void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64);
+ asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
+}
+
+void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) {
+ asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8);
+}
+
+void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) {
+ // cpu defaults to i32 to r64, with zero extension
+ asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64);
+ asm_x64_write_word32(as, src_i32);
+}
+
+void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) {
+ // cpu defaults to i32 to r64
+ // to mov i64 to r64 need to use REX prefix
+ asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64);
+ asm_x64_write_word64(as, src_i64);
+}
+
+void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) {
+ if (UNSIGNED_FIT32(src_i64)) {
+ // 5 bytes
+ asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64);
+ } else {
+ // 10 bytes
+ asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
+ }
+}
+
+void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp)
+{
+ assert(0);
+ asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32);
+ //asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp);
+ asm_x64_write_word32(as, src_i32);
+}
+
+void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) {
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32)
+{
+ assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits
+ if (SIGNED_FIT8(src_i32))
+ {
+ asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+ asm_x64_write_byte_1(as, src_i32 & 0xff);
+ }
+ else
+ {
+ asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+ asm_x64_write_word32(as, src_i32);
+ }
+}
+
+void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) {
+ // defaults to 32 bit operation
+ asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+}
+
+void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) {
+ if (SIGNED_FIT8(src_i32)) {
+ // defaults to 32 bit operation
+ asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+ asm_x64_write_byte_1(as, src_i32 & 0xff);
+ } else {
+ // defaults to 32 bit operation
+ asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+ asm_x64_write_word32(as, src_i32);
+ }
+}
+
+void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) {
+ if (SIGNED_FIT8(src_i32)) {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+ asm_x64_write_byte_1(as, src_i32 & 0xff);
+ } else {
+ // use REX prefix for 64 bit operation
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+ asm_x64_write_word32(as, src_i32);
+ }
+}
+
+/* shifts not tested */
+void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+ asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32));
+ asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+ asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32));
+ asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+ asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32));
+ asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) {
+ asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
+}
+
+void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) {
+ assert(0);
+ asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64);
+ //asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b);
+}
+
+void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) {
+ assert(0);
+ asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32);
+ //asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a);
+}
+
+void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) {
+ if (SIGNED_FIT8(src_i32)) {
+ asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
+ asm_x64_write_byte_1(as, src_i32 & 0xff);
+ } else {
+ asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
+ asm_x64_write_word32(as, src_i32);
+ }
+}
+
+void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) {
+ asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
+}
+
+void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) {
+ asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8));
+}
+
+int asm_x64_label_new(asm_x64_t* as) {
+ return as->next_label++;
+}
+
+void asm_x64_label_assign(asm_x64_t* as, int label) {
+ if (as->pass > ASM_X64_PASS_1) {
+ assert(label < as->max_num_labels);
+ if (as->pass == ASM_X64_PASS_2) {
+ // assign label offset
+ assert(as->label_offsets[label] == -1);
+ as->label_offsets[label] = as->code_offset;
+ } else if (as->pass == ASM_X64_PASS_3) {
+ // ensure label offset has not changed from PASS_2 to PASS_3
+ //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
+ assert(as->label_offsets[label] == as->code_offset);
+ }
+ }
+}
+
+void asm_x64_jmp_label(asm_x64_t* as, int label) {
+ if (as->pass > ASM_X64_PASS_1) {
+ int dest = as->label_offsets[label];
+ int rel = dest - as->code_offset;
+ if (dest >= 0 && rel < 0) {
+ // is a backwards jump, so we know the size of the jump on the first pass
+ // calculate rel assuming 8 bit relative jump
+ rel -= 2;
+ if (SIGNED_FIT8(rel)) {
+ asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff);
+ } else {
+ rel += 2;
+ goto large_jump;
+ }
+ } else {
+ // is a forwards jump, so need to assume it's large
+ large_jump:
+ rel -= 5;
+ asm_x64_write_byte_1(as, OPCODE_JMP_REL32);
+ asm_x64_write_word32(as, rel);
+ }
+ }
+}
+
+void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) {
+ if (as->pass > ASM_X64_PASS_1) {
+ int dest = as->label_offsets[label];
+ int rel = dest - as->code_offset;
+ if (dest >= 0 && rel < 0) {
+ // is a backwards jump, so we know the size of the jump on the first pass
+ // calculate rel assuming 8 bit relative jump
+ rel -= 2;
+ if (SIGNED_FIT8(rel)) {
+ asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff);
+ } else {
+ rel += 2;
+ goto large_jump;
+ }
+ } else {
+ // is a forwards jump, so need to assume it's large
+ large_jump:
+ rel -= 6;
+ asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type);
+ asm_x64_write_word32(as, rel);
+ }
+ }
+}
+
+void asm_x64_entry(asm_x64_t* as, int num_locals) {
+ asm_x64_push_r64(as, REG_RBP);
+ asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP);
+ if (num_locals < 0) {
+ num_locals = 0;
+ }
+ num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary
+ asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP);
+ asm_x64_push_r64(as, REG_RBX);
+}
+
+void asm_x64_exit(asm_x64_t* as) {
+ asm_x64_pop_r64(as, REG_RBX);
+ asm_x64_write_byte_1(as, OPCODE_LEAVE);
+ asm_x64_ret(as);
+}
+
+void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) {
+ assert(0);
+ asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE);
+}
+
+void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) {
+ assert(0);
+ //asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32);
+}
+
+void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) {
+ assert(0);
+ //asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE);
+}
+
+static int asm_x64_local_offset_from_ebp(int local_num)
+{
+ return -(local_num + 1) * WORD_SIZE;
+}
+
+void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) {
+ asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64);
+}
+
+void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) {
+ asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num));
+}
+
+void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) {
+ int offset = asm_x64_local_offset_from_ebp(local_num);
+ if (offset == 0) {
+ asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64);
+ } else {
+ asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64);
+ }
+}
+
+void asm_x64_push_local(asm_x64_t* as, int local_num) {
+ asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num));
+}
+
+void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64)
+{
+ asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64);
+ asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64);
+ asm_x64_push_r64(as, temp_r64);
+}
+
+/*
+ can't use these because code might be relocated when resized
+
+void asm_x64_call(asm_x64_t* as, void* func)
+{
+ asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
+ asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+ asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
+ asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
+}
+
+void asm_x64_call_i1(asm_x64_t* as, void* func, int i1)
+{
+ asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
+ asm_x64_sub_i32_from_r32(as, 12, REG_RSP);
+ asm_x64_push_i32(as, i1);
+ asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+ asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
+ asm_x64_add_i32_to_r32(as, 16, REG_RSP);
+ asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
+}
+*/
+
+void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) {
+ /*
+ asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
+ asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
+ */
+ // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
+ asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+ asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4));
+}
diff --git a/py/asmx64.h b/py/asmx64.h
new file mode 100644
index 0000000000..4871dbff8a
--- /dev/null
+++ b/py/asmx64.h
@@ -0,0 +1,76 @@
+#define ASM_X64_PASS_1 (1)
+#define ASM_X64_PASS_2 (2)
+#define ASM_X64_PASS_3 (3)
+
+#define REG_RAX (0)
+#define REG_RCX (1)
+#define REG_RDX (2)
+#define REG_RBX (3)
+#define REG_RSP (4)
+#define REG_RBP (5)
+#define REG_RSI (6)
+#define REG_RDI (7)
+
+// condition codes, used for jcc and setcc (desipite their j-name!)
+#define JCC_JB (0x2) // below, unsigned
+#define JCC_JZ (0x4)
+#define JCC_JE (0x4)
+#define JCC_JNZ (0x5)
+#define JCC_JNE (0x5)
+#define JCC_JL (0xc) // less, signed
+
+#define REG_RET REG_RAX
+#define REG_ARG_1 REG_RDI
+#define REG_ARG_2 REG_RSI
+#define REG_ARG_3 REG_RDX
+
+typedef struct _asm_x64_t asm_x64_t;
+
+asm_x64_t* asm_x64_new();
+void asm_x64_free(asm_x64_t* as, bool free_code);
+void asm_x64_start_pass(asm_x64_t *as, int pass);
+void asm_x64_end_pass(asm_x64_t *as);
+uint asm_x64_get_code_size(asm_x64_t* as);
+void* asm_x64_get_code(asm_x64_t* as);
+
+void asm_x64_nop(asm_x64_t* as);
+void asm_x64_push_r64(asm_x64_t* as, int src_r64);
+void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits
+void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset);
+void asm_x64_pop_r64(asm_x64_t* as, int dest_r64);
+void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
+void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp);
+void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32);
+void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64);
+void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64);
+void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp);
+void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64);
+void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64);
+void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
+void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32);
+void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32);
+void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32);
+void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b);
+void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b);
+void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b);
+void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32);
+void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b);
+void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8);
+int asm_x64_label_new(asm_x64_t* as);
+void asm_x64_label_assign(asm_x64_t* as, int label);
+void asm_x64_jmp_label(asm_x64_t* as, int label);
+void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label);
+void asm_x64_entry(asm_x64_t* as, int num_locals);
+void asm_x64_exit(asm_x64_t* as);
+void asm_x64_push_arg(asm_x64_t* as, int src_arg_num);
+void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32);
+void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num);
+void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64);
+void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num);
+void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
+void asm_x64_push_local(asm_x64_t* as, int local_num);
+void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32);
+void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);
diff --git a/py/bc.c b/py/bc.c
new file mode 100644
index 0000000000..1edd911ab6
--- /dev/null
+++ b/py/bc.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "runtime.h"
+#include "bc.h"
+
+#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
+#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
+#define PUSH(val) *--sp = (val)
+#define POP() (*sp++)
+
+py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) {
+ byte *ip = code;
+ py_obj_t stack[10];
+ py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack
+ machine_uint_t unum;
+ machine_int_t snum;
+ qstr qstr;
+ py_obj_t obj1, obj2;
+ py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL};
+
+ // init args
+ for (int i = 0; i < n_args; i++) {
+ if (i == 0) {
+ fast0 = args[0];
+ } else if (i == 1) {
+ fast1 = args[1];
+ } else if (i == 2) {
+ fast2 = args[2];
+ } else {
+ assert(i - 3 < 4);
+ fastn[i - 3] = args[i];
+ }
+ }
+
+ // execute byte code
+ for (;;) {
+ int op = *ip++;
+ switch (op) {
+ case PYBC_LOAD_CONST_FALSE:
+ PUSH(py_const_false);
+ break;
+
+ case PYBC_LOAD_CONST_NONE:
+ PUSH(py_const_none);
+ break;
+
+ case PYBC_LOAD_CONST_TRUE:
+ PUSH(py_const_true);
+ break;
+
+ case PYBC_LOAD_CONST_SMALL_INT:
+ snum = ip[0] | (ip[1] << 8);
+ if (snum & 0x8000) {
+ snum |= ~0xffff;
+ }
+ ip += 2;
+ PUSH((py_obj_t)(snum << 1 | 1));
+ break;
+
+ case PYBC_LOAD_CONST_ID:
+ DECODE_QSTR;
+ PUSH(rt_load_const_str(qstr)); // TODO
+ break;
+
+ case PYBC_LOAD_CONST_STRING:
+ DECODE_QSTR;
+ PUSH(rt_load_const_str(qstr));
+ break;
+
+ case PYBC_LOAD_FAST_0:
+ PUSH(fast0);
+ break;
+
+ case PYBC_LOAD_FAST_1:
+ PUSH(fast1);
+ break;
+
+ case PYBC_LOAD_FAST_2:
+ PUSH(fast2);
+ break;
+
+ case PYBC_LOAD_FAST_N:
+ DECODE_UINT;
+ PUSH(fastn[unum - 3]);
+ break;
+
+ case PYBC_LOAD_NAME:
+ DECODE_QSTR;
+ PUSH(rt_load_name(qstr));
+ break;
+
+ case PYBC_LOAD_GLOBAL:
+ DECODE_QSTR;
+ PUSH(rt_load_global(qstr));
+ break;
+
+ case PYBC_LOAD_ATTR:
+ DECODE_QSTR;
+ *sp = rt_load_attr(*sp, qstr);
+ break;
+
+ case PYBC_LOAD_METHOD:
+ DECODE_QSTR;
+ sp -= 1;
+ rt_load_method(sp[1], qstr, sp);
+ break;
+
+ case PYBC_LOAD_BUILD_CLASS:
+ PUSH(rt_load_build_class());
+ break;
+
+ case PYBC_STORE_FAST_0:
+ fast0 = POP();
+ break;
+
+ case PYBC_STORE_FAST_1:
+ fast1 = POP();
+ break;
+
+ case PYBC_STORE_FAST_2:
+ fast2 = POP();
+ break;
+
+ case PYBC_STORE_FAST_N:
+ DECODE_UINT;
+ fastn[unum - 3] = POP();
+ break;
+
+ case PYBC_STORE_NAME:
+ DECODE_QSTR;
+ rt_store_name(qstr, POP());
+ break;
+
+ case PYBC_STORE_SUBSCR:
+ rt_store_subscr(sp[1], sp[0], sp[2]);
+ sp += 3;
+ break;
+
+ case PYBC_DUP_TOP:
+ obj1 = *sp;
+ PUSH(obj1);
+ break;
+
+ case PYBC_DUP_TOP_TWO:
+ sp -= 2;
+ sp[0] = sp[2];
+ sp[1] = sp[3];
+ break;
+
+ case PYBC_POP_TOP:
+ ++sp;
+ break;
+
+ case PYBC_ROT_THREE:
+ obj1 = sp[0];
+ sp[0] = sp[1];
+ sp[1] = sp[2];
+ sp[2] = obj1;
+ break;
+
+ case PYBC_JUMP:
+ DECODE_UINT;
+ ip = code + unum;
+ break;
+
+ case PYBC_POP_JUMP_IF_FALSE:
+ DECODE_UINT;
+ if (!rt_is_true(POP())) {
+ ip = code + unum;
+ }
+ break;
+
+ case PYBC_SETUP_LOOP:
+ DECODE_UINT;
+ break;
+
+ case PYBC_POP_BLOCK:
+ break;
+
+ case PYBC_BINARY_OP:
+ unum = *ip++;
+ obj2 = POP();
+ obj1 = *sp;
+ *sp = rt_binary_op(unum, obj1, obj2);
+ break;
+
+ case PYBC_COMPARE_OP:
+ unum = *ip++;
+ obj2 = POP();
+ obj1 = *sp;
+ *sp = rt_compare_op(unum, obj1, obj2);
+ break;
+
+ case PYBC_BUILD_LIST:
+ DECODE_UINT;
+ obj1 = rt_build_list(unum, sp);
+ sp += unum - 1;
+ *sp = obj1;
+ break;
+
+ case PYBC_BUILD_MAP:
+ DECODE_UINT;
+ PUSH(rt_build_map(unum));
+ break;
+
+ case PYBC_STORE_MAP:
+ sp += 2;
+ rt_store_map(sp[0], sp[-2], sp[-1]);
+ break;
+
+ case PYBC_BUILD_SET:
+ DECODE_UINT;
+ obj1 = rt_build_set(unum, sp);
+ sp += unum - 1;
+ *sp = obj1;
+ break;
+
+ case PYBC_MAKE_FUNCTION:
+ DECODE_UINT;
+ PUSH(rt_make_function_from_id(unum));
+ break;
+
+ case PYBC_CALL_FUNCTION:
+ DECODE_UINT;
+ assert((unum & 0xff00) == 0); // n_keyword
+ // switch on n_positional
+ if ((unum & 0xff) == 0) {
+ *sp = rt_call_function_0(*sp);
+ } else if ((unum & 0xff) == 1) {
+ obj1 = *sp++; // the single argument
+ *sp = rt_call_function_1(*sp, obj1);
+ } else if ((unum & 0xff) == 2) {
+ obj2 = *sp++; // the second argument
+ obj1 = *sp++; // the first argument
+ *sp = rt_call_function_2(*sp, obj1, obj2);
+ } else {
+ assert(0);
+ }
+ break;
+
+ case PYBC_CALL_METHOD:
+ DECODE_UINT;
+ assert((unum & 0xff00) == 0); // n_keyword
+ // switch on n_positional
+ if ((unum & 0xff) == 0) {
+ obj1 = *sp++; // the self object (or NULL)
+ *sp = rt_call_method_1(*sp, obj1);
+ } else if ((unum & 0xff) == 1) {
+ obj2 = *sp++; // the first argument
+ obj1 = *sp++; // the self object (or NULL)
+ *sp = rt_call_function_2(*sp, obj1, obj2);
+ } else {
+ assert(0);
+ }
+ break;
+
+ case PYBC_RETURN_VALUE:
+ return *sp;
+
+ default:
+ printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op);
+ assert(0);
+ return py_const_none;
+ }
+ }
+}
diff --git a/py/bc.h b/py/bc.h
new file mode 100644
index 0000000000..f09843a960
--- /dev/null
+++ b/py/bc.h
@@ -0,0 +1,97 @@
+#define PYBC_LOAD_CONST_FALSE (0x10)
+#define PYBC_LOAD_CONST_NONE (0x11)
+#define PYBC_LOAD_CONST_TRUE (0x12)
+#define PYBC_LOAD_CONST_SMALL_INT (0x13) // int
+#define PYBC_LOAD_CONST_INT (0x14) // qstr
+#define PYBC_LOAD_CONST_DEC (0x15) // qstr
+#define PYBC_LOAD_CONST_ID (0x16) // qstr
+#define PYBC_LOAD_CONST_BYTES (0x17) // qstr
+#define PYBC_LOAD_CONST_STRING (0x18) // qstr
+
+#define PYBC_LOAD_FAST_0 (0x20)
+#define PYBC_LOAD_FAST_1 (0x21)
+#define PYBC_LOAD_FAST_2 (0x22)
+#define PYBC_LOAD_FAST_N (0x23) // uint
+#define PYBC_LOAD_NAME (0x24) // qstr
+#define PYBC_LOAD_GLOBAL (0x25) // qstr
+#define PYBC_LOAD_ATTR (0x26) // qstr
+#define PYBC_LOAD_METHOD (0x27) // qstr
+#define PYBC_LOAD_BUILD_CLASS (0x28)
+
+#define PYBC_STORE_FAST_0 (0x30)
+#define PYBC_STORE_FAST_1 (0x31)
+#define PYBC_STORE_FAST_2 (0x32)
+#define PYBC_STORE_FAST_N (0x33) // uint
+#define PYBC_STORE_NAME (0x34) // qstr
+#define PYBC_STORE_GLOBAL (0x35) // qstr
+#define PYBC_STORE_ATTR (0x36) // qstr
+#define PYBC_STORE_LOCALS (0x37)
+#define PYBC_STORE_SUBSCR (0x38)
+
+#define PYBC_DELETE_FAST_N (0x39) // uint
+#define PYBC_DELETE_NAME (0x3a) // qstr
+#define PYBC_DELETE_GLOBAL (0x3b) // qstr
+#define PYBC_DELETE_DEREF (0x3c) // qstr
+#define PYBC_DELETE_ATTR (0x3d) // qstr
+#define PYBC_DELETE_SUBSCR (0x3e)
+
+#define PYBC_DUP_TOP (0x40)
+#define PYBC_DUP_TOP_TWO (0x41)
+#define PYBC_POP_TOP (0x42)
+#define PYBC_ROT_TWO (0x43)
+#define PYBC_ROT_THREE (0x44)
+#define PYBC_JUMP (0x45) // pos
+#define PYBC_POP_JUMP_IF_TRUE (0x46) // pos
+#define PYBC_POP_JUMP_IF_FALSE (0x47) // pos
+#define PYBC_JUMP_IF_TRUE_OR_POP (0x48) // pos
+#define PYBC_JUMP_IF_FALSE_OR_POP (0x49) // pos
+#define PYBC_SETUP_LOOP (0x4a) // pos
+#define PYBC_BREAK_LOOP (0x4b) // pos
+#define PYBC_CONTINUE_LOOP (0x4c) // pos
+#define PYBC_SETUP_WITH (0x4d) // pos
+#define PYBC_WITH_CLEANUP (0x4e)
+#define PYBC_SETUP_EXCEPT (0x4f) // pos
+#define PYBC_SETUP_FINALLY (0x50) // pos
+#define PYBC_END_FINALLY (0x51)
+#define PYBC_GET_ITER (0x52)
+#define PYBC_FOR_ITER (0x53) // pos
+#define PYBC_POP_BLOCK (0x54)
+#define PYBC_POP_EXCEPT (0x55)
+
+#define PYBC_UNARY_OP (0x60) // byte
+#define PYBC_BINARY_OP (0x61) // byte
+#define PYBC_COMPARE_OP (0x62) // byte
+
+#define PYBC_BUILD_TUPLE (0x70) // uint
+#define PYBC_BUILD_LIST (0x71) // uint
+#define PYBC_LIST_APPEND (0x72) // uint
+#define PYBC_BUILD_MAP (0x73) // uint
+#define PYBC_STORE_MAP (0x74)
+#define PYBC_MAP_ADD (0x75) // uint
+#define PYBC_BUILD_SET (0x76) // uint
+#define PYBC_SET_ADD (0x77) // uint
+#define PYBC_BUILD_SLICE (0x78) // uint
+#define PYBC_UNPACK_SEQUENCE (0x79) // uint
+#define PYBC_UNPACK_EX (0x7a) // uint
+
+#define PYBC_RETURN_VALUE (0x80)
+#define PYBC_RAISE_VARARGS (0x81) // uint
+#define PYBC_YIELD_VALUE (0x82)
+#define PYBC_YIELD_FROM (0x83)
+
+#define PYBC_MAKE_FUNCTION (0x90) // uint
+#define PYBC_MAKE_CLOSURE (0x91) // uint?
+#define PYBC_CALL_FUNCTION (0x92) // uint
+#define PYBC_CALL_FUNCTION_VAR (0x93) // uint
+#define PYBC_CALL_FUNCTION_KW (0x94) // uint
+#define PYBC_CALL_FUNCTION_VAR_KW (0x95) // uint
+#define PYBC_CALL_METHOD (0x96) // uint
+#define PYBC_CALL_METHOD_VAR (0x97) // uint
+#define PYBC_CALL_METHOD_KW (0x98) // uint
+#define PYBC_CALL_METHOD_VAR_KW (0x99) // uint
+
+#define PYBC_IMPORT_NAME (0xe0)
+#define PYBC_IMPORT_FROM (0xe1)
+#define PYBC_IMPORT_STAR (0xe2)
+
+py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args);
diff --git a/py/compile.c b/py/compile.c
new file mode 100644
index 0000000000..0e6ce4443b
--- /dev/null
+++ b/py/compile.c
@@ -0,0 +1,2510 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "compile.h"
+#include "runtime.h"
+#include "emit.h"
+
+// TODO need to mangle __attr names
+
+typedef enum {
+ PN_none = 0,
+#define DEF_RULE(rule, comp, kind, arg...) PN_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+ PN_maximum_number_of,
+} pn_kind_t;
+
+#define EMIT(fun, arg...) (emit_##fun(comp->emit, ##arg))
+
+typedef struct _compiler_t {
+ qstr qstr___class__;
+ qstr qstr___locals__;
+ qstr qstr___name__;
+ qstr qstr___module__;
+ qstr qstr___qualname__;
+ qstr qstr___doc__;
+ qstr qstr_assertion_error;
+
+ pass_kind_t pass;
+
+ int break_label;
+ int continue_label;
+ int except_nest_level;
+
+ int n_arg_keyword;
+ bool have_star_arg;
+ bool have_dbl_star_arg;
+ bool have_bare_star;
+ int param_pass;
+ int param_pass_num_dict_params;
+ int param_pass_num_default_params;
+
+ scope_t *scope_head;
+ scope_t *scope_cur;
+
+ emitter_t *emit;
+} compiler_t;
+
+py_parse_node_t fold_constants(py_parse_node_t pn) {
+ if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+
+ // fold arguments first
+ for (int i = 0; i < n; i++) {
+ pns->nodes[i] = fold_constants(pns->nodes[i]);
+ }
+
+ switch (PY_PARSE_NODE_STRUCT_KIND(pns)) {
+ case PN_shift_expr:
+ if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+ int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_LESS)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 << arg1); // XXX can overflow; enabled only to compare with CPython
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_MORE)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 >> arg1);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ break;
+
+ case PN_arith_expr:
+ // XXX can overflow; enabled only to compare with CPython
+ if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+ int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_PLUS)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 + arg1);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_MINUS)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ break;
+
+ case PN_term:
+ // XXX can overflow; enabled only to compare with CPython
+ if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+ int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 * arg1);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_SLASH)) {
+ ; // pass
+ //} else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_)) {
+ //pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ break;
+
+ case PN_factor_2:
+ if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) {
+ machine_int_t arg = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, -arg);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ~arg);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ break;
+
+ case PN_power:
+ // XXX can overflow; enabled only to compare with CPython
+ if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_NULL(pns->nodes[1]) && !PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+ py_parse_node_struct_t* pns2 = (py_parse_node_struct_t*)pns->nodes[2];
+ if (PY_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) {
+ int power = PY_PARSE_NODE_LEAF_ARG(pns2->nodes[0]);
+ if (power >= 0) {
+ int ans = 1;
+ int base = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ for (; power > 0; power--) {
+ ans *= base;
+ }
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ans);
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ return pn;
+}
+
+void compile_node(compiler_t *comp, py_parse_node_t pn);
+
+scope_t *scope_new_and_link(compiler_t *comp, scope_kind_t kind, py_parse_node_t pn) {
+ scope_t *scope = scope_new(kind, pn);
+ scope->parent = comp->scope_cur;
+ scope->next = NULL;
+ if (comp->scope_head == NULL) {
+ comp->scope_head = scope;
+ } else {
+ scope_t *s = comp->scope_head;
+ while (s->next != NULL) {
+ s = s->next;
+ }
+ s->next = scope;
+ }
+ return scope;
+}
+
+int list_len(py_parse_node_t pn, int pn_kind) {
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ return 0;
+ } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+ return 1;
+ } else {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
+ return 1;
+ } else {
+ return PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ }
+ }
+}
+
+void apply_to_single_or_list(compiler_t *comp, py_parse_node_t pn, int pn_list_kind, void (*f)(compiler_t*, py_parse_node_t)) {
+ if (PY_PARSE_NODE_IS_STRUCT(pn) && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)pn) == pn_list_kind) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < num_nodes; i++) {
+ f(comp, pns->nodes[i]);
+ }
+ } else if (!PY_PARSE_NODE_IS_NULL(pn)) {
+ f(comp, pn);
+ }
+}
+
+int list_get(py_parse_node_t *pn, int pn_kind, py_parse_node_t **nodes) {
+ if (PY_PARSE_NODE_IS_NULL(*pn)) {
+ *nodes = NULL;
+ return 0;
+ } else if (PY_PARSE_NODE_IS_LEAF(*pn)) {
+ *nodes = pn;
+ return 1;
+ } else {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)(*pn);
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
+ *nodes = pn;
+ return 1;
+ } else {
+ *nodes = pns->nodes;
+ return PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ }
+ }
+}
+
+void compile_do_nothing(compiler_t *comp, py_parse_node_struct_t *pns) {
+}
+
+void compile_generic_all_nodes(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < num_nodes; i++) {
+ compile_node(comp, pns->nodes[i]);
+ }
+}
+
+bool c_tuple_is_const(py_parse_node_t pn) {
+ if (!PY_PARSE_NODE_IS_LEAF(pn)) {
+ return false;
+ }
+ if (PY_PARSE_NODE_IS_ID(pn)) {
+ return false;
+ }
+ return true;
+}
+
+void c_tuple_emit_const(compiler_t *comp, py_parse_node_t pn) {
+ assert(PY_PARSE_NODE_IS_LEAF(pn));
+ int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+ switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+ case PY_PARSE_NODE_ID: assert(0);
+ case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_verbatim_int, arg); break;
+ case PY_PARSE_NODE_INTEGER: EMIT(load_const_verbatim_str, qstr_str(arg)); break;
+ case PY_PARSE_NODE_DECIMAL: EMIT(load_const_verbatim_str, qstr_str(arg)); break;
+ case PY_PARSE_NODE_STRING: EMIT(load_const_verbatim_quoted_str, arg, false); break;
+ case PY_PARSE_NODE_BYTES: EMIT(load_const_verbatim_quoted_str, arg, true); break;
+ case PY_PARSE_NODE_TOKEN:
+ switch (arg) {
+ case PY_TOKEN_KW_FALSE: EMIT(load_const_verbatim_str, "False"); break;
+ case PY_TOKEN_KW_NONE: EMIT(load_const_verbatim_str, "None"); break;
+ case PY_TOKEN_KW_TRUE: EMIT(load_const_verbatim_str, "True"); break;
+ default: assert(0);
+ }
+ break;
+ default: assert(0);
+ }
+}
+
+// funnelling all tuple creations through this function and all this constant stuff is purely to agree with CPython
+void c_tuple(compiler_t *comp, py_parse_node_t pn, py_parse_node_struct_t *pns_list) {
+ int n = 0;
+ if (pns_list != NULL) {
+ n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_list);
+ }
+ int total = n;
+ bool is_const = true;
+ if (!PY_PARSE_NODE_IS_NULL(pn)) {
+ total += 1;
+ if (!c_tuple_is_const(pn)) {
+ is_const = false;
+ }
+ }
+ for (int i = 0; i < n; i++) {
+ if (!c_tuple_is_const(pns_list->nodes[i])) {
+ is_const = false;
+ break;
+ }
+ }
+ if (total > 0 && is_const) {
+ bool need_comma = false;
+ EMIT(load_const_verbatim_start);
+ EMIT(load_const_verbatim_str, "(");
+ if (!PY_PARSE_NODE_IS_NULL(pn)) {
+ c_tuple_emit_const(comp, pn);
+ need_comma = true;
+ }
+ for (int i = 0; i < n; i++) {
+ if (need_comma) {
+ EMIT(load_const_verbatim_str, ", ");
+ }
+ c_tuple_emit_const(comp, pns_list->nodes[i]);
+ need_comma = true;
+ }
+ if (total == 1) {
+ EMIT(load_const_verbatim_str, ",)");
+ } else {
+ EMIT(load_const_verbatim_str, ")");
+ }
+ EMIT(load_const_verbatim_end);
+ } else {
+ if (!PY_PARSE_NODE_IS_NULL(pn)) {
+ compile_node(comp, pn);
+ }
+ for (int i = 0; i < n; i++) {
+ compile_node(comp, pns_list->nodes[i]);
+ }
+ EMIT(build_tuple, total);
+ }
+}
+
+void compile_generic_tuple(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // a simple tuple expression
+ /*
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < n; i++) {
+ compile_node(comp, pns->nodes[i]);
+ }
+ EMIT(build_tuple, n);
+ */
+ c_tuple(comp, PY_PARSE_NODE_NULL, pns);
+}
+
+bool node_is_const_false(py_parse_node_t pn) {
+ return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_FALSE);
+ // untested: || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1);
+}
+
+bool node_is_const_true(py_parse_node_t pn) {
+ return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_TRUE) || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1);
+}
+
+// having c_if_cond_2 and the is_nested variable is purely to match with CPython, which doesn't fully optimise not's
+void c_if_cond_2(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label, bool is_nested) {
+ if (node_is_const_false(pn)) {
+ if (jump_if == false) {
+ EMIT(jump, label);
+ }
+ return;
+ } else if (node_is_const_true(pn)) {
+ if (jump_if == true) {
+ EMIT(jump, label);
+ }
+ return;
+ } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_or_test) {
+ if (jump_if == false) {
+ int label2 = EMIT(label_new);
+ for (int i = 0; i < n - 1; i++) {
+ c_if_cond_2(comp, pns->nodes[i], true, label2, true);
+ }
+ c_if_cond_2(comp, pns->nodes[n - 1], false, label, true);
+ EMIT(label_assign, label2);
+ } else {
+ for (int i = 0; i < n; i++) {
+ c_if_cond_2(comp, pns->nodes[i], true, label, true);
+ }
+ }
+ return;
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_and_test) {
+ if (jump_if == false) {
+ for (int i = 0; i < n; i++) {
+ c_if_cond_2(comp, pns->nodes[i], false, label, true);
+ }
+ } else {
+ int label2 = EMIT(label_new);
+ for (int i = 0; i < n - 1; i++) {
+ c_if_cond_2(comp, pns->nodes[i], false, label2, true);
+ }
+ c_if_cond_2(comp, pns->nodes[n - 1], true, label, true);
+ EMIT(label_assign, label2);
+ }
+ return;
+ } else if (!is_nested && PY_PARSE_NODE_STRUCT_KIND(pns) == PN_not_test_2) {
+ c_if_cond_2(comp, pns->nodes[0], !jump_if, label, true);
+ return;
+ }
+ }
+
+ // nothing special, fall back to default compiling for node and jump
+ compile_node(comp, pn);
+ if (jump_if == false) {
+ EMIT(pop_jump_if_false, label);
+ } else {
+ EMIT(pop_jump_if_true, label);
+ }
+}
+
+void c_if_cond(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label) {
+ c_if_cond_2(comp, pn, jump_if, label, false);
+}
+
+typedef enum { ASSIGN_STORE, ASSIGN_AUG_LOAD, ASSIGN_AUG_STORE } assign_kind_t;
+void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t kind);
+
+void c_assign_power(compiler_t *comp, py_parse_node_struct_t *pns, assign_kind_t assign_kind) {
+ if (assign_kind != ASSIGN_AUG_STORE) {
+ compile_node(comp, pns->nodes[0]);
+ }
+
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) {
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+ if (assign_kind != ASSIGN_AUG_STORE) {
+ for (int i = 0; i < n - 1; i++) {
+ compile_node(comp, pns1->nodes[i]);
+ }
+ }
+ assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1]));
+ pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1];
+ }
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) {
+ printf("SyntaxError: can't assign to function call\n");
+ return;
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) {
+ if (assign_kind == ASSIGN_AUG_STORE) {
+ EMIT(rot_three);
+ EMIT(store_subscr);
+ } else {
+ compile_node(comp, pns1->nodes[0]);
+ if (assign_kind == ASSIGN_AUG_LOAD) {
+ EMIT(dup_top_two);
+ EMIT(binary_op, RT_BINARY_OP_SUBSCR);
+ } else {
+ EMIT(store_subscr);
+ }
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) {
+ assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0]));
+ if (assign_kind == ASSIGN_AUG_LOAD) {
+ EMIT(dup_top);
+ EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+ } else {
+ if (assign_kind == ASSIGN_AUG_STORE) {
+ EMIT(rot_two);
+ }
+ EMIT(store_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+ // SyntaxError, cannot assign
+ assert(0);
+ }
+}
+
+void c_assign_tuple(compiler_t *comp, int n, py_parse_node_t *nodes) {
+ assert(n >= 0);
+ int have_star_index = -1;
+ for (int i = 0; i < n; i++) {
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_star_expr)) {
+ if (have_star_index < 0) {
+ EMIT(unpack_ex, i, n - i - 1);
+ have_star_index = i;
+ } else {
+ printf("SyntaxError: two starred expressions in assignment\n");
+ return;
+ }
+ }
+ }
+ if (have_star_index < 0) {
+ EMIT(unpack_sequence, n);
+ }
+ for (int i = 0; i < n; i++) {
+ if (i == have_star_index) {
+ c_assign(comp, ((py_parse_node_struct_t*)nodes[i])->nodes[0], ASSIGN_STORE);
+ } else {
+ c_assign(comp, nodes[i], ASSIGN_STORE);
+ }
+ }
+}
+
+// assigns top of stack to pn
+void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t assign_kind) {
+ tail_recursion:
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ assert(0);
+ } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+ if (PY_PARSE_NODE_IS_ID(pn)) {
+ int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+ switch (assign_kind) {
+ case ASSIGN_STORE:
+ case ASSIGN_AUG_STORE:
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, arg);
+ break;
+ case ASSIGN_AUG_LOAD:
+ emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg);
+ break;
+ }
+ } else {
+ printf("SyntaxError: can't assign to literal\n");
+ return;
+ }
+ } else {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ switch (PY_PARSE_NODE_STRUCT_KIND(pns)) {
+ case PN_power:
+ // lhs is an index or attribute
+ c_assign_power(comp, pns, assign_kind);
+ break;
+
+ case PN_testlist_star_expr:
+ case PN_exprlist:
+ // lhs is a tuple
+ if (assign_kind != ASSIGN_STORE) {
+ goto bad_aug;
+ }
+ c_assign_tuple(comp, PY_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
+ break;
+
+ case PN_atom_paren:
+ // lhs is something in parenthesis
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // empty tuple
+ printf("SyntaxError: can't assign to ()\n");
+ return;
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ goto testlist_comp;
+ } else {
+ // parenthesis around 1 item, is just that item
+ pn = pns->nodes[0];
+ goto tail_recursion;
+ }
+ break;
+
+ case PN_atom_bracket:
+ // lhs is something in brackets
+ if (assign_kind != ASSIGN_STORE) {
+ goto bad_aug;
+ }
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // empty list, assignment allowed
+ c_assign_tuple(comp, 0, NULL);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ goto testlist_comp;
+ } else {
+ // brackets around 1 item
+ c_assign_tuple(comp, 1, &pns->nodes[0]);
+ }
+ break;
+
+ default:
+ printf("unknown assign, %u\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns));
+ assert(0);
+ }
+ return;
+
+ testlist_comp:
+ // lhs is a sequence
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
+ // sequence of one item, with trailing comma
+ assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0]));
+ c_assign_tuple(comp, 1, &pns->nodes[0]);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
+ // sequence of many items
+ // TODO call c_assign_tuple instead
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns2);
+ EMIT(unpack_sequence, 1 + n);
+ c_assign(comp, pns->nodes[0], ASSIGN_STORE);
+ for (int i = 0; i < n; i++) {
+ c_assign(comp, pns2->nodes[i], ASSIGN_STORE);
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) {
+ // TODO not implemented
+ assert(0);
+ } else {
+ // sequence with 2 items
+ goto sequence_with_2_items;
+ }
+ } else {
+ // sequence with 2 items
+ sequence_with_2_items:
+ c_assign_tuple(comp, 2, pns->nodes);
+ }
+ return;
+ }
+ return;
+
+ bad_aug:
+ printf("SyntaxError: illegal expression for augmented assignment\n");
+}
+
+// stuff for lambda and comprehensions and generators
+void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int n_dict_params, int n_default_params) {
+ // make closed over variables, if any
+ int nfree = 0;
+ if (comp->scope_cur->kind != SCOPE_MODULE) {
+ for (int i = 0; i < this_scope->id_info_len; i++) {
+ id_info_t *id_info = &this_scope->id_info[i];
+ if (id_info->kind == ID_INFO_KIND_FREE) {
+ EMIT(load_closure, id_info->qstr);
+ nfree += 1;
+ }
+ }
+ }
+ if (nfree > 0) {
+ EMIT(build_tuple, nfree);
+ }
+
+ // make the function/closure
+ if (nfree == 0) {
+ EMIT(make_function, this_scope, n_dict_params, n_default_params);
+ } else {
+ EMIT(make_closure, this_scope, n_dict_params, n_default_params);
+ }
+}
+
+void compile_funcdef_param(compiler_t *comp, py_parse_node_t pn) {
+ assert(PY_PARSE_NODE_IS_STRUCT(pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_name) {
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+ // this parameter has a default value
+ // in CPython, None (and True, False?) as default parameters are loaded with LOAD_NAME; don't understandy why
+ if (comp->have_bare_star) {
+ comp->param_pass_num_dict_params += 1;
+ if (comp->param_pass == 1) {
+ EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+ compile_node(comp, pns->nodes[2]);
+ }
+ } else {
+ comp->param_pass_num_default_params += 1;
+ if (comp->param_pass == 2) {
+ compile_node(comp, pns->nodes[2]);
+ }
+ }
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_star) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // bare star
+ comp->have_bare_star = true;
+ }
+ }
+}
+
+// leaves function object on stack
+// returns function name
+qstr compile_funcdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->pass == PASS_1) {
+ // create a new scope for this function
+ scope_t *s = scope_new_and_link(comp, SCOPE_FUNCTION, (py_parse_node_t)pns);
+ // store the function scope so the compiling function can use it at each pass
+ pns->nodes[4] = (py_parse_node_t)s;
+ }
+
+ // save variables (probably don't need to do this, since we can't have nested definitions..?)
+ bool old_have_bare_star = comp->have_bare_star;
+ int old_param_pass = comp->param_pass;
+ int old_param_pass_num_dict_params = comp->param_pass_num_dict_params;
+ int old_param_pass_num_default_params = comp->param_pass_num_default_params;
+
+ // compile default parameters
+ comp->have_bare_star = false;
+ comp->param_pass = 1; // pass 1 does any default parameters after bare star
+ comp->param_pass_num_dict_params = 0;
+ comp->param_pass_num_default_params = 0;
+ apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param);
+ comp->have_bare_star = false;
+ comp->param_pass = 2; // pass 2 does any default parameters before bare star
+ comp->param_pass_num_dict_params = 0;
+ comp->param_pass_num_default_params = 0;
+ apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param);
+
+ // get the scope for this function
+ scope_t *fscope = (scope_t*)pns->nodes[4];
+
+ // make the function
+ close_over_variables_etc(comp, fscope, comp->param_pass_num_dict_params, comp->param_pass_num_default_params);
+
+ // restore variables
+ comp->have_bare_star = old_have_bare_star;
+ comp->param_pass = old_param_pass;
+ comp->param_pass_num_dict_params = old_param_pass_num_dict_params;
+ comp->param_pass_num_default_params = old_param_pass_num_default_params;
+
+ // return its name (the 'f' in "def f(...):")
+ return fscope->simple_name;
+}
+
+// leaves class object on stack
+// returns class name
+qstr compile_classdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->pass == PASS_1) {
+ // create a new scope for this class
+ scope_t *s = scope_new_and_link(comp, SCOPE_CLASS, (py_parse_node_t)pns);
+ // store the class scope so the compiling function can use it at each pass
+ pns->nodes[3] = (py_parse_node_t)s;
+ }
+
+ EMIT(load_build_class);
+
+ // scope for this class
+ scope_t *cscope = (scope_t*)pns->nodes[3];
+
+ // compile the class
+ close_over_variables_etc(comp, cscope, 0, 0);
+
+ // get its name
+ EMIT(load_const_id, cscope->simple_name);
+
+ // nodes[1] has parent classes, if any
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+ // no parent classes
+ EMIT(call_function, 2, 0, false, false);
+ } else {
+ // have a parent class or classes
+ // TODO what if we have, eg, *a or **a in the parent list?
+ compile_node(comp, pns->nodes[1]);
+ EMIT(call_function, 2 + list_len(pns->nodes[1], PN_arglist), 0, false, false);
+ }
+
+ // return its name (the 'C' in class C(...):")
+ return cscope->simple_name;
+}
+
+void compile_decorated(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // get the list of decorators
+ py_parse_node_t *nodes;
+ int n = list_get(&pns->nodes[0], PN_decorators, &nodes);
+
+ // load each decorator
+ for (int i = 0; i < n; i++) {
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_decorator)); // should be
+ py_parse_node_struct_t *pns_decorator = (py_parse_node_struct_t*)nodes[i];
+ py_parse_node_t *nodes2;
+ int n2 = list_get(&pns_decorator->nodes[0], PN_dotted_name, &nodes2);
+ compile_node(comp, nodes2[0]);
+ for (int i = 1; i < n2; i++) {
+ EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(nodes2[i]));
+ }
+ if (!PY_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) {
+ // first call the function with these arguments
+ compile_node(comp, pns_decorator->nodes[1]);
+ }
+ }
+
+ // compile the body (funcdef or classdef) and get its name
+ py_parse_node_struct_t *pns_body = (py_parse_node_struct_t*)pns->nodes[1];
+ qstr body_name = 0;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_funcdef) {
+ body_name = compile_funcdef_helper(comp, pns_body);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_classdef) {
+ body_name = compile_classdef_helper(comp, pns_body);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+
+ // call each decorator
+ for (int i = 0; i < n; i++) {
+ EMIT(call_function, 1, 0, false, false);
+ }
+
+ // store func/class object into name
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, body_name);
+}
+
+void compile_funcdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+ qstr fname = compile_funcdef_helper(comp, pns);
+ // store function object into function name
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, fname);
+}
+
+void c_del_stmt(compiler_t *comp, py_parse_node_t pn) {
+ if (PY_PARSE_NODE_IS_ID(pn)) {
+ emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pn));
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_power)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+
+ compile_node(comp, pns->nodes[0]); // base of the power node
+
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) {
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+ for (int i = 0; i < n - 1; i++) {
+ compile_node(comp, pns1->nodes[i]);
+ }
+ assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1]));
+ pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1];
+ }
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) {
+ // SyntaxError: can't delete a function call
+ assert(0);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) {
+ compile_node(comp, pns1->nodes[0]);
+ EMIT(delete_subscr);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) {
+ assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0]));
+ EMIT(delete_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+ // SyntaxError, cannot delete
+ assert(0);
+ }
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_paren)) {
+ pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_testlist_comp)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ // TODO perhaps factorise testlist_comp code with other uses of PN_testlist_comp
+
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3b) {
+ // sequence of one item, with trailing comma
+ assert(PY_PARSE_NODE_IS_NULL(pns1->nodes[0]));
+ c_del_stmt(comp, pns->nodes[0]);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3c) {
+ // sequence of many items
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+ c_del_stmt(comp, pns->nodes[0]);
+ for (int i = 0; i < n; i++) {
+ c_del_stmt(comp, pns1->nodes[i]);
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) {
+ // TODO not implemented; can't del comprehension?
+ assert(0);
+ } else {
+ // sequence with 2 items
+ goto sequence_with_2_items;
+ }
+ } else {
+ // sequence with 2 items
+ sequence_with_2_items:
+ c_del_stmt(comp, pns->nodes[0]);
+ c_del_stmt(comp, pns->nodes[1]);
+ }
+ } else {
+ // tuple with 1 element
+ c_del_stmt(comp, pn);
+ }
+ } else {
+ // not implemented
+ assert(0);
+ }
+}
+
+void compile_del_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ apply_to_single_or_list(comp, pns->nodes[0], PN_exprlist, c_del_stmt);
+}
+
+void compile_break_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->break_label == 0) {
+ printf("ERROR: cannot break from here\n");
+ }
+ EMIT(break_loop, comp->break_label);
+}
+
+void compile_continue_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->continue_label == 0) {
+ printf("ERROR: cannot continue from here\n");
+ }
+ if (comp->except_nest_level > 0) {
+ EMIT(continue_loop, comp->continue_label);
+ } else {
+ EMIT(jump, comp->continue_label);
+ }
+}
+
+void compile_return_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) {
+ // special case when returning an if-expression; to match CPython optimisation
+ py_parse_node_struct_t *pns_test_if_expr = (py_parse_node_struct_t*)pns->nodes[0];
+ py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns_test_if_expr->nodes[1];
+
+ int l_fail = EMIT(label_new);
+ c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition
+ compile_node(comp, pns_test_if_expr->nodes[0]); // success value
+ EMIT(return_value);
+ EMIT(label_assign, l_fail);
+ compile_node(comp, pns_test_if_else->nodes[1]); // failure value
+ } else {
+ compile_node(comp, pns->nodes[0]);
+ }
+ EMIT(return_value);
+}
+
+void compile_yield_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_node(comp, pns->nodes[0]);
+ EMIT(pop_top);
+}
+
+void compile_raise_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // raise
+ EMIT(raise_varargs, 0);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_raise_stmt_arg)) {
+ // raise x from y
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ compile_node(comp, pns->nodes[0]);
+ compile_node(comp, pns->nodes[1]);
+ EMIT(raise_varargs, 2);
+ } else {
+ // raise x
+ compile_node(comp, pns->nodes[0]);
+ EMIT(raise_varargs, 1);
+ }
+}
+
+// q1 holds the base, q2 the full name
+// eg a -> q1=q2=a
+// a.b.c -> q1=a, q2=a.b.c
+void do_import_name(compiler_t *comp, py_parse_node_t pn, qstr *q1, qstr *q2) {
+ bool is_as = false;
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dotted_as_name)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ // a name of the form x as y; unwrap it
+ *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
+ pn = pns->nodes[0];
+ is_as = true;
+ }
+ if (PY_PARSE_NODE_IS_ID(pn)) {
+ // just a simple name
+ *q2 = PY_PARSE_NODE_LEAF_ARG(pn);
+ if (!is_as) {
+ *q1 = *q2;
+ }
+ EMIT(import_name, *q2);
+ } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dotted_name) {
+ // a name of the form a.b.c
+ if (!is_as) {
+ *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ }
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ int len = n - 1;
+ for (int i = 0; i < n; i++) {
+ len += strlen(qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
+ }
+ char *str = m_new(char, len + 1);
+ str[0] = 0;
+ for (int i = 0; i < n; i++) {
+ if (i > 0) {
+ strcat(str, ".");
+ }
+ strcat(str, qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
+ }
+ *q2 = qstr_from_str_take(str);
+ EMIT(import_name, *q2);
+ if (is_as) {
+ for (int i = 1; i < n; i++) {
+ EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+ }
+ }
+ } else {
+ // TODO not implemented
+ assert(0);
+ }
+ } else {
+ // TODO not implemented
+ assert(0);
+ }
+}
+
+void compile_dotted_as_name(compiler_t *comp, py_parse_node_t pn) {
+ EMIT(load_const_small_int, 0); // ??
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ qstr q1, q2;
+ do_import_name(comp, pn, &q1, &q2);
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, q1);
+}
+
+void compile_import_name(compiler_t *comp, py_parse_node_struct_t *pns) {
+ apply_to_single_or_list(comp, pns->nodes[0], PN_dotted_as_names, compile_dotted_as_name);
+}
+
+void compile_import_from(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) {
+ EMIT(load_const_small_int, 0); // what's this for??
+ EMIT(load_const_verbatim_start);
+ EMIT(load_const_verbatim_str, "('*',)");
+ EMIT(load_const_verbatim_end);
+ qstr dummy_q, id1;
+ do_import_name(comp, pns->nodes[0], &dummy_q, &id1);
+ EMIT(import_star);
+ } else {
+ py_parse_node_t *pn_nodes;
+ int n = list_get(&pns->nodes[1], PN_import_as_names, &pn_nodes);
+
+ EMIT(load_const_small_int, 0); // what's this for??
+ EMIT(load_const_verbatim_start);
+ EMIT(load_const_verbatim_str, "(");
+ for (int i = 0; i < n; i++) {
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name));
+ py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i];
+ qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id
+ if (i > 0) {
+ EMIT(load_const_verbatim_str, ", ");
+ }
+ EMIT(load_const_verbatim_str, "'");
+ EMIT(load_const_verbatim_str, qstr_str(id2));
+ EMIT(load_const_verbatim_str, "'");
+ }
+ if (n == 1) {
+ EMIT(load_const_verbatim_str, ",");
+ }
+ EMIT(load_const_verbatim_str, ")");
+ EMIT(load_const_verbatim_end);
+ qstr dummy_q, id1;
+ do_import_name(comp, pns->nodes[0], &dummy_q, &id1);
+ for (int i = 0; i < n; i++) {
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name));
+ py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i];
+ qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id
+ EMIT(import_from, id2);
+ if (PY_PARSE_NODE_IS_NULL(pns3->nodes[1])) {
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, id2);
+ } else {
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]));
+ }
+ }
+ EMIT(pop_top);
+ }
+}
+
+void compile_global_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+ emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+ } else {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < num_nodes; i++) {
+ emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+ }
+ }
+}
+
+void compile_nonlocal_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+ emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+ } else {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < num_nodes; i++) {
+ emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+ }
+ }
+}
+
+void compile_assert_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int l_end = EMIT(label_new);
+ c_if_cond(comp, pns->nodes[0], true, l_end);
+ emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr_assertion_error);
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+ // assertion message
+ compile_node(comp, pns->nodes[1]);
+ EMIT(call_function, 1, 0, false, false);
+ }
+ EMIT(raise_varargs, 1);
+ EMIT(label_assign, l_end);
+}
+
+void compile_if_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // TODO proper and/or short circuiting
+
+ int l_end = EMIT(label_new);
+
+ int l_fail = EMIT(label_new);
+ c_if_cond(comp, pns->nodes[0], false, l_fail); // if condition
+
+ compile_node(comp, pns->nodes[1]); // if block
+ //if (!(PY_PARSE_NODE_IS_NULL(pns->nodes[2]) && PY_PARSE_NODE_IS_NULL(pns->nodes[3]))) { // optimisation; doesn't align with CPython
+ // jump over elif/else blocks if they exist
+ if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+ EMIT(jump, l_end);
+ }
+ //}
+ EMIT(label_assign, l_fail);
+
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+ // compile elif blocks
+
+ py_parse_node_struct_t *pns_elif = (py_parse_node_struct_t*)pns->nodes[2];
+
+ if (PY_PARSE_NODE_STRUCT_KIND(pns_elif) == PN_if_stmt_elif_list) {
+ // multiple elif blocks
+
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_elif);
+ for (int i = 0; i < n; i++) {
+ py_parse_node_struct_t *pns_elif2 = (py_parse_node_struct_t*)pns_elif->nodes[i];
+ l_fail = EMIT(label_new);
+ c_if_cond(comp, pns_elif2->nodes[0], false, l_fail); // elif condition
+
+ compile_node(comp, pns_elif2->nodes[1]); // elif block
+ if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+ EMIT(jump, l_end);
+ }
+ EMIT(label_assign, l_fail);
+ }
+
+ } else {
+ // a single elif block
+
+ l_fail = EMIT(label_new);
+ c_if_cond(comp, pns_elif->nodes[0], false, l_fail); // elif condition
+
+ compile_node(comp, pns_elif->nodes[1]); // elif block
+ if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+ EMIT(jump, l_end);
+ }
+ EMIT(label_assign, l_fail);
+ }
+ }
+
+ // compile else block
+ compile_node(comp, pns->nodes[3]); // can be null
+
+ EMIT(label_assign, l_end);
+}
+
+void compile_while_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int old_break_label = comp->break_label;
+ int old_continue_label = comp->continue_label;
+
+ int done_label = EMIT(label_new);
+ int end_label = EMIT(label_new);
+ int break_label = EMIT(label_new);
+ int continue_label = EMIT(label_new);
+
+ comp->break_label = break_label;
+ comp->continue_label = continue_label;
+
+ EMIT(setup_loop, end_label);
+ EMIT(label_assign, continue_label);
+ c_if_cond(comp, pns->nodes[0], false, done_label); // condition
+ compile_node(comp, pns->nodes[1]); // body
+ if (!emit_last_emit_was_return_value(comp->emit)) {
+ EMIT(jump, continue_label);
+ }
+ EMIT(label_assign, done_label);
+
+ // break/continue apply to outer loop (if any) in the else block
+ comp->break_label = old_break_label;
+ comp->continue_label = old_continue_label;
+
+ // CPython does not emit POP_BLOCK if the condition was a constant; don't undertand why
+ // this is a small hack to agree with CPython
+ if (!node_is_const_true(pns->nodes[0])) {
+ EMIT(pop_block);
+ }
+
+ compile_node(comp, pns->nodes[2]); // else
+
+ EMIT(label_assign, break_label);
+ EMIT(label_assign, end_label);
+}
+
+void compile_for_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int old_break_label = comp->break_label;
+ int old_continue_label = comp->continue_label;
+
+ int for_label = EMIT(label_new);
+ int pop_label = EMIT(label_new);
+ int end_label = EMIT(label_new);
+
+ int break_label = EMIT(label_new);
+
+ comp->continue_label = for_label;
+ comp->break_label = break_label;
+
+ EMIT(setup_loop, end_label);
+ compile_node(comp, pns->nodes[1]); // iterator
+ EMIT(get_iter);
+ EMIT(label_assign, for_label);
+ EMIT(for_iter, pop_label);
+ c_assign(comp, pns->nodes[0], ASSIGN_STORE); // variable
+ compile_node(comp, pns->nodes[2]); // body
+ if (!emit_last_emit_was_return_value(comp->emit)) {
+ EMIT(jump, for_label);
+ }
+ EMIT(label_assign, pop_label);
+ EMIT(for_iter_end);
+
+ // break/continue apply to outer loop (if any) in the else block
+ comp->break_label = old_break_label;
+ comp->continue_label = old_continue_label;
+
+ EMIT(pop_block);
+
+ compile_node(comp, pns->nodes[3]); // else (not tested)
+
+ EMIT(label_assign, break_label);
+ EMIT(label_assign, end_label);
+}
+
+void compile_try_except(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_excepts, py_parse_node_t pn_else) {
+ // this function is a bit of a hack at the moment
+ // don't understand how the stack works with exceptions, so we force it to return to the correct value
+
+ // setup code
+ int stack_size = EMIT(get_stack_size);
+ int l1 = EMIT(label_new);
+ int success_label = EMIT(label_new);
+ comp->except_nest_level += 1; // for correct handling of continue
+ EMIT(setup_except, l1);
+ compile_node(comp, pn_body); // body
+ EMIT(pop_block);
+ EMIT(jump, success_label);
+ EMIT(label_assign, l1);
+ int l2 = EMIT(label_new);
+
+ for (int i = 0; i < n_except; i++) {
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_excepts[i], PN_try_stmt_except)); // should be
+ py_parse_node_struct_t *pns_except = (py_parse_node_struct_t*)pn_excepts[i];
+
+ qstr qstr_exception_local = 0;
+ int end_finally_label = EMIT(label_new);
+
+ if (PY_PARSE_NODE_IS_NULL(pns_except->nodes[0])) {
+ // this is a catch all exception handler
+ if (i + 1 != n_except) {
+ printf("SyntaxError: default 'except:' must be last\n");
+ return;
+ }
+ } else {
+ // this exception handler requires a match to a certain type of exception
+ py_parse_node_t pns_exception_expr = pns_except->nodes[0];
+ if (PY_PARSE_NODE_IS_STRUCT(pns_exception_expr)) {
+ py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns_exception_expr;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_try_stmt_as_name) {
+ // handler binds the exception to a local
+ pns_exception_expr = pns3->nodes[0];
+ qstr_exception_local = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]);
+ }
+ }
+ EMIT(dup_top);
+ compile_node(comp, pns_exception_expr);
+ EMIT(compare_op, RT_COMPARE_OP_EXCEPTION_MATCH);
+ EMIT(pop_jump_if_false, end_finally_label);
+ }
+
+ EMIT(pop_top);
+
+ if (qstr_exception_local == 0) {
+ EMIT(pop_top);
+ } else {
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+ }
+
+ EMIT(pop_top);
+
+ int l3;
+ if (qstr_exception_local != 0) {
+ l3 = EMIT(label_new);
+ EMIT(setup_finally, l3);
+ }
+ compile_node(comp, pns_except->nodes[1]);
+ if (qstr_exception_local != 0) {
+ EMIT(pop_block);
+ }
+ EMIT(pop_except);
+ if (qstr_exception_local != 0) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(label_assign, l3);
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+ emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+ EMIT(end_finally);
+ }
+ EMIT(jump, l2);
+ EMIT(label_assign, end_finally_label);
+ }
+
+ EMIT(end_finally);
+ EMIT(label_assign, success_label);
+ comp->except_nest_level -= 1;
+ compile_node(comp, pn_else); // else block, can be null
+ EMIT(label_assign, l2);
+ EMIT(set_stack_size, stack_size);
+}
+
+void compile_try_finally(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_except, py_parse_node_t pn_else, py_parse_node_t pn_finally) {
+ // don't understand how the stack works with exceptions, so we force it to return to the correct value
+ int stack_size = EMIT(get_stack_size);
+ int l_finally_block = EMIT(label_new);
+ EMIT(setup_finally, l_finally_block);
+ if (n_except == 0) {
+ assert(PY_PARSE_NODE_IS_NULL(pn_else));
+ compile_node(comp, pn_body);
+ } else {
+ compile_try_except(comp, pn_body, n_except, pn_except, pn_else);
+ }
+ EMIT(pop_block);
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(label_assign, l_finally_block);
+ compile_node(comp, pn_finally);
+ EMIT(end_finally);
+ EMIT(set_stack_size, stack_size);
+}
+
+void compile_try_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_finally) {
+ // just try-finally
+ compile_try_finally(comp, pns->nodes[0], 0, NULL, PY_PARSE_NODE_NULL, pns2->nodes[0]);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_except_and_more) {
+ // try-except and possibly else and/or finally
+ py_parse_node_t *pn_excepts;
+ int n_except = list_get(&pns2->nodes[0], PN_try_stmt_except_list, &pn_excepts);
+ if (PY_PARSE_NODE_IS_NULL(pns2->nodes[2])) {
+ // no finally
+ compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1]);
+ } else {
+ // have finally
+ compile_try_finally(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1], ((py_parse_node_struct_t*)pns2->nodes[2])->nodes[0]);
+ }
+ } else {
+ // just try-except
+ py_parse_node_t *pn_excepts;
+ int n_except = list_get(&pns->nodes[1], PN_try_stmt_except_list, &pn_excepts);
+ compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, PY_PARSE_NODE_NULL);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+}
+
+void compile_with_stmt_helper(compiler_t *comp, int n, py_parse_node_t *nodes, py_parse_node_t body) {
+ if (n == 0) {
+ // no more pre-bits, compile the body of the with
+ compile_node(comp, body);
+ } else {
+ int l_end = EMIT(label_new);
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[0], PN_with_item)) {
+ // this pre-bit is of the form "a as b"
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)nodes[0];
+ compile_node(comp, pns->nodes[0]);
+ EMIT(setup_with, l_end);
+ c_assign(comp, pns->nodes[1], ASSIGN_STORE);
+ } else {
+ // this pre-bit is just an expression
+ compile_node(comp, nodes[0]);
+ EMIT(setup_with, l_end);
+ EMIT(pop_top);
+ }
+ // compile additional pre-bits and the body
+ compile_with_stmt_helper(comp, n - 1, nodes + 1, body);
+ // finish this with block
+ EMIT(pop_block);
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(label_assign, l_end);
+ EMIT(with_cleanup);
+ EMIT(end_finally);
+ }
+}
+
+void compile_with_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // get the nodes for the pre-bit of the with (the a as b, c as d, ... bit)
+ py_parse_node_t *nodes;
+ int n = list_get(&pns->nodes[0], PN_with_stmt_list, &nodes);
+ assert(n > 0);
+
+ // compile in a nested fashion
+ compile_with_stmt_helper(comp, n, nodes, pns->nodes[1]);
+}
+
+void compile_expr_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+ if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+ // do nothing with a lonely constant
+ } else {
+ compile_node(comp, pns->nodes[0]); // just an expression
+ EMIT(pop_top); // discard last result since this is a statement and leaves nothing on the stack
+ }
+ } else {
+ py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+ int kind = PY_PARSE_NODE_STRUCT_KIND(pns1);
+ if (kind == PN_expr_stmt_augassign) {
+ c_assign(comp, pns->nodes[0], ASSIGN_AUG_LOAD); // lhs load for aug assign
+ compile_node(comp, pns1->nodes[1]); // rhs
+ assert(PY_PARSE_NODE_IS_TOKEN(pns1->nodes[0]));
+ // note that we don't really need to implement separate inplace ops, just normal binary ops will suffice
+ switch (PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])) {
+ case PY_TOKEN_DEL_PIPE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_OR); break;
+ case PY_TOKEN_DEL_CARET_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_XOR); break;
+ case PY_TOKEN_DEL_AMPERSAND_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_AND); break;
+ case PY_TOKEN_DEL_DBL_LESS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_LSHIFT); break;
+ case PY_TOKEN_DEL_DBL_MORE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_RSHIFT); break;
+ case PY_TOKEN_DEL_PLUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_ADD); break;
+ case PY_TOKEN_DEL_MINUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_SUBTRACT); break;
+ case PY_TOKEN_DEL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MULTIPLY); break;
+ case PY_TOKEN_DEL_DBL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_FLOOR_DIVIDE); break;
+ case PY_TOKEN_DEL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_TRUE_DIVIDE); break;
+ case PY_TOKEN_DEL_PERCENT_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MODULO); break;
+ case PY_TOKEN_DEL_DBL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_POWER); break;
+ default: assert(0); // shouldn't happen
+ }
+ c_assign(comp, pns->nodes[0], ASSIGN_AUG_STORE); // lhs store for aug assign
+ } else if (kind == PN_expr_stmt_assign_list) {
+ int rhs = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1) - 1;
+ compile_node(comp, ((py_parse_node_struct_t*)pns1->nodes[rhs])->nodes[0]); // rhs
+ // following CPython, we store left-most first
+ if (rhs > 0) {
+ EMIT(dup_top);
+ }
+ c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store
+ for (int i = 0; i < rhs; i++) {
+ if (i + 1 < rhs) {
+ EMIT(dup_top);
+ }
+ c_assign(comp, ((py_parse_node_struct_t*)pns1->nodes[i])->nodes[0], ASSIGN_STORE); // middle store
+ }
+ } else if (kind == PN_expr_stmt_assign) {
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr)
+ && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr)
+ && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 2
+ && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 2) {
+ // optimisation for a, b = c, d; to match CPython's optimisation
+ py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0];
+ py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0];
+ compile_node(comp, pns10->nodes[0]); // rhs
+ compile_node(comp, pns10->nodes[1]); // rhs
+ EMIT(rot_two);
+ c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store
+ c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr)
+ && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr)
+ && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 3
+ && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 3) {
+ // optimisation for a, b, c = d, e, f; to match CPython's optimisation
+ py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0];
+ py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0];
+ compile_node(comp, pns10->nodes[0]); // rhs
+ compile_node(comp, pns10->nodes[1]); // rhs
+ compile_node(comp, pns10->nodes[2]); // rhs
+ EMIT(rot_three);
+ EMIT(rot_two);
+ c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store
+ c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store
+ c_assign(comp, pns0->nodes[2], ASSIGN_STORE); // lhs store
+ } else {
+ compile_node(comp, pns1->nodes[0]); // rhs
+ c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+}
+
+void c_binary_op(compiler_t *comp, py_parse_node_struct_t *pns, rt_binary_op_t binary_op) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ compile_node(comp, pns->nodes[0]);
+ for (int i = 1; i < num_nodes; i += 1) {
+ compile_node(comp, pns->nodes[i]);
+ EMIT(binary_op, binary_op);
+ }
+}
+
+void compile_test_if_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_test_if_else));
+ py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns->nodes[1];
+
+ int stack_size = EMIT(get_stack_size);
+ int l_fail = EMIT(label_new);
+ int l_end = EMIT(label_new);
+ c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition
+ compile_node(comp, pns->nodes[0]); // success value
+ EMIT(jump, l_end);
+ EMIT(label_assign, l_fail);
+ EMIT(set_stack_size, stack_size); // force stack size reset
+ compile_node(comp, pns_test_if_else->nodes[1]); // failure value
+ EMIT(label_assign, l_end);
+}
+
+void compile_lambdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // TODO default params etc for lambda; possibly just use funcdef code
+ //py_parse_node_t pn_params = pns->nodes[0];
+ //py_parse_node_t pn_body = pns->nodes[1];
+
+ if (comp->pass == PASS_1) {
+ // create a new scope for this lambda
+ scope_t *s = scope_new_and_link(comp, SCOPE_LAMBDA, (py_parse_node_t)pns);
+ // store the lambda scope so the compiling function (this one) can use it at each pass
+ pns->nodes[2] = (py_parse_node_t)s;
+ }
+
+ // get the scope for this lambda
+ scope_t *this_scope = (scope_t*)pns->nodes[2];
+
+ // make the lambda
+ close_over_variables_etc(comp, this_scope, 0, 0);
+}
+
+void compile_or_test(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int l_end = EMIT(label_new);
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < n; i += 1) {
+ compile_node(comp, pns->nodes[i]);
+ if (i + 1 < n) {
+ EMIT(jump_if_true_or_pop, l_end);
+ }
+ }
+ EMIT(label_assign, l_end);
+}
+
+void compile_and_test(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int l_end = EMIT(label_new);
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < n; i += 1) {
+ compile_node(comp, pns->nodes[i]);
+ if (i + 1 < n) {
+ EMIT(jump_if_false_or_pop, l_end);
+ }
+ }
+ EMIT(label_assign, l_end);
+}
+
+void compile_not_test_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_node(comp, pns->nodes[0]);
+ EMIT(unary_op, RT_UNARY_OP_NOT);
+}
+
+void compile_comparison(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int stack_size = EMIT(get_stack_size);
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ compile_node(comp, pns->nodes[0]);
+ bool multi = (num_nodes > 3);
+ int l_fail = 0;
+ if (multi) {
+ l_fail = EMIT(label_new);
+ }
+ for (int i = 1; i + 1 < num_nodes; i += 2) {
+ compile_node(comp, pns->nodes[i + 1]);
+ if (i + 2 < num_nodes) {
+ EMIT(dup_top);
+ EMIT(rot_three);
+ }
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS)) {
+ EMIT(compare_op, RT_COMPARE_OP_LESS);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE)) {
+ EMIT(compare_op, RT_COMPARE_OP_MORE);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_EQUAL)) {
+ EMIT(compare_op, RT_COMPARE_OP_EQUAL);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS_EQUAL)) {
+ EMIT(compare_op, RT_COMPARE_OP_LESS_EQUAL);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE_EQUAL)) {
+ EMIT(compare_op, RT_COMPARE_OP_MORE_EQUAL);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_NOT_EQUAL)) {
+ EMIT(compare_op, RT_COMPARE_OP_NOT_EQUAL);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_KW_IN)) {
+ EMIT(compare_op, RT_COMPARE_OP_IN);
+ } else if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[i])) {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[i];
+ int kind = PY_PARSE_NODE_STRUCT_KIND(pns2);
+ if (kind == PN_comp_op_not_in) {
+ EMIT(compare_op, RT_COMPARE_OP_NOT_IN);
+ } else if (kind == PN_comp_op_is) {
+ if (PY_PARSE_NODE_IS_NULL(pns2->nodes[0])) {
+ EMIT(compare_op, RT_COMPARE_OP_IS);
+ } else {
+ EMIT(compare_op, RT_COMPARE_OP_IS_NOT);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ if (i + 2 < num_nodes) {
+ EMIT(jump_if_false_or_pop, l_fail);
+ }
+ }
+ if (multi) {
+ int l_end = EMIT(label_new);
+ EMIT(jump, l_end);
+ EMIT(label_assign, l_fail);
+ EMIT(rot_two);
+ EMIT(pop_top);
+ EMIT(label_assign, l_end);
+ EMIT(set_stack_size, stack_size + 1); // force stack size
+ }
+}
+
+void compile_star_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // TODO
+ assert(0);
+ compile_node(comp, pns->nodes[0]);
+ //EMIT(unary_op, "UNARY_STAR");
+}
+
+void compile_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ c_binary_op(comp, pns, RT_BINARY_OP_OR);
+}
+
+void compile_xor_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ c_binary_op(comp, pns, RT_BINARY_OP_XOR);
+}
+
+void compile_and_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ c_binary_op(comp, pns, RT_BINARY_OP_AND);
+}
+
+void compile_shift_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ compile_node(comp, pns->nodes[0]);
+ for (int i = 1; i + 1 < num_nodes; i += 2) {
+ compile_node(comp, pns->nodes[i + 1]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_LESS)) {
+ EMIT(binary_op, RT_BINARY_OP_LSHIFT);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_MORE)) {
+ EMIT(binary_op, RT_BINARY_OP_RSHIFT);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+}
+
+void compile_arith_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ compile_node(comp, pns->nodes[0]);
+ for (int i = 1; i + 1 < num_nodes; i += 2) {
+ compile_node(comp, pns->nodes[i + 1]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PLUS)) {
+ EMIT(binary_op, RT_BINARY_OP_ADD);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MINUS)) {
+ EMIT(binary_op, RT_BINARY_OP_SUBTRACT);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+}
+
+void compile_term(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ compile_node(comp, pns->nodes[0]);
+ for (int i = 1; i + 1 < num_nodes; i += 2) {
+ compile_node(comp, pns->nodes[i + 1]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_STAR)) {
+ EMIT(binary_op, RT_BINARY_OP_MULTIPLY);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_SLASH)) {
+ EMIT(binary_op, RT_BINARY_OP_FLOOR_DIVIDE);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_SLASH)) {
+ EMIT(binary_op, RT_BINARY_OP_TRUE_DIVIDE);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PERCENT)) {
+ EMIT(binary_op, RT_BINARY_OP_MODULO);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ }
+}
+
+void compile_factor_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_node(comp, pns->nodes[1]);
+ if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) {
+ EMIT(unary_op, RT_UNARY_OP_POSITIVE);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) {
+ EMIT(unary_op, RT_UNARY_OP_NEGATIVE);
+ } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) {
+ EMIT(unary_op, RT_UNARY_OP_INVERT);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+}
+
+void compile_trailer_paren_helper(compiler_t *comp, py_parse_node_struct_t *pns, bool is_method_call) {
+ // function to call is on top of stack
+
+ int old_n_arg_keyword = comp->n_arg_keyword;
+ bool old_have_star_arg = comp->have_star_arg;
+ bool old_have_dbl_star_arg = comp->have_dbl_star_arg;
+ comp->n_arg_keyword = 0;
+ comp->have_star_arg = false;
+ comp->have_dbl_star_arg = false;
+
+ compile_node(comp, pns->nodes[0]); // arguments to function call; can be null
+
+ // compute number of positional arguments
+ int n_positional = list_len(pns->nodes[0], PN_arglist) - comp->n_arg_keyword;
+ if (comp->have_star_arg) {
+ n_positional -= 1;
+ }
+ if (comp->have_dbl_star_arg) {
+ n_positional -= 1;
+ }
+
+ if (is_method_call) {
+ EMIT(call_method, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg);
+ } else {
+ EMIT(call_function, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg);
+ }
+
+ comp->n_arg_keyword = old_n_arg_keyword;
+ comp->have_star_arg = old_have_star_arg;
+ comp->have_dbl_star_arg = old_have_dbl_star_arg;
+}
+
+void compile_power_trailers(compiler_t *comp, py_parse_node_struct_t *pns) {
+ int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < num_nodes; i++) {
+ if (i + 1 < num_nodes && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i], PN_trailer_period) && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i + 1], PN_trailer_paren)) {
+ // optimisation for method calls a.f(...), following PyPy
+ py_parse_node_struct_t *pns_period = (py_parse_node_struct_t*)pns->nodes[i];
+ py_parse_node_struct_t *pns_paren = (py_parse_node_struct_t*)pns->nodes[i + 1];
+ EMIT(load_method, PY_PARSE_NODE_LEAF_ARG(pns_period->nodes[0])); // get the method
+ compile_trailer_paren_helper(comp, pns_paren, true);
+ i += 1;
+ } else {
+ compile_node(comp, pns->nodes[i]);
+ }
+ }
+}
+
+void compile_power_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_node(comp, pns->nodes[0]);
+ EMIT(binary_op, RT_BINARY_OP_POWER);
+}
+
+void compile_atom_string(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // a list of strings
+ EMIT(load_const_verbatim_start);
+ EMIT(load_const_verbatim_str, "'");
+ int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ for (int i = 0; i < n; i++) {
+ // TODO allow concatenation of either strings or bytes, but not mixed
+ assert(PY_PARSE_NODE_IS_LEAF(pns->nodes[i]));
+ assert(PY_PARSE_NODE_LEAF_KIND(pns->nodes[i]) == PY_PARSE_NODE_STRING);
+ const char *str = qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+ EMIT(load_const_verbatim_strn, str, strlen(str));
+ }
+ EMIT(load_const_verbatim_str, "'");
+ EMIT(load_const_verbatim_end);
+}
+
+// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node
+void compile_comprehension(compiler_t *comp, py_parse_node_struct_t *pns, scope_kind_t kind) {
+ assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2);
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
+ py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1];
+
+ if (comp->pass == PASS_1) {
+ // create a new scope for this comprehension
+ scope_t *s = scope_new_and_link(comp, kind, (py_parse_node_t)pns);
+ // store the comprehension scope so the compiling function (this one) can use it at each pass
+ pns_comp_for->nodes[3] = (py_parse_node_t)s;
+ }
+
+ // get the scope for this comprehension
+ scope_t *this_scope = (scope_t*)pns_comp_for->nodes[3];
+
+ // compile the comprehension
+ close_over_variables_etc(comp, this_scope, 0, 0);
+
+ compile_node(comp, pns_comp_for->nodes[1]); // source of the iterator
+ EMIT(get_iter);
+ EMIT(call_function, 1, 0, false, false);
+}
+
+void compile_atom_paren(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // an empty tuple
+ /*
+ EMIT(build_tuple, 0);
+ */
+ c_tuple(comp, PY_PARSE_NODE_NULL, NULL);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ assert(!PY_PARSE_NODE_IS_NULL(pns->nodes[1]));
+ if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
+ // tuple of one item, with trailing comma
+ assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0]));
+ /*
+ compile_node(comp, pns->nodes[0]);
+ EMIT(build_tuple, 1);
+ */
+ c_tuple(comp, pns->nodes[0], NULL);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
+ // tuple of many items
+ /*
+ compile_node(comp, pns->nodes[0]);
+ compile_generic_all_nodes(comp, pns2);
+ EMIT(build_tuple, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns2));
+ */
+ c_tuple(comp, pns->nodes[0], pns2);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
+ // generator expression
+ compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
+ } else {
+ // tuple with 2 items
+ goto tuple_with_2_items;
+ }
+ } else {
+ // tuple with 2 items
+ tuple_with_2_items:
+ /*
+ compile_node(comp, pns->nodes[0]);
+ compile_node(comp, pns->nodes[1]);
+ EMIT(build_tuple, 2);
+ */
+ c_tuple(comp, PY_PARSE_NODE_NULL, pns);
+ }
+ } else {
+ // parenthesis around a single item, is just that item
+ compile_node(comp, pns->nodes[0]);
+ }
+}
+
+void compile_atom_bracket(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // empty list
+ EMIT(build_list, 0);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[0];
+ if (PY_PARSE_NODE_IS_STRUCT(pns2->nodes[1])) {
+ py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns2->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3b) {
+ // list of one item, with trailing comma
+ assert(PY_PARSE_NODE_IS_NULL(pns3->nodes[0]));
+ compile_node(comp, pns2->nodes[0]);
+ EMIT(build_list, 1);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3c) {
+ // list of many items
+ compile_node(comp, pns2->nodes[0]);
+ compile_generic_all_nodes(comp, pns3);
+ EMIT(build_list, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns3));
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_comp_for) {
+ // list comprehension
+ compile_comprehension(comp, pns2, SCOPE_LIST_COMP);
+ } else {
+ // list with 2 items
+ goto list_with_2_items;
+ }
+ } else {
+ // list with 2 items
+ list_with_2_items:
+ compile_node(comp, pns2->nodes[0]);
+ compile_node(comp, pns2->nodes[1]);
+ EMIT(build_list, 2);
+ }
+ } else {
+ // list with 1 item
+ compile_node(comp, pns->nodes[0]);
+ EMIT(build_list, 1);
+ }
+}
+
+void compile_atom_brace(compiler_t *comp, py_parse_node_struct_t *pns) {
+ py_parse_node_t pn = pns->nodes[0];
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ // empty dict
+ EMIT(build_map, 0);
+ } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+ pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker_item) {
+ // dict with one element
+ EMIT(build_map, 1);
+ compile_node(comp, pn);
+ EMIT(store_map);
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker) {
+ assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should succeed
+ py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_dictorsetmaker_list) {
+ // dict/set with multiple elements
+
+ // get tail elements (2nd, 3rd, ...)
+ py_parse_node_t *nodes;
+ int n = list_get(&pns1->nodes[0], PN_dictorsetmaker_list2, &nodes);
+
+ // first element sets whether it's a dict or set
+ bool is_dict;
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) {
+ // a dictionary
+ EMIT(build_map, 1 + n);
+ compile_node(comp, pns->nodes[0]);
+ EMIT(store_map);
+ is_dict = true;
+ } else {
+ // a set
+ compile_node(comp, pns->nodes[0]); // 1st value of set
+ is_dict = false;
+ }
+
+ // process rest of elements
+ for (int i = 0; i < n; i++) {
+ py_parse_node_t pn = nodes[i];
+ bool is_key_value = PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dictorsetmaker_item);
+ compile_node(comp, pn);
+ if (is_dict) {
+ if (!is_key_value) {
+ printf("SyntaxError?: expecting key:value for dictionary");
+ return;
+ }
+ EMIT(store_map);
+ } else {
+ if (is_key_value) {
+ printf("SyntaxError?: expecting just a value for set");
+ return;
+ }
+ }
+ }
+
+ // if it's a set, build it
+ if (!is_dict) {
+ EMIT(build_set, 1 + n);
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_comp_for) {
+ // dict/set comprehension
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) {
+ // a dictionary comprehension
+ compile_comprehension(comp, pns, SCOPE_DICT_COMP);
+ } else {
+ // a set comprehension
+ compile_comprehension(comp, pns, SCOPE_SET_COMP);
+ }
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ // set with one element
+ goto set_with_one_element;
+ }
+ } else {
+ // set with one element
+ set_with_one_element:
+ compile_node(comp, pn);
+ EMIT(build_set, 1);
+ }
+}
+
+void compile_trailer_paren(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_trailer_paren_helper(comp, pns, false);
+}
+
+void compile_trailer_bracket(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // object who's index we want is on top of stack
+ compile_node(comp, pns->nodes[0]); // the index
+ EMIT(binary_op, RT_BINARY_OP_SUBSCR);
+}
+
+void compile_trailer_period(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // object who's attribute we want is on top of stack
+ EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // attribute to get
+}
+
+void compile_subscript_3_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+ assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3); // should always be
+ py_parse_node_t pn = pns->nodes[0];
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ // [?:]
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(build_slice, 2);
+ } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+ pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3c) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ pn = pns->nodes[0];
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ // [?::]
+ EMIT(build_slice, 2);
+ } else {
+ // [?::x]
+ compile_node(comp, pn);
+ EMIT(build_slice, 3);
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3d) {
+ compile_node(comp, pns->nodes[0]);
+ assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+ pns = (py_parse_node_struct_t*)pns->nodes[1];
+ assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_sliceop); // should always be
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // [?:x:]
+ EMIT(build_slice, 2);
+ } else {
+ // [?:x:x]
+ compile_node(comp, pns->nodes[0]);
+ EMIT(build_slice, 3);
+ }
+ } else {
+ // [?:x]
+ compile_node(comp, pn);
+ EMIT(build_slice, 2);
+ }
+ } else {
+ // [?:x]
+ compile_node(comp, pn);
+ EMIT(build_slice, 2);
+ }
+}
+
+void compile_subscript_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+ compile_node(comp, pns->nodes[0]); // start of slice
+ assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+ compile_subscript_3_helper(comp, (py_parse_node_struct_t*)pns->nodes[1]);
+}
+
+void compile_subscript_3(compiler_t *comp, py_parse_node_struct_t *pns) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ compile_subscript_3_helper(comp, pns);
+}
+
+void compile_dictorsetmaker_item(compiler_t *comp, py_parse_node_struct_t *pns) {
+ // if this is called then we are compiling a dict key:value pair
+ compile_node(comp, pns->nodes[1]); // value
+ compile_node(comp, pns->nodes[0]); // key
+}
+
+void compile_classdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+ qstr cname = compile_classdef_helper(comp, pns);
+ // store class object into class name
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, cname);
+}
+
+void compile_arglist_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->have_star_arg) {
+ printf("SyntaxError?: can't have multiple *x\n");
+ return;
+ }
+ comp->have_star_arg = true;
+ compile_node(comp, pns->nodes[0]);
+}
+
+void compile_arglist_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->have_dbl_star_arg) {
+ printf("SyntaxError?: can't have multiple **x\n");
+ return;
+ }
+ comp->have_dbl_star_arg = true;
+ compile_node(comp, pns->nodes[0]);
+}
+
+void compile_argument(compiler_t *comp, py_parse_node_struct_t *pns) {
+ assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+ if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_argument_3) {
+ if (!PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+ printf("SyntaxError?: lhs of keyword argument must be an id\n");
+ return;
+ }
+ EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+ compile_node(comp, pns2->nodes[0]);
+ comp->n_arg_keyword += 1;
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
+ compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+}
+
+void compile_yield_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+ if (comp->scope_cur->kind != SCOPE_FUNCTION) {
+ printf("SyntaxError: 'yield' outside function\n");
+ return;
+ }
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(yield_value);
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_yield_arg_from)) {
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ compile_node(comp, pns->nodes[0]);
+ EMIT(get_iter);
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(yield_from);
+ } else {
+ compile_node(comp, pns->nodes[0]);
+ EMIT(yield_value);
+ }
+}
+
+typedef void (*compile_function_t)(compiler_t*, py_parse_node_struct_t*);
+static compile_function_t compile_function[] = {
+ NULL,
+#define nc NULL
+#define c(f) compile_##f
+#define DEF_RULE(rule, comp, kind, arg...) comp,
+#include "grammar.h"
+#undef nc
+#undef c
+#undef DEF_RULE
+};
+
+void compile_node(compiler_t *comp, py_parse_node_t pn) {
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ // pass
+ } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+ int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+ switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+ case PY_PARSE_NODE_ID: emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); break;
+ case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_small_int, arg); break;
+ case PY_PARSE_NODE_INTEGER: EMIT(load_const_int, arg); break;
+ case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break;
+ case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break;
+ case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break;
+ case PY_PARSE_NODE_TOKEN: EMIT(load_const_tok, arg); break;
+ default: assert(0);
+ }
+ } else {
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ compile_function_t f = compile_function[PY_PARSE_NODE_STRUCT_KIND(pns)];
+ if (f == NULL) {
+ printf("node %u cannot be compiled\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns));
+ parse_node_show(pn, 0);
+ assert(0);
+ } else {
+ f(comp, pns);
+ }
+ }
+}
+
+void compile_scope_func_lambda_param(compiler_t *comp, py_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) {
+ // TODO verify that *k and **k are last etc
+ assert(PY_PARSE_NODE_IS_STRUCT(pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+ qstr param_name = 0;
+ py_parse_node_t pn_annotation = PY_PARSE_NODE_NULL;
+ if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_name) {
+ param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ //int node_index = 1; unused
+ if (allow_annotations) {
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+ // this parameter has an annotation
+ pn_annotation = pns->nodes[1];
+ }
+ //node_index = 2; unused
+ }
+ /* this is obsolete now that num dict/default params are calculated in compile_funcdef_param
+ if (!PY_PARSE_NODE_IS_NULL(pns->nodes[node_index])) {
+ // this parameter has a default value
+ if (comp->have_bare_star) {
+ comp->scope_cur->num_dict_params += 1;
+ } else {
+ comp->scope_cur->num_default_params += 1;
+ }
+ }
+ */
+ if (comp->have_bare_star) {
+ // comes after a bare star, so doesn't count as a parameter
+ } else {
+ comp->scope_cur->num_params += 1;
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_star) {
+ if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+ // bare star
+ // TODO see http://www.python.org/dev/peps/pep-3102/
+ comp->have_bare_star = true;
+ //assert(comp->scope_cur->num_dict_params == 0);
+ } else if (PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+ // named star
+ comp->scope_cur->flags |= SCOPE_FLAG_VARARGS;
+ param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ } else if (allow_annotations && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_tfpdef)) {
+ // named star with annotation
+ comp->scope_cur->flags |= SCOPE_FLAG_VARARGS;
+ pns = (py_parse_node_struct_t*)pns->nodes[0];
+ param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ pn_annotation = pns->nodes[1];
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+ } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_dbl_star) {
+ param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+ if (allow_annotations && !PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+ // this parameter has an annotation
+ pn_annotation = pns->nodes[1];
+ }
+ comp->scope_cur->flags |= SCOPE_FLAG_VARKEYWORDS;
+ } else {
+ // TODO anything to implement?
+ assert(0);
+ }
+
+ if (param_name != 0) {
+ if (!PY_PARSE_NODE_IS_NULL(pn_annotation)) {
+ // TODO this parameter has an annotation
+ }
+ bool added;
+ id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, param_name, &added);
+ if (!added) {
+ printf("SyntaxError?: same name used for parameter; %s\n", qstr_str(param_name));
+ return;
+ }
+ id_info->param = true;
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ }
+}
+
+void compile_scope_func_param(compiler_t *comp, py_parse_node_t pn) {
+ compile_scope_func_lambda_param(comp, pn, PN_typedargslist_name, PN_typedargslist_star, PN_typedargslist_dbl_star, true);
+}
+
+void compile_scope_lambda_param(compiler_t *comp, py_parse_node_t pn) {
+ compile_scope_func_lambda_param(comp, pn, PN_varargslist_name, PN_varargslist_star, PN_varargslist_dbl_star, false);
+}
+
+void compile_scope_comp_iter(compiler_t *comp, py_parse_node_t pn_iter, py_parse_node_t pn_inner_expr, int l_top, int for_depth) {
+ tail_recursion:
+ if (PY_PARSE_NODE_IS_NULL(pn_iter)) {
+ // no more nested if/for; compile inner expression
+ compile_node(comp, pn_inner_expr);
+ if (comp->scope_cur->kind == SCOPE_LIST_COMP) {
+ EMIT(list_append, for_depth + 2);
+ } else if (comp->scope_cur->kind == SCOPE_DICT_COMP) {
+ EMIT(map_add, for_depth + 2);
+ } else if (comp->scope_cur->kind == SCOPE_SET_COMP) {
+ EMIT(set_add, for_depth + 2);
+ } else {
+ EMIT(yield_value);
+ EMIT(pop_top);
+ }
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_if)) {
+ // if condition
+ py_parse_node_struct_t *pns_comp_if = (py_parse_node_struct_t*)pn_iter;
+ c_if_cond(comp, pns_comp_if->nodes[0], false, l_top);
+ pn_iter = pns_comp_if->nodes[1];
+ goto tail_recursion;
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_for)) {
+ // for loop
+ py_parse_node_struct_t *pns_comp_for2 = (py_parse_node_struct_t*)pn_iter;
+ compile_node(comp, pns_comp_for2->nodes[1]);
+ int l_end2 = EMIT(label_new);
+ int l_top2 = EMIT(label_new);
+ EMIT(get_iter);
+ EMIT(label_assign, l_top2);
+ EMIT(for_iter, l_end2);
+ c_assign(comp, pns_comp_for2->nodes[0], ASSIGN_STORE);
+ compile_scope_comp_iter(comp, pns_comp_for2->nodes[2], pn_inner_expr, l_top2, for_depth + 1);
+ EMIT(jump, l_top2);
+ EMIT(label_assign, l_end2);
+ EMIT(for_iter_end);
+ } else {
+ // shouldn't happen
+ assert(0);
+ }
+}
+
+void check_for_doc_string(compiler_t *comp, py_parse_node_t pn) {
+ // see http://www.python.org/dev/peps/pep-0257/
+
+ // look for the first statement
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) {
+ // fall through
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_file_input_2)) {
+ pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+ } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_suite_block_stmts)) {
+ pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+ } else {
+ return;
+ }
+
+ // check the first statement for a doc string
+ if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) {
+ py_parse_node_struct_t* pns = (py_parse_node_struct_t*)pn;
+ if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+ int kind = PY_PARSE_NODE_LEAF_KIND(pns->nodes[0]);
+ if (kind == PY_PARSE_NODE_STRING) {
+ compile_node(comp, pns->nodes[0]); // a doc string
+ // store doc string
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___doc__);
+ }
+ }
+ }
+}
+
+void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
+ comp->pass = pass;
+ comp->scope_cur = scope;
+ emit_start_pass(comp->emit, pass, scope);
+
+ if (comp->pass == PASS_1) {
+ scope->stack_size = 0;
+ }
+
+ if (comp->pass == PASS_3) {
+ //printf("----\n");
+ scope_print_info(scope);
+ }
+
+ // compile
+ if (scope->kind == SCOPE_MODULE) {
+ check_for_doc_string(comp, scope->pn);
+ compile_node(comp, scope->pn);
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(return_value);
+ } else if (scope->kind == SCOPE_FUNCTION) {
+ assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+ assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_funcdef);
+
+ // work out number of parameters, keywords and default parameters, and add them to the id_info array
+ if (comp->pass == PASS_1) {
+ comp->have_bare_star = false;
+ apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_scope_func_param);
+ }
+
+ assert(pns->nodes[2] == 0); // 2 is something...
+
+ compile_node(comp, pns->nodes[3]); // 3 is function body
+ // emit return if it wasn't the last opcode
+ if (!emit_last_emit_was_return_value(comp->emit)) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ EMIT(return_value);
+ }
+ } else if (scope->kind == SCOPE_LAMBDA) {
+ assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+ assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 3);
+
+ // work out number of parameters, keywords and default parameters, and add them to the id_info array
+ if (comp->pass == PASS_1) {
+ comp->have_bare_star = false;
+ apply_to_single_or_list(comp, pns->nodes[0], PN_varargslist, compile_scope_lambda_param);
+ }
+
+ compile_node(comp, pns->nodes[1]); // 1 is lambda body
+ EMIT(return_value);
+ } else if (scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) {
+ // a bit of a hack at the moment
+
+ assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+ assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2);
+ assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
+ py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1];
+
+ qstr qstr_arg = qstr_from_strn_copy(".0", 2);
+ if (comp->pass == PASS_1) {
+ bool added;
+ id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added);
+ assert(added);
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ scope->num_params = 1;
+ }
+
+ if (scope->kind == SCOPE_LIST_COMP) {
+ EMIT(build_list, 0);
+ } else if (scope->kind == SCOPE_DICT_COMP) {
+ EMIT(build_map, 0);
+ } else if (scope->kind == SCOPE_SET_COMP) {
+ EMIT(build_set, 0);
+ }
+
+ int l_end = EMIT(label_new);
+ int l_top = EMIT(label_new);
+ emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, qstr_arg);
+ EMIT(label_assign, l_top);
+ EMIT(for_iter, l_end);
+ c_assign(comp, pns_comp_for->nodes[0], ASSIGN_STORE);
+ compile_scope_comp_iter(comp, pns_comp_for->nodes[2], pns->nodes[0], l_top, 0);
+ EMIT(jump, l_top);
+ EMIT(label_assign, l_end);
+ EMIT(for_iter_end);
+
+ if (scope->kind == SCOPE_GEN_EXPR) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ }
+ EMIT(return_value);
+ } else {
+ assert(scope->kind == SCOPE_CLASS);
+ assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+ py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+ assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_classdef);
+
+ if (comp->pass == PASS_1) {
+ bool added;
+ id_info_t *id_info = scope_find_or_add_id(scope, comp->qstr___class__, &added);
+ assert(added);
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ id_info = scope_find_or_add_id(scope, comp->qstr___locals__, &added);
+ assert(added);
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ id_info->param = true;
+ scope->num_params = 1; // __locals__ is the parameter
+ }
+
+ emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___locals__);
+ EMIT(store_locals);
+ emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___name__);
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___module__);
+ EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // 0 is class name
+ emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___qualname__);
+
+ check_for_doc_string(comp, pns->nodes[2]);
+ compile_node(comp, pns->nodes[2]); // 2 is class body
+
+ id_info_t *id = scope_find(scope, comp->qstr___class__);
+ assert(id != NULL);
+ if (id->kind == ID_INFO_KIND_LOCAL) {
+ EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+ } else {
+ EMIT(load_closure, comp->qstr___class__);
+ }
+ EMIT(return_value);
+ }
+
+ emit_end_pass(comp->emit);
+}
+
+void compile_scope_compute_things(compiler_t *comp, scope_t *scope) {
+ // in functions, turn implicit globals into explicit globals
+ // compute num_locals, and the index of each local
+ scope->num_locals = 0;
+ for (int i = 0; i < scope->id_info_len; i++) {
+ id_info_t *id = &scope->id_info[i];
+ if (scope->kind == SCOPE_CLASS && id->qstr == comp->qstr___class__) {
+ // __class__ is not counted as a local; if it's used then it becomes a ID_INFO_KIND_CELL
+ continue;
+ }
+ if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ id->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+ }
+ if (id->param || id->kind == ID_INFO_KIND_LOCAL) {
+ id->local_num = scope->num_locals;
+ scope->num_locals += 1;
+ }
+ }
+
+ // compute flags
+ //scope->flags = 0; since we set some things in parameters
+ if (scope->kind != SCOPE_MODULE) {
+ scope->flags |= SCOPE_FLAG_NEWLOCALS;
+ }
+ if (scope->kind == SCOPE_FUNCTION || scope->kind == SCOPE_LAMBDA || scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) {
+ assert(scope->parent != NULL);
+ scope->flags |= SCOPE_FLAG_OPTIMISED;
+
+ // TODO possibly other ways it can be nested
+ if (scope->parent->kind == SCOPE_FUNCTION || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) {
+ scope->flags |= SCOPE_FLAG_NESTED;
+ }
+ }
+ int num_free = 0;
+ for (int i = 0; i < scope->id_info_len; i++) {
+ id_info_t *id = &scope->id_info[i];
+ if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+ num_free += 1;
+ }
+ }
+ if (num_free == 0) {
+ scope->flags |= SCOPE_FLAG_NOFREE;
+ }
+}
+
+void py_compile(py_parse_node_t pn) {
+ compiler_t *comp = m_new(compiler_t, 1);
+
+ comp->qstr___class__ = qstr_from_strn_copy("__class__", 9);
+ comp->qstr___locals__ = qstr_from_strn_copy("__locals__", 10);
+ comp->qstr___name__ = qstr_from_strn_copy("__name__", 8);
+ comp->qstr___module__ = qstr_from_strn_copy("__module__", 10);
+ comp->qstr___qualname__ = qstr_from_strn_copy("__qualname__", 12);
+ comp->qstr___doc__ = qstr_from_strn_copy("__doc__", 7);
+ comp->qstr_assertion_error = qstr_from_strn_copy("AssertionError", 14);
+
+ comp->break_label = 0;
+ comp->continue_label = 0;
+ comp->except_nest_level = 0;
+ comp->scope_head = NULL;
+ comp->scope_cur = NULL;
+
+ comp->emit = emit_new(comp->qstr___class__);
+
+ pn = fold_constants(pn);
+ scope_new_and_link(comp, SCOPE_MODULE, pn);
+
+ for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+ compile_scope(comp, s, PASS_1);
+ }
+
+ for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+ compile_scope_compute_things(comp, s);
+ }
+
+ for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+ compile_scope(comp, s, PASS_2);
+ compile_scope(comp, s, PASS_3);
+ }
+
+ m_free(comp);
+}
diff --git a/py/compile.h b/py/compile.h
new file mode 100644
index 0000000000..339acca0c0
--- /dev/null
+++ b/py/compile.h
@@ -0,0 +1 @@
+void py_compile(py_parse_node_t pn);
diff --git a/py/emit.h b/py/emit.h
new file mode 100644
index 0000000000..8cad745dde
--- /dev/null
+++ b/py/emit.h
@@ -0,0 +1,120 @@
+//#define EMIT_DO_CPY
+#define EMIT_DO_BC
+//#define EMIT_DO_X64
+//#define EMIT_DO_THUMB
+
+/* Notes on passes:
+ * We don't know exactly the opcodes in pass 1 because they depend on the
+ * closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which
+ * depends on determining the scope of variables in each function, and this
+ * is not known until the end of pass 1.
+ * As a consequence, we don't know the maximum stack size until the end of pass 2.
+ * This is problematic for some emitters (x64) since they need to know the maximum
+ * stack size to compile the entry to the function, and this effects code size.
+ */
+
+typedef enum {
+ PASS_1 = 1, // work out id's and their kind, and number of labels
+ PASS_2 = 2, // work out stack size and code size and label offsets
+ PASS_3 = 3, // emit code
+} pass_kind_t;
+
+typedef struct _emitter_t emitter_t;
+
+void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr);
+void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr);
+void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr);
+void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
+void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
+
+emitter_t *emit_new();
+void emit_set_native_types(emitter_t *emit, bool do_native_types);
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope);
+void emit_end_pass(emitter_t *emit);
+bool emit_last_emit_was_return_value(emitter_t *emit);
+int emit_get_stack_size(emitter_t *emit);
+void emit_set_stack_size(emitter_t *emit, int size);
+
+int emit_label_new(emitter_t *emit);
+void emit_label_assign(emitter_t *emit, int l);
+void emit_import_name(emitter_t *emit, qstr qstr);
+void emit_import_from(emitter_t *emit, qstr qstr);
+void emit_import_star(emitter_t *emit);
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok);
+void emit_load_const_small_int(emitter_t *emit, int arg);
+void emit_load_const_int(emitter_t *emit, qstr qstr);
+void emit_load_const_dec(emitter_t *emit, qstr qstr);
+void emit_load_const_id(emitter_t *emit, qstr qstr);
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes);
+void emit_load_const_verbatim_start(emitter_t *emit);
+void emit_load_const_verbatim_int(emitter_t *emit, int val);
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str);
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len);
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes);
+void emit_load_const_verbatim_end(emitter_t *emit);
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_load_name(emitter_t *emit, qstr qstr);
+void emit_load_global(emitter_t *emit, qstr qstr);
+void emit_load_deref(emitter_t *emit, qstr qstr);
+void emit_load_closure(emitter_t *emit, qstr qstr);
+void emit_load_attr(emitter_t *emit, qstr qstr);
+void emit_load_method(emitter_t *emit, qstr qstr);
+void emit_load_build_class(emitter_t *emit);
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_store_name(emitter_t *emit, qstr qstr);
+void emit_store_global(emitter_t *emit, qstr qstr);
+void emit_store_deref(emitter_t *emit, qstr qstr);
+void emit_store_attr(emitter_t *emit, qstr qstr);
+void emit_store_locals(emitter_t *emit);
+void emit_store_subscr(emitter_t *emit);
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_delete_name(emitter_t *emit, qstr qstr);
+void emit_delete_global(emitter_t *emit, qstr qstr);
+void emit_delete_deref(emitter_t *emit, qstr qstr);
+void emit_delete_attr(emitter_t *emit, qstr qstr);
+void emit_delete_subscr(emitter_t *emit);
+void emit_dup_top(emitter_t *emit);
+void emit_dup_top_two(emitter_t *emit);
+void emit_pop_top(emitter_t *emit);
+void emit_rot_two(emitter_t *emit);
+void emit_rot_three(emitter_t *emit);
+void emit_jump(emitter_t *emit, int label);
+void emit_pop_jump_if_true(emitter_t *emit, int label);
+void emit_pop_jump_if_false(emitter_t *emit, int label);
+void emit_jump_if_true_or_pop(emitter_t *emit, int label);
+void emit_jump_if_false_or_pop(emitter_t *emit, int label);
+void emit_setup_loop(emitter_t *emit, int label);
+void emit_break_loop(emitter_t *emit, int label);
+void emit_continue_loop(emitter_t *emit, int label);
+void emit_setup_with(emitter_t *emit, int label);
+void emit_with_cleanup(emitter_t *emit);
+void emit_setup_except(emitter_t *emit, int label);
+void emit_setup_finally(emitter_t *emit, int label);
+void emit_end_finally(emitter_t *emit);
+void emit_get_iter(emitter_t *emit); // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label);
+void emit_for_iter_end(emitter_t *emit);
+void emit_pop_block(emitter_t *emit);
+void emit_pop_except(emitter_t *emit);
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op);
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op);
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op);
+void emit_build_tuple(emitter_t *emit, int n_args);
+void emit_build_list(emitter_t *emit, int n_args);
+void emit_list_append(emitter_t *emit, int list_stack_index);
+void emit_build_map(emitter_t *emit, int n_args);
+void emit_store_map(emitter_t *emit);
+void emit_map_add(emitter_t *emit, int map_stack_index);
+void emit_build_set(emitter_t *emit, int n_args);
+void emit_set_add(emitter_t *emit, int set_stack_index);
+void emit_build_slice(emitter_t *emit, int n_args);
+void emit_unpack_sequence(emitter_t *emit, int n_args);
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right);
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
+void emit_return_value(emitter_t *emit);
+void emit_raise_varargs(emitter_t *emit, int n_args);
+void emit_yield_value(emitter_t *emit);
+void emit_yield_from(emitter_t *emit);
diff --git a/py/emitbc.c b/py/emitbc.c
new file mode 100644
index 0000000000..9d159ae605
--- /dev/null
+++ b/py/emitbc.c
@@ -0,0 +1,692 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "bc.h"
+
+#ifdef EMIT_DO_BC
+
+struct _emitter_t {
+ int pass;
+ int next_label;
+ int stack_size;
+ bool last_emit_was_return_value;
+
+ scope_t *scope;
+
+ int max_num_labels;
+ uint *label_offsets;
+
+ uint code_offset;
+ uint code_size;
+ byte *code_base;
+ byte dummy_data[8];
+};
+
+emitter_t *emit_new() {
+ emitter_t *emit = m_new(emitter_t, 1);
+ emit->max_num_labels = 0;
+ emit->label_offsets = NULL;
+ emit->code_offset = 0;
+ emit->code_size = 0;
+ emit->code_base = NULL;
+ return emit;
+}
+
+uint emit_get_code_size(emitter_t* emit) {
+ return emit->code_size;
+}
+
+void* emit_get_code(emitter_t* emit) {
+ return emit->code_base;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+ emit->pass = pass;
+ emit->next_label = 1;
+ emit->stack_size = 0;
+ emit->last_emit_was_return_value = false;
+ emit->scope = scope;
+ if (pass == PASS_1) {
+ scope->unique_code_id = rt_get_new_unique_code_id();
+ } else if (pass > PASS_1) {
+ if (emit->label_offsets == NULL) {
+ emit->label_offsets = m_new(uint, emit->max_num_labels);
+ }
+ if (pass == PASS_2) {
+ memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint));
+ }
+ }
+ emit->code_offset = 0;
+}
+
+void emit_end_pass(emitter_t *emit) {
+ // check stack is back to zero size
+ if (emit->stack_size != 0) {
+ printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+ }
+
+ if (emit->pass == PASS_1) {
+ // calculate number of labels need
+ if (emit->next_label > emit->max_num_labels) {
+ emit->max_num_labels = emit->next_label;
+ }
+
+ } else if (emit->pass == PASS_2) {
+ // calculate size of code in bytes
+ emit->code_size = emit->code_offset;
+ emit->code_base = m_new(byte, emit->code_size);
+ printf("code_size: %u\n", emit->code_size);
+
+ } else if (emit->pass == PASS_3) {
+ rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params);
+ }
+}
+
+// all functions must go through this one to emit bytes
+static byte* emit_get_cur_to_write_bytes(emitter_t* emit, int num_bytes_to_write) {
+ //printf("emit %d\n", num_bytes_to_write);
+ if (emit->pass < PASS_3) {
+ emit->code_offset += num_bytes_to_write;
+ return emit->dummy_data;
+ } else {
+ assert(emit->code_offset + num_bytes_to_write <= emit->code_size);
+ byte *c = emit->code_base + emit->code_offset;
+ emit->code_offset += num_bytes_to_write;
+ return c;
+ }
+}
+
+static void emit_write_byte_1(emitter_t* emit, byte b1) {
+ byte* c = emit_get_cur_to_write_bytes(emit, 1);
+ c[0] = b1;
+}
+
+static void emit_write_byte_1_byte(emitter_t* emit, byte b1, uint b2) {
+ assert((b2 & (~0xff)) == 0);
+ byte* c = emit_get_cur_to_write_bytes(emit, 2);
+ c[0] = b1;
+ c[1] = b2;
+}
+
+static void emit_write_byte_1_int(emitter_t* emit, byte b1, int num) {
+ assert((num & (~0x7fff)) == 0 || (num & (~0x7fff)) == (~0x7fff));
+ byte* c = emit_get_cur_to_write_bytes(emit, 3);
+ c[0] = b1;
+ c[1] = num;
+ c[2] = num >> 8;
+}
+
+static void emit_write_byte_1_uint(emitter_t* emit, byte b1, uint num) {
+ if (num <= 127) { // fits in 0x7f
+ // fit argument in single byte
+ byte* c = emit_get_cur_to_write_bytes(emit, 2);
+ c[0] = b1;
+ c[1] = num;
+ } else if (num <= 16383) { // fits in 0x3fff
+ // fit argument in two bytes
+ byte* c = emit_get_cur_to_write_bytes(emit, 3);
+ c[0] = b1;
+ c[1] = (num >> 8) | 0x80;
+ c[2] = num;
+ } else {
+ // larger numbers not implemented/supported
+ assert(0);
+ }
+}
+
+static void emit_write_byte_1_qstr(emitter_t* emit, byte b1, qstr qstr) {
+ emit_write_byte_1_uint(emit, b1, qstr);
+}
+
+static void emit_write_byte_1_label(emitter_t* emit, byte b1, int label) {
+ uint code_offset;
+ if (emit->pass < PASS_3) {
+ code_offset = 0;
+ } else {
+ code_offset = emit->label_offsets[label];
+ }
+ emit_write_byte_1_uint(emit, b1, code_offset);
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+ return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+ return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+ if (emit->pass > PASS_1) {
+ emit->stack_size = size;
+ }
+}
+
+static void emit_pre(emitter_t *emit, int stack_size_delta) {
+ if (emit->pass > PASS_1) {
+ emit->stack_size += stack_size_delta;
+ if (emit->stack_size > emit->scope->stack_size) {
+ emit->scope->stack_size = emit->stack_size;
+ }
+ }
+ emit->last_emit_was_return_value = false;
+}
+
+int emit_label_new(emitter_t *emit) {
+ return emit->next_label++;
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+ emit_pre(emit, 0);
+ if (emit->pass > PASS_1) {
+ assert(l < emit->max_num_labels);
+ if (emit->pass == PASS_2) {
+ // assign label offset
+ assert(emit->label_offsets[l] == -1);
+ emit->label_offsets[l] = emit->code_offset;
+ } else if (emit->pass == PASS_3) {
+ // ensure label offset has not changed from PASS_2 to PASS_3
+ assert(emit->label_offsets[l] == emit->code_offset);
+ //printf("l%d: (at %d)\n", l, emit->code_offset);
+ }
+ }
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_qstr(emit, PYBC_IMPORT_NAME, qstr);
+}
+
+void emit_import_from(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_IMPORT_FROM, qstr);
+}
+
+void emit_import_star(emitter_t *emit) {
+ emit_pre(emit, -1);
+ emit_write_byte_1(emit, PYBC_IMPORT_STAR);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+ emit_pre(emit, 1);
+ switch (tok) {
+ case PY_TOKEN_KW_FALSE: emit_write_byte_1(emit, PYBC_LOAD_CONST_FALSE); break;
+ case PY_TOKEN_KW_NONE: emit_write_byte_1(emit, PYBC_LOAD_CONST_NONE); break;
+ case PY_TOKEN_KW_TRUE: emit_write_byte_1(emit, PYBC_LOAD_CONST_TRUE); break;
+ default: assert(0);
+ }
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_int(emit, PYBC_LOAD_CONST_SMALL_INT, arg);
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_INT, qstr);
+}
+
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_DEC, qstr);
+}
+
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_ID, qstr);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+ emit_pre(emit, 1);
+ if (bytes) {
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_BYTES, qstr);
+ } else {
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_STRING, qstr);
+ }
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+ emit_pre(emit, 1);
+ assert(0);
+}
+
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+ assert(0);
+}
+
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+ assert(0);
+}
+
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+ assert(0);
+}
+
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+ assert(0);
+}
+
+void emit_load_const_verbatim_end(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+ assert(local_num >= 0);
+ emit_pre(emit, 1);
+ switch (local_num) {
+ case 0: emit_write_byte_1(emit, PYBC_LOAD_FAST_0); break;
+ case 1: emit_write_byte_1(emit, PYBC_LOAD_FAST_1); break;
+ case 2: emit_write_byte_1(emit, PYBC_LOAD_FAST_2); break;
+ default: emit_write_byte_1_uint(emit, PYBC_LOAD_FAST_N, local_num); break;
+ }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_NAME, qstr);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_GLOBAL, qstr);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ assert(0);
+}
+
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1);
+ assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_ATTR, qstr);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_qstr(emit, PYBC_LOAD_METHOD, qstr);
+}
+
+void emit_load_build_class(emitter_t *emit) {
+ emit_pre(emit, 1);
+ emit_write_byte_1(emit, PYBC_LOAD_BUILD_CLASS);
+}
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+ assert(local_num >= 0);
+ emit_pre(emit, -1);
+ switch (local_num) {
+ case 0: emit_write_byte_1(emit, PYBC_STORE_FAST_0); break;
+ case 1: emit_write_byte_1(emit, PYBC_STORE_FAST_1); break;
+ case 2: emit_write_byte_1(emit, PYBC_STORE_FAST_2); break;
+ default: emit_write_byte_1_uint(emit, PYBC_STORE_FAST_N, local_num); break;
+ }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_qstr(emit, PYBC_STORE_NAME, qstr);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_qstr(emit, PYBC_STORE_GLOBAL, qstr);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1);
+ assert(0);
+}
+
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -2);
+ emit_write_byte_1_qstr(emit, PYBC_STORE_ATTR, qstr);
+}
+
+void emit_store_locals(emitter_t *emit) {
+ emit_pre(emit, -1);
+ emit_write_byte_1(emit, PYBC_STORE_LOCALS);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+ emit_pre(emit, -3);
+ emit_write_byte_1(emit, PYBC_STORE_SUBSCR);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+ assert(local_num >= 0);
+ emit_pre(emit, 0);
+ emit_write_byte_1_uint(emit, PYBC_DELETE_FAST_N, local_num);
+}
+
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_qstr(emit, PYBC_DELETE_NAME, qstr);
+}
+
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_qstr(emit, PYBC_DELETE_GLOBAL, qstr);
+}
+
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_qstr(emit, PYBC_DELETE_DEREF, qstr);
+}
+
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_qstr(emit, PYBC_DELETE_ATTR, qstr);
+}
+
+void emit_delete_subscr(emitter_t *emit) {
+ emit_pre(emit, -2);
+ emit_write_byte_1(emit, PYBC_DELETE_SUBSCR);
+}
+
+void emit_dup_top(emitter_t *emit) {
+ emit_pre(emit, 1);
+ emit_write_byte_1(emit, PYBC_DUP_TOP);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+ emit_pre(emit, 2);
+ emit_write_byte_1(emit, PYBC_DUP_TOP_TWO);
+}
+
+void emit_pop_top(emitter_t *emit) {
+ emit_pre(emit, -1);
+ emit_write_byte_1(emit, PYBC_POP_TOP);
+}
+
+void emit_rot_two(emitter_t *emit) {
+ emit_pre(emit, 0);
+ emit_write_byte_1(emit, PYBC_ROT_TWO);
+}
+
+void emit_rot_three(emitter_t *emit) {
+ emit_pre(emit, 0);
+ emit_write_byte_1(emit, PYBC_ROT_THREE);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_label(emit, PYBC_JUMP, label);
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_TRUE, label);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_FALSE, label);
+}
+
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_label(emit, PYBC_JUMP_IF_TRUE_OR_POP, label);
+}
+
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_label(emit, PYBC_JUMP_IF_FALSE_OR_POP, label);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_label(emit, PYBC_SETUP_LOOP, label);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_label(emit, PYBC_BREAK_LOOP, label);
+}
+
+void emit_continue_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_label(emit, PYBC_CONTINUE_LOOP, label);
+}
+
+void emit_setup_with(emitter_t *emit, int label) {
+ emit_pre(emit, 7);
+ emit_write_byte_1_label(emit, PYBC_SETUP_WITH, label);
+}
+
+void emit_with_cleanup(emitter_t *emit) {
+ emit_pre(emit, -7);
+ emit_write_byte_1(emit, PYBC_WITH_CLEANUP);
+}
+
+void emit_setup_except(emitter_t *emit, int label) {
+ emit_pre(emit, 6);
+ emit_write_byte_1_label(emit, PYBC_SETUP_EXCEPT, label);
+}
+
+void emit_setup_finally(emitter_t *emit, int label) {
+ emit_pre(emit, 6);
+ emit_write_byte_1_label(emit, PYBC_SETUP_FINALLY, label);
+}
+
+void emit_end_finally(emitter_t *emit) {
+ emit_pre(emit, -1);
+ emit_write_byte_1(emit, PYBC_END_FINALLY);
+}
+
+void emit_get_iter(emitter_t *emit) {
+ emit_pre(emit, 0);
+ emit_write_byte_1(emit, PYBC_GET_ITER);
+}
+
+void emit_for_iter(emitter_t *emit, int label) {
+ emit_pre(emit, 1);
+ emit_write_byte_1_label(emit, PYBC_FOR_ITER, label);
+}
+
+void emit_for_iter_end(emitter_t *emit) {
+ emit_pre(emit, -1);
+}
+
+void emit_pop_block(emitter_t *emit) {
+ emit_pre(emit, 0);
+ emit_write_byte_1(emit, PYBC_POP_BLOCK);
+}
+
+void emit_pop_except(emitter_t *emit) {
+ emit_pre(emit, 0);
+ emit_write_byte_1(emit, PYBC_POP_EXCEPT);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+ emit_pre(emit, 0);
+ emit_write_byte_1_byte(emit, PYBC_UNARY_OP, op);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_byte(emit, PYBC_BINARY_OP, op);
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+ emit_pre(emit, -1);
+ emit_write_byte_1_byte(emit, PYBC_COMPARE_OP, op);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, 1 - n_args);
+ emit_write_byte_1_uint(emit, PYBC_BUILD_TUPLE, n_args);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, 1 - n_args);
+ emit_write_byte_1_uint(emit, PYBC_BUILD_LIST, n_args);
+}
+
+void emit_list_append(emitter_t *emit, int list_stack_index) {
+ assert(list_stack_index >= 0);
+ emit_pre(emit, -1);
+ emit_write_byte_1_uint(emit, PYBC_LIST_APPEND, list_stack_index);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, 1);
+ emit_write_byte_1_uint(emit, PYBC_BUILD_MAP, n_args);
+}
+
+void emit_store_map(emitter_t *emit) {
+ emit_pre(emit, -2);
+ emit_write_byte_1(emit, PYBC_STORE_MAP);
+}
+
+void emit_map_add(emitter_t *emit, int map_stack_index) {
+ assert(map_stack_index >= 0);
+ emit_pre(emit, -2);
+ emit_write_byte_1_uint(emit, PYBC_MAP_ADD, map_stack_index);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, 1 - n_args);
+ emit_write_byte_1_uint(emit, PYBC_BUILD_SET, n_args);
+}
+
+void emit_set_add(emitter_t *emit, int set_stack_index) {
+ assert(set_stack_index >= 0);
+ emit_pre(emit, -1);
+ emit_write_byte_1_uint(emit, PYBC_SET_ADD, set_stack_index);
+}
+
+void emit_build_slice(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, 1 - n_args);
+ emit_write_byte_1_uint(emit, PYBC_BUILD_SLICE, n_args);
+}
+
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, -1 + n_args);
+ emit_write_byte_1_uint(emit, PYBC_UNPACK_SEQUENCE, n_args);
+}
+
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+ assert(n_left >=0 && n_right >= 0);
+ emit_pre(emit, -1 + n_left + n_right + 1);
+ emit_write_byte_1_uint(emit, PYBC_UNPACK_EX, n_left | (n_right << 8));
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(n_default_params == 0 && n_dict_params == 0);
+ emit_pre(emit, 1);
+ emit_write_byte_1_uint(emit, PYBC_MAKE_FUNCTION, scope->unique_code_id);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(0);
+ emit_pre(emit, -2 - n_default_params - 2 * n_dict_params);
+ if (emit->pass == PASS_3) {
+ printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
+ }
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ int s = 0;
+ if (have_star_arg) {
+ s += 1;
+ }
+ if (have_dbl_star_arg) {
+ s += 1;
+ }
+ emit_pre(emit, -n_positional - 2 * n_keyword - s);
+ int op;
+ if (have_star_arg) {
+ if (have_dbl_star_arg) {
+ op = PYBC_CALL_FUNCTION_VAR_KW;
+ } else {
+ op = PYBC_CALL_FUNCTION_VAR;
+ }
+ } else {
+ if (have_dbl_star_arg) {
+ op = PYBC_CALL_FUNCTION_KW;
+ } else {
+ op = PYBC_CALL_FUNCTION;
+ }
+ }
+ emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ int s = 0;
+ if (have_star_arg) {
+ s += 1;
+ }
+ if (have_dbl_star_arg) {
+ s += 1;
+ }
+ emit_pre(emit, -n_positional - 2 * n_keyword - s);
+ int op;
+ if (have_star_arg) {
+ if (have_dbl_star_arg) {
+ op = PYBC_CALL_METHOD_VAR_KW;
+ } else {
+ op = PYBC_CALL_METHOD_VAR;
+ }
+ } else {
+ if (have_dbl_star_arg) {
+ op = PYBC_CALL_METHOD_KW;
+ } else {
+ op = PYBC_CALL_METHOD;
+ }
+ }
+ emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
+}
+
+void emit_return_value(emitter_t *emit) {
+ emit_pre(emit, -1);
+ emit->last_emit_was_return_value = true;
+ emit_write_byte_1(emit, PYBC_RETURN_VALUE);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+ assert(n_args >= 0);
+ emit_pre(emit, -n_args);
+ emit_write_byte_1_uint(emit, PYBC_RAISE_VARARGS, n_args);
+}
+
+void emit_yield_value(emitter_t *emit) {
+ emit_pre(emit, 0);
+ if (emit->pass == PASS_2) {
+ emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+ }
+ emit_write_byte_1(emit, PYBC_YIELD_VALUE);
+}
+
+void emit_yield_from(emitter_t *emit) {
+ emit_pre(emit, -1);
+ if (emit->pass == PASS_2) {
+ emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+ }
+ emit_write_byte_1(emit, PYBC_YIELD_FROM);
+}
+
+#endif // EMIT_DO_BC
diff --git a/py/emitcommon.c b/py/emitcommon.c
new file mode 100644
index 0000000000..1fd8697c3a
--- /dev/null
+++ b/py/emitcommon.c
@@ -0,0 +1,171 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+
+#define EMIT(fun, arg...) (emit_##fun(emit, ##arg))
+
+void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr) {
+ if (pass == PASS_1) {
+ if (scope->kind == SCOPE_MODULE) {
+ printf("SyntaxError?: can't declare global in outer code\n");
+ return;
+ }
+ bool added;
+ id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
+ if (!added) {
+ printf("SyntaxError?: identifier already declared something\n");
+ return;
+ }
+ id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+
+ // if the id exists in the global scope, set its kind to EXPLICIT_GLOBAL
+ id_info = scope_find_global(scope, qstr);
+ if (id_info != NULL) {
+ id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+ }
+ }
+}
+
+void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr) {
+ if (pass == PASS_1) {
+ if (scope->kind == SCOPE_MODULE) {
+ printf("SyntaxError?: can't declare nonlocal in outer code\n");
+ return;
+ }
+ bool added;
+ id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
+ if (!added) {
+ printf("SyntaxError?: identifier already declared something\n");
+ return;
+ }
+ id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
+ if (id_info2 == NULL || !(id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
+ printf("SyntaxError: no binding for nonlocal '%s' found\n", qstr_str(qstr));
+ return;
+ }
+ id_info->kind = ID_INFO_KIND_FREE;
+ scope_close_over_in_parents(scope, qstr);
+ }
+}
+
+void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr) {
+ id_info_t *id_info = NULL;
+ if (pass == PASS_1) {
+ // name adding/lookup
+ bool added;
+ id_info = scope_find_or_add_id(scope, qstr, &added);
+ if (added) {
+ if (strcmp(qstr_str(qstr), "AssertionError") == 0) {
+ id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+ // TODO how much of a hack is this?
+ } else if (strcmp(qstr_str(qstr), "super") == 0 && scope->kind == SCOPE_FUNCTION) {
+ // special case, super is a global, and also counts as use of __class__
+ id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+ id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr___class__);
+ if (id_info2 != NULL) {
+ id_info2 = scope_find_or_add_id(scope, qstr___class__, &added);
+ if (added) {
+ id_info2->kind = ID_INFO_KIND_FREE;
+ scope_close_over_in_parents(scope, qstr___class__);
+ }
+ }
+ } else {
+ id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
+ if (id_info2 != NULL && (id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
+ id_info->kind = ID_INFO_KIND_FREE;
+ scope_close_over_in_parents(scope, qstr);
+ } else {
+ id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
+ }
+ }
+ }
+ } else {
+ id_info = scope_find(scope, qstr);
+ }
+
+ assert(id_info != NULL); // TODO can this ever fail?
+
+ // call the emit backend with the correct code
+ if (id_info == NULL || id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ EMIT(load_name, qstr);
+ } else if (id_info->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+ EMIT(load_global, qstr);
+ } else if (id_info->kind == ID_INFO_KIND_LOCAL) {
+ EMIT(load_fast, qstr, id_info->local_num);
+ } else if (id_info->kind == ID_INFO_KIND_CELL || id_info->kind == ID_INFO_KIND_FREE) {
+ EMIT(load_deref, qstr);
+ } else {
+ assert(0);
+ }
+}
+
+static id_info_t *get_id_for_modification(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+ id_info_t *id_info = NULL;
+ if (pass == PASS_1) {
+ // name adding/lookup
+ bool added;
+ id_info = scope_find_or_add_id(scope, qstr, &added);
+ if (added) {
+ if (scope->kind == SCOPE_MODULE || scope->kind == SCOPE_CLASS) {
+ id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
+ } else {
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ }
+ } else if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ // rebind as a local variable
+ id_info->kind = ID_INFO_KIND_LOCAL;
+ }
+ } else {
+ id_info = scope_find(scope, qstr);
+ }
+
+ assert(id_info != NULL); // TODO can this ever fail?
+
+ return id_info;
+}
+
+void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+ // create/get the id info
+ id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
+
+ // call the emit backend with the correct code
+ if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ EMIT(store_name, qstr);
+ } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+ EMIT(store_global, qstr);
+ } else if (id->kind == ID_INFO_KIND_LOCAL) {
+ EMIT(store_fast, qstr, id->local_num);
+ } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+ EMIT(store_deref, qstr);
+ } else {
+ assert(0);
+ }
+}
+
+void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+ // create/get the id info
+ id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
+
+ // call the emit backend with the correct code
+ if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ EMIT(delete_name, qstr);
+ } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+ EMIT(delete_global, qstr);
+ } else if (id->kind == ID_INFO_KIND_LOCAL) {
+ EMIT(delete_fast, qstr, id->local_num);
+ } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+ EMIT(delete_deref, qstr);
+ } else {
+ assert(0);
+ }
+}
diff --git a/py/emitcpy.c b/py/emitcpy.c
new file mode 100644
index 0000000000..637abd772a
--- /dev/null
+++ b/py/emitcpy.c
@@ -0,0 +1,834 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+
+#ifdef EMIT_DO_CPY
+
+struct _emitter_t {
+ int pass;
+ int next_label;
+ int byte_code_offset;
+ int stack_size;
+ bool last_emit_was_return_value;
+
+ scope_t *scope;
+
+ int max_num_labels;
+ int *label_offsets;
+};
+
+emitter_t *emit_new() {
+ emitter_t *emit = m_new(emitter_t, 1);
+ emit->max_num_labels = 0;
+ emit->label_offsets = NULL;
+ return emit;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+ emit->pass = pass;
+ emit->next_label = 1;
+ emit->byte_code_offset = 0;
+ emit->stack_size = 0;
+ emit->last_emit_was_return_value = false;
+ emit->scope = scope;
+ if (pass > PASS_1) {
+ if (emit->label_offsets == NULL) {
+ emit->label_offsets = m_new(int, emit->max_num_labels);
+ }
+ if (pass == PASS_2) {
+ memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(int));
+ }
+ }
+}
+
+void emit_end_pass(emitter_t *emit) {
+ // check stack is back to zero size
+ if (emit->stack_size != 0) {
+ printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+ }
+
+ // calculate number of labels need
+ if (emit->pass == PASS_1) {
+ if (emit->next_label > emit->max_num_labels) {
+ emit->max_num_labels = emit->next_label;
+ }
+ }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+ return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+ return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+ emit->stack_size = size;
+}
+
+static void emit_pre(emitter_t *emit, int stack_size_delta, int byte_code_size) {
+ emit->stack_size += stack_size_delta;
+ if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+ emit->scope->stack_size = emit->stack_size;
+ }
+ emit->last_emit_was_return_value = false;
+ if (emit->pass == PASS_3 && byte_code_size > 0) {
+ if (emit->byte_code_offset >= 1000) {
+ printf("%d ", emit->byte_code_offset);
+ } else {
+ printf("% 4d ", emit->byte_code_offset);
+ }
+ }
+ emit->byte_code_offset += byte_code_size;
+}
+
+int emit_label_new(emitter_t *emit) {
+ return emit->next_label++;
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+ emit_pre(emit, 0, 0);
+ if (emit->pass > PASS_1) {
+ assert(l < emit->max_num_labels);
+ if (emit->pass == PASS_2) {
+ // assign label offset
+ assert(emit->label_offsets[l] == -1);
+ emit->label_offsets[l] = emit->byte_code_offset;
+ } else if (emit->pass == PASS_3) {
+ // ensure label offset has not changed from PASS_2 to PASS_3
+ assert(emit->label_offsets[l] == emit->byte_code_offset);
+ //printf("l%d: (at %d)\n", l, emit->byte_code_offset);
+ }
+ }
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("IMPORT_NAME %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_import_from(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("IMPORT_FROM %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_import_star(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_3) {
+ printf("IMPORT_STAR\n");
+ }
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST ");
+ switch (tok) {
+ case PY_TOKEN_KW_FALSE: printf("False"); break;
+ case PY_TOKEN_KW_NONE: printf("None"); break;
+ case PY_TOKEN_KW_TRUE: printf("True"); break;
+ default: printf("?=%d\n", tok); return; assert(0);
+ }
+ printf("\n");
+ }
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST %d\n", arg);
+ }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST '%s'\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST ");
+ emit_load_const_verbatim_quoted_str(emit, qstr, bytes);
+ printf("\n");
+ }
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST ");
+ }
+}
+
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+ if (emit->pass == PASS_3) {
+ printf("%d", val);
+ }
+}
+
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+ if (emit->pass == PASS_3) {
+ printf("%s", str);
+ }
+}
+
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+ if (emit->pass == PASS_3) {
+ printf("%.*s", len, str);
+ }
+}
+
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+ // TODO strings should be escaped before we get here
+ if (emit->pass == PASS_3) {
+ const char *str = qstr_str(qstr);
+ int len = strlen(str);
+ bool has_single_quote = false;
+ bool has_double_quote = false;
+ for (int i = 0; i < len; i++) {
+ if (str[i] == '\'') {
+ has_single_quote = true;
+ } else if (str[i] == '"') {
+ has_double_quote = true;
+ }
+ }
+ if (bytes) {
+ printf("b");
+ }
+ bool quote_single = false;
+ if (has_single_quote && !has_double_quote) {
+ printf("\"");
+ } else {
+ quote_single = true;
+ printf("'");
+ }
+ for (int i = 0; i < len; i++) {
+ if (str[i] == '\n') {
+ printf("\\n");
+ } else if (str[i] == '\\' && str[i + 1] == '\'') {
+ i += 1;
+ if (quote_single) {
+ printf("\\'");
+ } else {
+ printf("'");
+ }
+ } else if (str[i] == '\'' && quote_single) {
+ printf("\\'");
+ } else {
+ printf("%c", str[i]);
+ }
+ }
+ if (has_single_quote && !has_double_quote) {
+ printf("\"");
+ } else {
+ printf("'");
+ }
+ }
+}
+
+void emit_load_const_verbatim_end(emitter_t *emit) {
+ if (emit->pass == PASS_3) {
+ printf("\n");
+ }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_NAME %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_GLOBAL %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_FAST %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_DEREF %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CLOSURE %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_ATTR %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+ emit_load_attr(emit, qstr);
+}
+
+void emit_load_build_class(emitter_t *emit) {
+ emit_pre(emit, 1, 1);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_BUILD_CLASS\n");
+ }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("STORE_NAME %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("STORE_GLOBAL %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("STORE_FAST %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("STORE_DEREF %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -2, 3);
+ if (emit->pass == PASS_3) {
+ printf("STORE_ATTR %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_store_locals(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_3) {
+ printf("STORE_LOCALS\n");
+ }
+}
+
+void emit_store_subscr(emitter_t *emit) {
+ emit_pre(emit, -3, 1);
+ if (emit->pass == PASS_3) {
+ printf("STORE_SUBSCR\n");
+ }
+}
+
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_NAME %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_GLOBAL %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_FAST %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_DEREF %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_ATTR %s\n", qstr_str(qstr));
+ }
+}
+
+void emit_delete_subscr(emitter_t *emit) {
+ emit_pre(emit, -2, 1);
+ if (emit->pass == PASS_3) {
+ printf("DELETE_SUBSCR\n");
+ }
+}
+
+void emit_dup_top(emitter_t *emit) {
+ emit_pre(emit, 1, 1);
+ if (emit->pass == PASS_3) {
+ printf("DUP_TOP\n");
+ }
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+ emit_pre(emit, 2, 1);
+ if (emit->pass == PASS_3) {
+ printf("DUP_TOP_TWO\n");
+ }
+}
+
+void emit_pop_top(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_3) {
+ printf("POP_TOP\n");
+ }
+}
+
+void emit_rot_two(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("ROT_TWO\n");
+ }
+}
+
+void emit_rot_three(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("ROT_THREE\n");
+ }
+}
+
+void emit_jump(emitter_t *emit, int label) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ int dest = emit->label_offsets[label];
+ if (dest < emit->byte_code_offset) {
+ printf("JUMP_ABSOLUTE %d\n", emit->label_offsets[label]);
+ } else {
+ printf("JUMP_FORWARD %d\n", emit->label_offsets[label]);
+ }
+ }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("POP_JUMP_IF_TRUE %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("POP_JUMP_IF_FALSE %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("JUMP_IF_TRUE_OR_POP %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("JUMP_IF_FALSE_OR_POP %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("SETUP_LOOP %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("BREAK_LOOP\n"); // CPython doesn't have label
+ //printf("BREAK_LOOP %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_continue_loop(emitter_t *emit, int label) {
+ emit_pre(emit, 0, 3);
+ if (emit->pass == PASS_3) {
+ printf("CONTINUE_LOOP %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_setup_with(emitter_t *emit, int label) {
+ emit_pre(emit, 7, 3);
+ if (emit->pass == PASS_3) {
+ printf("SETUP_WITH %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_with_cleanup(emitter_t *emit) {
+ emit_pre(emit, -7, 1);
+ if (emit->pass == PASS_3) {
+ printf("WITH_CLEANUP\n");
+ }
+}
+
+void emit_setup_except(emitter_t *emit, int label) {
+ emit_pre(emit, 6, 3);
+ if (emit->pass == PASS_3) {
+ printf("SETUP_EXCEPT %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_setup_finally(emitter_t *emit, int label) {
+ emit_pre(emit, 6, 3);
+ if (emit->pass == PASS_3) {
+ printf("SETUP_FINALLY %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_end_finally(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_3) {
+ printf("END_FINALLY\n");
+ }
+}
+
+void emit_get_iter(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("GET_ITER\n");
+ }
+}
+
+void emit_for_iter(emitter_t *emit, int label) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("FOR_ITER %d\n", emit->label_offsets[label]);
+ }
+}
+
+void emit_for_iter_end(emitter_t *emit) {
+ emit_pre(emit, -1, 0);
+}
+
+void emit_pop_block(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("POP_BLOCK\n");
+ }
+}
+
+void emit_pop_except(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ printf("POP_EXCEPT\n");
+ }
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_3) {
+ switch (op) {
+ case RT_UNARY_OP_NOT: printf("UNARY_NOT\n"); break;
+ case RT_UNARY_OP_POSITIVE: printf("UNARY_POSITIVE\n"); break;
+ case RT_UNARY_OP_NEGATIVE: printf("UNARY_NEGATIVE\n"); break;
+ case RT_UNARY_OP_INVERT: printf("UNARY_INVERT\n"); break;
+ default: assert(0);
+ }
+ }
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_3) {
+ switch (op) {
+ case RT_BINARY_OP_SUBSCR: printf("BINARY_SUBSCR\n"); break;
+ case RT_BINARY_OP_OR: printf("BINARY_OR\n"); break;
+ case RT_BINARY_OP_XOR: printf("BINARY_XOR\n"); break;
+ case RT_BINARY_OP_AND: printf("BINARY_AND\n"); break;
+ case RT_BINARY_OP_LSHIFT: printf("BINARY_LSHIFT\n"); break;
+ case RT_BINARY_OP_RSHIFT: printf("BINARY_RSHIFT\n"); break;
+ case RT_BINARY_OP_ADD: printf("BINARY_ADD\n"); break;
+ case RT_BINARY_OP_SUBTRACT: printf("BINARY_SUBTRACT\n"); break;
+ case RT_BINARY_OP_MULTIPLY: printf("BINARY_MULTIPLY\n"); break;
+ case RT_BINARY_OP_FLOOR_DIVIDE: printf("BINARY_FLOOR_DIVIDE\n"); break;
+ case RT_BINARY_OP_TRUE_DIVIDE: printf("BINARY_TRUE_DIVIDE\n"); break;
+ case RT_BINARY_OP_MODULO: printf("BINARY_MODULO\n"); break;
+ case RT_BINARY_OP_POWER: printf("BINARY_POWER\n"); break;
+ case RT_BINARY_OP_INPLACE_OR: printf("INPLACE_OR\n"); break;
+ case RT_BINARY_OP_INPLACE_XOR: printf("INPLACE_XOR\n"); break;
+ case RT_BINARY_OP_INPLACE_AND: printf("INPLACE_AND\n"); break;
+ case RT_BINARY_OP_INPLACE_LSHIFT: printf("INPLACE_LSHIFT\n"); break;
+ case RT_BINARY_OP_INPLACE_RSHIFT: printf("INPLACE_RSHIFT\n"); break;
+ case RT_BINARY_OP_INPLACE_ADD: printf("INPLACE_ADD\n"); break;
+ case RT_BINARY_OP_INPLACE_SUBTRACT: printf("INPLACE_SUBTRACT\n"); break;
+ case RT_BINARY_OP_INPLACE_MULTIPLY: printf("INPLACE_MULTIPLY\n"); break;
+ case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: printf("INPLACE_FLOOR_DIVIDE\n"); break;
+ case RT_BINARY_OP_INPLACE_TRUE_DIVIDE: printf("INPLACE_TRUE_DIVIDE\n"); break;
+ case RT_BINARY_OP_INPLACE_MODULO: printf("INPLACE_MODULO\n"); break;
+ case RT_BINARY_OP_INPLACE_POWER: printf("INPLACE_POWER\n"); break;
+ default: assert(0);
+ }
+ }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ switch (op) {
+ case RT_COMPARE_OP_LESS: printf("COMPARE_OP <\n"); break;
+ case RT_COMPARE_OP_MORE: printf("COMPARE_OP >\n"); break;
+ case RT_COMPARE_OP_EQUAL: printf("COMPARE_OP ==\n"); break;
+ case RT_COMPARE_OP_LESS_EQUAL: printf("COMPARE_OP <=\n"); break;
+ case RT_COMPARE_OP_MORE_EQUAL: printf("COMPARE_OP >=\n"); break;
+ case RT_COMPARE_OP_NOT_EQUAL: printf("COMPARE_OP !=\n"); break;
+ case RT_COMPARE_OP_IN: printf("COMPARE_OP in\n"); break;
+ case RT_COMPARE_OP_NOT_IN: printf("COMPARE_OP not in\n"); break;
+ case RT_COMPARE_OP_IS: printf("COMPARE_OP is\n"); break;
+ case RT_COMPARE_OP_IS_NOT: printf("COMPARE_OP is not\n"); break;
+ case RT_COMPARE_OP_EXCEPTION_MATCH: printf("COMPARE_OP exception match\n"); break;
+ default: assert(0);
+ }
+ }
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+ emit_pre(emit, 1 - n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("BUILD_TUPLE %d\n", n_args);
+ }
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+ emit_pre(emit, 1 - n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("BUILD_LIST %d\n", n_args);
+ }
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LIST_APPEND %d\n", list_index);
+ }
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("BUILD_MAP %d\n", n_args);
+ }
+}
+
+void emit_store_map(emitter_t *emit) {
+ emit_pre(emit, -2, 1);
+ if (emit->pass == PASS_3) {
+ printf("STORE_MAP\n");
+ }
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+ emit_pre(emit, -2, 3);
+ if (emit->pass == PASS_3) {
+ printf("MAP_ADD %d\n", map_index);
+ }
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+ emit_pre(emit, 1 - n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("BUILD_SET %d\n", n_args);
+ }
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+ emit_pre(emit, -1, 3);
+ if (emit->pass == PASS_3) {
+ printf("SET_ADD %d\n", set_index);
+ }
+}
+
+void emit_build_slice(emitter_t *emit, int n_args) {
+ emit_pre(emit, 1 - n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("BUILD_SLICE %d\n", n_args);
+ }
+}
+
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+ emit_pre(emit, -1 + n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("UNPACK_SEQUENCE %d\n", n_args);
+ }
+}
+
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+ emit_pre(emit, -1 + n_left + n_right + 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("UNPACK_EX %d\n", n_left | (n_right << 8));
+ }
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ int s = 0;
+ if (have_star_arg) {
+ s += 1;
+ }
+ if (have_dbl_star_arg) {
+ s += 1;
+ }
+ emit_pre(emit, -n_positional - 2 * n_keyword - s, 3);
+ if (emit->pass == PASS_3) {
+ if (have_star_arg) {
+ if (have_dbl_star_arg) {
+ printf("CALL_FUNCTION_VAR_KW");
+ } else {
+ printf("CALL_FUNCTION_VAR");
+ }
+ } else {
+ if (have_dbl_star_arg) {
+ printf("CALL_FUNCTION_KW");
+ } else {
+ printf("CALL_FUNCTION");
+ }
+ }
+ printf(" %d, %d\n", n_positional, n_keyword);
+ }
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ emit_call_function(emit, n_positional, n_keyword, have_star_arg, have_dbl_star_arg);
+}
+
+void emit_return_value(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ emit->last_emit_was_return_value = true;
+ if (emit->pass == PASS_3) {
+ printf("RETURN_VALUE\n");
+ }
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+ emit_pre(emit, -n_args, 3);
+ if (emit->pass == PASS_3) {
+ printf("RAISE_VARARGS %d\n", n_args);
+ }
+}
+
+void load_const_code_and_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST code %s\n", qstr_str(qstr));
+ }
+ // load qualified name
+ emit_pre(emit, 1, 3);
+ if (emit->pass == PASS_3) {
+ printf("LOAD_CONST '");
+ // code just to work out the qualname (or whatever it is)
+ {
+ int depth = 0;
+ for (scope_t *s = emit->scope; s->parent != NULL; s = s->parent) {
+ depth += 1;
+ }
+ for (int wanted_depth = depth; wanted_depth >= 0; wanted_depth--) {
+ scope_t *s = emit->scope;
+ for (int i = 0; i < wanted_depth; i++) {
+ s = s->parent;
+ }
+ if (s->kind == SCOPE_FUNCTION) {
+ printf("%s.<locals>.", qstr_str(s->simple_name));
+ } else if (s->kind == SCOPE_CLASS) {
+ printf("%s.", qstr_str(s->simple_name));
+ }
+ }
+ }
+ printf("%s'\n", qstr_str(qstr));
+ }
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ load_const_code_and_name(emit, scope->simple_name);
+ emit_pre(emit, -1 - n_default_params - 2 * n_dict_params, 3);
+ if (emit->pass == PASS_3) {
+ printf("MAKE_FUNCTION %d\n", (n_dict_params << 8) | n_default_params);
+ }
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ load_const_code_and_name(emit, scope->simple_name);
+ emit_pre(emit, -2 - n_default_params - 2 * n_dict_params, 3);
+ if (emit->pass == PASS_3) {
+ printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
+ }
+}
+
+void emit_yield_value(emitter_t *emit) {
+ emit_pre(emit, 0, 1);
+ if (emit->pass == PASS_2) {
+ emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+ }
+ if (emit->pass == PASS_3) {
+ printf("YIELD_VALUE\n");
+ }
+}
+
+void emit_yield_from(emitter_t *emit) {
+ emit_pre(emit, -1, 1);
+ if (emit->pass == PASS_2) {
+ emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+ }
+ if (emit->pass == PASS_3) {
+ printf("YIELD_FROM\n");
+ }
+}
+
+#endif // EMIT_DO_CPY
diff --git a/py/emitthumb.c b/py/emitthumb.c
new file mode 100644
index 0000000000..cad6b65044
--- /dev/null
+++ b/py/emitthumb.c
@@ -0,0 +1,673 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "asmthumb.h"
+
+#ifdef EMIT_DO_THUMB
+
+#define REG_LOCAL_1 (REG_R4)
+#define REG_LOCAL_2 (REG_R5)
+#define REG_LOCAL_3 (REG_R6)
+#define REG_TEMP (REG_R7)
+#define REG_LOCAL_NUM (3)
+
+typedef enum {
+ NEED_TO_PUSH_NOTHING,
+ NEED_TO_PUSH_REG,
+ NEED_TO_PUSH_I32,
+} need_to_push_t;
+
+struct _emitter_t {
+ int pass;
+ int stack_start;
+ int stack_size;
+ bool last_emit_was_return_value;
+ need_to_push_t need_to_push;
+ int last_reg;
+ int32_t last_i32;
+
+ scope_t *scope;
+
+ asm_thumb_t *as;
+ bool do_native_types;
+};
+
+emitter_t *emit_new() {
+ emitter_t *emit = m_new(emitter_t, 1);
+ emit->as = asm_thumb_new();
+ emit->do_native_types = true;
+ return emit;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+ emit->pass = pass;
+ emit->stack_start = 0;
+ emit->stack_size = 0;
+ emit->last_emit_was_return_value = false;
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+ emit->scope = scope;
+ if (pass == PASS_1) {
+ scope->unique_code_id = rt_get_new_unique_code_id();
+ }
+
+ asm_thumb_start_pass(emit->as, pass);
+
+ // entry to function
+ int num_locals = 0;
+ if (pass > PASS_1) {
+ num_locals = scope->num_locals - REG_LOCAL_NUM;
+ if (num_locals < 0) {
+ num_locals = 0;
+ }
+ emit->stack_start = num_locals;
+ num_locals += scope->stack_size;
+ }
+ asm_thumb_entry(emit->as, num_locals);
+
+ // initialise locals from parameters
+ for (int i = 0; i < scope->num_params; i++) {
+ if (i == 0) {
+ asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1);
+ } else if (i == 1) {
+ asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2);
+ } else if (i == 2) {
+ asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3);
+ } else if (i == 3) {
+ asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4);
+ } else {
+ // TODO not implemented
+ assert(0);
+ }
+ }
+
+ asm_thumb_mov_reg_i32(emit->as, REG_R7, (machine_uint_t)rt_fun_table);
+}
+
+void emit_end_pass(emitter_t *emit) {
+ if (!emit->last_emit_was_return_value) {
+ asm_thumb_exit(emit->as);
+ }
+ asm_thumb_end_pass(emit->as);
+
+ // check stack is back to zero size
+ if (emit->stack_size != 0) {
+ printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+ }
+
+ if (emit->pass == PASS_3) {
+ py_fun_t f = asm_thumb_get_code(emit->as);
+ rt_assign_native_code(emit->scope->unique_code_id, f, asm_thumb_get_code_size(emit->as), emit->scope->num_params);
+ }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+ return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+ return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+ emit->stack_size = size;
+}
+
+static void adjust_stack(emitter_t *emit, int stack_size_delta) {
+ emit->stack_size += stack_size_delta;
+ assert(emit->stack_size >= 0);
+ if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+ emit->scope->stack_size = emit->stack_size;
+ }
+}
+
+static void stack_settle(emitter_t *emit) {
+ switch (emit->need_to_push) {
+ case NEED_TO_PUSH_NOTHING:
+ break;
+
+ case NEED_TO_PUSH_REG:
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, emit->last_reg);
+ adjust_stack(emit, 1);
+ break;
+
+ case NEED_TO_PUSH_I32:
+ asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, emit->last_i32);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, REG_R0);
+ adjust_stack(emit, 1);
+ break;
+ }
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
+ adjust_stack(emit, stack_size_delta);
+ emit->last_emit_was_return_value = false;
+}
+
+static void emit_pre(emitter_t *emit) {
+ stack_settle(emit);
+ emit_pre_raw(emit, 0);
+}
+
+static void emit_pre_pop_reg(emitter_t *emit, int reg_dest) {
+ switch (emit->need_to_push) {
+ case NEED_TO_PUSH_NOTHING:
+ asm_thumb_mov_reg_local(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
+ emit_pre_raw(emit, -1);
+ break;
+
+ case NEED_TO_PUSH_REG:
+ emit_pre_raw(emit, 0);
+ if (emit->last_reg != reg_dest) {
+ asm_thumb_mov_reg_reg(emit->as, reg_dest, emit->last_reg);
+ }
+ break;
+
+ case NEED_TO_PUSH_I32:
+ emit_pre_raw(emit, 0);
+ asm_thumb_mov_reg_i32_optimised(emit->as, reg_dest, emit->last_i32);
+ break;
+ }
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_pop_reg_reg(emitter_t *emit, int rega, int regb) {
+ emit_pre_pop_reg(emit, rega);
+ asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
+ adjust_stack(emit, -1);
+}
+
+static void emit_pre_pop_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
+ emit_pre_pop_reg(emit, rega);
+ asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
+ asm_thumb_mov_reg_local(emit->as, regc, emit->stack_start + emit->stack_size - 2);
+ adjust_stack(emit, -2);
+}
+
+static void emit_post(emitter_t *emit) {
+}
+
+static void emit_post_push_reg(emitter_t *emit, int reg) {
+ emit->need_to_push = NEED_TO_PUSH_REG;
+ emit->last_reg = reg;
+}
+
+static void emit_post_push_i32(emitter_t *emit, int32_t i32) {
+ emit->need_to_push = NEED_TO_PUSH_I32;
+ emit->last_i32 = i32;
+}
+
+static void emit_post_push_reg_reg(emitter_t *emit, int rega, int regb) {
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+ emit->need_to_push = NEED_TO_PUSH_REG;
+ emit->last_reg = regb;
+ adjust_stack(emit, 1);
+}
+
+static void emit_post_push_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
+ adjust_stack(emit, 3);
+}
+
+static void emit_post_push_reg_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc, int regd) {
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
+ asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 3, regd);
+ adjust_stack(emit, 4);
+}
+
+static void emit_get_stack_pointer_to_reg_for_pop(emitter_t *emit, int reg_dest, int n_pop) {
+ asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
+ adjust_stack(emit, -n_pop);
+}
+
+static void emit_get_stack_pointer_to_reg_for_push(emitter_t *emit, int reg_dest, int n_push) {
+ asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size + n_push - 1);
+ adjust_stack(emit, n_push);
+}
+
+static void emit_call(emitter_t *emit, rt_fun_kind_t fun_kind) {
+ asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
+}
+
+static void emit_call_with_i32_arg(emitter_t *emit, rt_fun_kind_t fun_kind, int32_t arg_val, int arg_reg) {
+ asm_thumb_mov_reg_i32_optimised(emit->as, arg_reg, arg_val);
+ asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
+}
+
+int emit_label_new(emitter_t *emit) {
+ return asm_thumb_label_new(emit->as);
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+ asm_thumb_label_assign(emit->as, l);
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_import_from(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_import_star(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+ emit_pre(emit);
+ py_obj_t o;
+ switch (tok) {
+ case PY_TOKEN_KW_NONE: o = py_const_none; break;
+ case PY_TOKEN_KW_FALSE: o = py_const_false; break;
+ case PY_TOKEN_KW_TRUE: o = py_const_true; break;
+ default: assert(0); o = 0; // shouldn't happen
+ }
+ emit_post_push_i32(emit, (machine_uint_t)o);
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+ emit_pre(emit);
+ if (emit->do_native_types) {
+ emit_post_push_i32(emit, arg);
+ } else {
+ emit_post_push_i32(emit, (arg << 1) | 1);
+ }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+ emit_pre(emit);
+ emit_call_with_i32_arg(emit, RT_F_LOAD_CONST_STR, qstr, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+ assert(0);
+}
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+ assert(0);
+}
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+ assert(0);
+}
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+ assert(0);
+}
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+ assert(0);
+}
+void emit_load_const_verbatim_end(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+ emit_pre(emit);
+ if (local_num == 0) {
+ emit_post_push_reg(emit, REG_LOCAL_1);
+ } else if (local_num == 1) {
+ emit_post_push_reg(emit, REG_LOCAL_2);
+ } else if (local_num == 2) {
+ emit_post_push_reg(emit, REG_LOCAL_3);
+ } else {
+ asm_thumb_mov_reg_local(emit->as, REG_R0, local_num - 1);
+ emit_post_push_reg(emit, REG_R0);
+ }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit);
+ emit_call_with_i32_arg(emit, RT_F_LOAD_NAME, qstr, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit);
+ emit_call_with_i32_arg(emit, RT_F_LOAD_GLOBAL, qstr, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
+ emit_call_with_i32_arg(emit, RT_F_LOAD_ATTR, qstr, REG_ARG_2); // arg2 = attribute name
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
+ emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+ emit_call_with_i32_arg(emit, RT_F_LOAD_METHOD, qstr, REG_ARG_2); // arg2 = method name
+}
+
+void emit_load_build_class(emitter_t *emit) {
+ assert(0);
+} // basically load __build_class__ from builtins
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+ if (local_num == 0) {
+ emit_pre_pop_reg(emit, REG_LOCAL_1);
+ } else if (local_num == 1) {
+ emit_pre_pop_reg(emit, REG_LOCAL_2);
+ } else if (local_num == 2) {
+ emit_pre_pop_reg(emit, REG_LOCAL_3);
+ } else {
+ emit_pre_pop_reg(emit, REG_R0);
+ asm_thumb_mov_local_reg(emit->as, local_num - 1, REG_R0);
+ }
+ emit_post(emit);
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_reg(emit, REG_ARG_2);
+ emit_call_with_i32_arg(emit, RT_F_STORE_NAME, qstr, REG_ARG_1); // arg1 = name
+ emit_post(emit);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_store_locals(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+ emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
+ emit_call(emit, RT_F_STORE_SUBSCR);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+ assert(0);
+}
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_subscr(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_dup_top(emitter_t *emit) {
+ emit_pre_pop_reg(emit, REG_R0);
+ emit_post_push_reg_reg(emit, REG_R0, REG_R0);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+ emit_pre_pop_reg_reg(emit, REG_R0, REG_R1);
+ emit_post_push_reg_reg_reg_reg(emit, REG_R1, REG_R0, REG_R1, REG_R0);
+}
+
+void emit_pop_top(emitter_t *emit) {
+ emit_pre_pop_reg(emit, REG_R0);
+ emit_post(emit);
+}
+
+void emit_rot_two(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_rot_three(emitter_t *emit) {
+ emit_pre_pop_reg_reg_reg(emit, REG_R0, REG_R1, REG_R2);
+ emit_post_push_reg_reg_reg(emit, REG_R0, REG_R2, REG_R1);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+ emit_pre(emit);
+ asm_thumb_b_label(emit->as, label);
+ emit_post(emit);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+ if (emit->do_native_types) {
+ emit_pre_pop_reg(emit, REG_RET);
+ asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
+ emit_post(emit);
+ } else {
+ emit_pre_pop_reg(emit, REG_ARG_1);
+ emit_call(emit, RT_F_IS_TRUE);
+ asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
+ emit_post(emit);
+ }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+ assert(0);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+ emit_pre(emit);
+ emit_post(emit);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_continue_loop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_setup_with(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_with_cleanup(emitter_t *emit) {
+ assert(0);
+}
+void emit_setup_except(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_setup_finally(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_end_finally(emitter_t *emit) {
+ assert(0);
+}
+void emit_get_iter(emitter_t *emit) {
+ assert(0);
+} // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_for_iter_end(emitter_t *emit) {
+ assert(0);
+}
+void emit_pop_except(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+ emit_pre_pop_reg(emit, REG_ARG_2);
+ emit_call_with_i32_arg(emit, RT_F_UNARY_OP, op, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+ assert(0);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+ emit_call_with_i32_arg(emit, RT_F_BUILD_LIST, n_args, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET); // new list
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+ assert(0);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_call_with_i32_arg(emit, RT_F_BUILD_MAP, n_args, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET); // new map
+}
+
+void emit_store_map(emitter_t *emit) {
+ emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
+ emit_call(emit, RT_F_STORE_MAP);
+ emit_post_push_reg(emit, REG_RET); // map
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+ assert(0);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+ emit_call_with_i32_arg(emit, RT_F_BUILD_SET, n_args, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET); // new set
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+ assert(0);
+}
+void emit_build_slice(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+ assert(0);
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(n_default_params == 0 && n_dict_params == 0);
+ emit_pre(emit);
+ emit_call_with_i32_arg(emit, RT_F_MAKE_FUNCTION_FROM_ID, scope->unique_code_id, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(0);
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+ if (n_positional == 0) {
+ emit_pre_pop_reg(emit, REG_ARG_1); // the function
+ emit_call(emit, RT_F_CALL_FUNCTION_0);
+ } else if (n_positional == 1) {
+ emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
+ emit_call(emit, RT_F_CALL_FUNCTION_1);
+ } else if (n_positional == 2) {
+ emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
+ emit_call(emit, RT_F_CALL_FUNCTION_2);
+ } else {
+ assert(0);
+ }
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+ if (n_positional == 0) {
+ emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
+ emit_call(emit, RT_F_CALL_METHOD_1);
+ } else if (n_positional == 1) {
+ emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
+ emit_call(emit, RT_F_CALL_METHOD_2);
+ } else {
+ assert(0);
+ }
+ emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_pop_block(emitter_t *emit) {
+ emit_pre(emit);
+ emit_post(emit);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+ if (emit->do_native_types) {
+ emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
+ asm_thumb_add_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, REG_ARG_2);
+ emit_post_push_reg(emit, REG_RET);
+ } else {
+ emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
+ emit_call_with_i32_arg(emit, RT_F_BINARY_OP, op, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+ }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+ if (emit->do_native_types) {
+ emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
+ asm_thumb_cmp_reg_reg(emit->as, REG_ARG_1, REG_ARG_2);
+ asm_thumb_ite_ge(emit->as);
+ asm_thumb_mov_reg_i8(emit->as, REG_RET, 0); // if r0 >= r1
+ asm_thumb_mov_reg_i8(emit->as, REG_RET, 1); // if r0 < r1
+ emit_post_push_reg(emit, REG_RET);
+ } else {
+ emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
+ emit_call_with_i32_arg(emit, RT_F_COMPARE_OP, op, REG_ARG_1);
+ emit_post_push_reg(emit, REG_RET);
+ }
+}
+
+void emit_return_value(emitter_t *emit) {
+ emit_pre_pop_reg(emit, REG_RET);
+ emit->last_emit_was_return_value = true;
+ //asm_thumb_call_ind(emit->as, 0, REG_R0); to seg fault for debugging with gdb
+ asm_thumb_exit(emit->as);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_yield_value(emitter_t *emit) {
+ assert(0);
+}
+void emit_yield_from(emitter_t *emit) {
+ assert(0);
+}
+
+#endif // EMIT_DO_THUMB
diff --git a/py/emitx64.c b/py/emitx64.c
new file mode 100644
index 0000000000..da4c7e333d
--- /dev/null
+++ b/py/emitx64.c
@@ -0,0 +1,680 @@
+/* This code is equivalent to emitx64.c but pre-allocates stack
+ * space and uses mov instead of push/pop instructions to access
+ * the temporary stack. It runs in similar time, but uses 3*n
+ * more bytes, where n is number of push/pop instructions.
+ *
+ * This code is preferred because it keeps the stack aligned on a
+ * 16 byte boundary.
+ *
+ * Improvements:
+ * Doesn't call stub functions, does all the work inline.
+ * Has optimisations for loading i64s to stack.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "asmx64.h"
+
+#ifdef EMIT_DO_X64
+
+#define REG_LOCAL_1 (REG_RBX)
+#define REG_LOCAL_NUM (1)
+
+typedef enum {
+ NEED_TO_PUSH_NOTHING,
+ NEED_TO_PUSH_R64,
+ NEED_TO_PUSH_I64,
+} need_to_push_t;
+
+struct _emitter_t {
+ int pass;
+ int stack_start;
+ int stack_size;
+ bool last_emit_was_return_value;
+ need_to_push_t need_to_push;
+ int last_r64;
+ int64_t last_i64;
+
+ scope_t *scope;
+
+ asm_x64_t *as;
+ bool do_native_types;
+};
+
+emitter_t *emit_new() {
+ emitter_t *emit = m_new(emitter_t, 1);
+ emit->as = asm_x64_new();
+ emit->do_native_types = false;
+ return emit;
+}
+
+void emit_set_native_types(emitter_t *emit, bool do_native_types) {
+ emit->do_native_types = do_native_types;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+ emit->pass = pass;
+ emit->stack_start = 0;
+ emit->stack_size = 0;
+ emit->last_emit_was_return_value = false;
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+ emit->scope = scope;
+ if (pass == PASS_1) {
+ scope->unique_code_id = rt_get_new_unique_code_id();
+ }
+
+ asm_x64_start_pass(emit->as, pass);
+
+ // entry to function
+ int num_locals = 0;
+ if (pass > PASS_1) {
+ num_locals = scope->num_locals - REG_LOCAL_NUM;
+ if (num_locals < 0) {
+ num_locals = 0;
+ }
+ emit->stack_start = num_locals;
+ num_locals += scope->stack_size;
+ }
+ asm_x64_entry(emit->as, num_locals);
+
+ // initialise locals from parameters
+ for (int i = 0; i < scope->num_params; i++) {
+ if (i == 0) {
+ asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1);
+ } else if (i == 1) {
+ asm_x64_mov_r64_to_local(emit->as, REG_ARG_2, i - 1);
+ } else if (i == 2) {
+ asm_x64_mov_r64_to_local(emit->as, REG_ARG_3, i - 1);
+ } else {
+ // TODO not implemented
+ assert(0);
+ }
+ }
+}
+
+void emit_end_pass(emitter_t *emit) {
+ if (!emit->last_emit_was_return_value) {
+ asm_x64_exit(emit->as);
+ }
+ asm_x64_end_pass(emit->as);
+
+ // check stack is back to zero size
+ if (emit->stack_size != 0) {
+ printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+ }
+
+ if (emit->pass == PASS_3) {
+ py_fun_t f = asm_x64_get_code(emit->as);
+ rt_assign_native_code(emit->scope->unique_code_id, f, asm_x64_get_code_size(emit->as), emit->scope->num_params);
+ }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+ return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+ return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+ emit->stack_size = size;
+}
+
+static void adjust_stack(emitter_t *emit, int stack_size_delta) {
+ emit->stack_size += stack_size_delta;
+ assert(emit->stack_size >= 0);
+ if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+ emit->scope->stack_size = emit->stack_size;
+ }
+}
+
+static void stack_settle(emitter_t *emit) {
+ switch (emit->need_to_push) {
+ case NEED_TO_PUSH_NOTHING:
+ break;
+
+ case NEED_TO_PUSH_R64:
+ asm_x64_mov_r64_to_local(emit->as, emit->last_r64, emit->stack_start + emit->stack_size);
+ adjust_stack(emit, 1);
+ break;
+
+ case NEED_TO_PUSH_I64:
+ asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, REG_RAX);
+ asm_x64_mov_r64_to_local(emit->as, REG_RAX, emit->stack_start + emit->stack_size);
+ adjust_stack(emit, 1);
+ break;
+ }
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
+ adjust_stack(emit, stack_size_delta);
+ emit->last_emit_was_return_value = false;
+}
+
+static void emit_pre(emitter_t *emit) {
+ stack_settle(emit);
+ emit_pre_raw(emit, 0);
+}
+
+static void emit_pre_pop_r64(emitter_t *emit, int r64) {
+ switch (emit->need_to_push) {
+ case NEED_TO_PUSH_NOTHING:
+ asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
+ emit_pre_raw(emit, -1);
+ break;
+
+ case NEED_TO_PUSH_R64:
+ emit_pre_raw(emit, 0);
+ if (emit->last_r64 != r64) {
+ asm_x64_mov_r64_to_r64(emit->as, emit->last_r64, r64);
+ }
+ break;
+
+ case NEED_TO_PUSH_I64:
+ emit_pre_raw(emit, 0);
+ asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, r64);
+ break;
+ }
+ emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_pop_r64_r64(emitter_t *emit, int r64a, int r64b) {
+ emit_pre_pop_r64(emit, r64a);
+ asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
+ adjust_stack(emit, -1);
+}
+
+static void emit_pre_pop_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
+ emit_pre_pop_r64(emit, r64a);
+ asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
+ asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 2, r64c);
+ adjust_stack(emit, -2);
+}
+
+static void emit_post(emitter_t *emit) {
+}
+
+static void emit_post_push_r64(emitter_t *emit, int r64) {
+ emit->need_to_push = NEED_TO_PUSH_R64;
+ emit->last_r64 = r64;
+}
+
+static void emit_post_push_i64(emitter_t *emit, int64_t i64) {
+ emit->need_to_push = NEED_TO_PUSH_I64;
+ emit->last_i64 = i64;
+}
+
+static void emit_post_push_r64_r64(emitter_t *emit, int r64a, int r64b) {
+ asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+ emit->need_to_push = NEED_TO_PUSH_R64;
+ emit->last_r64 = r64b;
+ adjust_stack(emit, 1);
+}
+
+static void emit_post_push_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
+ asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+ asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
+ asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
+ adjust_stack(emit, 3);
+}
+
+static void emit_post_push_r64_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c, int r64d) {
+ asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+ asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
+ asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
+ asm_x64_mov_r64_to_local(emit->as, r64d, emit->stack_start + emit->stack_size + 3);
+ adjust_stack(emit, 4);
+}
+
+static void emit_get_stack_pointer_to_r64_for_pop(emitter_t *emit, int r64, int n_pop) {
+ asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
+ adjust_stack(emit, -n_pop);
+}
+
+static void emit_get_stack_pointer_to_r64_for_push(emitter_t *emit, int r64, int n_push) {
+ asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size + n_push - 1, r64);
+ adjust_stack(emit, n_push);
+}
+
+static void emit_call(emitter_t *emit, void *fun) {
+ asm_x64_call_ind(emit->as, fun, REG_RAX);
+}
+
+static void emit_call_with_i64_arg(emitter_t *emit, void *fun, int64_t arg_val, int arg_r64) {
+ asm_x64_mov_i64_to_r64_optimised(emit->as, arg_val, arg_r64);
+ asm_x64_call_ind(emit->as, fun, REG_RAX);
+}
+
+int emit_label_new(emitter_t *emit) {
+ return asm_x64_label_new(emit->as);
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+ asm_x64_label_assign(emit->as, l);
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_import_from(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_import_star(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+ emit_pre(emit);
+ py_obj_t o;
+ switch (tok) {
+ case PY_TOKEN_KW_NONE: o = py_const_none; break;
+ case PY_TOKEN_KW_FALSE: o = py_const_false; break;
+ case PY_TOKEN_KW_TRUE: o = py_const_true; break;
+ default: assert(0); // shouldn't happen
+ }
+ emit_post_push_i64(emit, (uint64_t)o);
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+ emit_pre(emit);
+ if (emit->do_native_types) {
+ emit_post_push_i64(emit, arg);
+ } else {
+ emit_post_push_i64(emit, (arg << 1) | 1);
+ }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+ emit_pre(emit);
+ emit_call_with_i64_arg(emit, rt_load_const_str, qstr, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+ assert(0);
+}
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+ assert(0);
+}
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+ assert(0);
+}
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+ assert(0);
+}
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+ assert(0);
+}
+void emit_load_const_verbatim_end(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+ if (local_num == 0) {
+ emit_pre(emit);
+ emit_post_push_r64(emit, REG_LOCAL_1);
+ } else {
+ emit_pre(emit);
+ asm_x64_mov_local_to_r64(emit->as, local_num - 1, REG_RAX);
+ emit_post_push_r64(emit, REG_RAX);
+ }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+ emit_pre(emit);
+ emit_call_with_i64_arg(emit, rt_load_name, qstr, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+ emit_pre(emit);
+ emit_call_with_i64_arg(emit, rt_load_global, qstr, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
+ emit_call_with_i64_arg(emit, rt_load_attr, qstr, REG_ARG_2); // arg2 = attribute name
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
+ emit_get_stack_pointer_to_r64_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+ emit_call_with_i64_arg(emit, rt_load_method, qstr, REG_ARG_2); // arg2 = method name
+}
+
+void emit_load_build_class(emitter_t *emit) {
+ assert(0);
+} // basically load __build_class__ from builtins
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+ if (local_num == 0) {
+ emit_pre_pop_r64(emit, REG_LOCAL_1);
+ emit_post(emit);
+ } else {
+ emit_pre_pop_r64(emit, REG_RAX);
+ asm_x64_mov_r64_to_local(emit->as, REG_RAX, local_num - 1);
+ emit_post(emit);
+ }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+ emit_pre_pop_r64(emit, REG_ARG_2);
+ emit_call_with_i64_arg(emit, rt_store_name, qstr, REG_ARG_1); // arg1 = name
+ emit_post(emit);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_store_locals(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+ emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
+ emit_call(emit, rt_store_subscr);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+ assert(0);
+}
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+ assert(0);
+}
+void emit_delete_subscr(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_dup_top(emitter_t *emit) {
+ emit_pre_pop_r64(emit, REG_RAX);
+ emit_post_push_r64_r64(emit, REG_RAX, REG_RAX);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+ emit_pre_pop_r64_r64(emit, REG_RAX, REG_RDI);
+ emit_post_push_r64_r64_r64_r64(emit, REG_RDI, REG_RAX, REG_RDI, REG_RAX);
+}
+
+void emit_pop_top(emitter_t *emit) {
+ emit_pre_pop_r64(emit, REG_RAX);
+ emit_post(emit);
+}
+
+void emit_rot_two(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_rot_three(emitter_t *emit) {
+ emit_pre_pop_r64_r64_r64(emit, REG_RAX, REG_RDI, REG_RSI);
+ emit_post_push_r64_r64_r64(emit, REG_RAX, REG_RSI, REG_RDI);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+ emit_pre(emit);
+ asm_x64_jmp_label(emit->as, label);
+ emit_post(emit);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+ if (emit->do_native_types) {
+ emit_pre_pop_r64(emit, REG_RET);
+ asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
+ asm_x64_jcc_label(emit->as, JCC_JZ, label);
+ emit_post(emit);
+ } else {
+ emit_pre_pop_r64(emit, REG_ARG_1);
+ emit_call(emit, rt_is_true);
+ asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
+ asm_x64_jcc_label(emit->as, JCC_JZ, label);
+ emit_post(emit);
+ }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+ assert(0);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+ emit_pre(emit);
+ emit_post(emit);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_continue_loop(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_setup_with(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_with_cleanup(emitter_t *emit) {
+ assert(0);
+}
+void emit_setup_except(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_setup_finally(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_end_finally(emitter_t *emit) {
+ assert(0);
+}
+void emit_get_iter(emitter_t *emit) {
+ assert(0);
+} // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label) {
+ assert(0);
+}
+void emit_for_iter_end(emitter_t *emit) {
+ assert(0);
+}
+void emit_pop_except(emitter_t *emit) {
+ assert(0);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+ emit_pre_pop_r64(emit, REG_ARG_2);
+ emit_call_with_i64_arg(emit, rt_unary_op, op, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+ assert(0);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+ emit_call_with_i64_arg(emit, rt_build_list, n_args, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET); // new list
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+ assert(0);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_call_with_i64_arg(emit, rt_build_map, n_args, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET); // new map
+}
+
+void emit_store_map(emitter_t *emit) {
+ emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
+ emit_call(emit, rt_store_map);
+ emit_post_push_r64(emit, REG_RET); // map
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+ assert(0);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+ emit_pre(emit);
+ emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+ emit_call_with_i64_arg(emit, rt_build_set, n_args, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET); // new set
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+ assert(0);
+}
+void emit_build_slice(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+ assert(0);
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(n_default_params == 0 && n_dict_params == 0);
+ emit_pre(emit);
+ emit_call_with_i64_arg(emit, rt_make_function_from_id, scope->unique_code_id, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+ assert(0);
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+ if (n_positional == 0) {
+ emit_pre_pop_r64(emit, REG_ARG_1); // the function
+ emit_call(emit, rt_call_function_0);
+ } else if (n_positional == 1) {
+ emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
+ emit_call(emit, rt_call_function_1);
+ } else if (n_positional == 2) {
+ emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
+ emit_call(emit, rt_call_function_2);
+ } else {
+ assert(0);
+ }
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+ assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+ if (n_positional == 0) {
+ emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
+ emit_call(emit, rt_call_method_1);
+ } else if (n_positional == 1) {
+ emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
+ emit_call(emit, rt_call_method_2);
+ } else {
+ assert(0);
+ }
+ emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_pop_block(emitter_t *emit) {
+ emit_pre(emit);
+ emit_post(emit);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+ if (emit->do_native_types) {
+ assert(op == RT_BINARY_OP_ADD);
+ emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_RET);
+ asm_x64_add_r64_to_r64(emit->as, REG_ARG_2, REG_RET);
+ emit_post_push_r64(emit, REG_RET);
+ } else {
+ emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+ emit_call_with_i64_arg(emit, rt_binary_op, op, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+ }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+ if (emit->do_native_types) {
+ assert(op == RT_COMPARE_OP_LESS);
+ emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+ asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET);
+ asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2);
+ asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET);
+ emit_post_push_r64(emit, REG_RET);
+ } else {
+ emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+ emit_call_with_i64_arg(emit, rt_compare_op, op, REG_ARG_1);
+ emit_post_push_r64(emit, REG_RET);
+ }
+}
+
+void emit_return_value(emitter_t *emit) {
+ emit_pre_pop_r64(emit, REG_RAX);
+ emit->last_emit_was_return_value = true;
+ //asm_x64_call_ind(emit->as, 0, REG_RAX); to seg fault for debugging with gdb
+ asm_x64_exit(emit->as);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+ assert(0);
+}
+void emit_yield_value(emitter_t *emit) {
+ assert(0);
+}
+void emit_yield_from(emitter_t *emit) {
+ assert(0);
+}
+
+#endif // EMIT_DO_X64
diff --git a/py/grammar.h b/py/grammar.h
new file mode 100644
index 0000000000..05bb237a52
--- /dev/null
+++ b/py/grammar.h
@@ -0,0 +1,300 @@
+// rules for writing rules:
+// - zero_or_more is implemented using opt_rule around a one_or_more rule
+// - don't put opt_rule in arguments of or rule; instead, wrap the call to this or rule in opt_rule
+
+// # Start symbols for the grammar:
+// # single_input is a single interactive statement;
+// # file_input is a module or sequence of commands read from an input file;
+// # eval_input is the input for the eval() functions.
+// # NB: compound_stmt in single_input is followed by extra NEWLINE!
+// single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+// file_input: (NEWLINE | stmt)* ENDMARKER
+// eval_input: testlist NEWLINE* ENDMARKER
+
+DEF_RULE(file_input, nc, and(1), opt_rule(file_input_2))
+DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3))
+DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt))
+
+// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+// decorators: decorator+
+// decorated: decorators (classdef | funcdef)
+// funcdef: 'def' NAME parameters ['->' test] ':' suite
+// parameters: '(' [typedargslist] ')'
+// typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* [',' ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef
+// tfpdef: NAME [':' test]
+// varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef
+// vfpdef: NAME
+
+DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE))
+//DEF_RULE(decorator_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+DEF_RULE(decorators, nc, one_or_more, rule(decorator))
+DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body))
+DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef))
+DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdef_2), tok(DEL_COLON), rule(suite))
+DEF_RULE(funcdef_2, nc, and(2), tok(DEL_MINUS_MORE), rule(test))
+// TODO typedargslist lets through more than is allowed
+DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA))
+DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star))
+DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal))
+DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef))
+DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon))
+DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test))
+DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon))
+// TODO varargslist lets through more than is allowed
+DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA))
+DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star))
+DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal))
+DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef))
+DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME))
+DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(vfpdef, nc, and(1), tok(NAME))
+
+// stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt
+
+DEF_RULE(stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(simple_stmt))
+
+// simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+
+DEF_RULE(simple_stmt, nc, and(2), rule(simple_stmt_2), tok(NEWLINE))
+DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON))
+
+// small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
+// expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*)
+// testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+// augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
+// # For normal assignments, additional restrictions enforced by the interpreter
+
+DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt))
+DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2))
+DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list))
+DEF_RULE(expr_stmt_augassign, nc, and(2), rule(augassign), rule(expr_stmt_6))
+DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign))
+DEF_RULE(expr_stmt_assign, nc, and(2), tok(DEL_EQUAL), rule(expr_stmt_6))
+DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr))
+DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA))
+DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test))
+DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL))
+
+// del_stmt: 'del' exprlist
+// pass_stmt: 'pass'
+// flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+// break_stmt: 'break'
+// continue_stmt: 'continue'
+// return_stmt: 'return' [testlist]
+// yield_stmt: yield_expr
+// raise_stmt: 'raise' [test ['from' test]]
+
+DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist))
+DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS))
+DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt))
+DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK))
+DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE))
+DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist))
+DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr))
+DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg))
+DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from))
+DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test))
+
+// import_stmt: import_name | import_from
+// import_name: 'import' dotted_as_names
+// import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names)
+// import_as_name: NAME ['as' NAME]
+// dotted_as_name: dotted_name ['as' NAME]
+// import_as_names: import_as_name (',' import_as_name)* [',']
+// dotted_as_names: dotted_as_name (',' dotted_as_name)*
+// dotted_name: NAME ('.' NAME)*
+// global_stmt: 'global' NAME (',' NAME)*
+// nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+// assert_stmt: 'assert' test [',' test]
+
+DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from))
+DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names))
+DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3))
+DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b))
+DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipses), opt_rule(dotted_name))
+DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names))
+DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE))
+DEF_RULE(one_or_more_period_or_ellipses, nc, one_or_more, rule(period_or_ellipses))
+DEF_RULE(period_or_ellipses, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSES))
+DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name))
+DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name))
+DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME))
+DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA))
+DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA))
+DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD))
+DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list))
+DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list))
+DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA))
+DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra))
+DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test))
+
+// if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+// while_stmt: 'while' test ':' suite ['else' ':' suite]
+// for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+// try_stmt: 'try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)
+// # NB compile.c makes sure that the default except clause is last
+// except_clause: 'except' [test ['as' NAME]]
+// with_stmt: 'with' with_item (',' with_item)* ':' suite
+// with_item: test ['as' expr]
+// suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt))
+DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif))
+DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite))
+DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
+DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
+DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2))
+DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally))
+DEF_RULE(try_stmt_except_and_more, nc, and(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally))
+DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite))
+DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name))
+DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except))
+DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite))
+DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite))
+DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite))
+DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA))
+DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as))
+DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr))
+DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt))
+DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT))
+DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt))
+
+// test: or_test ['if' or_test 'else' test] | lambdef
+// test_nocond: or_test | lambdef_nocond
+// lambdef: 'lambda' [varargslist] ':' test
+// lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+
+DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr))
+DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else))
+DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test))
+DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test))
+DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test))
+DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond))
+
+// or_test: and_test ('or' and_test)*
+// and_test: not_test ('and' not_test)*
+// not_test: 'not' not_test | comparison
+// comparison: expr (comp_op expr)*
+// comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+// star_expr: '*' expr
+// expr: xor_expr ('|' xor_expr)*
+// xor_expr: and_expr ('^' and_expr)*
+// and_expr: shift_expr ('&' shift_expr)*
+// shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+// arith_expr: term (('+'|'-') term)*
+// term: factor (('*'|'/'|'%'|'//') factor)*
+// factor: ('+'|'-'|'~') factor | power
+// power: atom trailer* ['**' factor]
+
+DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR))
+DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND))
+DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison))
+DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test))
+DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op))
+DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is))
+DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN))
+DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not))
+DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT))
+DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr))
+DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE))
+DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET))
+DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND))
+DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op))
+DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE))
+DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op))
+DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS))
+DEF_RULE(term, c(term), list, rule(factor), rule(term_op))
+DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH))
+DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power))
+DEF_RULE(factor_2, c(factor_2), and(2), rule(factor_op), rule(factor))
+DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE))
+DEF_RULE(power, c(generic_all_nodes), and(3), rule(atom), opt_rule(power_trailers), opt_rule(power_dbl_star))
+DEF_RULE(power_trailers, c(power_trailers), one_or_more, rule(trailer))
+DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(factor))
+
+// atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False'
+// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+
+DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSES), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
+DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes))
+DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES))
+DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE))
+DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp))
+DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE))
+DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE))
+DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3))
+DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test))
+DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b))
+DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c))
+DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA))
+DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period))
+DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE))
+DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME))
+
+// subscriptlist: subscript (',' subscript)* [',']
+// subscript: test | [test] ':' [test] [sliceop]
+// sliceop: ':' [test]
+
+DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA))
+DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2))
+DEF_RULE(subscript_2, c(subscript_2), and(2), rule(test), opt_rule(subscript_3))
+DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b))
+DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d))
+DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test))
+DEF_RULE(subscript_3d, nc, and(2), rule(test), opt_rule(sliceop))
+DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test))
+
+// exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+// testlist: test (',' test)* [',']
+// dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [',']))
+
+DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA))
+DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr))
+DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA))
+// TODO dictorsetmaker lets through more than is allowed
+DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail))
+DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon))
+DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test))
+DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list))
+DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2))
+DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA))
+
+// classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite))
+DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+
+// arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
+
+// TODO arglist lets through more than is allowed, compiler needs to do further verification
+DEF_RULE(arglist, c(generic_all_nodes), list_with_end, rule(arglist_2), tok(DEL_COMMA))
+DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument))
+DEF_RULE(arglist_star, c(arglist_star), and(2), tok(OP_STAR), rule(test))
+DEF_RULE(arglist_dbl_star, c(arglist_dbl_star), and(2), tok(OP_DBL_STAR), rule(test))
+
+// # The reason that keywords are test nodes instead of NAME is that using NAME
+// # results in an ambiguity. ast.c makes sure it's a NAME.
+// argument: test [comp_for] | test '=' test # Really [keyword '='] test
+// comp_iter: comp_for | comp_if
+// comp_for: 'for' exprlist 'in' or_test [comp_iter]
+// comp_if: 'if' test_nocond [comp_iter]
+
+DEF_RULE(argument, c(argument), and(2), rule(test), opt_rule(argument_2))
+DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3))
+DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if))
+DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter))
+DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter))
+
+// # not used in grammar, but may appear in "node" passed from Parser to Compiler
+// encoding_decl: NAME
+
+// yield_expr: 'yield' [yield_arg]
+// yield_arg: 'from' test | testlist
+
+DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg))
+DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist))
+DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test))
diff --git a/py/lexer.c b/py/lexer.c
new file mode 100644
index 0000000000..9c2195ef5b
--- /dev/null
+++ b/py/lexer.c
@@ -0,0 +1,677 @@
+/* lexer.c -- simple tokeniser for Python implementation
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+
+#define TAB_SIZE (8)
+#define CHR_EOF (-1)
+
+struct _py_lexer_t {
+ const char *name; // (file) name of source
+ bool free; // free source when done with it
+
+ const char *src_beg; // beginning of source
+ const char *src_cur; // current location in source; points to chr0
+ const char *src_end; // end (exclusive) of source
+ unichar chr0, chr1, chr2; // current characters from source
+
+ uint line; // source line
+ uint column; // source column
+
+ uint cont_line; // continued line
+
+ int emit_dent;
+ int nested_bracket_level;
+
+ uint alloc_indent_level;
+ uint num_indent_level;
+ uint16_t *indent_level;
+
+ py_token_t tok_cur;
+ py_token_t tok_next;
+};
+
+static bool py_token_is_str(const py_token_t *tok, const char *str) {
+ uint i = 0;
+ const char *tstr = tok->str;
+
+ while (i < tok->len && *tstr == *str) {
+ ++i;
+ ++tstr;
+ ++str;
+ }
+
+ return i == tok->len && *str == 0;
+}
+
+void py_token_show(const py_token_t *tok) {
+ printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len);
+ if (tok->str != NULL && tok->len > 0) {
+ const char *i = tok->str;
+ const char *j = i + tok->len;
+ printf(" ");
+ while (i < j) {
+ unichar c = g_utf8_get_char(i);
+ i = g_utf8_next_char(i);
+ if (g_unichar_isprint(c)) {
+ printf("%c", c);
+ } else {
+ printf("?");
+ }
+ }
+ }
+ printf("\n");
+}
+
+void py_token_show_error_prefix(const py_token_t *tok) {
+ printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column);
+}
+
+bool py_token_show_error(const py_token_t *tok, const char *msg) {
+ printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg);
+ return false;
+}
+
+static bool is_end(py_lexer_t *lex) {
+ return lex->chr0 == CHR_EOF;
+}
+
+static bool is_physical_newline(py_lexer_t *lex) {
+ return lex->chr0 == '\n' || lex->chr0 == '\r';
+}
+
+static bool is_char(py_lexer_t *lex, char c) {
+ return lex->chr0 == c;
+}
+
+static bool is_char_or(py_lexer_t *lex, char c1, char c2) {
+ return lex->chr0 == c1 || lex->chr0 == c2;
+}
+
+static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) {
+ return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
+}
+
+/*
+static bool is_char_following(py_lexer_t *lex, char c) {
+ return lex->chr1 == c;
+}
+*/
+
+static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) {
+ return lex->chr1 == c1 || lex->chr1 == c2;
+}
+
+static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) {
+ return lex->chr2 == c1 || lex->chr2 == c2;
+}
+
+static bool is_char_and(py_lexer_t *lex, char c1, char c2) {
+ return lex->chr0 == c1 && lex->chr1 == c2;
+}
+
+static bool is_whitespace(py_lexer_t *lex) {
+ return g_unichar_isspace(lex->chr0);
+}
+
+static bool is_letter(py_lexer_t *lex) {
+ return g_unichar_isalpha(lex->chr0);
+}
+
+static bool is_digit(py_lexer_t *lex) {
+ return g_unichar_isdigit(lex->chr0);
+}
+
+static bool is_following_digit(py_lexer_t *lex) {
+ return g_unichar_isdigit(lex->chr1);
+}
+
+// TODO UNICODE include unicode characters in definition of identifiers
+static bool is_head_of_identifier(py_lexer_t *lex) {
+ return is_letter(lex) || lex->chr0 == '_';
+}
+
+// TODO UNICODE include unicode characters in definition of identifiers
+static bool is_tail_of_identifier(py_lexer_t *lex) {
+ return is_head_of_identifier(lex) || is_digit(lex);
+}
+
+static void next_char(py_lexer_t *lex) {
+ if (lex->chr0 == CHR_EOF) {
+ return;
+ }
+
+ int advance = 1;
+
+ if (lex->chr0 == '\n') {
+ // LF is a new line
+ ++lex->line;
+ lex->column = 1;
+ lex->cont_line = lex->line;
+ } else if (lex->chr0 == '\r') {
+ // CR is a new line
+ ++lex->line;
+ lex->column = 1;
+ lex->cont_line = lex->line;
+ if (lex->chr1 == '\n') {
+ // CR LF is a single new line
+ advance = 2;
+ }
+ } else if (lex->chr0 == '\t') {
+ // a tab
+ lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1;
+ } else {
+ // a character worth one column
+ ++lex->column;
+ }
+
+ for (; advance > 0; advance--) {
+ lex->chr0 = lex->chr1;
+ lex->chr1 = lex->chr2;
+ lex->src_cur++;
+ if (lex->src_cur + 2 < lex->src_end) {
+ lex->chr2 = lex->src_cur[2];
+ } else {
+ // EOF
+ if (lex->chr1 != '\n' && lex->chr1 != '\r') {
+ lex->chr2 = '\n'; // insert newline at end of file
+ } else {
+ lex->chr2 = CHR_EOF;
+ }
+ }
+ }
+}
+
+void indent_push(py_lexer_t *lex, uint indent) {
+ if (lex->num_indent_level >= lex->alloc_indent_level) {
+ lex->alloc_indent_level *= 2;
+ lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level);
+ }
+ lex->indent_level[lex->num_indent_level++] = indent;
+}
+
+uint indent_top(py_lexer_t *lex) {
+ return lex->indent_level[lex->num_indent_level - 1];
+}
+
+void indent_pop(py_lexer_t *lex) {
+ lex->num_indent_level -= 1;
+}
+
+// some tricky operator encoding:
+// <op> = begin with <op>, if this opchar matches then begin here
+// e<op> = end with <op>, if this opchar matches then end
+// E<op> = mandatory end with <op>, this opchar must match, then end
+// c<op> = continue with <op>, if this opchar matches then continue matching
+// this means if the start of two ops are the same then they are equal til the last char
+
+static const char *tok_enc =
+ "()[]{},:;@~" // singles
+ "<e=c<e=" // < <= << <<=
+ ">e=c>e=" // > >= >> >>=
+ "*e=c*e=" // * *= ** **=
+ "+e=" // + +=
+ "-e=e>" // - -= ->
+ "&e=" // & &=
+ "|e=" // | |=
+ "/e=c/e=" // / /= // //=
+ "%e=" // % %=
+ "^e=" // ^ ^=
+ "=e=" // = ==
+ "!E=" // !=
+ ".c.E."; // . ...
+
+// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
+static const uint8_t tok_enc_kind[] = {
+ PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE,
+ PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE,
+ PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE,
+ PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE,
+
+ PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL,
+ PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL,
+ PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL,
+ PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL,
+ PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE,
+ PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL,
+ PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL,
+ PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+ PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL,
+ PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL,
+ PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL,
+ PY_TOKEN_OP_NOT_EQUAL,
+ PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES,
+};
+
+// must have the same order as enum in lexer.h
+static const char *tok_kw[] = {
+ "False",
+ "None",
+ "True",
+ "and",
+ "as",
+ "assert",
+ "break",
+ "class",
+ "continue",
+ "def",
+ "del",
+ "elif",
+ "else",
+ "except",
+ "finally",
+ "for",
+ "from",
+ "global",
+ "if",
+ "import",
+ "in",
+ "is",
+ "lambda",
+ "nonlocal",
+ "not",
+ "or",
+ "pass",
+ "raise",
+ "return",
+ "try",
+ "while",
+ "with",
+ "yield",
+ NULL,
+};
+
+static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
+ bool had_physical_newline = false;
+
+ while (!is_end(lex)) {
+ if (is_physical_newline(lex)) {
+ had_physical_newline = true;
+ next_char(lex);
+ } else if (is_whitespace(lex)) {
+ next_char(lex);
+ } else if (is_char(lex, '#')) {
+ next_char(lex);
+ while (!is_end(lex) && !is_physical_newline(lex)) {
+ next_char(lex);
+ }
+ // had_physical_newline will be set on next loop
+ } else if (is_char(lex, '\\')) {
+ // backslash (outside string literals) must appear just before a physical newline
+ next_char(lex);
+ if (!is_physical_newline(lex)) {
+ // TODO SyntaxError
+ assert(0);
+ } else {
+ next_char(lex);
+ }
+ } else {
+ break;
+ }
+ }
+
+ tok->src_name = lex->name;
+ tok->src_line = lex->line;
+ tok->src_column = lex->column;
+ tok->kind = PY_TOKEN_INVALID;
+ tok->cont_line = lex->cont_line;
+ tok->str = lex->src_cur;
+ tok->len = 0;
+
+ if (lex->emit_dent < 0) {
+ tok->kind = PY_TOKEN_DEDENT;
+ lex->emit_dent += 1;
+
+ } else if (lex->emit_dent > 0) {
+ tok->kind = PY_TOKEN_INDENT;
+ lex->emit_dent -= 1;
+
+ } else if (had_physical_newline && lex->nested_bracket_level == 0
+ && tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment
+ ) {
+ tok->kind = PY_TOKEN_NEWLINE;
+
+ uint num_spaces = lex->column - 1;
+ lex->emit_dent = 0;
+ if (num_spaces == indent_top(lex)) {
+ } else if (num_spaces > indent_top(lex)) {
+ indent_push(lex, num_spaces);
+ lex->emit_dent += 1;
+ } else {
+ while (num_spaces < indent_top(lex)) {
+ indent_pop(lex);
+ lex->emit_dent -= 1;
+ }
+ if (num_spaces != indent_top(lex)) {
+ //SyntaxError
+ }
+ }
+
+ } else if (is_end(lex)) {
+ // TODO emit a newline if file does not end in one
+ if (indent_top(lex) > 0) {
+ tok->kind = PY_TOKEN_NEWLINE;
+ lex->emit_dent = 0;
+ while (indent_top(lex) > 0) {
+ indent_pop(lex);
+ lex->emit_dent -= 1;
+ }
+ } else {
+ tok->kind = PY_TOKEN_END;
+ }
+
+ } else if (is_char_or(lex, '\'', '\"')
+ || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
+ || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) {
+ // a string or bytes literal
+
+ // parse type codes
+ bool is_raw = false;
+ bool is_bytes = false;
+ if (is_char(lex, 'u')) {
+ next_char(lex);
+ } else if (is_char(lex, 'b')) {
+ is_bytes = true;
+ next_char(lex);
+ if (is_char(lex, 'r')) {
+ is_raw = true;
+ next_char(lex);
+ }
+ } else if (is_char(lex, 'r')) {
+ is_raw = true;
+ next_char(lex);
+ if (is_char(lex, 'b')) {
+ is_bytes = true;
+ next_char(lex);
+ }
+ }
+
+ // set token kind
+ if (is_bytes) {
+ tok->kind = PY_TOKEN_BYTES;
+ } else {
+ tok->kind = PY_TOKEN_STRING;
+ }
+
+ // get first quoting character
+ char quote_char = '\'';
+ if (is_char(lex, '\"')) {
+ quote_char = '\"';
+ }
+ next_char(lex);
+
+ // work out if it's a single or triple quoted literal
+ int num_quotes;
+ if (is_char_and(lex, quote_char, quote_char)) {
+ // triple quotes
+ next_char(lex);
+ next_char(lex);
+ num_quotes = 3;
+ } else {
+ // single quotes
+ num_quotes = 1;
+ }
+
+ // set start of token
+ tok->str = lex->src_cur;
+
+ // parse the literal
+ // TODO proper escaping
+ int n_closing = 0;
+ while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
+ if (is_char(lex, quote_char)) {
+ n_closing += 1;
+ } else {
+ n_closing = 0;
+ if (!is_raw && is_char(lex, '\\')) {
+ next_char(lex);
+ }
+ }
+ next_char(lex);
+ }
+
+ // check we got the required end quotes
+ if (n_closing < num_quotes) {
+ tok->kind = PY_TOKEN_LONELY_STRING_OPEN;
+ }
+
+ // set token string (byte) length
+ tok->len = lex->src_cur - tok->str - n_closing;
+
+ // we set the length, return now so it's not set incorrectly below
+ return;
+
+ } else if (is_head_of_identifier(lex)) {
+ tok->kind = PY_TOKEN_NAME;
+
+ next_char(lex);
+
+ while (!is_end(lex) && is_tail_of_identifier(lex)) {
+ next_char(lex);
+ }
+
+ } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
+ tok->kind = PY_TOKEN_NUMBER;
+
+ next_char(lex);
+
+ while (!is_end(lex)) {
+ if (is_char_or(lex, 'e', 'E')) {
+ next_char(lex);
+ if (is_char(lex, '+') || is_char(lex, '-')) {
+ next_char(lex);
+ }
+ } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
+ next_char(lex);
+ } else {
+ break;
+ }
+ }
+
+ } else {
+ // search for encoded delimiter or operator
+
+ const char *t = tok_enc;
+ uint tok_enc_index = 0;
+ for (; *t != 0 && !is_char(lex, *t); t += 1) {
+ if (*t == 'e' || *t == 'c') {
+ t += 1;
+ } else if (*t == 'E') {
+ tok_enc_index -= 1;
+ t += 1;
+ }
+ tok_enc_index += 1;
+ }
+
+ next_char(lex);
+
+ if (*t == 0) {
+ // didn't match any delimiter or operator characters
+ tok->kind = PY_TOKEN_INVALID;
+
+ } else {
+ // matched a delimiter or operator character
+
+ // get the maximum characters for a valid token
+ t += 1;
+ uint t_index = tok_enc_index;
+ for (;;) {
+ for (; *t == 'e'; t += 1) {
+ t += 1;
+ t_index += 1;
+ if (is_char(lex, *t)) {
+ next_char(lex);
+ tok_enc_index = t_index;
+ break;
+ }
+ }
+
+ if (*t == 'E') {
+ t += 1;
+ if (is_char(lex, *t)) {
+ next_char(lex);
+ tok_enc_index = t_index;
+ } else {
+ tok->kind = PY_TOKEN_INVALID;
+ }
+ break;
+ }
+
+ if (*t == 'c') {
+ t += 1;
+ t_index += 1;
+ if (is_char(lex, *t)) {
+ next_char(lex);
+ tok_enc_index = t_index;
+ t += 1;
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ // set token kind
+ tok->kind = tok_enc_kind[tok_enc_index];
+
+ // compute bracket level for implicit line joining
+ if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) {
+ lex->nested_bracket_level += 1;
+ } else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) {
+ lex->nested_bracket_level -= 1;
+ }
+ }
+ }
+
+ // set token string (byte) length
+ tok->len = lex->src_cur - tok->str;
+
+ // check for keywords (must be done after setting token string length)
+ if (tok->kind == PY_TOKEN_NAME) {
+ for (int i = 0; tok_kw[i] != NULL; i++) {
+ if (py_token_is_str(tok, tok_kw[i])) {
+ tok->kind = PY_TOKEN_KW_FALSE + i;
+ break;
+ }
+ }
+ }
+}
+
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) {
+ py_lexer_t *lex;
+
+ lex = m_new(py_lexer_t, 1);
+
+ //lex->name = g_strdup(src_name); // TODO
+ lex->name = src_name;
+ lex->free = free_str;
+ lex->src_beg = str;
+ lex->src_cur = str;
+ lex->src_end = str + len;
+ lex->line = 1;
+ lex->column = 1;
+ lex->cont_line = lex->line;
+ lex->emit_dent = 0;
+ lex->nested_bracket_level = 0;
+ lex->alloc_indent_level = 16;
+ lex->num_indent_level = 1;
+ lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
+ lex->indent_level[0] = 0;
+
+ // preload characters
+ // TODO unicode
+ if (len == 0) {
+ lex->chr0 = '\n'; // insert newline at end of file
+ lex->chr1 = CHR_EOF;
+ lex->chr2 = CHR_EOF;
+ } else if (len == 1) {
+ lex->chr0 = str[0];
+ if (lex->chr0 != '\n' && lex->chr0 != '\r') {
+ lex->chr1 = '\n'; // insert newline at end of file
+ } else {
+ lex->chr1 = CHR_EOF;
+ }
+ lex->chr2 = CHR_EOF;
+ } else if (len == 2) {
+ lex->chr0 = str[0];
+ lex->chr1 = str[1];
+ if (lex->chr1 != '\n' && lex->chr1 != '\r') {
+ lex->chr2 = '\n'; // insert newline at end of file
+ } else {
+ lex->chr2 = CHR_EOF;
+ }
+ } else {
+ lex->chr0 = str[0];
+ lex->chr1 = str[1];
+ lex->chr2 = str[2];
+ }
+
+ py_lexer_next_token_into(lex, &lex->tok_cur);
+ py_lexer_next_token_into(lex, &lex->tok_next);
+
+ return lex;
+}
+
+void py_lexer_free(py_lexer_t *lex) {
+ if (lex == NULL) {
+ return;
+ }
+ //m_free(lex->name);
+ if (lex->free) {
+ m_free((char*)lex->src_beg);
+ }
+ m_free(lex);
+}
+
+void py_lexer_to_next(py_lexer_t *lex) {
+ lex->tok_cur = lex->tok_next;
+ py_lexer_next_token_into(lex, &lex->tok_next);
+}
+
+const py_token_t *py_lexer_cur(const py_lexer_t *lex) {
+ return &lex->tok_cur;
+}
+
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) {
+ return lex->tok_cur.kind == kind;
+}
+
+/*
+bool py_lexer_is_str(py_lexer_t *lex, const char *str) {
+ return py_token_is_str(&lex->tok_cur, str);
+}
+
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) {
+ return lex->tok_next.kind == kind;
+}
+
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) {
+ return py_token_is_str(&lex->tok_next, str);
+}
+
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) {
+ if (py_lexer_is_kind(lex, kind)) {
+ py_lexer_to_next(lex);
+ return true;
+ }
+ return false;
+}
+
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str) {
+ if (py_lexer_is_str(lex, str)) {
+ py_lexer_to_next(lex);
+ return true;
+ }
+ return false;
+}
+*/
+
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
+ return py_token_show_error(&lex->tok_cur, msg);
+}
diff --git a/py/lexer.h b/py/lexer.h
new file mode 100644
index 0000000000..32ab48a084
--- /dev/null
+++ b/py/lexer.h
@@ -0,0 +1,141 @@
+/* lexer.h -- simple tokeniser for Python implementation
+ */
+
+#ifndef INCLUDED_LEXER_H
+#define INCLUDED_LEXER_H
+
+/* uses (byte) length instead of null termination
+ * tokens are the same - UTF-8 with (byte) length
+ */
+
+typedef enum _py_token_kind_t {
+ PY_TOKEN_END, // 0
+
+ PY_TOKEN_INVALID,
+ PY_TOKEN_LONELY_STRING_OPEN,
+
+ PY_TOKEN_NEWLINE, // 3
+ PY_TOKEN_INDENT, // 4
+ PY_TOKEN_DEDENT, // 5
+
+ PY_TOKEN_NAME, // 6
+ PY_TOKEN_NUMBER,
+ PY_TOKEN_STRING,
+ PY_TOKEN_BYTES,
+
+ PY_TOKEN_ELLIPSES,
+
+ PY_TOKEN_KW_FALSE, // 11
+ PY_TOKEN_KW_NONE,
+ PY_TOKEN_KW_TRUE,
+ PY_TOKEN_KW_AND,
+ PY_TOKEN_KW_AS,
+ PY_TOKEN_KW_ASSERT,
+ PY_TOKEN_KW_BREAK,
+ PY_TOKEN_KW_CLASS,
+ PY_TOKEN_KW_CONTINUE,
+ PY_TOKEN_KW_DEF, // 20
+ PY_TOKEN_KW_DEL,
+ PY_TOKEN_KW_ELIF,
+ PY_TOKEN_KW_ELSE,
+ PY_TOKEN_KW_EXCEPT,
+ PY_TOKEN_KW_FINALLY,
+ PY_TOKEN_KW_FOR,
+ PY_TOKEN_KW_FROM,
+ PY_TOKEN_KW_GLOBAL,
+ PY_TOKEN_KW_IF,
+ PY_TOKEN_KW_IMPORT, // 30
+ PY_TOKEN_KW_IN,
+ PY_TOKEN_KW_IS,
+ PY_TOKEN_KW_LAMBDA,
+ PY_TOKEN_KW_NONLOCAL,
+ PY_TOKEN_KW_NOT,
+ PY_TOKEN_KW_OR,
+ PY_TOKEN_KW_PASS,
+ PY_TOKEN_KW_RAISE,
+ PY_TOKEN_KW_RETURN,
+ PY_TOKEN_KW_TRY, // 40
+ PY_TOKEN_KW_WHILE,
+ PY_TOKEN_KW_WITH,
+ PY_TOKEN_KW_YIELD,
+
+ PY_TOKEN_OP_PLUS, // 44
+ PY_TOKEN_OP_MINUS,
+ PY_TOKEN_OP_STAR,
+ PY_TOKEN_OP_DBL_STAR,
+ PY_TOKEN_OP_SLASH,
+ PY_TOKEN_OP_DBL_SLASH,
+ PY_TOKEN_OP_PERCENT,
+ PY_TOKEN_OP_LESS,
+ PY_TOKEN_OP_DBL_LESS,
+ PY_TOKEN_OP_MORE,
+ PY_TOKEN_OP_DBL_MORE, // 54
+ PY_TOKEN_OP_AMPERSAND,
+ PY_TOKEN_OP_PIPE,
+ PY_TOKEN_OP_CARET,
+ PY_TOKEN_OP_TILDE,
+ PY_TOKEN_OP_LESS_EQUAL,
+ PY_TOKEN_OP_MORE_EQUAL,
+ PY_TOKEN_OP_DBL_EQUAL,
+ PY_TOKEN_OP_NOT_EQUAL,
+
+ PY_TOKEN_DEL_PAREN_OPEN, // 63
+ PY_TOKEN_DEL_PAREN_CLOSE,
+ PY_TOKEN_DEL_BRACKET_OPEN,
+ PY_TOKEN_DEL_BRACKET_CLOSE,
+ PY_TOKEN_DEL_BRACE_OPEN,
+ PY_TOKEN_DEL_BRACE_CLOSE,
+ PY_TOKEN_DEL_COMMA,
+ PY_TOKEN_DEL_COLON,
+ PY_TOKEN_DEL_PERIOD,
+ PY_TOKEN_DEL_SEMICOLON,
+ PY_TOKEN_DEL_AT, // 73
+ PY_TOKEN_DEL_EQUAL,
+ PY_TOKEN_DEL_PLUS_EQUAL,
+ PY_TOKEN_DEL_MINUS_EQUAL,
+ PY_TOKEN_DEL_STAR_EQUAL,
+ PY_TOKEN_DEL_SLASH_EQUAL,
+ PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+ PY_TOKEN_DEL_PERCENT_EQUAL,
+ PY_TOKEN_DEL_AMPERSAND_EQUAL,
+ PY_TOKEN_DEL_PIPE_EQUAL,
+ PY_TOKEN_DEL_CARET_EQUAL, // 83
+ PY_TOKEN_DEL_DBL_MORE_EQUAL,
+ PY_TOKEN_DEL_DBL_LESS_EQUAL,
+ PY_TOKEN_DEL_DBL_STAR_EQUAL,
+ PY_TOKEN_DEL_MINUS_MORE,
+} py_token_kind_t;
+
+typedef struct _py_token_t {
+ const char *src_name; // (file) name of source
+ uint src_line; // actual source line
+ uint src_column; // actual source column
+
+ py_token_kind_t kind; // kind of token
+ uint cont_line; // token belongs to this line in a continued line
+ const char *str; // string of token
+ uint len; // (byte) length of string of token
+} py_token_t;
+
+typedef struct _py_lexer_t py_lexer_t;
+
+void py_token_show(const py_token_t *tok);
+void py_token_show_error_prefix(const py_token_t *tok);
+bool py_token_show_error(const py_token_t *tok, const char *msg);
+
+py_lexer_t *py_lexer_from_file(const char *filename);
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+void py_lexer_free(py_lexer_t *lex);
+void py_lexer_to_next(py_lexer_t *lex);
+const py_token_t *py_lexer_cur(const py_lexer_t *lex);
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
+/* unused
+bool py_lexer_is_str(py_lexer_t *lex, const char *str);
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
+*/
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
+
+#endif /* INCLUDED_LEXER_H */
diff --git a/py/lexerfile.c b/py/lexerfile.c
new file mode 100644
index 0000000000..74bb5a061a
--- /dev/null
+++ b/py/lexerfile.c
@@ -0,0 +1,23 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "misc.h"
+#include "lexer.h"
+
+py_lexer_t *py_lexer_from_file(const char *filename) {
+ // TODO abstract away file functionality
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ printf("cannot open file %s\n", filename);
+ return NULL;
+ }
+ uint size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+ char *data = m_new(char, size);
+ read(fd, data, size);
+ close(fd);
+
+ return py_lexer_from_str_len(filename, data, size, true);
+}
diff --git a/py/machine.h b/py/machine.h
new file mode 100644
index 0000000000..fa39c8f2d0
--- /dev/null
+++ b/py/machine.h
@@ -0,0 +1,4 @@
+typedef int64_t machine_int_t; // must be pointer size
+typedef uint64_t machine_uint_t; // must be pointer size
+typedef void *machine_ptr_t; // must be of pointer size
+typedef double machine_float_t;
diff --git a/py/main.c b/py/main.c
new file mode 100644
index 0000000000..7b17c38a85
--- /dev/null
+++ b/py/main.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "runtime.h"
+
+int main(int argc, char **argv) {
+ qstr_init();
+ rt_init();
+
+ if (argc != 2) {
+ printf("usage: py <file>\n");
+ return 1;
+ }
+ py_lexer_t *lex = py_lexer_from_file(argv[1]);
+ //const char *pysrc = "def f():\n x=x+1\n print(42)\n";
+ //py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false);
+ if (lex == NULL) {
+ return 1;
+ }
+
+ if (0) {
+ while (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
+ py_token_show(py_lexer_cur(lex));
+ py_lexer_to_next(lex);
+ }
+ } else {
+ py_parse_node_t pn = py_parse(lex, 0);
+ //printf("----------------\n");
+ //parse_node_show(pn, 0);
+ //printf("----------------\n");
+ py_compile(pn);
+ //printf("----------------\n");
+ }
+
+ py_lexer_free(lex);
+
+ if (1) {
+ // execute it
+ py_obj_t module_fun = rt_make_function_from_id(1);
+ if (module_fun != py_const_none) {
+ py_obj_t ret = rt_call_function_0(module_fun);
+ printf("done! got: ");
+ py_obj_print(ret);
+ printf("\n");
+ }
+ }
+
+ rt_deinit();
+
+ //printf("total bytes = %d\n", m_get_total_bytes_allocated());
+ return 0;
+}
diff --git a/py/malloc.c b/py/malloc.c
new file mode 100644
index 0000000000..8775f68aa3
--- /dev/null
+++ b/py/malloc.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "misc.h"
+
+static int total_bytes_allocated = 0;
+
+void m_free(void *ptr) {
+ if (ptr != NULL) {
+ free(ptr);
+ }
+}
+
+void *m_malloc(int num_bytes) {
+ if (num_bytes == 0) {
+ return NULL;
+ }
+ void *ptr = malloc(num_bytes);
+ if (ptr == NULL) {
+ printf("could not allocate memory, allocating %d bytes\n", num_bytes);
+ return NULL;
+ }
+ total_bytes_allocated += num_bytes;
+ return ptr;
+}
+
+void *m_malloc0(int num_bytes) {
+ if (num_bytes == 0) {
+ return NULL;
+ }
+ void *ptr = calloc(1, num_bytes);
+ if (ptr == NULL) {
+ printf("could not allocate memory, allocating %d bytes\n", num_bytes);
+ return NULL;
+ }
+ total_bytes_allocated += num_bytes;
+ return ptr;
+}
+
+void *m_realloc(void *ptr, int num_bytes) {
+ if (num_bytes == 0) {
+ free(ptr);
+ return NULL;
+ }
+ ptr = realloc(ptr, num_bytes);
+ if (ptr == NULL) {
+ printf("could not allocate memory, reallocating %d bytes\n", num_bytes);
+ return NULL;
+ }
+ total_bytes_allocated += num_bytes;
+ return ptr;
+}
+
+int m_get_total_bytes_allocated() {
+ return total_bytes_allocated;
+}
diff --git a/py/misc.c b/py/misc.c
new file mode 100644
index 0000000000..a5bf8d5534
--- /dev/null
+++ b/py/misc.c
@@ -0,0 +1,84 @@
+#include <stdint.h>
+#include <string.h>
+
+#include "misc.h"
+
+// attribute flags
+#define FL_PRINT (0x01)
+#define FL_SPACE (0x02)
+#define FL_DIGIT (0x04)
+#define FL_ALPHA (0x08)
+#define FL_UPPER (0x10)
+#define FL_LOWER (0x20)
+
+// shorthand character attributes
+#define AT_PR (FL_PRINT)
+#define AT_SP (FL_SPACE | FL_PRINT)
+#define AT_DI (FL_DIGIT | FL_PRINT)
+#define AT_AL (FL_ALPHA | FL_PRINT)
+#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT)
+#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT)
+
+// table of attributes for ascii characters
+static const uint8_t attr[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI,
+ AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
+};
+
+unichar g_utf8_get_char(const char *s) {
+ return *s;
+}
+
+char *g_utf8_next_char(const char *s) {
+ return (char*)(s + 1);
+}
+
+bool g_unichar_isspace(unichar c) {
+ return c < 128 && (attr[c] & FL_SPACE) != 0;
+}
+
+bool g_unichar_isalpha(unichar c) {
+ return c < 128 && (attr[c] & FL_ALPHA) != 0;
+}
+
+bool g_unichar_isprint(unichar c) {
+ return c < 128 && (attr[c] & FL_PRINT) != 0;
+}
+
+bool g_unichar_isdigit(unichar c) {
+ return c < 128 && (attr[c] & FL_DIGIT) != 0;
+}
+
+/*
+bool char_is_alpha_or_digit(unichar c) {
+ return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
+}
+
+bool char_is_upper(unichar c) {
+ return c < 128 && (attr[c] & FL_UPPER) != 0;
+}
+
+bool char_is_lower(unichar c) {
+ return c < 128 && (attr[c] & FL_LOWER) != 0;
+}
+*/
+
+/*
+char *g_strdup(const char *s) {
+ return strdup(s);
+}
+*/
diff --git a/py/misc.h b/py/misc.h
new file mode 100644
index 0000000000..9ba80a5c37
--- /dev/null
+++ b/py/misc.h
@@ -0,0 +1,91 @@
+// a mini library of useful types and functions
+
+#ifndef _INCLUDED_MINILIB_H
+#define _INCLUDED_MINILIB_H
+
+/** types *******************************************************/
+
+typedef int bool;
+enum {
+ false = 0,
+ true = 1
+};
+
+typedef unsigned char byte;
+typedef unsigned int uint;
+
+/** memomry allocation ******************************************/
+
+#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num))))
+#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num))))
+#define m_renew(type, ptr, num) ((type*)(m_realloc((ptr), sizeof(type) * (num))))
+
+void m_free(void *ptr);
+void *m_malloc(int num_bytes);
+void *m_malloc0(int num_bytes);
+void *m_realloc(void *ptr, int num_bytes);
+
+int m_get_total_bytes_allocated();
+
+/** unichar / UTF-8 *********************************************/
+
+typedef int unichar; // TODO
+
+unichar g_utf8_get_char(const char *s);
+char *g_utf8_next_char(const char *s);
+
+bool g_unichar_isspace(unichar c);
+bool g_unichar_isalpha(unichar c);
+bool g_unichar_isprint(unichar c);
+bool g_unichar_isdigit(unichar c);
+
+//char *g_strdup(const char *s);
+
+/** blob ********************************************************/
+
+/*
+unsigned short decode_le16(byte *buf);
+unsigned int decode_le32(byte *buf);
+void encode_le16(byte *buf, unsigned short i);
+void encode_le32(byte *buf, unsigned int i);
+*/
+
+/** string ******************************************************/
+
+/*
+#define streq(s1, s2) (strcmp((s1), (s2)) == 0)
+*/
+
+/** variable string *********************************************/
+
+/*
+typedef struct _vstr_t vstr_t;
+
+vstr_t *vstr_new();
+void vstr_free(vstr_t *vstr);
+void vstr_reset(vstr_t *vstr);
+bool vstr_had_error(vstr_t *vstr);
+char *vstr_str(vstr_t *vstr);
+int vstr_len(vstr_t *vstr);
+void vstr_hint_size(vstr_t *vstr, int size);
+char *vstr_add_len(vstr_t *vstr, int len);
+void vstr_add_str(vstr_t *vstr, const char *str);
+void vstr_add_strn(vstr_t *vstr, const char *str, int len);
+void vstr_add_byte(vstr_t *vstr, byte v);
+void vstr_add_le16(vstr_t *vstr, unsigned short v);
+void vstr_add_le32(vstr_t *vstr, unsigned int v);
+void vstr_cut_tail(vstr_t *vstr, int len);
+void vstr_printf(vstr_t *vstr, const char *fmt, ...);
+*/
+
+/** unique string ***********************************************/
+
+typedef unsigned int qstr;
+
+void qstr_init();
+qstr qstr_from_str_static(const char *str);
+qstr qstr_from_str_take(char *str);
+qstr qstr_from_strn_copy(const char *str, int len);
+const char* qstr_str(qstr qstr);
+
+#endif // _INCLUDED_MINILIB_H
diff --git a/py/parse.c b/py/parse.c
new file mode 100644
index 0000000000..94a5a5d9ca
--- /dev/null
+++ b/py/parse.c
@@ -0,0 +1,565 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+
+#define RULE_ACT_KIND_MASK (0xf0)
+#define RULE_ACT_ARG_MASK (0x0f)
+#define RULE_ACT_OR (0x10)
+#define RULE_ACT_AND (0x20)
+#define RULE_ACT_LIST (0x30)
+
+#define RULE_ARG_BLANK (0x0000)
+#define RULE_ARG_KIND_MASK (0xf000)
+#define RULE_ARG_ARG_MASK (0x0fff)
+#define RULE_ARG_TOK (0x1000)
+#define RULE_ARG_RULE (0x2000)
+#define RULE_ARG_OPT_TOK (0x3000)
+#define RULE_ARG_OPT_RULE (0x4000)
+
+// (un)comment to use rule names; for debugging
+//#define USE_RULE_NAME (1)
+
+typedef struct _rule_t {
+ byte rule_id;
+ byte act;
+#ifdef USE_RULE_NAME
+ const char *rule_name;
+#endif
+ uint16_t arg[];
+} rule_t;
+
+enum {
+ RULE_none = 0,
+#define DEF_RULE(rule, comp, kind, arg...) RULE_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+ RULE_maximum_number_of,
+};
+
+#define or(n) (RULE_ACT_OR | n)
+#define and(n) (RULE_ACT_AND | n)
+#define one_or_more (RULE_ACT_LIST | 2)
+#define list (RULE_ACT_LIST | 1)
+#define list_with_end (RULE_ACT_LIST | 3)
+#define tok(t) (RULE_ARG_TOK | PY_TOKEN_##t)
+#define rule(r) (RULE_ARG_RULE | RULE_##r)
+#define opt_tok(t) (RULE_ARG_OPT_TOK | PY_TOKEN_##t)
+#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
+#ifdef USE_RULE_NAME
+#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, #rule, { arg } };
+#else
+#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, { arg } };
+#endif
+#include "grammar.h"
+#undef or
+#undef and
+#undef list
+#undef list_with_end
+#undef tok
+#undef rule
+#undef opt_tok
+#undef opt_rule
+#undef one_or_more
+#undef DEF_RULE
+
+static rule_t *rules[] = {
+ NULL,
+#define DEF_RULE(rule, comp, kind, arg...) &rule_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+};
+
+typedef struct _rule_stack_t {
+ byte rule_id;
+ int32_t arg_i; // what should be the size and signedness?
+} rule_stack_t;
+
+typedef struct _parser_t {
+ uint rule_stack_alloc;
+ uint rule_stack_top;
+ rule_stack_t *rule_stack;
+
+ uint result_stack_top;
+ py_parse_node_t *result_stack;
+} parser_t;
+
+static void push_rule(parser_t *parser, rule_t *rule, int arg_i) {
+ if (parser->rule_stack_top >= parser->rule_stack_alloc) {
+ parser->rule_stack_alloc *= 2;
+ parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
+ }
+ parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id;
+ parser->rule_stack[parser->rule_stack_top].arg_i = arg_i;
+ parser->rule_stack_top += 1;
+}
+
+static void push_rule_from_arg(parser_t *parser, uint arg) {
+ assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
+ uint rule_id = arg & RULE_ARG_ARG_MASK;
+ assert(rule_id < RULE_maximum_number_of);
+ push_rule(parser, rules[rule_id], 0);
+}
+
+static void pop_rule(parser_t *parser, rule_t **rule, uint *arg_i) {
+ parser->rule_stack_top -= 1;
+ *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
+ *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
+}
+
+py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
+ return (py_parse_node_t)(kind | (arg << 4));
+}
+
+int num_parse_nodes_allocated = 0;
+py_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) {
+ py_parse_node_struct_t *pn = m_malloc(sizeof(py_parse_node_struct_t) + num_args * sizeof(py_parse_node_t));
+ pn->source = 0; // TODO
+ pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
+ num_parse_nodes_allocated += 1;
+ return pn;
+}
+
+void parse_node_show(py_parse_node_t pn, int indent) {
+ for (int i = 0; i < indent; i++) {
+ printf(" ");
+ }
+ if (PY_PARSE_NODE_IS_NULL(pn)) {
+ printf("NULL\n");
+ } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+ int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+ switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+ case PY_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
+ case PY_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break;
+ case PY_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
+ case PY_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
+ case PY_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
+ case PY_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
+ case PY_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break;
+ default: assert(0);
+ }
+ } else {
+ py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pn;
+ int n = pns2->kind_num_nodes >> 8;
+#ifdef USE_RULE_NAME
+ printf("%s(%d) (n=%d)\n", rules[PY_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, PY_PARSE_NODE_STRUCT_KIND(pns2), n);
+#else
+ printf("rule(%u) (n=%d)\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns2), n);
+#endif
+ for (int i = 0; i < n; i++) {
+ parse_node_show(pns2->nodes[i], indent + 2);
+ }
+ }
+}
+
+/*
+static void result_stack_show(parser_t *parser) {
+ printf("result stack, most recent first\n");
+ for (int i = parser->result_stack_top - 1; i >= 0; i--) {
+ parse_node_show(parser->result_stack[i], 0);
+ }
+}
+*/
+
+static py_parse_node_t pop_result(parser_t *parser) {
+ assert(parser->result_stack_top > 0);
+ return parser->result_stack[--parser->result_stack_top];
+}
+
+static py_parse_node_t peek_result(parser_t *parser, int pos) {
+ assert(parser->result_stack_top > pos);
+ return parser->result_stack[parser->result_stack_top - 1 - pos];
+}
+
+static void push_result_node(parser_t *parser, py_parse_node_t pn) {
+ parser->result_stack[parser->result_stack_top++] = pn;
+}
+
+static void push_result_token(parser_t *parser, const py_lexer_t *lex) {
+ const py_token_t *tok = py_lexer_cur(lex);
+ py_parse_node_t pn;
+ if (tok->kind == PY_TOKEN_NAME) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len));
+ } else if (tok->kind == PY_TOKEN_NUMBER) {
+ bool dec = false;
+ bool small_int = true;
+ int int_val = 0;
+ int len = tok->len;
+ const char *str = tok->str;
+ int base = 10;
+ int i = 0;
+ if (len >= 3 && str[0] == '0') {
+ if (str[1] == 'o' || str[1] == 'O') {
+ // octal
+ base = 8;
+ i = 2;
+ } else if (str[1] == 'x' || str[1] == 'X') {
+ // hexadecimal
+ base = 16;
+ i = 2;
+ } else if (str[1] == 'b' || str[1] == 'B') {
+ // binary
+ base = 2;
+ i = 2;
+ }
+ }
+ for (; i < len; i++) {
+ if (g_unichar_isdigit(str[i]) && str[i] - '0' < base) {
+ int_val = base * int_val + str[i] - '0';
+ } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
+ int_val = base * int_val + str[i] - 'a' + 10;
+ } else if (base == 16 && 'F' <= str[i] && str[i] <= 'F') {
+ int_val = base * int_val + str[i] - 'A' + 10;
+ } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E') {
+ dec = true;
+ break;
+ } else {
+ small_int = false;
+ break;
+ }
+ }
+ if (dec) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len));
+ } else if (small_int && -0x10000 <= int_val && int_val <= 0xffff) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, int_val);
+ } else {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len));
+ }
+ } else if (tok->kind == PY_TOKEN_STRING) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len));
+ } else if (tok->kind == PY_TOKEN_BYTES) {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len));
+ } else {
+ pn = py_parse_node_new_leaf(PY_PARSE_NODE_TOKEN, tok->kind);
+ }
+ push_result_node(parser, pn);
+}
+
+static void push_result_rule(parser_t *parser, rule_t *rule, int num_args) {
+ py_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args);
+ for (int i = num_args; i > 0; i--) {
+ pn->nodes[i - 1] = pop_result(parser);
+ }
+ push_result_node(parser, (py_parse_node_t)pn);
+}
+
+py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) {
+ wanted_rule = RULE_file_input;
+ parser_t *parser = m_new(parser_t, 1);
+ parser->rule_stack_alloc = 64;
+ parser->rule_stack_top = 0;
+ parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
+
+ parser->result_stack = m_new(py_parse_node_t, 1000);
+ parser->result_stack_top = 0;
+
+ push_rule(parser, rules[wanted_rule], 0);
+
+ uint n, i;
+ bool backtrack = false;
+ rule_t *rule;
+ py_token_kind_t tok_kind;
+ bool emit_rule;
+ bool had_trailing_sep;
+
+ for (;;) {
+ next_rule:
+ if (parser->rule_stack_top == 0) {
+ break;
+ }
+
+ pop_rule(parser, &rule, &i);
+ n = rule->act & RULE_ACT_ARG_MASK;
+
+ /*
+ // debugging
+ printf("depth=%d ", parser->rule_stack_top);
+ for (int j = 0; j < parser->rule_stack_top; ++j) {
+ printf(" ");
+ }
+ printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
+ */
+
+ switch (rule->act & RULE_ACT_KIND_MASK) {
+ case RULE_ACT_OR:
+ if (i > 0 && !backtrack) {
+ goto next_rule;
+ } else {
+ backtrack = false;
+ }
+ for (; i < n - 1; ++i) {
+ switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
+ case RULE_ARG_TOK:
+ if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
+ push_result_token(parser, lex);
+ py_lexer_to_next(lex);
+ goto next_rule;
+ }
+ break;
+ case RULE_ARG_RULE:
+ push_rule(parser, rule, i + 1);
+ push_rule_from_arg(parser, rule->arg[i]);
+ goto next_rule;
+ default:
+ assert(0);
+ }
+ }
+ if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+ if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
+ push_result_token(parser, lex);
+ py_lexer_to_next(lex);
+ } else {
+ backtrack = true;
+ goto next_rule;
+ }
+ } else {
+ push_rule_from_arg(parser, rule->arg[i]);
+ }
+ break;
+
+ case RULE_ACT_AND:
+
+ // failed, backtrack if we can, else syntax error
+ if (backtrack) {
+ assert(i > 0);
+ if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
+ // an optional rule that failed, so continue with next arg
+ push_result_node(parser, PY_PARSE_NODE_NULL);
+ backtrack = false;
+ } else {
+ // a mandatory rule that failed, so propagate backtrack
+ if (i > 1) {
+ // already eaten tokens so can't backtrack
+ goto syntax_error;
+ } else {
+ goto next_rule;
+ }
+ }
+ }
+
+ // progress through the rule
+ for (; i < n; ++i) {
+ switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
+ case RULE_ARG_TOK:
+ // need to match a token
+ tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
+ if (py_lexer_is_kind(lex, tok_kind)) {
+ // matched token
+ if (tok_kind == PY_TOKEN_NAME) {
+ push_result_token(parser, lex);
+ }
+ py_lexer_to_next(lex);
+ } else {
+ // failed to match token
+ if (i > 0) {
+ // already eaten tokens so can't backtrack
+ goto syntax_error;
+ } else {
+ // this rule failed, so backtrack
+ backtrack = true;
+ goto next_rule;
+ }
+ }
+ break;
+ case RULE_ARG_RULE:
+ //if (i + 1 < n) {
+ push_rule(parser, rule, i + 1);
+ //}
+ push_rule_from_arg(parser, rule->arg[i]);
+ goto next_rule;
+ case RULE_ARG_OPT_RULE:
+ push_rule(parser, rule, i + 1);
+ push_rule_from_arg(parser, rule->arg[i]);
+ goto next_rule;
+ default:
+ assert(0);
+ }
+ }
+
+ assert(i == n);
+
+ // matched the rule, so now build the corresponding parse_node
+
+ // count number of arguments for the parse_node
+ i = 0;
+ emit_rule = false;
+ for (int x = 0; x < n; ++x) {
+ if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+ tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
+ if (tok_kind >= PY_TOKEN_NAME) {
+ emit_rule = true;
+ }
+ if (tok_kind == PY_TOKEN_NAME) {
+ // only tokens which were names are pushed to stack
+ i += 1;
+ }
+ } else {
+ // rules are always pushed
+ i += 1;
+ }
+ }
+
+ // always emit these rules, even if they have only 1 argument
+ if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
+ emit_rule = true;
+ }
+
+ // never emit these rules if they have only 1 argument
+ // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
+ if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
+ emit_rule = false;
+ }
+
+ // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
+ if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) {
+ emit_rule = true;
+ push_result_node(parser, PY_PARSE_NODE_NULL);
+ i += 1;
+ }
+
+ int num_not_nil = 0;
+ for (int x = 0; x < i; ++x) {
+ if (peek_result(parser, x) != PY_PARSE_NODE_NULL) {
+ num_not_nil += 1;
+ }
+ }
+ //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
+ if (emit_rule) {
+ push_result_rule(parser, rule, i);
+ } else if (num_not_nil == 0) {
+ push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
+ //result_stack_show(parser);
+ //assert(0);
+ } else if (num_not_nil == 1) {
+ // single result, leave it on stack
+ py_parse_node_t pn = PY_PARSE_NODE_NULL;
+ for (int x = 0; x < i; ++x) {
+ py_parse_node_t pn2 = pop_result(parser);
+ if (pn2 != PY_PARSE_NODE_NULL) {
+ pn = pn2;
+ }
+ }
+ push_result_node(parser, pn);
+ } else {
+ push_result_rule(parser, rule, i);
+ }
+ break;
+
+ case RULE_ACT_LIST:
+ // n=2 is: item item*
+ // n=1 is: item (sep item)*
+ // n=3 is: item (sep item)* [sep]
+ if (backtrack) {
+ list_backtrack:
+ had_trailing_sep = false;
+ if (n == 2) {
+ if (i == 1) {
+ // fail on item, first time round; propagate backtrack
+ goto next_rule;
+ } else {
+ // fail on item, in later rounds; finish with this rule
+ backtrack = false;
+ }
+ } else {
+ if (i == 1) {
+ // fail on item, first time round; propagate backtrack
+ goto next_rule;
+ } else if ((i & 1) == 1) {
+ // fail on item, in later rounds; have eaten tokens so can't backtrack
+ if (n == 3) {
+ // list allows trailing separator; finish parsing list
+ had_trailing_sep = true;
+ backtrack = false;
+ } else {
+ // list doesn't allowing trailing separator; fail
+ goto syntax_error;
+ }
+ } else {
+ // fail on separator; finish parsing list
+ backtrack = false;
+ }
+ }
+ } else {
+ for (;;) {
+ uint arg = rule->arg[i & 1 & n];
+ switch (arg & RULE_ARG_KIND_MASK) {
+ case RULE_ARG_TOK:
+ if (py_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
+ if (i & 1 & n) {
+ // separators which are tokens are not pushed to result stack
+ } else {
+ push_result_token(parser, lex);
+ }
+ py_lexer_to_next(lex);
+ // got element of list, so continue parsing list
+ i += 1;
+ } else {
+ // couldn't get element of list
+ i += 1;
+ backtrack = true;
+ goto list_backtrack;
+ }
+ break;
+ case RULE_ARG_RULE:
+ push_rule(parser, rule, i + 1);
+ push_rule_from_arg(parser, arg);
+ goto next_rule;
+ default:
+ assert(0);
+ }
+ }
+ }
+ assert(i >= 1);
+
+ // compute number of elements in list, result in i
+ i -= 1;
+ if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+ // don't count separators when they are tokens
+ i = (i + 1) / 2;
+ }
+
+ if (i == 1) {
+ // list matched single item
+ if (had_trailing_sep) {
+ // if there was a trailing separator, make a list of a single item
+ push_result_rule(parser, rule, i);
+ } else {
+ // just leave single item on stack (ie don't wrap in a list)
+ }
+ } else {
+ //printf("done list %s %d %d\n", rule->rule_name, n, i);
+ push_result_rule(parser, rule, i);
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+ if (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
+ py_lexer_show_error(lex, "unexpected token at end:");
+ py_token_show(py_lexer_cur(lex));
+ }
+ //printf("--------------\n");
+ //result_stack_show(parser);
+ assert(parser->result_stack_top == 1);
+ //printf("maximum depth: %d\n", parser->rule_stack_alloc);
+ //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
+ return parser->result_stack[0];
+
+syntax_error:
+ py_lexer_show_error(lex, "syntax error:");
+#ifdef USE_RULE_NAME
+ py_lexer_show_error(lex, rule->rule_name);
+#endif
+ py_token_show(py_lexer_cur(lex));
+ return PY_PARSE_NODE_NULL;
+}
diff --git a/py/parse.h b/py/parse.h
new file mode 100644
index 0000000000..07d553c141
--- /dev/null
+++ b/py/parse.h
@@ -0,0 +1,54 @@
+struct _py_lexer_t;
+
+// a py_parse_node_t is:
+// - 0000...0000: no node
+// - xxxx...0001: an identifier; bits 4 and above are the qstr
+// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
+// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
+// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
+// - xxxx...1001: a string; bits 4 and above are the qstr holding the value
+// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
+// - xxxx...1101: a token; bits 4 and above are py_token_kind_t
+// - xxxx...xxx0: pointer to py_parse_node_struct_t
+
+#define PY_PARSE_NODE_NULL (0)
+#define PY_PARSE_NODE_ID (0x1)
+#define PY_PARSE_NODE_SMALL_INT (0x3)
+#define PY_PARSE_NODE_INTEGER (0x5)
+#define PY_PARSE_NODE_DECIMAL (0x7)
+#define PY_PARSE_NODE_STRING (0x9)
+#define PY_PARSE_NODE_BYTES (0xb)
+#define PY_PARSE_NODE_TOKEN (0xd)
+
+typedef machine_uint_t py_parse_node_t; // must be pointer size
+
+typedef struct _py_parse_node_struct_t {
+ uint32_t source; // file identifier, and line number
+ uint32_t kind_num_nodes; // parse node kind, and number of nodes
+ py_parse_node_t nodes[]; // nodes
+} py_parse_node_struct_t;
+
+// macros for py_parse_node_t usage
+// some of these evaluate their argument more than once
+
+#define PY_PARSE_NODE_IS_NULL(pn) ((pn) == PY_PARSE_NODE_NULL)
+#define PY_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
+#define PY_PARSE_NODE_IS_STRUCT(pn) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0)
+#define PY_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0 && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)(pn)) == (k))
+
+#define PY_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == PY_PARSE_NODE_ID)
+#define PY_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == PY_PARSE_NODE_SMALL_INT)
+#define PY_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == PY_PARSE_NODE_TOKEN)
+#define PY_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (PY_PARSE_NODE_TOKEN | (k << 4)))
+
+#define PY_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
+// TODO should probably have int and uint versions of this macro
+#define PY_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
+#define PY_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
+#define PY_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
+
+py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
+
+void parse_node_show(py_parse_node_t pn, int indent);
+
+py_parse_node_t py_parse(struct _py_lexer_t *lex, int wanted_rule);
diff --git a/py/qstr.c b/py/qstr.c
new file mode 100644
index 0000000000..33d15c7e73
--- /dev/null
+++ b/py/qstr.c
@@ -0,0 +1,56 @@
+#include <assert.h>
+#include <string.h>
+
+#include "misc.h"
+
+static int qstrs_alloc;
+static int qstrs_len;
+static const char **qstrs;
+
+void qstr_init() {
+ qstrs_alloc = 400;
+ qstrs_len = 1;
+ qstrs = m_new(const char*, qstrs_alloc);
+ qstrs[0] = "nil";
+}
+
+static qstr qstr_add(const char *str) {
+ if (qstrs_len >= qstrs_alloc) {
+ qstrs_alloc *= 2;
+ qstrs = m_renew(const char*, qstrs, qstrs_alloc);
+ }
+ qstrs[qstrs_len++] = str;
+ return qstrs_len - 1;
+}
+
+qstr qstr_from_str_static(const char *str) {
+ for (int i = 0; i < qstrs_len; i++) {
+ if (strcmp(qstrs[i], str) == 0) {
+ return i;
+ }
+ }
+ return qstr_add(str);
+}
+
+qstr qstr_from_str_take(char *str) {
+ for (int i = 0; i < qstrs_len; i++) {
+ if (strcmp(qstrs[i], str) == 0) {
+ m_free(str);
+ return i;
+ }
+ }
+ return qstr_add(str);
+}
+
+qstr qstr_from_strn_copy(const char *str, int len) {
+ for (int i = 0; i < qstrs_len; i++) {
+ if (strncmp(qstrs[i], str, len) == 0 && qstrs[i][len] == '\0') {
+ return i;
+ }
+ }
+ return qstr_add(strndup(str, len));
+}
+
+const char *qstr_str(qstr qstr) {
+ return qstrs[qstr];
+}
diff --git a/py/runtime.c b/py/runtime.c
new file mode 100644
index 0000000000..bf2e2ee065
--- /dev/null
+++ b/py/runtime.c
@@ -0,0 +1,944 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "runtime.h"
+#include "bc.h"
+
+#define DEBUG_printf(args...) (void)0
+//#define DEBUG_printf(args...) printf(args)
+
+#define DEBUG_OP_printf(args...) (void)0
+//#define DEBUG_OP_printf(args...) printf(args)
+
+// enable/disable float support with this definition
+#define PY_FLOAT (1)
+
+typedef machine_int_t py_small_int_t;
+
+#define IS_O(o, k) (((((py_small_int_t)(o)) & 1) == 0) && (((py_obj_base_t*)(o))->kind == (k)))
+#define IS_SMALL_INT(o) (((py_small_int_t)(o)) & 1)
+#define FROM_SMALL_INT(o) (((py_small_int_t)(o)) >> 1)
+#define TO_SMALL_INT(o) ((py_obj_t)(((o) << 1) | 1))
+
+#ifdef PY_FLOAT
+typedef machine_float_t float_t;
+#endif
+
+typedef enum {
+ O_CONST,
+ O_STR,
+#ifdef PY_FLOAT
+ O_FLOAT,
+#endif
+ O_FUN_0,
+ O_FUN_1,
+ O_FUN_2,
+ O_FUN_N,
+ O_FUN_BC,
+ O_BOUND_METH,
+ O_LIST,
+ O_SET,
+ O_MAP,
+ O_CLASS,
+} py_obj_kind_t;
+
+typedef enum {
+ MAP_QSTR,
+ MAP_PY_OBJ,
+} py_map_kind_t;
+
+typedef struct _py_map_elem_t {
+ py_obj_t key;
+ py_obj_t value;
+} py_map_elem_t;
+
+typedef struct _py_map_t {
+ py_map_kind_t kind;
+ machine_uint_t alloc;
+ machine_uint_t used;
+ py_map_elem_t *table;
+} py_map_t;
+
+typedef struct _py_obj_base_t {
+ py_obj_kind_t kind;
+ union {
+ const char *id;
+ qstr u_str;
+#ifdef PY_FLOAT
+ float_t flt;
+#endif
+ struct { // for O_FUN_[012N]
+ void *fun;
+ int n_args;
+ } u_fun;
+ struct { // for O_FUN_BC
+ byte *code;
+ uint len;
+ int n_args;
+ } u_fun_bc;
+ struct { // for O_BOUND_METH
+ py_obj_t meth;
+ py_obj_t self;
+ } u_bound_meth;
+ struct { // for O_LIST
+ int alloc;
+ int len;
+ py_obj_t *items;
+ } u_list;
+ struct { // for O_SET
+ int alloc;
+ int used;
+ py_obj_t *table;
+ } u_set;
+ py_map_t u_map; // for O_MAP
+ /*
+ struct { // for O_MAP
+ int alloc;
+ int used;
+ py_map_elem_t *table;
+ } u_map;
+ */
+ struct { // for O_CLASS
+ py_map_t *map;
+ } u_class;
+ };
+} py_obj_base_t;
+
+py_obj_t py_const_none;
+py_obj_t py_const_false;
+py_obj_t py_const_true;
+
+py_map_t map_name;
+py_map_t map_builtins;
+
+// approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}]
+static int doubling_primes[] = {7, 19, 43, 89, 179, 347, 647, 1229, 2297, 4243, 7829, 14347, 26017, 47149, 84947, 152443, 273253, 488399, 869927, 1547173, 2745121, 4861607};
+
+int get_doubling_prime_greater_or_equal_to(int x) {
+ for (int i = 0; i < sizeof(doubling_primes) / sizeof(int); i++) {
+ if (doubling_primes[i] >= x) {
+ return doubling_primes[i];
+ }
+ }
+ // ran out of primes in the table!
+ // return something sensible, at least make it odd
+ return x | 1;
+}
+
+void py_map_init(py_map_t *map, py_map_kind_t kind, int n) {
+ map->kind = kind;
+ map->alloc = get_doubling_prime_greater_or_equal_to(n + 1);
+ map->used = 0;
+ map->table = m_new(py_map_elem_t, map->alloc);
+ for (int i = 0; i < map->alloc; i++) {
+ map->table[i].key = NULL;
+ map->table[i].value = NULL;
+ }
+}
+
+py_map_t *py_map_new(py_map_kind_t kind, int n) {
+ py_map_t *map = m_new(py_map_t, 1);
+ py_map_init(map, kind, n);
+ return map;
+}
+
+int py_obj_hash(py_obj_t o_in) {
+ if (IS_SMALL_INT(o_in)) {
+ return FROM_SMALL_INT(o_in);
+ } else if (IS_O(o_in, O_STR)) {
+ return ((py_obj_base_t*)o_in)->u_str;
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+bool py_obj_equal(py_obj_t o1, py_obj_t o2) {
+ if (o1 == o2) {
+ return true;
+ } else if (IS_SMALL_INT(o1) && IS_SMALL_INT(o2)) {
+ return false;
+ } else if (IS_O(o1, O_STR) && IS_O(o2, O_STR)) {
+ return ((py_obj_base_t*)o1)->u_str == ((py_obj_base_t*)o2)->u_str;
+ } else {
+ assert(0);
+ return false;
+ }
+}
+
+py_map_elem_t* py_map_lookup_helper(py_map_t *map, py_obj_t index, bool add_if_not_found) {
+ bool is_map_py_obj = (map->kind == MAP_PY_OBJ);
+ machine_uint_t hash;
+ if (is_map_py_obj) {
+ hash = py_obj_hash(index);
+ } else {
+ hash = (machine_uint_t)index;
+ }
+ uint pos = hash % map->alloc;
+ for (;;) {
+ py_map_elem_t *elem = &map->table[pos];
+ if (elem->key == NULL) {
+ // not in table
+ if (add_if_not_found) {
+ if (map->used + 1 >= map->alloc) {
+ // not enough room in table, rehash it
+ int old_alloc = map->alloc;
+ py_map_elem_t *old_table = map->table;
+ map->alloc = get_doubling_prime_greater_or_equal_to(map->alloc + 1);
+ map->used = 0;
+ map->table = m_new(py_map_elem_t, map->alloc);
+ for (int i = 0; i < old_alloc; i++) {
+ if (old_table[i].key != NULL) {
+ py_map_lookup_helper(map, old_table[i].key, true)->value = old_table[i].value;
+ }
+ }
+ m_free(old_table);
+ // restart the search for the new element
+ pos = hash % map->alloc;
+ } else {
+ map->used += 1;
+ elem->key = index;
+ return elem;
+ }
+ } else {
+ return NULL;
+ }
+ } else if (elem->key == index || (is_map_py_obj && py_obj_equal(elem->key, index))) {
+ // found it
+ if (add_if_not_found) {
+ elem->key = index;
+ }
+ return elem;
+ } else {
+ // not yet found, keep searching in this table
+ pos = (pos + 1) % map->alloc;
+ }
+ }
+}
+
+py_map_elem_t* py_qstr_map_lookup(py_map_t *map, qstr index, bool add_if_not_found) {
+ py_obj_t o = (py_obj_t)(machine_uint_t)index;
+ return py_map_lookup_helper(map, o, add_if_not_found);
+}
+
+py_map_elem_t* py_map_lookup(py_obj_t o, py_obj_t index, bool add_if_not_found) {
+ assert(IS_O(o, O_MAP));
+ return py_map_lookup_helper(&((py_obj_base_t *)o)->u_map, index, add_if_not_found);
+}
+
+static bool fit_small_int(py_small_int_t o) {
+ return true;
+}
+
+py_obj_t py_obj_new_const(const char *id) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_CONST;
+ o->id = id;
+ return (py_obj_t)o;
+}
+
+py_obj_t py_obj_new_str(qstr qstr) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_STR;
+ o->u_str = qstr;
+ return (py_obj_t)o;
+}
+
+#ifdef PY_FLOAT
+py_obj_t py_obj_new_float(float_t val) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_FLOAT;
+ o->flt = val;
+ return (py_obj_t)o;
+}
+#endif
+
+py_obj_t list_append(py_obj_t self_in, py_obj_t arg) {
+ assert(IS_O(self_in, O_LIST));
+ py_obj_base_t *self = self_in;
+ if (self->u_list.len >= self->u_list.alloc) {
+ self->u_list.alloc *= 2;
+ self->u_list.items = m_renew(py_obj_t, self->u_list.items, self->u_list.alloc);
+ }
+ self->u_list.items[self->u_list.len++] = arg;
+ return arg;
+}
+
+static qstr q_append;
+static qstr q_print;
+static qstr q_len;
+static qstr q___build_class__;
+
+typedef enum {
+ PY_CODE_NATIVE,
+ PY_CODE_BYTE,
+} py_code_kind_t;
+
+typedef struct _py_code_t {
+ py_code_kind_t kind;
+ int n_args;
+ union {
+ struct {
+ py_fun_t fun;
+ } u_native;
+ struct {
+ byte *code;
+ uint len;
+ } u_byte;
+ };
+} py_code_t;
+
+static int next_unique_code_id;
+static py_code_t *unique_codes;
+
+py_obj_t fun_list_append;
+
+py_obj_t py_builtin_print(py_obj_t o) {
+ if (IS_O(o, O_STR)) {
+ // special case, print string raw
+ printf("%s\n", qstr_str(((py_obj_base_t*)o)->u_str));
+ } else {
+ // print the object Python style
+ py_obj_print(o);
+ printf("\n");
+ }
+ return py_const_none;
+}
+
+py_obj_t py_builtin_len(py_obj_t o_in) {
+ py_small_int_t len = 0;
+ if (IS_O(o_in, O_LIST)) {
+ py_obj_base_t *o = o_in;
+ len = o->u_list.len;
+ } else if (IS_O(o_in, O_MAP)) {
+ py_obj_base_t *o = o_in;
+ len = o->u_map.used;
+ } else {
+ assert(0);
+ }
+ return TO_SMALL_INT(len);
+}
+
+py_obj_t py_builtin___build_class__(py_obj_t o1, py_obj_t o2) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_CLASS;
+ o->u_class.map = py_map_new(MAP_QSTR, 0);
+ return o;
+}
+
+FILE *fp_native = NULL;
+
+void rt_init() {
+ q_append = qstr_from_str_static("append");
+ q_print = qstr_from_str_static("print");
+ q_len = qstr_from_str_static("len");
+ q___build_class__ = qstr_from_str_static("__build_class__");
+
+ py_const_none = py_obj_new_const("None");
+ py_const_false = py_obj_new_const("False");
+ py_const_true = py_obj_new_const("True");
+
+ py_map_init(&map_name, MAP_QSTR, 0);
+
+ py_map_init(&map_builtins, MAP_QSTR, 3);
+ py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print);
+ py_qstr_map_lookup(&map_builtins, q_len, true)->value = rt_make_function_1(py_builtin_len);
+ py_qstr_map_lookup(&map_builtins, q___build_class__, true)->value = rt_make_function_2(py_builtin___build_class__);
+
+ next_unique_code_id = 1;
+ unique_codes = NULL;
+
+ fun_list_append = rt_make_function_2(list_append);
+
+ fp_native = fopen("out-native", "wb");
+}
+
+void rt_deinit() {
+ if (fp_native != NULL) {
+ fclose(fp_native);
+ }
+}
+
+int rt_get_new_unique_code_id() {
+ return next_unique_code_id++;
+}
+
+void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_args) {
+ if (unique_codes == NULL) {
+ unique_codes = m_new(py_code_t, next_unique_code_id);
+ }
+ assert(unique_code_id < next_unique_code_id);
+ unique_codes[unique_code_id].kind = PY_CODE_NATIVE;
+ unique_codes[unique_code_id].n_args = n_args;
+ unique_codes[unique_code_id].u_native.fun = fun;
+
+ DEBUG_printf("assign native code: id=%d fun=%p len=%u n_args=%d\n", unique_code_id, fun, len, n_args);
+ byte *fun_data = (byte*)(((machine_uint_t)fun) & (~1)); // need to clear lower bit in case it's thumb code
+ for (int i = 0; i < 128 && i < len; i++) {
+ if (i > 0 && i % 16 == 0) {
+ DEBUG_printf("\n");
+ }
+ DEBUG_printf(" %02x", fun_data[i]);
+ }
+ DEBUG_printf("\n");
+
+ if (fp_native != NULL) {
+ fwrite(fun_data, len, 1, fp_native);
+ }
+}
+
+void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) {
+ if (unique_codes == NULL) {
+ unique_codes = m_new(py_code_t, next_unique_code_id);
+ }
+ assert(unique_code_id < next_unique_code_id);
+ unique_codes[unique_code_id].kind = PY_CODE_BYTE;
+ unique_codes[unique_code_id].n_args = n_args;
+ unique_codes[unique_code_id].u_byte.code = code;
+ unique_codes[unique_code_id].u_byte.len = len;
+
+ DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args);
+}
+
+const char *py_obj_get_type_str(py_obj_t o_in) {
+ if (IS_SMALL_INT(o_in)) {
+ return "int";
+ } else {
+ py_obj_base_t *o = o_in;
+ switch (o->kind) {
+ case O_CONST:
+ if (o == py_const_none) {
+ return "NoneType";
+ } else {
+ return "bool";
+ }
+ case O_STR:
+ return "str";
+#ifdef PY_FLOAT
+ case O_FLOAT:
+ return "float";
+#endif
+ case O_LIST:
+ return "list";
+ case O_SET:
+ return "set";
+ case O_MAP:
+ return "dict";
+ default:
+ assert(0);
+ return "UnknownType";
+ }
+ }
+}
+
+void py_obj_print(py_obj_t o_in) {
+ if (IS_SMALL_INT(o_in)) {
+ printf("%d", (int)FROM_SMALL_INT(o_in));
+ } else {
+ py_obj_base_t *o = o_in;
+ switch (o->kind) {
+ case O_CONST:
+ printf("%s", o->id);
+ break;
+ case O_STR:
+ // TODO need to escape chars etc
+ printf("'%s'", qstr_str(o->u_str));
+ break;
+#ifdef PY_FLOAT
+ case O_FLOAT:
+ printf("%f", o->flt);
+ break;
+#endif
+ case O_LIST:
+ printf("[");
+ for (int i = 0; i < o->u_list.len; i++) {
+ if (i > 0) {
+ printf(", ");
+ }
+ py_obj_print(o->u_list.items[i]);
+ }
+ printf("]");
+ break;
+ case O_SET:
+ {
+ bool first = true;
+ printf("{");
+ for (int i = 0; i < o->u_set.alloc; i++) {
+ if (o->u_set.table[i] != NULL) {
+ if (!first) {
+ printf(", ");
+ }
+ first = false;
+ py_obj_print(o->u_set.table[i]);
+ }
+ }
+ printf("}");
+ break;
+ }
+ case O_MAP:
+ {
+ bool first = true;
+ printf("{");
+ for (int i = 0; i < o->u_map.alloc; i++) {
+ if (o->u_map.table[i].key != NULL) {
+ if (!first) {
+ printf(", ");
+ }
+ first = false;
+ py_obj_print(o->u_map.table[i].key);
+ printf(": ");
+ py_obj_print(o->u_map.table[i].value);
+ }
+ }
+ printf("}");
+ break;
+ }
+ default:
+ assert(0);
+ }
+ }
+}
+
+int rt_is_true(py_obj_t arg) {
+ DEBUG_OP_printf("is true %p\n", arg);
+ if (IS_SMALL_INT(arg)) {
+ if (FROM_SMALL_INT(arg) == 0) {
+ return 0;
+ } else {
+ return 1;
+ }
+ } else if (arg == py_const_none) {
+ return 0;
+ } else if (arg == py_const_false) {
+ return 0;
+ } else if (arg == py_const_true) {
+ return 1;
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+int rt_get_int(py_obj_t arg) {
+ if (IS_SMALL_INT(arg)) {
+ return FROM_SMALL_INT(arg);
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+py_obj_t rt_load_const_str(qstr qstr) {
+ DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
+ return py_obj_new_str(qstr);
+}
+
+py_obj_t rt_load_name(qstr qstr) {
+ // logic: search locals, globals, builtins
+ DEBUG_OP_printf("load %s\n", qstr_str(qstr));
+ py_map_elem_t *elem = py_qstr_map_lookup(&map_name, qstr, false);
+ if (elem == NULL) {
+ elem = py_qstr_map_lookup(&map_builtins, qstr, false);
+ if (elem == NULL) {
+ printf("name doesn't exist: %s\n", qstr_str(qstr));
+ assert(0);
+ }
+ }
+ return elem->value;
+}
+
+py_obj_t rt_load_global(qstr qstr) {
+ return rt_load_name(qstr); // TODO
+}
+
+py_obj_t rt_load_build_class() {
+ DEBUG_OP_printf("load_build_class\n");
+ py_map_elem_t *elem = py_qstr_map_lookup(&map_builtins, q___build_class__, false);
+ if (elem == NULL) {
+ printf("name doesn't exist: __build_class__\n");
+ assert(0);
+ }
+ return elem->value;
+}
+
+void rt_store_name(qstr qstr, py_obj_t obj) {
+ DEBUG_OP_printf("store %s <- %p\n", qstr_str(qstr), obj);
+ py_qstr_map_lookup(&map_name, qstr, true)->value = obj;
+}
+
+py_obj_t rt_unary_op(int op, py_obj_t arg) {
+ assert(0);
+ return py_const_none;
+}
+
+py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) {
+ DEBUG_OP_printf("binary %d %p %p\n", op, lhs, rhs);
+ if (op == RT_BINARY_OP_SUBSCR) {
+ if (IS_O(lhs, O_LIST) && IS_SMALL_INT(rhs)) {
+ return ((py_obj_base_t*)lhs)->u_list.items[FROM_SMALL_INT(rhs)];
+ } else {
+ assert(0);
+ }
+ } else if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
+ py_small_int_t val;
+ switch (op) {
+ case RT_BINARY_OP_ADD:
+ case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break;
+ case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break;
+ case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break;
+ case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break;
+#ifdef PY_FLOAT
+ case RT_BINARY_OP_TRUE_DIVIDE: return py_obj_new_float((float_t)FROM_SMALL_INT(lhs) / (float_t)FROM_SMALL_INT(rhs));
+#endif
+ default: printf("%d\n", op); assert(0); val = 0;
+ }
+ if (fit_small_int(val)) {
+ return TO_SMALL_INT(val);
+ }
+ } else if (IS_O(lhs, O_STR) && IS_O(rhs, O_STR)) {
+ const char *lhs_str = qstr_str(((py_obj_base_t*)lhs)->u_str);
+ const char *rhs_str = qstr_str(((py_obj_base_t*)rhs)->u_str);
+ char *val;
+ switch (op) {
+ case RT_BINARY_OP_ADD:
+ case RT_BINARY_OP_INPLACE_ADD: val = m_new(char, strlen(lhs_str) + strlen(rhs_str) + 1); strcpy(val, lhs_str); strcat(val, rhs_str); break;
+ default: printf("%d\n", op); assert(0); val = NULL;
+ }
+ return py_obj_new_str(qstr_from_str_take(val));
+ }
+ assert(0);
+ return py_const_none;
+}
+
+py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs) {
+ DEBUG_OP_printf("compare %d %p %p\n", op, lhs, rhs);
+ if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
+ int cmp;
+ switch (op) {
+ case RT_COMPARE_OP_LESS: cmp = FROM_SMALL_INT(lhs) < FROM_SMALL_INT(rhs); break;
+ case RT_COMPARE_OP_MORE: cmp = FROM_SMALL_INT(lhs) > FROM_SMALL_INT(rhs); break;
+ default: assert(0); cmp = 0;
+ }
+ if (cmp) {
+ return py_const_true;
+ } else {
+ return py_const_false;
+ }
+ }
+ assert(0);
+ return py_const_none;
+}
+
+py_obj_t rt_make_function_from_id(int unique_code_id) {
+ if (unique_code_id >= next_unique_code_id) {
+ // illegal code id
+ return py_const_none;
+ }
+ py_code_t *c = &unique_codes[unique_code_id];
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ switch (c->kind) {
+ case PY_CODE_NATIVE:
+ switch (c->n_args) {
+ case 0: o->kind = O_FUN_0; break;
+ case 1: o->kind = O_FUN_1; break;
+ case 2: o->kind = O_FUN_2; break;
+ default: assert(0);
+ }
+ o->u_fun.fun = c->u_native.fun;
+ break;
+ case PY_CODE_BYTE:
+ o->kind = O_FUN_BC;
+ o->u_fun_bc.code = c->u_byte.code;
+ o->u_fun_bc.len = c->u_byte.len;
+ o->u_fun_bc.n_args = c->n_args;
+ break;
+ default:
+ assert(0);
+ }
+ return o;
+}
+
+py_obj_t rt_make_function_0(py_fun_0_t fun) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_FUN_0;
+ o->u_fun.fun = fun;
+ return o;
+}
+
+py_obj_t rt_make_function_1(py_fun_1_t fun) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_FUN_1;
+ o->u_fun.fun = fun;
+ return o;
+}
+
+py_obj_t rt_make_function_2(py_fun_2_t fun) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_FUN_2;
+ o->u_fun.fun = fun;
+ return o;
+}
+
+py_obj_t rt_make_function(int n_args, py_fun_t code) {
+ // assumes code is a pointer to a py_fun_t (i think this is safe...)
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_FUN_N;
+ o->u_fun.fun = code;
+ o->u_fun.n_args = n_args;
+ return o;
+}
+
+py_obj_t rt_call_function_0(py_obj_t fun) {
+ if (IS_O(fun, O_FUN_0)) {
+ py_obj_base_t *o = fun;
+ DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+ return ((py_fun_0_t)o->u_fun.fun)();
+ } else if (IS_O(fun, O_FUN_BC)) {
+ py_obj_base_t *o = fun;
+ assert(o->u_fun_bc.n_args == 0);
+ DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+ return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0);
+ } else {
+ printf("fun0:%p\n", fun);
+ assert(0);
+ return py_const_none;
+ }
+}
+
+py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) {
+ if (IS_O(fun, O_FUN_1)) {
+ py_obj_base_t *o = fun;
+ DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+ return ((py_fun_1_t)o->u_fun.fun)(arg);
+ } else if (IS_O(fun, O_FUN_BC)) {
+ py_obj_base_t *o = fun;
+ assert(o->u_fun_bc.n_args == 1);
+ DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+ return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1);
+ } else if (IS_O(fun, O_BOUND_METH)) {
+ py_obj_base_t *o = fun;
+ return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg);
+ } else {
+ printf("fun1:%p\n", fun);
+ assert(0);
+ return py_const_none;
+ }
+}
+
+py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) {
+ if (IS_O(fun, O_FUN_2)) {
+ py_obj_base_t *o = fun;
+ DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+ return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2);
+ } else if (IS_O(fun, O_FUN_BC)) {
+ py_obj_base_t *o = fun;
+ assert(o->u_fun_bc.n_args == 2);
+ DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+ py_obj_t args[2];
+ args[0] = arg1;
+ args[1] = arg2;
+ return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2);
+ } else {
+ assert(0);
+ return py_const_none;
+ }
+}
+
+py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) {
+ DEBUG_OP_printf("call method %p %p\n", fun, self);
+ if (self == NULL) {
+ return rt_call_function_0(fun);
+ } else {
+ return rt_call_function_1(fun, self);
+ }
+}
+
+py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) {
+ DEBUG_OP_printf("call method %p %p %p\n", fun, self, arg);
+ if (self == NULL) {
+ return rt_call_function_1(fun, arg);
+ } else {
+ return rt_call_function_2(fun, self, arg);
+ }
+}
+
+// items are in reverse order
+py_obj_t rt_build_list(int n_args, py_obj_t *items) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_LIST;
+ o->u_list.alloc = n_args;
+ if (o->u_list.alloc < 4) {
+ o->u_list.alloc = 4;
+ }
+ o->u_list.len = n_args;
+ o->u_list.items = m_new(py_obj_t, o->u_list.alloc);
+ for (int i = 0; i < n_args; i++) {
+ o->u_list.items[i] = items[n_args - i - 1];
+ }
+ return o;
+}
+
+py_obj_t py_set_lookup(py_obj_t o_in, py_obj_t index, bool add_if_not_found) {
+ assert(IS_O(o_in, O_SET));
+ py_obj_base_t *o = o_in;
+ int hash = py_obj_hash(index);
+ int pos = hash % o->u_set.alloc;
+ for (;;) {
+ py_obj_t elem = o->u_set.table[pos];
+ if (elem == NULL) {
+ // not in table
+ if (add_if_not_found) {
+ if (o->u_set.used + 1 >= o->u_set.alloc) {
+ // not enough room in table, rehash it
+ int old_alloc = o->u_set.alloc;
+ py_obj_t *old_table = o->u_set.table;
+ o->u_set.alloc = get_doubling_prime_greater_or_equal_to(o->u_set.alloc + 1);
+ o->u_set.used = 0;
+ o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
+ for (int i = 0; i < old_alloc; i++) {
+ if (old_table[i] != NULL) {
+ py_set_lookup(o, old_table[i], true);
+ }
+ }
+ m_free(old_table);
+ // restart the search for the new element
+ pos = hash % o->u_set.alloc;
+ } else {
+ o->u_set.used += 1;
+ o->u_set.table[pos] = index;
+ return index;
+ }
+ } else {
+ return NULL;
+ }
+ } else if (py_obj_equal(elem, index)) {
+ // found it
+ return elem;
+ } else {
+ // not yet found, keep searching in this table
+ pos = (pos + 1) % o->u_set.alloc;
+ }
+ }
+}
+
+py_obj_t rt_build_set(int n_args, py_obj_t *items) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_SET;
+ o->u_set.alloc = get_doubling_prime_greater_or_equal_to(n_args + 1);
+ o->u_set.used = 0;
+ o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
+ for (int i = 0; i < o->u_set.alloc; i++) {
+ o->u_set.table[i] = NULL;
+ }
+ for (int i = 0; i < n_args; i++) {
+ py_set_lookup(o, items[i], true);
+ }
+ return o;
+}
+
+py_obj_t rt_build_map(int n_args) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_MAP;
+ py_map_init(&o->u_map, MAP_PY_OBJ, n_args);
+ return o;
+}
+
+py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value) {
+ assert(IS_O(map, O_MAP)); // should always be
+ py_map_lookup(map, key, true)->value = value;
+ return map;
+}
+
+void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) {
+ if (IS_O(base, O_LIST) && IS_SMALL_INT(index)) {
+ // list store
+ py_obj_base_t *o = base;
+ int idx = FROM_SMALL_INT(index);
+ if (idx < 0) {
+ idx += o->u_list.len;
+ }
+ if (0 <= idx && idx < o->u_list.len) {
+ o->u_list.items[idx] = value;
+ } else {
+ assert(0);
+ }
+ } else if (IS_O(base, O_MAP)) {
+ // map store
+ py_map_lookup(base, index, true)->value = value;
+ } else {
+ assert(0);
+ }
+}
+
+py_obj_t build_bound_method(py_obj_t self, py_obj_t meth) {
+ py_obj_base_t *o = m_new(py_obj_base_t, 1);
+ o->kind = O_BOUND_METH;
+ o->u_bound_meth.meth = meth;
+ o->u_bound_meth.self = self;
+ return o;
+}
+
+py_obj_t rt_load_attr(py_obj_t base, qstr attr) {
+ DEBUG_OP_printf("load %s\n", qstr_str(attr));
+ if (IS_O(base, O_LIST) && attr == q_append) {
+ return build_bound_method(base, fun_list_append);
+ } else if (IS_O(base, O_CLASS)) {
+ py_obj_base_t *o = base;
+ py_map_elem_t *elem = py_qstr_map_lookup(o->u_class.map, attr, false);
+ if (elem == NULL) {
+ printf("Nope! %s\n", qstr_str(attr));
+ assert(0);
+ }
+ return elem->value;
+ } else {
+ printf("AttributeError: '%s' object has no attribute '%s'\n", py_obj_get_type_str(base), qstr_str(attr));
+ assert(0);
+ return py_const_none;
+ }
+}
+
+void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) {
+ DEBUG_OP_printf("load method %s\n", qstr_str(attr));
+ if (IS_O(base, O_LIST) && attr == q_append) {
+ dest[1] = fun_list_append;
+ dest[0] = base;
+ } else {
+ dest[1] = rt_load_attr(base, attr);
+ dest[0] = NULL;
+ }
+}
+
+void *rt_fun_table[RT_F_NUMBER_OF] = {
+ rt_load_const_str,
+ rt_load_name,
+ rt_load_global,
+ rt_load_attr,
+ rt_load_method,
+ rt_store_name,
+ rt_store_subscr,
+ rt_is_true,
+ rt_unary_op,
+ rt_build_list,
+ rt_build_map,
+ rt_store_map,
+ rt_build_set,
+ rt_make_function_from_id,
+ rt_call_function_0,
+ rt_call_function_1,
+ rt_call_function_2,
+ rt_call_method_1,
+ rt_call_method_2,
+ rt_binary_op,
+ rt_compare_op,
+};
+
+/*
+void rt_f_vector(rt_fun_kind_t fun_kind) {
+ (rt_f_table[fun_kind])();
+}
+*/
diff --git a/py/runtime.h b/py/runtime.h
new file mode 100644
index 0000000000..4c842b235e
--- /dev/null
+++ b/py/runtime.h
@@ -0,0 +1,121 @@
+typedef enum {
+ RT_UNARY_OP_NOT,
+ RT_UNARY_OP_POSITIVE,
+ RT_UNARY_OP_NEGATIVE,
+ RT_UNARY_OP_INVERT,
+} rt_unary_op_t;
+
+typedef enum {
+ RT_BINARY_OP_SUBSCR,
+ RT_BINARY_OP_OR,
+ RT_BINARY_OP_XOR,
+ RT_BINARY_OP_AND,
+ RT_BINARY_OP_LSHIFT,
+ RT_BINARY_OP_RSHIFT,
+ RT_BINARY_OP_ADD,
+ RT_BINARY_OP_SUBTRACT,
+ RT_BINARY_OP_MULTIPLY,
+ RT_BINARY_OP_FLOOR_DIVIDE,
+ RT_BINARY_OP_TRUE_DIVIDE,
+ RT_BINARY_OP_MODULO,
+ RT_BINARY_OP_POWER,
+ RT_BINARY_OP_INPLACE_OR,
+ RT_BINARY_OP_INPLACE_XOR,
+ RT_BINARY_OP_INPLACE_AND,
+ RT_BINARY_OP_INPLACE_LSHIFT,
+ RT_BINARY_OP_INPLACE_RSHIFT,
+ RT_BINARY_OP_INPLACE_ADD,
+ RT_BINARY_OP_INPLACE_SUBTRACT,
+ RT_BINARY_OP_INPLACE_MULTIPLY,
+ RT_BINARY_OP_INPLACE_FLOOR_DIVIDE,
+ RT_BINARY_OP_INPLACE_TRUE_DIVIDE,
+ RT_BINARY_OP_INPLACE_MODULO,
+ RT_BINARY_OP_INPLACE_POWER,
+} rt_binary_op_t;
+
+typedef enum {
+ RT_COMPARE_OP_LESS,
+ RT_COMPARE_OP_MORE,
+ RT_COMPARE_OP_EQUAL,
+ RT_COMPARE_OP_LESS_EQUAL,
+ RT_COMPARE_OP_MORE_EQUAL,
+ RT_COMPARE_OP_NOT_EQUAL,
+ RT_COMPARE_OP_IN,
+ RT_COMPARE_OP_NOT_IN,
+ RT_COMPARE_OP_IS,
+ RT_COMPARE_OP_IS_NOT,
+ RT_COMPARE_OP_EXCEPTION_MATCH,
+} rt_compare_op_t;
+
+typedef enum {
+ RT_F_LOAD_CONST_STR = 0,
+ RT_F_LOAD_NAME,
+ RT_F_LOAD_GLOBAL,
+ RT_F_LOAD_ATTR,
+ RT_F_LOAD_METHOD,
+ RT_F_STORE_NAME,
+ RT_F_STORE_SUBSCR,
+ RT_F_IS_TRUE,
+ RT_F_UNARY_OP,
+ RT_F_BUILD_LIST,
+ RT_F_BUILD_MAP,
+ RT_F_STORE_MAP,
+ RT_F_BUILD_SET,
+ RT_F_MAKE_FUNCTION_FROM_ID,
+ RT_F_CALL_FUNCTION_0,
+ RT_F_CALL_FUNCTION_1,
+ RT_F_CALL_FUNCTION_2,
+ RT_F_CALL_METHOD_1,
+ RT_F_CALL_METHOD_2,
+ RT_F_BINARY_OP,
+ RT_F_COMPARE_OP,
+ RT_F_NUMBER_OF,
+} rt_fun_kind_t;
+
+extern void *rt_fun_table[RT_F_NUMBER_OF];
+
+typedef machine_ptr_t py_obj_t; // must be of pointer size
+typedef py_obj_t (*py_fun_0_t)();
+typedef py_obj_t (*py_fun_1_t)(py_obj_t);
+typedef py_obj_t (*py_fun_2_t)(py_obj_t, py_obj_t);
+typedef py_obj_t (*py_fun_t)();
+
+extern py_obj_t py_const_none;
+extern py_obj_t py_const_false;
+extern py_obj_t py_const_true;
+
+void rt_init();
+void rt_deinit();
+int rt_get_new_unique_code_id();
+void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args);
+void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args);
+py_fun_t rt_get_code(qstr id);
+void py_obj_print(py_obj_t o);
+int rt_is_true(py_obj_t arg);
+int rt_get_int(py_obj_t arg);
+py_obj_t rt_load_const_str(qstr qstr);
+//py_obj_t rt_load_const_code(qstr qstr);
+py_obj_t rt_load_name(qstr qstr);
+py_obj_t rt_load_global(qstr qstr);
+py_obj_t rt_load_build_class();
+void rt_store_name(qstr qstr, py_obj_t obj);
+py_obj_t rt_unary_op(int op, py_obj_t arg);
+py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs);
+py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs);
+py_obj_t rt_make_function_from_id(int unique_code_id);
+py_obj_t rt_make_function_0(py_fun_0_t f);
+py_obj_t rt_make_function_1(py_fun_1_t f);
+py_obj_t rt_make_function_2(py_fun_2_t f);
+py_obj_t rt_make_function(int n_args, py_fun_t code);
+py_obj_t rt_call_function_0(py_obj_t fun);
+py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg);
+py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2);
+py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self);
+py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg);
+py_obj_t rt_build_list(int n_args, py_obj_t *items);
+py_obj_t rt_build_map(int n_args);
+py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value);
+py_obj_t rt_build_set(int n_args, py_obj_t *items);
+void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t val);
+py_obj_t rt_load_attr(py_obj_t base, qstr attr);
+void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest);
diff --git a/py/scope.c b/py/scope.c
new file mode 100644
index 0000000000..a715b2b506
--- /dev/null
+++ b/py/scope.c
@@ -0,0 +1,218 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+
+scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn) {
+ scope_t *scope = m_new(scope_t, 1);
+ scope->kind = kind;
+ scope->parent = NULL;
+ scope->next = NULL;
+ scope->pn = pn;
+ switch (kind) {
+ case SCOPE_MODULE:
+ scope->simple_name = 0;
+ break;
+ case SCOPE_FUNCTION:
+ case SCOPE_CLASS:
+ assert(PY_PARSE_NODE_IS_STRUCT(pn));
+ scope->simple_name = PY_PARSE_NODE_LEAF_ARG(((py_parse_node_struct_t*)pn)->nodes[0]);
+ break;
+ case SCOPE_LAMBDA:
+ scope->simple_name = qstr_from_str_static("<lambda>");
+ break;
+ case SCOPE_LIST_COMP:
+ scope->simple_name = qstr_from_str_static("<listcomp>");
+ break;
+ case SCOPE_DICT_COMP:
+ scope->simple_name = qstr_from_str_static("<dictcomp>");
+ break;
+ case SCOPE_SET_COMP:
+ scope->simple_name = qstr_from_str_static("<setcomp>");
+ break;
+ case SCOPE_GEN_EXPR:
+ scope->simple_name = qstr_from_str_static("<genexpr>");
+ break;
+ default:
+ assert(0);
+ }
+ scope->id_info_alloc = 8;
+ scope->id_info_len = 0;
+ scope->id_info = m_new(id_info_t, scope->id_info_alloc);
+
+ scope->flags = 0;
+ scope->num_params = 0;
+ /* not needed
+ scope->num_default_params = 0;
+ scope->num_dict_params = 0;
+ */
+ scope->num_locals = 0;
+ scope->unique_code_id = 0;
+
+ return scope;
+}
+
+id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added) {
+ for (int i = 0; i < scope->id_info_len; i++) {
+ if (scope->id_info[i].qstr == qstr) {
+ *added = false;
+ return &scope->id_info[i];
+ }
+ }
+
+ // make sure we have enough memory
+ if (scope->id_info_len >= scope->id_info_alloc) {
+ scope->id_info_alloc *= 2;
+ scope->id_info = m_renew(id_info_t, scope->id_info, scope->id_info_alloc);
+ }
+
+ id_info_t *id_info;
+
+ {
+ /*
+ // just pick next slot in array
+ id_info = &scope->id_info[scope->id_info_len++];
+ */
+ }
+
+ {
+ // sort insert into id_info array, so we are equivalent to CPython (no other reason to do it)
+ scope->id_info_len += 1;
+ for (int i = scope->id_info_len - 1;; i--) {
+ if (i == 0 || strcmp(qstr_str(scope->id_info[i - 1].qstr), qstr_str(qstr)) < 0) {
+ id_info = &scope->id_info[i];
+ break;
+ } else {
+ scope->id_info[i] = scope->id_info[i - 1];
+ }
+ }
+ }
+
+ id_info->param = false;
+ id_info->kind = 0;
+ id_info->qstr = qstr;
+ *added = true;
+ return id_info;
+}
+
+id_info_t *scope_find(scope_t *scope, qstr qstr) {
+ for (int i = 0; i < scope->id_info_len; i++) {
+ if (scope->id_info[i].qstr == qstr) {
+ return &scope->id_info[i];
+ }
+ }
+ return NULL;
+}
+
+id_info_t *scope_find_global(scope_t *scope, qstr qstr) {
+ while (scope->parent != NULL) {
+ scope = scope->parent;
+ }
+ for (int i = 0; i < scope->id_info_len; i++) {
+ if (scope->id_info[i].qstr == qstr) {
+ return &scope->id_info[i];
+ }
+ }
+ return NULL;
+}
+
+id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr) {
+ if (scope->parent == NULL) {
+ return NULL;
+ }
+ for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].qstr == qstr) {
+ return &s->id_info[i];
+ }
+ }
+ }
+ return NULL;
+}
+
+void scope_close_over_in_parents(scope_t *scope, qstr qstr) {
+ assert(scope->parent != NULL); // we should have at least 1 parent
+ for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
+ id_info_t *id = NULL;
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].qstr == qstr) {
+ id = &s->id_info[i];
+ break;
+ }
+ }
+ if (id == NULL) {
+ // variable not declared in this scope, so declare it as free and keep searching parents
+ bool added;
+ id = scope_find_or_add_id(s, qstr, &added);
+ assert(added);
+ id->kind = ID_INFO_KIND_FREE;
+ } else {
+ // variable is declared in this scope, so finish
+ switch (id->kind) {
+ case ID_INFO_KIND_LOCAL: id->kind = ID_INFO_KIND_CELL; break; // variable local to this scope, close it over
+ case ID_INFO_KIND_FREE: break; // variable already closed over in a parent scope
+ case ID_INFO_KIND_CELL: break; // variable already closed over in this scope
+ default: assert(0); // TODO
+ }
+ return;
+ }
+ }
+ assert(0); // we should have found the variable in one of the parents
+}
+
+void scope_print_info(scope_t *s) {
+ if (s->kind == SCOPE_MODULE) {
+ printf("code <module>\n");
+ } else if (s->kind == SCOPE_LAMBDA) {
+ printf("code <lambda>\n");
+ } else if (s->kind == SCOPE_LIST_COMP) {
+ printf("code <listcomp>\n");
+ } else if (s->kind == SCOPE_DICT_COMP) {
+ printf("code <dictcomp>\n");
+ } else if (s->kind == SCOPE_SET_COMP) {
+ printf("code <setcomp>\n");
+ } else if (s->kind == SCOPE_GEN_EXPR) {
+ printf("code <genexpr>\n");
+ } else {
+ printf("code %s\n", qstr_str(s->simple_name));
+ }
+ /*
+ printf("var global:");
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+ printf(" %s", qstr_str(s->id_info[i].qstr));
+ }
+ }
+ printf("\n");
+ printf("var name:");
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+ printf(" %s", qstr_str(s->id_info[i].qstr));
+ }
+ }
+ printf("\n");
+ printf("var local:");
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].kind == ID_INFO_KIND_LOCAL) {
+ printf(" %s", qstr_str(s->id_info[i].qstr));
+ }
+ }
+ printf("\n");
+ printf("var free:");
+ for (int i = 0; i < s->id_info_len; i++) {
+ if (s->id_info[i].kind == ID_INFO_KIND_FREE) {
+ printf(" %s", qstr_str(s->id_info[i].qstr));
+ }
+ }
+ printf("\n");
+ */
+ printf(" flags %04x\n", s->flags);
+ printf(" argcount %d\n", s->num_params);
+ printf(" nlocals %d\n", s->num_locals);
+ printf(" stacksize %d\n", s->stack_size);
+}
diff --git a/py/scope.h b/py/scope.h
new file mode 100644
index 0000000000..2d620fb4fb
--- /dev/null
+++ b/py/scope.h
@@ -0,0 +1,58 @@
+enum {
+ ID_INFO_KIND_GLOBAL_IMPLICIT,
+ ID_INFO_KIND_GLOBAL_EXPLICIT,
+ ID_INFO_KIND_LOCAL, // in a function f, written and only referenced by f
+ ID_INFO_KIND_CELL, // in a function f, read/written by children of f
+ ID_INFO_KIND_FREE, // in a function f, belongs to the parent of f
+};
+
+typedef struct _id_info_t {
+ bool param;
+ int kind;
+ qstr qstr;
+ int local_num; // when it's an ID_INFO_KIND_LOCAL this is the unique number of the local
+} id_info_t;
+
+// taken from python source, Include/code.h
+#define SCOPE_FLAG_OPTIMISED 0x0001
+#define SCOPE_FLAG_NEWLOCALS 0x0002
+#define SCOPE_FLAG_VARARGS 0x0004
+#define SCOPE_FLAG_VARKEYWORDS 0x0008
+#define SCOPE_FLAG_NESTED 0x0010
+#define SCOPE_FLAG_GENERATOR 0x0020
+/* The SCOPE_FLAG_NOFREE flag is set if there are no free or cell variables.
+ This information is redundant, but it allows a single flag test
+ to determine whether there is any extra work to be done when the
+ call frame is setup.
+*/
+#define SCOPE_FLAG_NOFREE 0x0040
+
+// scope is a "block" in Python parlance
+typedef enum { SCOPE_MODULE, SCOPE_FUNCTION, SCOPE_LAMBDA, SCOPE_LIST_COMP, SCOPE_DICT_COMP, SCOPE_SET_COMP, SCOPE_GEN_EXPR, SCOPE_CLASS } scope_kind_t;
+typedef struct _scope_t {
+ scope_kind_t kind;
+ struct _scope_t *parent;
+ struct _scope_t *next;
+ py_parse_node_t pn;
+ qstr simple_name;
+ int id_info_alloc;
+ int id_info_len;
+ id_info_t *id_info;
+ int flags;
+ int num_params;
+ /* not needed
+ int num_default_params;
+ int num_dict_params;
+ */
+ int num_locals;
+ int stack_size;
+ int unique_code_id;
+} scope_t;
+
+scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn);
+id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added);
+id_info_t *scope_find(scope_t *scope, qstr qstr);
+id_info_t *scope_find_global(scope_t *scope, qstr qstr);
+id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr);
+void scope_close_over_in_parents(scope_t *scope, qstr qstr);
+void scope_print_info(scope_t *s);