103 files changed, 3223 insertions, 2671 deletions
diff --git a/py/argcheck.c b/py/argcheck.c
index 8cef10b165..9f225345d5 100644
--- a/py/argcheck.c
+++ b/py/argcheck.c
@@ -37,7 +37,7 @@ void mp_arg_check_num(size_t n_args, size_t n_kw, size_t n_args_min, size_t n_ar
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
             mp_arg_error_terse_mismatch();
         } else {
-            mp_raise_msg(&mp_type_TypeError, "function does not take keyword arguments");
+            mp_raise_TypeError("function does not take keyword arguments");
         }
     }
 
@@ -115,7 +115,7 @@ void mp_arg_parse_all(size_t n_pos, const mp_obj_t *pos, mp_map_t *kws, size_t n
             mp_arg_error_terse_mismatch();
         } else {
             // TODO better error message
-            mp_raise_msg(&mp_type_TypeError, "extra positional arguments given");
+            mp_raise_TypeError("extra positional arguments given");
         }
     }
     if (kws_found < kws->used) {
@@ -123,7 +123,7 @@ void mp_arg_parse_all(size_t n_pos, const mp_obj_t *pos, mp_map_t *kws, size_t n
             mp_arg_error_terse_mismatch();
         } else {
             // TODO better error message
-            mp_raise_msg(&mp_type_TypeError, "extra keyword arguments given");
+            mp_raise_TypeError("extra keyword arguments given");
         }
     }
 }
@@ -136,7 +136,7 @@ void mp_arg_parse_all_kw_array(size_t n_pos, size_t n_kw, const mp_obj_t *args,
 
 #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE || _MSC_VER
 NORETURN void mp_arg_error_terse_mismatch(void) {
-    mp_raise_msg(&mp_type_TypeError, "argument num/types mismatch");
+    mp_raise_TypeError("argument num/types mismatch");
 }
 #endif
 
diff --git a/py/asmxtensa.h b/py/asmxtensa.h
index 12083252eb..7db6c0d3dc 100644
--- a/py/asmxtensa.h
+++ b/py/asmxtensa.h
@@ -73,11 +73,11 @@
 
 // macros for encoding instructions (little endian versions)
 #define ASM_XTENSA_ENCODE_RRR(op0, op1, op2, r, s, t) \
-    (((op2) << 20) | ((op1) << 16) | ((r) << 12) | ((s) << 8) | ((t) << 4) | (op0))
+    ((((uint32_t)op2) << 20) | (((uint32_t)op1) << 16) | ((r) << 12) | ((s) << 8) | ((t) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_RRI4(op0, op1, r, s, t, imm4) \
     (((imm4) << 20) | ((op1) << 16) | ((r) << 12) | ((s) << 8) | ((t) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_RRI8(op0, r, s, t, imm8) \
-    (((imm8) << 16) | ((r) << 12) | ((s) << 8) | ((t) << 4) | (op0))
+    ((((uint32_t)imm8) << 16) | ((r) << 12) | ((s) << 8) | ((t) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_RI16(op0, t, imm16) \
     (((imm16) << 8) | ((t) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_RSR(op0, op1, op2, rs, t) \
@@ -85,7 +85,7 @@
 #define ASM_XTENSA_ENCODE_CALL(op0, n, offset) \
     (((offset) << 6) | ((n) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_CALLX(op0, op1, op2, r, s, m, n) \
-    (((op2) << 20) | ((op1) << 16) | ((r) << 12) | ((s) << 8) | ((m) << 6) | ((n) << 4) | (op0))
+    ((((uint32_t)op2) << 20) | (((uint32_t)op1) << 16) | ((r) << 12) | ((s) << 8) | ((m) << 6) | ((n) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_BRI8(op0, r, s, m, n, imm8) \
     (((imm8) << 16) | ((r) << 12) | ((s) << 8) | ((m) << 6) | ((n) << 4) | (op0))
 #define ASM_XTENSA_ENCODE_BRI12(op0, s, m, n, imm12) \
diff --git a/py/bc.c b/py/bc.c
index 07de08fc36..fc17946839 100644
--- a/py/bc.c
+++ b/py/bc.c
@@ -54,6 +54,16 @@ mp_uint_t mp_decode_uint(const byte **ptr) {
     return unum;
 }
 
+// This function is used to help reduce stack usage at the caller, for the case when
+// the caller doesn't need to increase the ptr argument.  If ptr is a local variable
+// and the caller uses mp_decode_uint(&ptr) instead of this function, then the compiler
+// must allocate a slot on the stack for ptr, and this slot cannot be reused for
+// anything else in the function because the pointer may have been stored in a global
+// and reused later in the function.
+mp_uint_t mp_decode_uint_value(const byte *ptr) {
+    return mp_decode_uint(&ptr);
+}
+
 STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, size_t expected, size_t given) {
 #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE
     // generic message, used also for other argument issues
@@ -86,25 +96,26 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) {
 
 // On entry code_state should be allocated somewhere (stack/heap) and
 // contain the following valid entries:
-//    - code_state->ip should contain the offset in bytes from the start of
-//      the bytecode chunk to just after n_state and n_exc_stack
-//    - code_state->n_state should be set to the state size (locals plus stack)
-void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+//    - code_state->fun_bc should contain a pointer to the function object
+//    - code_state->ip should contain the offset in bytes from the pointer
+//      code_state->fun_bc->bytecode to the entry n_state (0 for bytecode, non-zero for native)
+void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) {
     // This function is pretty complicated.  It's main aim is to be efficient in speed and RAM
     // usage for the common case of positional only args.
-    size_t n_state = code_state->n_state;
+
+    // get the function object that we want to set up (could be bytecode or native code)
+    mp_obj_fun_bc_t *self = code_state->fun_bc;
 
     // ip comes in as an offset into bytecode, so turn it into a true pointer
     code_state->ip = self->bytecode + (size_t)code_state->ip;
 
-    // store pointer to constant table
-    code_state->const_table = self->const_table;
-
     #if MICROPY_STACKLESS
     code_state->prev = NULL;
     #endif
 
     // get params
+    size_t n_state = mp_decode_uint(&code_state->ip);
+    mp_decode_uint(&code_state->ip); // skip n_exc_stack
     size_t scope_flags = *code_state->ip++;
     size_t n_pos_args = *code_state->ip++;
     size_t n_kwonly_args = *code_state->ip++;
@@ -168,7 +179,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, siz
         }
 
         // get pointer to arg_names array
-        const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table;
+        const mp_obj_t *arg_names = (const mp_obj_t*)self->const_table;
 
         for (size_t i = 0; i < n_kw; i++) {
             // the keys in kwargs are expected to be qstr objects
@@ -185,7 +196,12 @@ void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, siz
             }
             // Didn't find name match with positional args
             if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) == 0) {
-                mp_raise_msg(&mp_type_TypeError, "function does not take keyword arguments");
+                if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
+                    mp_raise_TypeError("unexpected keyword argument");
+                } else {
+                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
+                        "unexpected keyword argument '%q'", MP_OBJ_QSTR_VALUE(wanted_arg_name)));
+                }
             }
             mp_obj_dict_store(dict, kwargs[2 * i], kwargs[2 * i + 1]);
 continue2:;
@@ -234,7 +250,7 @@ continue2:;
     } else {
         // no keyword arguments given
         if (n_kwonly_args != 0) {
-            mp_raise_msg(&mp_type_TypeError, "function missing keyword-only argument");
+            mp_raise_TypeError("function missing keyword-only argument");
         }
         if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0) {
             *var_pos_kw_args = mp_obj_new_dict(0);
@@ -244,13 +260,8 @@ continue2:;
     // get the ip and skip argument names
     const byte *ip = code_state->ip;
 
-    // store pointer to code_info and jump over it
-    {
-        code_state->code_info = ip;
-        const byte *ip2 = ip;
-        size_t code_info_size = mp_decode_uint(&ip2);
-        ip += code_info_size;
-    }
+    // jump over code info (source file and line-number mapping)
+    ip += mp_decode_uint_value(ip);
 
     // bytecode prelude: initialise closed over variables
     size_t local_num;
@@ -293,7 +304,7 @@ STATIC const byte opcode_format_table[64] = {
     OC4(U, U, U, U), // 0x0c-0x0f
     OC4(B, B, B, U), // 0x10-0x13
     OC4(V, U, Q, V), // 0x14-0x17
-    OC4(B, U, V, V), // 0x18-0x1b
+    OC4(B, V, V, Q), // 0x18-0x1b
     OC4(Q, Q, Q, Q), // 0x1c-0x1f
     OC4(B, B, V, V), // 0x20-0x23
     OC4(Q, Q, Q, B), // 0x24-0x27
diff --git a/py/bc.h b/py/bc.h
index 4707da1793..e8d4286125 100644
--- a/py/bc.h
+++ b/py/bc.h
@@ -28,6 +28,7 @@
 
 #include "py/runtime.h"
 #include "py/obj.h"
+#include "py/objfun.h"
 
 // bytecode layout:
 //
@@ -70,9 +71,12 @@ typedef struct _mp_exc_stack_t {
 } mp_exc_stack_t;
 
 typedef struct _mp_code_state_t {
-    const byte *code_info;
+    // The fun_bc entry points to the underlying function object that is being executed.
+    // It is needed to access the start of bytecode and the const_table.
+    // It is also needed to prevent the GC from reclaiming the bytecode during execution,
+    // because the ip pointer below will always point to the interior of the bytecode.
+    mp_obj_fun_bc_t *fun_bc;
     const byte *ip;
-    const mp_uint_t *const_table;
     mp_obj_t *sp;
     // bit 0 is saved currently_in_except_block value
     mp_exc_stack_t *exc_sp;
@@ -80,7 +84,6 @@ typedef struct _mp_code_state_t {
     #if MICROPY_STACKLESS
     struct _mp_code_state_t *prev;
     #endif
-    size_t n_state;
     // Variable-length
     mp_obj_t state[0];
     // Variable-length, never accessed by name, only as (void*)(state + n_state)
@@ -88,15 +91,15 @@ typedef struct _mp_code_state_t {
 } mp_code_state_t;
 
 mp_uint_t mp_decode_uint(const byte **ptr);
+mp_uint_t mp_decode_uint_value(const byte *ptr);
 
 mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc);
 mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args);
-struct _mp_obj_fun_bc_t;
-void mp_setup_code_state(mp_code_state_t *code_state, struct _mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args);
+void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args);
 void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table);
-void mp_bytecode_print2(const byte *code, mp_uint_t len);
+void mp_bytecode_print2(const byte *code, size_t len, const mp_uint_t *const_table);
 const byte *mp_bytecode_print_str(const byte *ip);
-#define mp_bytecode_print_inst(code) mp_bytecode_print2(code, 1)
+#define mp_bytecode_print_inst(code, const_table) mp_bytecode_print2(code, 1, const_table)
 
 // Helper macros to access pointer with least significant bits holding flags
 #define MP_TAGPTR_PTR(x) ((void*)((uintptr_t)(x) & ~((uintptr_t)3)))
diff --git a/py/bc0.h b/py/bc0.h
index 5ff9e50a89..b5650abe41 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -37,12 +37,13 @@
 #define MP_BC_LOAD_CONST_OBJ     (0x17) // ptr
 #define MP_BC_LOAD_NULL          (0x18)
 
-#define MP_BC_LOAD_FAST_N        (0x1a) // uint
-#define MP_BC_LOAD_DEREF         (0x1b) // uint
-#define MP_BC_LOAD_NAME          (0x1c) // qstr
-#define MP_BC_LOAD_GLOBAL        (0x1d) // qstr
-#define MP_BC_LOAD_ATTR          (0x1e) // qstr
-#define MP_BC_LOAD_METHOD        (0x1f) // qstr
+#define MP_BC_LOAD_FAST_N        (0x19) // uint
+#define MP_BC_LOAD_DEREF         (0x1a) // uint
+#define MP_BC_LOAD_NAME          (0x1b) // qstr
+#define MP_BC_LOAD_GLOBAL        (0x1c) // qstr
+#define MP_BC_LOAD_ATTR          (0x1d) // qstr
+#define MP_BC_LOAD_METHOD        (0x1e) // qstr
+#define MP_BC_LOAD_SUPER_METHOD  (0x1f) // qstr
 #define MP_BC_LOAD_BUILD_CLASS   (0x20)
 #define MP_BC_LOAD_SUBSCR        (0x21)
 
@@ -79,6 +80,7 @@
 #define MP_BC_POP_BLOCK          (0x44)
 #define MP_BC_POP_EXCEPT         (0x45)
 #define MP_BC_UNWIND_JUMP        (0x46) // rel byte code offset, 16-bit signed, in excess; then a byte
+#define MP_BC_GET_ITER_STACK     (0x47)
 
 #define MP_BC_BUILD_TUPLE        (0x50) // uint
 #define MP_BC_BUILD_LIST         (0x51) // uint
diff --git a/py/binary.c b/py/binary.c
index d22e0f342d..34feb384ea 100644
--- a/py/binary.c
+++ b/py/binary.c
@@ -33,6 +33,7 @@
 #include "py/binary.h"
 #include "py/smallint.h"
 #include "py/objint.h"
+#include "py/runtime.h"
 
 // Helpers to work with binary-encoded data
 
@@ -100,6 +101,11 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) {
             }
         }
     }
+
+    if (size == 0) {
+        mp_raise_ValueError("bad typecode");
+    }
+
     if (palign != NULL) {
         *palign = align;
     }
@@ -321,9 +327,10 @@ void mp_binary_set_val_array(char typecode, void *p, mp_uint_t index, mp_obj_t v
             break;
         default:
             #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-            if ((typecode | 0x20) == 'q' && MP_OBJ_IS_TYPE(val_in, &mp_type_int)) {
+            if (MP_OBJ_IS_TYPE(val_in, &mp_type_int)) {
+                size_t size = mp_binary_get_size('@', typecode, NULL);
                 mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
-                    sizeof(long long), (byte*)&((long long*)p)[index]);
+                    size, (uint8_t*)p + index * size);
                 return;
             }
             #endif
diff --git a/py/builtin.h b/py/builtin.h
index 282eb1cc93..ec326d037b 100644
--- a/py/builtin.h
+++ b/py/builtin.h
@@ -53,6 +53,7 @@ MP_DECLARE_CONST_FUN_OBJ_3(mp_builtin_setattr_obj);
 MP_DECLARE_CONST_FUN_OBJ_0(mp_builtin_globals_obj);
 MP_DECLARE_CONST_FUN_OBJ_2(mp_builtin_hasattr_obj);
 MP_DECLARE_CONST_FUN_OBJ_1(mp_builtin_hash_obj);
+MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_help_obj);
 MP_DECLARE_CONST_FUN_OBJ_1(mp_builtin_hex_obj);
 MP_DECLARE_CONST_FUN_OBJ_1(mp_builtin_id_obj);
 MP_DECLARE_CONST_FUN_OBJ_2(mp_builtin_isinstance_obj);
@@ -72,7 +73,6 @@ MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_round_obj);
 MP_DECLARE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj);
 MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_sum_obj);
 // Defined by a port, but declared here for simplicity
-MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_help_obj);
 MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_input_obj);
 MP_DECLARE_CONST_FUN_OBJ_KW(mp_builtin_open_obj);
 
@@ -118,4 +118,6 @@ extern const mp_obj_module_t mp_module_webrepl;
 extern const mp_obj_module_t mp_module_framebuf;
 extern const mp_obj_module_t mp_module_btree;
 
+extern const char *MICROPY_PY_BUILTINS_HELP_TEXT;
+
 #endif // __MICROPY_INCLUDED_PY_BUILTIN_H__
diff --git a/py/builtinevex.c b/py/builtinevex.c
index 636f869300..d9a3833ccc 100644
--- a/py/builtinevex.c
+++ b/py/builtinevex.c
@@ -78,7 +78,7 @@ STATIC mp_obj_t mp_builtin_compile(size_t n_args, const mp_obj_t *args) {
     (void)n_args;
 
     // get the source
-    mp_uint_t str_len;
+    size_t str_len;
     const char *str = mp_obj_str_get_data(args[0], &str_len);
 
     // get the filename
@@ -95,7 +95,7 @@ STATIC mp_obj_t mp_builtin_compile(size_t n_args, const mp_obj_t *args) {
         case MP_QSTR_exec: parse_input_kind = MP_PARSE_FILE_INPUT; break;
         case MP_QSTR_eval: parse_input_kind = MP_PARSE_EVAL_INPUT; break;
         default:
-            mp_raise_msg(&mp_type_ValueError, "bad compile mode");
+            mp_raise_ValueError("bad compile mode");
     }
 
     mp_obj_code_t *code = m_new_obj(mp_obj_code_t);
@@ -128,7 +128,7 @@ STATIC mp_obj_t eval_exec_helper(size_t n_args, const mp_obj_t *args, mp_parse_i
     }
     #endif
 
-    mp_uint_t str_len;
+    size_t str_len;
     const char *str = mp_obj_str_get_data(args[0], &str_len);
 
     // create the lexer
@@ -136,9 +136,6 @@ STATIC mp_obj_t eval_exec_helper(size_t n_args, const mp_obj_t *args, mp_parse_i
     mp_lexer_t *lex;
     if (MICROPY_PY_BUILTINS_EXECFILE && parse_input_kind == MP_PARSE_SINGLE_INPUT) {
         lex = mp_lexer_new_from_file(str);
-        if (lex == NULL) {
-            nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OSError, "could not open file '%s'", str));
-        }
         parse_input_kind = MP_PARSE_FILE_INPUT;
     } else {
         lex = mp_lexer_new_from_str_len(MP_QSTR__lt_string_gt_, str, str_len, 0);
diff --git a/py/builtinhelp.c b/py/builtinhelp.c
new file mode 100644
index 0000000000..dbcd6e00f6
--- /dev/null
+++ b/py/builtinhelp.c
@@ -0,0 +1,180 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013-2016 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "py/builtin.h"
+#include "py/objmodule.h"
+
+#if MICROPY_PY_BUILTINS_HELP
+
+const char *mp_help_default_text =
+"Welcome to MicroPython!\n"
+"\n"
+"For online docs please visit http://docs.micropython.org/\n"
+"\n"
+"Control commands:\n"
+"  CTRL-A        -- on a blank line, enter raw REPL mode\n"
+"  CTRL-B        -- on a blank line, enter normal REPL mode\n"
+"  CTRL-C        -- interrupt a running program\n"
+"  CTRL-D        -- on a blank line, exit or do a soft reset\n"
+"  CTRL-E        -- on a blank line, enter paste mode\n"
+"\n"
+"For further help on a specific object, type help(obj)\n"
+;
+
+STATIC void mp_help_print_info_about_object(mp_obj_t name_o, mp_obj_t value) {
+    mp_print_str(MP_PYTHON_PRINTER, "  ");
+    mp_obj_print(name_o, PRINT_STR);
+    mp_print_str(MP_PYTHON_PRINTER, " -- ");
+    mp_obj_print(value, PRINT_STR);
+    mp_print_str(MP_PYTHON_PRINTER, "\n");
+}
+
+#if MICROPY_PY_BUILTINS_HELP_MODULES
+STATIC void mp_help_add_from_map(mp_obj_t list, const mp_map_t *map) {
+    for (size_t i = 0; i < map->alloc; i++) {
+        if (MP_MAP_SLOT_IS_FILLED(map, i)) {
+            mp_obj_list_append(list, map->table[i].key);
+        }
+    }
+}
+
+#if MICROPY_MODULE_FROZEN
+STATIC void mp_help_add_from_names(mp_obj_t list, const char *name) {
+    while (*name) {
+        size_t l = strlen(name);
+        // name should end in '.py' and we strip it off
+        mp_obj_list_append(list, mp_obj_new_str(name, l - 3, false));
+        name += l + 1;
+    }
+}
+#endif
+
+STATIC void mp_help_print_modules(void) {
+    mp_obj_t list = mp_obj_new_list(0, NULL);
+
+    mp_help_add_from_map(list, &mp_builtin_module_map);
+
+    #if MICROPY_MODULE_WEAK_LINKS
+    mp_help_add_from_map(list, &mp_builtin_module_weak_links_map);
+    #endif
+
+    #if MICROPY_MODULE_FROZEN_STR
+    extern const char mp_frozen_str_names[];
+    mp_help_add_from_names(list, mp_frozen_str_names);
+    #endif
+
+    #if MICROPY_MODULE_FROZEN_MPY
+    extern const char mp_frozen_mpy_names[];
+    mp_help_add_from_names(list, mp_frozen_mpy_names);
+    #endif
+
+    // sort the list so it's printed in alphabetical order
+    mp_obj_list_sort(1, &list, (mp_map_t*)&mp_const_empty_map);
+
+    // print the list of modules in a column-first order
+    #define NUM_COLUMNS (4)
+    #define COLUMN_WIDTH (18)
+    mp_uint_t len;
+    mp_obj_t *items;
+    mp_obj_list_get(list, &len, &items);
+    unsigned int num_rows = (len + NUM_COLUMNS - 1) / NUM_COLUMNS;
+    for (unsigned int i = 0; i < num_rows; ++i) {
+        unsigned int j = i;
+        for (;;) {
+            int l = mp_print_str(MP_PYTHON_PRINTER, mp_obj_str_get_str(items[j]));
+            j += num_rows;
+            if (j >= len) {
+                break;
+            }
+            int gap = COLUMN_WIDTH - l;
+            while (gap < 1) {
+                gap += COLUMN_WIDTH;
+            }
+            while (gap--) {
+                mp_print_str(MP_PYTHON_PRINTER, " ");
+            }
+        }
+        mp_print_str(MP_PYTHON_PRINTER, "\n");
+    }
+
+    // let the user know there may be other modules available from the filesystem
+    mp_print_str(MP_PYTHON_PRINTER, "Plus any modules on the filesystem\n");
+}
+#endif
+
+STATIC void mp_help_print_obj(const mp_obj_t obj) {
+    #if MICROPY_PY_BUILTINS_HELP_MODULES
+    if (obj == MP_OBJ_NEW_QSTR(MP_QSTR_modules)) {
+        mp_help_print_modules();
+        return;
+    }
+    #endif
+
+    // try to print something sensible about the given object
+    mp_print_str(MP_PYTHON_PRINTER, "object ");
+    mp_obj_print(obj, PRINT_STR);
+    mp_printf(MP_PYTHON_PRINTER, " is of type %s\n", mp_obj_get_type_str(obj));
+
+    mp_map_t *map = NULL;
+    if (MP_OBJ_IS_TYPE(obj, &mp_type_module)) {
+        map = mp_obj_dict_get_map(mp_obj_module_get_globals(obj));
+    } else {
+        mp_obj_type_t *type;
+        if (MP_OBJ_IS_TYPE(obj, &mp_type_type)) {
+            type = obj;
+        } else {
+            type = mp_obj_get_type(obj);
+        }
+        if (type->locals_dict != MP_OBJ_NULL && MP_OBJ_IS_TYPE(type->locals_dict, &mp_type_dict)) {
+            map = mp_obj_dict_get_map(type->locals_dict);
+        }
+    }
+    if (map != NULL) {
+        for (uint i = 0; i < map->alloc; i++) {
+            if (map->table[i].key != MP_OBJ_NULL) {
+                mp_help_print_info_about_object(map->table[i].key, map->table[i].value);
+            }
+        }
+    }
+}
+
+STATIC mp_obj_t mp_builtin_help(size_t n_args, const mp_obj_t *args) {
+    if (n_args == 0) {
+        // print a general help message
+        mp_print_str(MP_PYTHON_PRINTER, MICROPY_PY_BUILTINS_HELP_TEXT);
+    } else {
+        // try to print something sensible about the given object
+        mp_help_print_obj(args[0]);
+    }
+
+    return mp_const_none;
+}
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_help_obj, 0, 1, mp_builtin_help);
+
+#endif // MICROPY_PY_BUILTINS_HELP
diff --git a/py/builtinimport.c b/py/builtinimport.c
index 4024c5d59a..d01ebbe73f 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c
@@ -47,14 +47,6 @@
 
 #define PATH_SEP_CHAR '/'
 
-#if MICROPY_MODULE_WEAK_LINKS
-STATIC const mp_rom_map_elem_t mp_builtin_module_weak_links_table[] = {
-    MICROPY_PORT_BUILTIN_MODULE_WEAK_LINKS
-};
-
-STATIC MP_DEFINE_CONST_MAP(mp_builtin_module_weak_links_map, mp_builtin_module_weak_links_table);
-#endif
-
 bool mp_obj_is_package(mp_obj_t module) {
     mp_obj_t dest[2];
     mp_load_method_maybe(module, MP_QSTR___path__, dest);
@@ -105,7 +97,7 @@ STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) {
 STATIC mp_import_stat_t find_file(const char *file_str, uint file_len, vstr_t *dest) {
 #if MICROPY_PY_SYS
     // extract the list of paths
-    mp_uint_t path_num;
+    size_t path_num;
     mp_obj_t *path_items;
     mp_obj_list_get(mp_sys_path, &path_num, &path_items);
 
@@ -119,7 +111,7 @@ STATIC mp_import_stat_t find_file(const char *file_str, uint file_len, vstr_t *d
         // go through each path looking for a directory or file
         for (mp_uint_t i = 0; i < path_num; i++) {
             vstr_reset(dest);
-            mp_uint_t p_len;
+            size_t p_len;
             const char *p = mp_obj_str_get_data(path_items[i], &p_len);
             if (p_len > 0) {
                 vstr_add_strn(dest, p, p_len);
@@ -139,18 +131,7 @@ STATIC mp_import_stat_t find_file(const char *file_str, uint file_len, vstr_t *d
 }
 
 #if MICROPY_ENABLE_COMPILER
-STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex, const char *fname) {
-
-    if (lex == NULL) {
-        // we verified the file exists using stat, but lexer could still fail
-        if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_ImportError, "module not found");
-        } else {
-            nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ImportError,
-                "no module named '%s'", fname));
-        }
-    }
-
+STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex) {
     #if MICROPY_PY___FILE__
     qstr source_name = lex->source_name;
     mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
@@ -215,7 +196,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
     // found the filename in the list of frozen files, then load and execute it.
     #if MICROPY_MODULE_FROZEN_STR
     if (frozen_type == MP_FROZEN_STR) {
-        do_load_from_lexer(module_obj, modref, file_str);
+        do_load_from_lexer(module_obj, modref);
         return;
     }
     #endif
@@ -243,7 +224,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
     #if MICROPY_ENABLE_COMPILER
     {
         mp_lexer_t *lex = mp_lexer_new_from_file(file_str);
-        do_load_from_lexer(module_obj, lex, file_str);
+        do_load_from_lexer(module_obj, lex);
         return;
     }
     #endif
@@ -284,7 +265,7 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
         }
     }
 
-    mp_uint_t mod_len;
+    size_t mod_len;
     const char *mod_str = mp_obj_str_get_data(module_name, &mod_len);
 
     if (level != 0) {
@@ -315,7 +296,7 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
         DEBUG_printf("\n");
 #endif
 
-        mp_uint_t this_name_l;
+        size_t this_name_l;
         const char *this_name = mp_obj_str_get_data(this_name_q, &this_name_l);
 
         const char *p = this_name + this_name_l;
@@ -360,8 +341,7 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
         qstr new_mod_q = qstr_from_strn(new_mod, new_mod_l);
         DEBUG_printf("Resolved base name for relative import: '%s'\n", qstr_str(new_mod_q));
         if (new_mod_q == MP_QSTR_) {
-            // CPython raises SystemError
-            mp_raise_msg(&mp_type_ImportError, "cannot perform relative import");
+            mp_raise_ValueError("cannot perform relative import");
         }
         module_name = MP_OBJ_NEW_QSTR(new_mod_q);
         mod_str = new_mod;
@@ -449,8 +429,13 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
 
                 // if args[3] (fromtuple) has magic value False, set up
                 // this module for command-line "-m" option (set module's
-                // name to __main__ instead of real name).
-                if (i == mod_len && fromtuple == mp_const_false) {
+                // name to __main__ instead of real name). Do this only
+                // for *modules* however - packages never have their names
+                // replaced, instead they're -m'ed using a special __main__
+                // submodule in them. (This all apparently is done to not
+                // touch package name itself, which is important for future
+                // imports).
+                if (i == mod_len && fromtuple == mp_const_false && stat != MP_IMPORT_STAT_DIR) {
                     mp_obj_module_t *o = MP_OBJ_TO_PTR(module_obj);
                     mp_obj_dict_store(MP_OBJ_FROM_PTR(o->globals), MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR___main__));
                     #if MICROPY_CPYTHON_COMPAT
@@ -477,10 +462,10 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
                     path.len = orig_path_len;
                 } else { // MP_IMPORT_STAT_FILE
                     do_load(module_obj, &path);
-                    // TODO: We cannot just break here, at the very least, we must execute
-                    // trailer code below. But otherwise if there're remaining components,
-                    // that would be (??) object path within module, not modules path within FS.
-                    // break;
+                    // This should be the last component in the import path.  If there are
+                    // remaining components then it's an ImportError because the current path
+                    // (the module that was just loaded) is not a package.  This will be caught
+                    // on the next iteration because the file will not exist.
                 }
             }
             if (outer_module_obj != MP_OBJ_NULL) {
@@ -495,12 +480,6 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
         }
     }
 
-    if (i < mod_len) {
-        // we loaded a package, now need to load objects from within that package
-        // TODO
-        assert(0);
-    }
-
     // If fromlist is not empty, return leaf module
     if (fromtuple != mp_const_none) {
         return module_obj;
diff --git a/py/compile.c b/py/compile.c
index b84793d10a..8533e0528f 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -41,13 +41,19 @@
 // TODO need to mangle __attr names
 
 typedef enum {
+// define rules with a compile function
 #define DEF_RULE(rule, comp, kind, ...) PN_##rule,
+#define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
 #undef DEF_RULE
-    PN_maximum_number_of,
-    PN_string, // special node for non-interned string
-    PN_bytes, // special node for non-interned bytes
+#undef DEF_RULE_NC
     PN_const_object, // special node for a constant, generic Python object
+// define rules without a compile function
+#define DEF_RULE(rule, comp, kind, ...)
+#define DEF_RULE_NC(rule, kind, ...) PN_##rule,
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
 } pn_kind_t;
 
 #define NEED_METHOD_TABLE MICROPY_EMIT_NATIVE
@@ -109,7 +115,6 @@ typedef struct _compiler_t {
 
     uint8_t is_repl;
     uint8_t pass; // holds enum type pass_kind_t
-    uint8_t func_arg_is_super; // used to compile special case of super() function call
     uint8_t have_star;
 
     // try to keep compiler clean from nlr
@@ -450,8 +455,7 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
                     break;
             }
         } else {
-            compile_syntax_error(comp, pn, "can't assign to literal");
-            return;
+            goto cannot_assign;
         }
     } else {
         // pn must be a struct
@@ -466,7 +470,7 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
             case PN_exprlist:
                 // lhs is a tuple
                 if (assign_kind != ASSIGN_STORE) {
-                    goto bad_aug;
+                    goto cannot_assign;
                 }
                 c_assign_tuple(comp, MP_PARSE_NODE_NULL, MP_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
                 break;
@@ -479,7 +483,7 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
                 } else {
                     assert(MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp));
                     if (assign_kind != ASSIGN_STORE) {
-                        goto bad_aug;
+                        goto cannot_assign;
                     }
                     pns = (mp_parse_node_struct_t*)pns->nodes[0];
                     goto testlist_comp;
@@ -489,7 +493,7 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
             case PN_atom_bracket:
                 // lhs is something in brackets
                 if (assign_kind != ASSIGN_STORE) {
-                    goto bad_aug;
+                    goto cannot_assign;
                 }
                 if (MP_PARSE_NODE_IS_NULL(pns->nodes[0])) {
                     // empty list, assignment allowed
@@ -537,10 +541,6 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
 
     cannot_assign:
     compile_syntax_error(comp, pn, "can't assign to expression");
-    return;
-
-    bad_aug:
-    compile_syntax_error(comp, pn, "illegal expression for augmented assignment");
 }
 
 // stuff for lambda and comprehensions and generators:
@@ -585,8 +585,16 @@ STATIC void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int
 }
 
 STATIC void compile_funcdef_lambdef_param(compiler_t *comp, mp_parse_node_t pn) {
-    if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_typedargslist_star)
-        || MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_varargslist_star)) {
+    // For efficiency of the code below we extract the parse-node kind here
+    int pn_kind;
+    if (MP_PARSE_NODE_IS_ID(pn)) {
+        pn_kind = -1;
+    } else {
+        assert(MP_PARSE_NODE_IS_STRUCT(pn));
+        pn_kind = MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)pn);
+    }
+
+    if (pn_kind == PN_typedargslist_star || pn_kind == PN_varargslist_star) {
         comp->have_star = true;
         /* don't need to distinguish bare from named star
         mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
@@ -597,8 +605,7 @@ STATIC void compile_funcdef_lambdef_param(compiler_t *comp, mp_parse_node_t pn)
         }
         */
 
-    } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_typedargslist_dbl_star)
-        || MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_varargslist_dbl_star)) {
+    } else if (pn_kind == PN_typedargslist_dbl_star || pn_kind == PN_varargslist_dbl_star) {
         // named double star
         // TODO do we need to do anything with this?
 
@@ -606,14 +613,14 @@ STATIC void compile_funcdef_lambdef_param(compiler_t *comp, mp_parse_node_t pn)
         mp_parse_node_t pn_id;
         mp_parse_node_t pn_colon;
         mp_parse_node_t pn_equal;
-        if (MP_PARSE_NODE_IS_ID(pn)) {
+        if (pn_kind == -1) {
             // this parameter is just an id
 
             pn_id = pn;
             pn_colon = MP_PARSE_NODE_NULL;
             pn_equal = MP_PARSE_NODE_NULL;
 
-        } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_typedargslist_name)) {
+        } else if (pn_kind == PN_typedargslist_name) {
             // this parameter has a colon and/or equal specifier
 
             mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
@@ -622,7 +629,7 @@ STATIC void compile_funcdef_lambdef_param(compiler_t *comp, mp_parse_node_t pn)
             pn_equal = pns->nodes[2];
 
         } else {
-            assert(MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_varargslist_name)); // should be
+            assert(pn_kind == PN_varargslist_name); // should be
             // this parameter has an equal specifier
 
             mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
@@ -754,7 +761,6 @@ STATIC qstr compile_classdef_helper(compiler_t *comp, mp_parse_node_struct_t *pn
     if (MP_PARSE_NODE_IS_STRUCT_KIND(parents, PN_classdef_2)) {
         parents = MP_PARSE_NODE_NULL;
     }
-    comp->func_arg_is_super = false;
     compile_trailer_paren_helper(comp, parents, false, 2);
 
     // return its name (the 'C' in class C(...):")
@@ -828,7 +834,6 @@ STATIC void compile_decorated(compiler_t *comp, mp_parse_node_struct_t *pns) {
             // nodes[1] contains arguments to the decorator function, if any
             if (!MP_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) {
                 // call the decorator function with the arguments in nodes[1]
-                comp->func_arg_is_super = false;
                 compile_node(comp, pns_decorator->nodes[1]);
             }
         }
@@ -972,7 +977,8 @@ STATIC void compile_return_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
     if (MP_PARSE_NODE_IS_NULL(pns->nodes[0])) {
         // no argument to 'return', so return None
         EMIT_ARG(load_const_tok, MP_TOKEN_KW_NONE);
-    } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) {
+    } else if (MICROPY_COMP_RETURN_IF_EXPR
+        && MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) {
         // special case when returning an if-expression; to match CPython optimisation
         mp_parse_node_struct_t *pns_test_if_expr = (mp_parse_node_struct_t*)pns->nodes[0];
         mp_parse_node_struct_t *pns_test_if_else = (mp_parse_node_struct_t*)pns_test_if_expr->nodes[1];
@@ -1421,7 +1427,7 @@ STATIC void compile_for_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
         mp_parse_node_struct_t *pns_it = (mp_parse_node_struct_t*)pns->nodes[1];
         if (MP_PARSE_NODE_IS_ID(pns_it->nodes[0])
             && MP_PARSE_NODE_LEAF_ARG(pns_it->nodes[0]) == MP_QSTR_range
-            && MP_PARSE_NODE_IS_STRUCT_KIND(pns_it->nodes[1], PN_trailer_paren)) {
+            && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)pns_it->nodes[1]) == PN_trailer_paren) {
             mp_parse_node_t pn_range_args = ((mp_parse_node_struct_t*)pns_it->nodes[1])->nodes[0];
             mp_parse_node_t *args;
             int n_args = mp_parse_node_extract_list(&pn_range_args, PN_arglist, &args);
@@ -1443,8 +1449,9 @@ STATIC void compile_for_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
                     pn_range_start = args[0];
                     pn_range_end = args[1];
                     pn_range_step = args[2];
-                    // We need to know sign of step. This is possible only if it's constant
-                    if (!MP_PARSE_NODE_IS_SMALL_INT(pn_range_step)) {
+                    // the step must be a non-zero constant integer to do the optimisation
+                    if (!MP_PARSE_NODE_IS_SMALL_INT(pn_range_step)
+                        || MP_PARSE_NODE_LEAF_SMALL_INT(pn_range_step) == 0) {
                         optimize = false;
                     }
                 }
@@ -1475,7 +1482,7 @@ STATIC void compile_for_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
     uint pop_label = comp_next_label(comp);
 
     compile_node(comp, pns->nodes[1]); // iterator
-    EMIT(get_iter);
+    EMIT_ARG(get_iter, true);
     EMIT_ARG(label_assign, continue_label);
     EMIT_ARG(for_iter, pop_label);
     c_assign(comp, pns->nodes[0], ASSIGN_STORE); // variable
@@ -1523,7 +1530,7 @@ STATIC void compile_try_except(compiler_t *comp, mp_parse_node_t pn_body, int n_
         if (MP_PARSE_NODE_IS_NULL(pns_except->nodes[0])) {
             // this is a catch all exception handler
             if (i + 1 != n_except) {
-                compile_syntax_error(comp, pn_excepts[i], "default 'except:' must be last");
+                compile_syntax_error(comp, pn_excepts[i], "default 'except' must be last");
                 compile_decrease_except_level(comp);
                 return;
             }
@@ -1680,14 +1687,14 @@ STATIC void compile_with_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
 }
 
 STATIC void compile_yield_from(compiler_t *comp) {
-    EMIT(get_iter);
+    EMIT_ARG(get_iter, false);
     EMIT_ARG(load_const_tok, MP_TOKEN_KW_NONE);
     EMIT(yield_from);
 }
 
 #if MICROPY_PY_ASYNC_AWAIT
 STATIC void compile_await_object_method(compiler_t *comp, qstr method) {
-    EMIT_ARG(load_method, method);
+    EMIT_ARG(load_method, method, false);
     EMIT_ARG(call_method, 0, 0, 0);
     compile_yield_from(comp);
 }
@@ -1778,7 +1785,7 @@ STATIC void compile_async_with_stmt_helper(compiler_t *comp, int n, mp_parse_nod
         }
 
         compile_load_id(comp, context);
-        EMIT_ARG(load_method, MP_QSTR___aexit__);
+        EMIT_ARG(load_method, MP_QSTR___aexit__, false);
 
         EMIT_ARG(setup_except, try_exception_label);
         compile_increase_except_level(comp);
@@ -1872,8 +1879,6 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
         } else {
             // for non-REPL, evaluate then discard the expression
             if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
-                || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)
-                || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)
                 || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) {
                 // do nothing with a lonely constant
             } else {
@@ -2167,10 +2172,85 @@ STATIC void compile_factor_2(compiler_t *comp, mp_parse_node_struct_t *pns) {
 }
 
 STATIC void compile_atom_expr_normal(compiler_t *comp, mp_parse_node_struct_t *pns) {
-    // this is to handle special super() call
-    comp->func_arg_is_super = MP_PARSE_NODE_IS_ID(pns->nodes[0]) && MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]) == MP_QSTR_super;
+    // compile the subject of the expression
+    compile_node(comp, pns->nodes[0]);
+
+    // compile_atom_expr_await may call us with a NULL node
+    if (MP_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+        return;
+    }
+
+    // get the array of trailers (known to be an array of PARSE_NODE_STRUCT)
+    size_t num_trail = 1;
+    mp_parse_node_struct_t **pns_trail = (mp_parse_node_struct_t**)&pns->nodes[1];
+    if (MP_PARSE_NODE_STRUCT_KIND(pns_trail[0]) == PN_atom_expr_trailers) {
+        num_trail = MP_PARSE_NODE_STRUCT_NUM_NODES(pns_trail[0]);
+        pns_trail = (mp_parse_node_struct_t**)&pns_trail[0]->nodes[0];
+    }
+
+    // the current index into the array of trailers
+    size_t i = 0;
+
+    // handle special super() call
+    if (comp->scope_cur->kind == SCOPE_FUNCTION
+        && MP_PARSE_NODE_IS_ID(pns->nodes[0])
+        && MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]) == MP_QSTR_super
+        && MP_PARSE_NODE_STRUCT_KIND(pns_trail[0]) == PN_trailer_paren
+        && MP_PARSE_NODE_IS_NULL(pns_trail[0]->nodes[0])) {
+        // at this point we have matched "super()" within a function
+
+        // load the class for super to search for a parent
+        compile_load_id(comp, MP_QSTR___class__);
+
+        // look for first argument to function (assumes it's "self")
+        bool found = false;
+        id_info_t *id = &comp->scope_cur->id_info[0];
+        for (size_t n = comp->scope_cur->id_info_len; n > 0; --n, ++id) {
+            if (id->flags & ID_FLAG_IS_PARAM) {
+                // first argument found; load it
+                compile_load_id(comp, id->qst);
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            compile_syntax_error(comp, (mp_parse_node_t)pns_trail[0],
+                "super() can't find self"); // really a TypeError
+            return;
+        }
 
-    compile_generic_all_nodes(comp, pns);
+        if (num_trail >= 3
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[1]) == PN_trailer_period
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[2]) == PN_trailer_paren) {
+            // optimisation for method calls super().f(...), to eliminate heap allocation
+            mp_parse_node_struct_t *pns_period = pns_trail[1];
+            mp_parse_node_struct_t *pns_paren = pns_trail[2];
+            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0]), true);
+            compile_trailer_paren_helper(comp, pns_paren->nodes[0], true, 0);
+            i = 3;
+        } else {
+            // a super() call
+            EMIT_ARG(call_function, 2, 0, 0);
+            i = 1;
+        }
+    }
+
+    // compile the remaining trailers
+    for (; i < num_trail; i++) {
+        if (i + 1 < num_trail
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[i]) == PN_trailer_period
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[i + 1]) == PN_trailer_paren) {
+            // optimisation for method calls a.f(...), following PyPy
+            mp_parse_node_struct_t *pns_period = pns_trail[i];
+            mp_parse_node_struct_t *pns_paren = pns_trail[i + 1];
+            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0]), false);
+            compile_trailer_paren_helper(comp, pns_paren->nodes[0], true, 0);
+            i += 1;
+        } else {
+            // node is one of: trailer_paren, trailer_bracket, trailer_period
+            compile_node(comp, (mp_parse_node_t)pns_trail[i]);
+        }
+    }
 }
 
 STATIC void compile_power(compiler_t *comp, mp_parse_node_struct_t *pns) {
@@ -2181,22 +2261,6 @@ STATIC void compile_power(compiler_t *comp, mp_parse_node_struct_t *pns) {
 STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_arglist, bool is_method_call, int n_positional_extra) {
     // function to call is on top of stack
 
-    // this is to handle special super() call
-    if (MP_PARSE_NODE_IS_NULL(pn_arglist) && comp->func_arg_is_super && comp->scope_cur->kind == SCOPE_FUNCTION) {
-        compile_load_id(comp, MP_QSTR___class__);
-        // look for first argument to function (assumes it's "self")
-        for (int i = 0; i < comp->scope_cur->id_info_len; i++) {
-            if (comp->scope_cur->id_info[i].flags & ID_FLAG_IS_PARAM) {
-                // first argument found; load it and call super
-                EMIT_LOAD_FAST(MP_QSTR_, comp->scope_cur->id_info[i].local_num);
-                EMIT_ARG(call_function, 2, 0, 0);
-                return;
-            }
-        }
-        compile_syntax_error(comp, MP_PARSE_NODE_NULL, "super() call cannot find self"); // really a TypeError
-        return;
-    }
-
     // get the list of arguments
     mp_parse_node_t *args;
     int n_args = mp_parse_node_extract_list(&pn_arglist, PN_arglist, &args);
@@ -2276,82 +2340,6 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
     }
 }
 
-STATIC void compile_atom_expr_trailers(compiler_t *comp, mp_parse_node_struct_t *pns) {
-    int num_nodes = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
-    for (int i = 0; i < num_nodes; i++) {
-        if (i + 1 < num_nodes && MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i], PN_trailer_period) && MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i + 1], PN_trailer_paren)) {
-            // optimisation for method calls a.f(...), following PyPy
-            mp_parse_node_struct_t *pns_period = (mp_parse_node_struct_t*)pns->nodes[i];
-            mp_parse_node_struct_t *pns_paren = (mp_parse_node_struct_t*)pns->nodes[i + 1];
-            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0])); // get the method
-            compile_trailer_paren_helper(comp, pns_paren->nodes[0], true, 0);
-            i += 1;
-        } else {
-            compile_node(comp, pns->nodes[i]);
-        }
-        comp->func_arg_is_super = false;
-    }
-}
-
-STATIC void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
-    // a list of strings
-
-    // check type of list (string or bytes) and count total number of bytes
-    int n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
-    size_t n_bytes = 0;
-    int string_kind = MP_PARSE_NODE_NULL;
-    for (int i = 0; i < n; i++) {
-        int pn_kind;
-        if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
-            pn_kind = MP_PARSE_NODE_LEAF_KIND(pns->nodes[i]);
-            assert(pn_kind == MP_PARSE_NODE_STRING || pn_kind == MP_PARSE_NODE_BYTES);
-            n_bytes += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
-        } else {
-            assert(MP_PARSE_NODE_IS_STRUCT(pns->nodes[i]));
-            mp_parse_node_struct_t *pns_string = (mp_parse_node_struct_t*)pns->nodes[i];
-            if (MP_PARSE_NODE_STRUCT_KIND(pns_string) == PN_string) {
-                pn_kind = MP_PARSE_NODE_STRING;
-            } else {
-                assert(MP_PARSE_NODE_STRUCT_KIND(pns_string) == PN_bytes);
-                pn_kind = MP_PARSE_NODE_BYTES;
-            }
-            n_bytes += pns_string->nodes[1];
-        }
-        if (i == 0) {
-            string_kind = pn_kind;
-        } else if (pn_kind != string_kind) {
-            compile_syntax_error(comp, (mp_parse_node_t)pns, "cannot mix bytes and nonbytes literals");
-            return;
-        }
-    }
-
-    // if we are not in the last pass, just load a dummy object
-    if (comp->pass != MP_PASS_EMIT) {
-        EMIT_ARG(load_const_obj, mp_const_none);
-        return;
-    }
-
-    // concatenate string/bytes
-    vstr_t vstr;
-    vstr_init_len(&vstr, n_bytes);
-    byte *s_dest = (byte*)vstr.buf;
-    for (int i = 0; i < n; i++) {
-        if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
-            size_t s_len;
-            const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
-            memcpy(s_dest, s, s_len);
-            s_dest += s_len;
-        } else {
-            mp_parse_node_struct_t *pns_string = (mp_parse_node_struct_t*)pns->nodes[i];
-            memcpy(s_dest, (const char*)pns_string->nodes[0], pns_string->nodes[1]);
-            s_dest += pns_string->nodes[1];
-        }
-    }
-
-    // load the object
-    EMIT_ARG(load_const_obj, mp_obj_new_str_from_vstr(string_kind == MP_PARSE_NODE_STRING ? &mp_type_str : &mp_type_bytes, &vstr));
-}
-
 // pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node
 STATIC void compile_comprehension(compiler_t *comp, mp_parse_node_struct_t *pns, scope_kind_t kind) {
     assert(MP_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2);
@@ -2372,7 +2360,9 @@ STATIC void compile_comprehension(compiler_t *comp, mp_parse_node_struct_t *pns,
     close_over_variables_etc(comp, this_scope, 0, 0);
 
     compile_node(comp, pns_comp_for->nodes[1]); // source of the iterator
-    EMIT(get_iter);
+    if (kind == SCOPE_GEN_EXPR) {
+        EMIT_ARG(get_iter, false);
+    }
     EMIT_ARG(call_function, 1, 0, 0);
 }
 
@@ -2490,13 +2480,21 @@ STATIC void compile_atom_brace(compiler_t *comp, mp_parse_node_struct_t *pns) {
                     compile_node(comp, pn_i);
                     if (is_dict) {
                         if (!is_key_value) {
-                            compile_syntax_error(comp, (mp_parse_node_t)pns, "expecting key:value for dictionary");
+                            if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
+                                compile_syntax_error(comp, (mp_parse_node_t)pns, "invalid syntax");
+                            } else {
+                                compile_syntax_error(comp, (mp_parse_node_t)pns, "expecting key:value for dict");
+                            }
                             return;
                         }
                         EMIT(store_map);
                     } else {
                         if (is_key_value) {
-                            compile_syntax_error(comp, (mp_parse_node_t)pns, "expecting just a value for set");
+                            if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
+                                compile_syntax_error(comp, (mp_parse_node_t)pns, "invalid syntax");
+                            } else {
+                                compile_syntax_error(comp, (mp_parse_node_t)pns, "expecting just a value for set");
+                            }
                             return;
                         }
                     }
@@ -2649,45 +2647,29 @@ STATIC void compile_atom_expr_await(compiler_t *comp, mp_parse_node_struct_t *pn
 }
 #endif
 
-STATIC void compile_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
-    // only create and load the actual str object on the last pass
-    if (comp->pass != MP_PASS_EMIT) {
-        EMIT_ARG(load_const_obj, mp_const_none);
-    } else {
-        EMIT_ARG(load_const_obj, mp_obj_new_str((const char*)pns->nodes[0], pns->nodes[1], false));
-    }
-}
-
-STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) {
-    // only create and load the actual bytes object on the last pass
-    if (comp->pass != MP_PASS_EMIT) {
-        EMIT_ARG(load_const_obj, mp_const_none);
-    } else {
-        EMIT_ARG(load_const_obj, mp_obj_new_bytes((const byte*)pns->nodes[0], pns->nodes[1]));
-    }
-}
-
-STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
+STATIC mp_obj_t get_const_object(mp_parse_node_struct_t *pns) {
     #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
     // nodes are 32-bit pointers, but need to extract 64-bit object
-    EMIT_ARG(load_const_obj, (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
+    return (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
     #else
-    EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]);
+    return (mp_obj_t)pns->nodes[0];
     #endif
 }
 
+STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
+    EMIT_ARG(load_const_obj, get_const_object(pns));
+}
+
 typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*);
 STATIC const compile_function_t compile_function[] = {
-#define nc NULL
+// only define rules with a compile function
 #define c(f) compile_##f
 #define DEF_RULE(rule, comp, kind, ...) comp,
+#define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
-#undef nc
 #undef c
 #undef DEF_RULE
-    NULL,
-    compile_string,
-    compile_bytes,
+#undef DEF_RULE_NC
     compile_const_object,
 };
 
@@ -2741,8 +2723,8 @@ STATIC void compile_node(compiler_t *comp, mp_parse_node_t pn) {
     } else {
         mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
         EMIT_ARG(set_source_line, pns->source_line);
+        assert(MP_PARSE_NODE_STRUCT_KIND(pns) <= PN_const_object);
         compile_function_t f = compile_function[MP_PARSE_NODE_STRUCT_KIND(pns)];
-        assert(f != NULL);
         f(comp, pns);
     }
 }
@@ -2887,20 +2869,20 @@ STATIC void compile_scope_comp_iter(compiler_t *comp, mp_parse_node_struct_t *pn
             EMIT(yield_value);
             EMIT(pop_top);
         } else {
-            EMIT_ARG(store_comp, comp->scope_cur->kind, for_depth + 2);
+            EMIT_ARG(store_comp, comp->scope_cur->kind, 4 * for_depth + 5);
         }
-    } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_if)) {
+    } else if (MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)pn_iter) == PN_comp_if) {
         // if condition
         mp_parse_node_struct_t *pns_comp_if = (mp_parse_node_struct_t*)pn_iter;
         c_if_cond(comp, pns_comp_if->nodes[0], false, l_top);
         pn_iter = pns_comp_if->nodes[1];
         goto tail_recursion;
     } else {
-        assert(MP_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_for)); // should be
+        assert(MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)pn_iter) == PN_comp_for); // should be
         // for loop
         mp_parse_node_struct_t *pns_comp_for2 = (mp_parse_node_struct_t*)pn_iter;
         compile_node(comp, pns_comp_for2->nodes[1]);
-        EMIT(get_iter);
+        EMIT_ARG(get_iter, true);
         compile_scope_comp_iter(comp, pns_comp_for2, pn_inner_expr, for_depth + 1);
     }
 
@@ -2940,7 +2922,8 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
         mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
         if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0])
                 && MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING)
-            || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
+            || (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)
+                && MP_OBJ_IS_STR(get_const_object((mp_parse_node_struct_t*)pns->nodes[0])))) {
                 // compile the doc string
                 compile_node(comp, pns->nodes[0]);
                 // store the doc string
@@ -3070,7 +3053,19 @@ STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
         #endif
         }
 
-        compile_load_id(comp, qstr_arg);
+        // There are 4 slots on the stack for the iterator, and the first one is
+        // NULL to indicate that the second one points to the iterator object.
+        if (scope->kind == SCOPE_GEN_EXPR) {
+            // TODO static assert that MP_OBJ_ITER_BUF_NSLOTS == 4
+            EMIT(load_null);
+            compile_load_id(comp, qstr_arg);
+            EMIT(load_null);
+            EMIT(load_null);
+        } else {
+            compile_load_id(comp, qstr_arg);
+            EMIT_ARG(get_iter, true);
+        }
+
         compile_scope_comp_iter(comp, pns_comp_for, pns->nodes[0], 0);
 
         if (scope->kind == SCOPE_GEN_EXPR) {
diff --git a/py/emit.h b/py/emit.h
index 8359d5b61e..b935409561 100644
--- a/py/emit.h
+++ b/py/emit.h
@@ -88,7 +88,7 @@ typedef struct _emit_method_table_t {
     void (*load_const_obj)(emit_t *emit, mp_obj_t obj);
     void (*load_null)(emit_t *emit);
     void (*load_attr)(emit_t *emit, qstr qst);
-    void (*load_method)(emit_t *emit, qstr qst);
+    void (*load_method)(emit_t *emit, qstr qst, bool is_super);
     void (*load_build_class)(emit_t *emit);
     void (*load_subscr)(emit_t *emit);
     void (*store_attr)(emit_t *emit, qstr qst);
@@ -110,7 +110,7 @@ typedef struct _emit_method_table_t {
     void (*setup_except)(emit_t *emit, mp_uint_t label);
     void (*setup_finally)(emit_t *emit, mp_uint_t label);
     void (*end_finally)(emit_t *emit);
-    void (*get_iter)(emit_t *emit);
+    void (*get_iter)(emit_t *emit, bool use_stack);
     void (*for_iter)(emit_t *emit, mp_uint_t label);
     void (*for_iter_end)(emit_t *emit);
     void (*pop_block)(emit_t *emit);
@@ -205,7 +205,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst);
 void mp_emit_bc_load_const_obj(emit_t *emit, mp_obj_t obj);
 void mp_emit_bc_load_null(emit_t *emit);
 void mp_emit_bc_load_attr(emit_t *emit, qstr qst);
-void mp_emit_bc_load_method(emit_t *emit, qstr qst);
+void mp_emit_bc_load_method(emit_t *emit, qstr qst, bool is_super);
 void mp_emit_bc_load_build_class(emit_t *emit);
 void mp_emit_bc_load_subscr(emit_t *emit);
 void mp_emit_bc_store_attr(emit_t *emit, qstr qst);
@@ -228,7 +228,7 @@ void mp_emit_bc_with_cleanup(emit_t *emit, mp_uint_t label);
 void mp_emit_bc_setup_except(emit_t *emit, mp_uint_t label);
 void mp_emit_bc_setup_finally(emit_t *emit, mp_uint_t label);
 void mp_emit_bc_end_finally(emit_t *emit);
-void mp_emit_bc_get_iter(emit_t *emit);
+void mp_emit_bc_get_iter(emit_t *emit, bool use_stack);
 void mp_emit_bc_for_iter(emit_t *emit, mp_uint_t label);
 void mp_emit_bc_for_iter_end(emit_t *emit);
 void mp_emit_bc_pop_block(emit_t *emit);
diff --git a/py/emitbc.c b/py/emitbc.c
index e11c9ae94f..ec12a62c6c 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -144,10 +144,15 @@ STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_sk
     //printf("  %d %d\n", bytes_to_skip, lines_to_skip);
     while (bytes_to_skip > 0 || lines_to_skip > 0) {
         mp_uint_t b, l;
-        if (lines_to_skip <= 6) {
+        if (lines_to_skip <= 6 || bytes_to_skip > 0xf) {
             // use 0b0LLBBBBB encoding
             b = MIN(bytes_to_skip, 0x1f);
-            l = MIN(lines_to_skip, 0x3);
+            if (b < bytes_to_skip) {
+                // we can't skip any lines until we skip all the bytes
+                l = 0;
+            } else {
+                l = MIN(lines_to_skip, 0x3);
+            }
             *emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
         } else {
             // use 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
@@ -443,7 +448,19 @@ bool mp_emit_bc_last_emit_was_return_value(emit_t *emit) {
 }
 
 void mp_emit_bc_adjust_stack_size(emit_t *emit, mp_int_t delta) {
+    if (emit->pass == MP_PASS_SCOPE) {
+        return;
+    }
+    assert((mp_int_t)emit->stack_size + delta >= 0);
     emit->stack_size += delta;
+    if (emit->stack_size > emit->scope->stack_size) {
+        emit->scope->stack_size = emit->stack_size;
+    }
+    emit->last_emit_was_return_value = false;
+}
+
+static inline void emit_bc_pre(emit_t *emit, mp_int_t stack_size_delta) {
+    mp_emit_bc_adjust_stack_size(emit, stack_size_delta);
 }
 
 void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t source_line) {
@@ -466,18 +483,6 @@ void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t source_line) {
 #endif
 }
 
-STATIC void emit_bc_pre(emit_t *emit, mp_int_t stack_size_delta) {
-    if (emit->pass == MP_PASS_SCOPE) {
-        return;
-    }
-    assert((mp_int_t)emit->stack_size + stack_size_delta >= 0);
-    emit->stack_size += stack_size_delta;
-    if (emit->stack_size > emit->scope->stack_size) {
-        emit->scope->stack_size = emit->stack_size;
-    }
-    emit->last_emit_was_return_value = false;
-}
-
 void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l) {
     emit_bc_pre(emit, 0);
     if (emit->pass == MP_PASS_SCOPE) {
@@ -589,9 +594,9 @@ void mp_emit_bc_load_attr(emit_t *emit, qstr qst) {
     }
 }
 
-void mp_emit_bc_load_method(emit_t *emit, qstr qst) {
-    emit_bc_pre(emit, 1);
-    emit_write_bytecode_byte_qstr(emit, MP_BC_LOAD_METHOD, qst);
+void mp_emit_bc_load_method(emit_t *emit, qstr qst, bool is_super) {
+    emit_bc_pre(emit, 1 - 2 * is_super);
+    emit_write_bytecode_byte_qstr(emit, is_super ? MP_BC_LOAD_SUPER_METHOD : MP_BC_LOAD_METHOD, qst);
 }
 
 void mp_emit_bc_load_build_class(emit_t *emit) {
@@ -729,6 +734,10 @@ void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_dept
         if (label & MP_EMIT_BREAK_FROM_FOR) {
             // need to pop the iterator if we are breaking out of a for loop
             emit_write_bytecode_byte(emit, MP_BC_POP_TOP);
+            // also pop the iter_buf
+            for (size_t i = 0; i < MP_OBJ_ITER_BUF_NSLOTS - 1; ++i) {
+                emit_write_bytecode_byte(emit, MP_BC_POP_TOP);
+            }
         }
         emit_write_bytecode_byte_signed_label(emit, MP_BC_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR);
     } else {
@@ -768,9 +777,9 @@ void mp_emit_bc_end_finally(emit_t *emit) {
     emit_write_bytecode_byte(emit, MP_BC_END_FINALLY);
 }
 
-void mp_emit_bc_get_iter(emit_t *emit) {
-    emit_bc_pre(emit, 0);
-    emit_write_bytecode_byte(emit, MP_BC_GET_ITER);
+void mp_emit_bc_get_iter(emit_t *emit, bool use_stack) {
+    emit_bc_pre(emit, use_stack ? MP_OBJ_ITER_BUF_NSLOTS - 1 : 0);
+    emit_write_bytecode_byte(emit, use_stack ? MP_BC_GET_ITER_STACK : MP_BC_GET_ITER);
 }
 
 void mp_emit_bc_for_iter(emit_t *emit, mp_uint_t label) {
@@ -779,7 +788,7 @@ void mp_emit_bc_for_iter(emit_t *emit, mp_uint_t label) {
 }
 
 void mp_emit_bc_for_iter_end(emit_t *emit) {
-    emit_bc_pre(emit, -1);
+    emit_bc_pre(emit, -MP_OBJ_ITER_BUF_NSLOTS);
 }
 
 void mp_emit_bc_pop_block(emit_t *emit) {
diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c
index c733cf2c73..f48086b1d2 100644
--- a/py/emitinlinethumb.c
+++ b/py/emitinlinethumb.c
@@ -36,10 +36,19 @@
 #if MICROPY_EMIT_INLINE_THUMB
 
 typedef enum {
+// define rules with a compile function
 #define DEF_RULE(rule, comp, kind, ...) PN_##rule,
+#define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
 #undef DEF_RULE
-    PN_maximum_number_of,
+#undef DEF_RULE_NC
+    PN_const_object, // special node for a constant, generic Python object
+// define rules without a compile function
+#define DEF_RULE(rule, comp, kind, ...)
+#define DEF_RULE_NC(rule, kind, ...) PN_##rule,
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
 } pn_kind_t;
 
 struct _emit_inline_asm_t {
diff --git a/py/emitnative.c b/py/emitnative.c
index 2e18d26b4a..99adc809c7 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -85,6 +85,7 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
     [MP_F_LOAD_BUILD_CLASS] = 0,
     [MP_F_LOAD_ATTR] = 2,
     [MP_F_LOAD_METHOD] = 3,
+    [MP_F_LOAD_SUPER_METHOD] = 2,
     [MP_F_STORE_NAME] = 2,
     [MP_F_STORE_GLOBAL] = 2,
     [MP_F_STORE_ATTR] = 3,
@@ -105,8 +106,8 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
     [MP_F_NATIVE_CALL_FUNCTION_N_KW] = 3,
     [MP_F_CALL_METHOD_N_KW] = 3,
     [MP_F_CALL_METHOD_N_KW_VAR] = 3,
-    [MP_F_GETITER] = 1,
-    [MP_F_ITERNEXT] = 1,
+    [MP_F_NATIVE_GETITER] = 2,
+    [MP_F_NATIVE_ITERNEXT] = 1,
     [MP_F_NLR_PUSH] = 1,
     [MP_F_NLR_POP] = 0,
     [MP_F_NATIVE_RAISE] = 1,
@@ -385,11 +386,9 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
                 ASM_MOV_REG_REG(emit->as, REG_LOCAL_2, REG_ARG_2);
             } else if (i == 2) {
                 ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_3);
-            } else if (i == 3) {
-                ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_4, i - REG_LOCAL_NUM);
             } else {
-                // TODO not implemented
-                assert(0);
+                assert(i == 3); // should be true; max 4 args is checked above
+                ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_4, i - REG_LOCAL_NUM);
             }
         }
         #endif
@@ -409,43 +408,29 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
         #endif
 
         // prepare incoming arguments for call to mp_setup_code_state
+
         #if N_X86
-        asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_2);
-        asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_3);
-        asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_4);
-        asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_5);
-        #else
-        #if N_THUMB
-        ASM_MOV_REG_REG(emit->as, ASM_THUMB_REG_R4, REG_ARG_4);
-        #elif N_ARM
-        ASM_MOV_REG_REG(emit->as, ASM_ARM_REG_R4, REG_ARG_4);
-        #else
-        ASM_MOV_REG_REG(emit->as, REG_ARG_5, REG_ARG_4);
-        #endif
-        ASM_MOV_REG_REG(emit->as, REG_ARG_4, REG_ARG_3);
-        ASM_MOV_REG_REG(emit->as, REG_ARG_3, REG_ARG_2);
-        ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
+        asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
+        asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
+        asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_3);
+        asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4);
         #endif
 
+        // set code_state.fun_bc
+        ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t));
+
         // set code_state.ip (offset from start of this function to prelude info)
         // XXX this encoding may change size
-        ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(mp_uint_t), REG_ARG_1);
-
-        // set code_state.n_state
-        ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->n_state, offsetof(mp_code_state_t, n_state) / sizeof(mp_uint_t), REG_ARG_1);
+        ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(uintptr_t), REG_ARG_1);
 
         // put address of code_state into first arg
         ASM_MOV_LOCAL_ADDR_TO_REG(emit->as, 0, REG_ARG_1);
 
         // call mp_setup_code_state to prepare code_state structure
         #if N_THUMB
-        asm_thumb_op16(emit->as, 0xb400 | (1 << ASM_THUMB_REG_R4)); // push 5th arg
         asm_thumb_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_THUMB_REG_R4);
-        asm_thumb_op16(emit->as, 0xbc00 | (1 << REG_RET)); // pop dummy (was 5th arg)
         #elif N_ARM
-        asm_arm_push(emit->as, 1 << ASM_ARM_REG_R4); // push 5th arg
         asm_arm_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_ARM_REG_R4);
-        asm_arm_pop(emit->as, 1 << REG_RET); // pop dummy (was 5th arg)
         #else
         ASM_CALL_IND(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE);
         #endif
@@ -479,6 +464,9 @@ STATIC void emit_native_end_pass(emit_t *emit) {
 
     if (!emit->do_viper_types) {
         emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
+        mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
+        mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
+        mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
         mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
         mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
         mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);
@@ -527,9 +515,7 @@ STATIC void emit_native_end_pass(emit_t *emit) {
     ASM_END_PASS(emit->as);
 
     // check stack is back to zero size
-    if (emit->stack_size != 0) {
-        mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size);
-    }
+    assert(emit->stack_size == 0);
 
     if (emit->pass == MP_PASS_EMIT) {
         void *f = mp_asm_base_get_code(&emit->as->base);
@@ -595,38 +581,9 @@ STATIC void emit_native_set_source_line(emit_t *emit, mp_uint_t source_line) {
     (void)source_line;
 }
 
-/*
-STATIC void emit_pre_raw(emit_t *emit, int stack_size_delta) {
-    adjust_stack(emit, stack_size_delta);
-    emit->last_emit_was_return_value = false;
-}
-*/
-
 // this must be called at start of emit functions
 STATIC void emit_native_pre(emit_t *emit) {
     emit->last_emit_was_return_value = false;
-    // settle the stack
-    /*
-    if (regs_needed != 0) {
-        for (int i = 0; i < emit->stack_size; i++) {
-            switch (emit->stack_info[i].kind) {
-                case STACK_VALUE:
-                    break;
-
-                case STACK_REG:
-                    // TODO only push reg if in regs_needed
-                    emit->stack_info[i].kind = STACK_VALUE;
-                    ASM_MOV_REG_TO_LOCAL(emit->as, emit->stack_info[i].data.u_reg, emit->stack_start + i);
-                    break;
-
-                case STACK_IMM:
-                    // don't think we ever need to push imms for settling
-                    //ASM_MOV_IMM_TO_LOCAL(emit->last_imm, emit->stack_start + i);
-                    break;
-            }
-        }
-    }
-    */
 }
 
 // depth==0 is top, depth==1 is before top, etc
@@ -867,7 +824,7 @@ STATIC void emit_get_stack_pointer_to_reg_for_pop(emit_t *emit, mp_uint_t reg_de
                     break;
                 default:
                     // not handled
-                    assert(0);
+                    mp_not_implemented("conversion to object");
             }
         }
 
@@ -977,9 +934,9 @@ STATIC void emit_native_load_const_tok(emit_t *emit, mp_token_kind_t tok) {
             case MP_TOKEN_KW_NONE: vtype = VTYPE_PTR_NONE; val = 0; break;
             case MP_TOKEN_KW_FALSE: vtype = VTYPE_BOOL; val = 0; break;
             case MP_TOKEN_KW_TRUE: vtype = VTYPE_BOOL; val = 1; break;
-            no_other_choice1:
-            case MP_TOKEN_ELLIPSIS: vtype = VTYPE_PYOBJ; val = (mp_uint_t)&mp_const_ellipsis_obj; break;
-            default: assert(0); goto no_other_choice1; // to help flow control analysis
+            default:
+                assert(tok == MP_TOKEN_ELLIPSIS);
+                vtype = VTYPE_PYOBJ; val = (mp_uint_t)&mp_const_ellipsis_obj; break;
         }
     } else {
         vtype = VTYPE_PYOBJ;
@@ -987,9 +944,9 @@ STATIC void emit_native_load_const_tok(emit_t *emit, mp_token_kind_t tok) {
             case MP_TOKEN_KW_NONE: val = (mp_uint_t)mp_const_none; break;
             case MP_TOKEN_KW_FALSE: val = (mp_uint_t)mp_const_false; break;
             case MP_TOKEN_KW_TRUE: val = (mp_uint_t)mp_const_true; break;
-            no_other_choice2:
-            case MP_TOKEN_ELLIPSIS: val = (mp_uint_t)&mp_const_ellipsis_obj; break;
-            default: assert(0); goto no_other_choice2; // to help flow control analysis
+            default:
+                assert(tok == MP_TOKEN_ELLIPSIS);
+                val = (mp_uint_t)&mp_const_ellipsis_obj; break;
         }
     }
     emit_post_push_imm(emit, vtype, val);
@@ -1011,9 +968,7 @@ STATIC void emit_native_load_const_str(emit_t *emit, qstr qst) {
     // do native array access.  For now we just load them as any other object.
     /*
     if (emit->do_viper_types) {
-        // not implemented properly
         // load a pointer to the asciiz string?
-        assert(0);
         emit_post_push_imm(emit, VTYPE_PTR, (mp_uint_t)qstr_str(qst));
     } else
     */
@@ -1111,12 +1066,18 @@ STATIC void emit_native_load_attr(emit_t *emit, qstr qst) {
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
 
-STATIC void emit_native_load_method(emit_t *emit, qstr qst) {
-    vtype_kind_t vtype_base;
-    emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
-    assert(vtype_base == VTYPE_PYOBJ);
-    emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
-    emit_call_with_imm_arg(emit, MP_F_LOAD_METHOD, qst, REG_ARG_2); // arg2 = method name
+STATIC void emit_native_load_method(emit_t *emit, qstr qst, bool is_super) {
+    if (is_super) {
+        emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, 3); // arg2 = dest ptr
+        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_2, 2); // arg2 = dest ptr
+        emit_call_with_imm_arg(emit, MP_F_LOAD_SUPER_METHOD, qst, REG_ARG_1); // arg1 = method name
+    } else {
+        vtype_kind_t vtype_base;
+        emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
+        assert(vtype_base == VTYPE_PYOBJ);
+        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+        emit_call_with_imm_arg(emit, MP_F_LOAD_METHOD, qst, REG_ARG_2); // arg2 = method name
+    }
 }
 
 STATIC void emit_native_load_build_class(emit_t *emit) {
@@ -1799,23 +1760,29 @@ STATIC void emit_native_end_finally(emit_t *emit) {
     emit_post(emit);
 }
 
-STATIC void emit_native_get_iter(emit_t *emit) {
+STATIC void emit_native_get_iter(emit_t *emit, bool use_stack) {
     // perhaps the difficult one, as we want to rewrite for loops using native code
     // in cases where we iterate over a Python object, can we use normal runtime calls?
 
     vtype_kind_t vtype;
     emit_pre_pop_reg(emit, &vtype, REG_ARG_1);
     assert(vtype == VTYPE_PYOBJ);
-    emit_call(emit, MP_F_GETITER);
-    emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
+    if (use_stack) {
+        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_2, MP_OBJ_ITER_BUF_NSLOTS);
+        emit_call(emit, MP_F_NATIVE_GETITER);
+    } else {
+        // mp_getiter will allocate the iter_buf on the heap
+        ASM_MOV_IMM_TO_REG(emit->as, 0, REG_ARG_2);
+        emit_call(emit, MP_F_NATIVE_GETITER);
+        emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
+    }
 }
 
 STATIC void emit_native_for_iter(emit_t *emit, mp_uint_t label) {
     emit_native_pre(emit);
-    vtype_kind_t vtype;
-    emit_access_stack(emit, 1, &vtype, REG_ARG_1);
-    assert(vtype == VTYPE_PYOBJ);
-    emit_call(emit, MP_F_ITERNEXT);
+    emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_1, MP_OBJ_ITER_BUF_NSLOTS);
+    adjust_stack(emit, MP_OBJ_ITER_BUF_NSLOTS);
+    emit_call(emit, MP_F_NATIVE_ITERNEXT);
     ASM_MOV_IMM_TO_REG(emit->as, (mp_uint_t)MP_OBJ_STOP_ITERATION, REG_TEMP1);
     ASM_JUMP_IF_REG_EQ(emit->as, REG_RET, REG_TEMP1, label);
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
@@ -1824,7 +1791,7 @@ STATIC void emit_native_for_iter(emit_t *emit, mp_uint_t label) {
 STATIC void emit_native_for_iter_end(emit_t *emit) {
     // adjust stack counter (we get here from for_iter ending, which popped the value for us)
     emit_native_pre(emit);
-    adjust_stack(emit, -1);
+    adjust_stack(emit, -MP_OBJ_ITER_BUF_NSLOTS);
     emit_post(emit);
 }
 
@@ -1837,12 +1804,6 @@ STATIC void emit_native_pop_block(emit_t *emit) {
 
 STATIC void emit_native_pop_except(emit_t *emit) {
     (void)emit;
-    /*
-    emit_native_pre(emit);
-    emit_call(emit, MP_F_NLR_POP);
-    adjust_stack(emit, -(mp_int_t)(sizeof(nlr_buf_t) / sizeof(mp_uint_t)));
-    emit_post(emit);
-    */
 }
 
 STATIC void emit_native_unary_op(emit_t *emit, mp_unary_op_t op) {
@@ -2196,7 +2157,8 @@ STATIC void emit_native_call_function(emit_t *emit, mp_uint_t n_positional, mp_u
                 emit_post_top_set_vtype(emit, vtype_cast);
                 break;
             default:
-                assert(!"TODO: convert obj to int");
+                // this can happen when casting a cast: int(int)
+                mp_not_implemented("casting");
         }
     } else {
         assert(vtype_fun == VTYPE_PYOBJ);
@@ -2253,7 +2215,6 @@ STATIC void emit_native_return_value(emit_t *emit) {
         assert(vtype == VTYPE_PYOBJ);
     }
     emit->last_emit_was_return_value = true;
-    //ASM_BREAK_POINT(emit->as); // to insert a break-point for debugging
     ASM_EXIT(emit->as);
 }
 
@@ -2271,12 +2232,12 @@ STATIC void emit_native_raise_varargs(emit_t *emit, mp_uint_t n_args) {
 STATIC void emit_native_yield_value(emit_t *emit) {
     // not supported (for now)
     (void)emit;
-    assert(0);
+    mp_not_implemented("native yield");
 }
 STATIC void emit_native_yield_from(emit_t *emit) {
     // not supported (for now)
     (void)emit;
-    assert(0);
+    mp_not_implemented("native yield from");
 }
 
 STATIC void emit_native_start_except_handler(emit_t *emit) {
diff --git a/py/formatfloat.c b/py/formatfloat.c
index 58a423e38c..9ff80d9f63 100644
--- a/py/formatfloat.c
+++ b/py/formatfloat.c
@@ -388,10 +388,6 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
             }
             *rs = '1';
         }
-        if (fp_isless1(f) && fmt == 'f') {
-            // We rounded up to 1.0
-            prec--;
-        }
     }
 
     // verify that we did not overrun the input buffer so far
diff --git a/py/frozenmod.c b/py/frozenmod.c
index 660167eed4..1eaaf574a2 100644
--- a/py/frozenmod.c
+++ b/py/frozenmod.c
@@ -43,16 +43,16 @@ extern const char mp_frozen_str_names[];
 extern const uint32_t mp_frozen_str_sizes[];
 extern const char mp_frozen_str_content[];
 
-STATIC mp_lexer_t *mp_find_frozen_str(const char *str, size_t len) {
+// On input, *len contains size of name, on output - size of content
+const char *mp_find_frozen_str(const char *str, size_t *len) {
     const char *name = mp_frozen_str_names;
 
     size_t offset = 0;
     for (int i = 0; *name != 0; i++) {
         size_t l = strlen(name);
-        if (l == len && !memcmp(str, name, l)) {
-            qstr source = qstr_from_strn(name, l);
-            mp_lexer_t *lex = MICROPY_MODULE_FROZEN_LEXER(source, mp_frozen_str_content + offset, mp_frozen_str_sizes[i], 0);
-            return lex;
+        if (l == *len && !memcmp(str, name, l)) {
+            *len = mp_frozen_str_sizes[i];
+            return mp_frozen_str_content + offset;
         }
         name += l + 1;
         offset += mp_frozen_str_sizes[i] + 1;
@@ -60,6 +60,19 @@ STATIC mp_lexer_t *mp_find_frozen_str(const char *str, size_t len) {
     return NULL;
 }
 
+STATIC mp_lexer_t *mp_lexer_frozen_str(const char *str, size_t len) {
+    size_t name_len = len;
+    const char *content = mp_find_frozen_str(str, &len);
+
+    if (content == NULL) {
+        return NULL;
+    }
+
+    qstr source = qstr_from_strn(str, name_len);
+    mp_lexer_t *lex = MICROPY_MODULE_FROZEN_LEXER(source, content, len, 0);
+    return lex;
+}
+
 #endif
 
 #if MICROPY_MODULE_FROZEN_MPY
@@ -124,7 +137,7 @@ mp_import_stat_t mp_frozen_stat(const char *str) {
 
 int mp_find_frozen_module(const char *str, size_t len, void **data) {
     #if MICROPY_MODULE_FROZEN_STR
-    mp_lexer_t *lex = mp_find_frozen_str(str, len);
+    mp_lexer_t *lex = mp_lexer_frozen_str(str, len);
     if (lex != NULL) {
         *data = lex;
         return MP_FROZEN_STR;
diff --git a/py/frozenmod.h b/py/frozenmod.h
index f08cb5e321..4b125ff247 100644
--- a/py/frozenmod.h
+++ b/py/frozenmod.h
@@ -24,6 +24,8 @@
  * THE SOFTWARE.
  */
 
+#include "py/lexer.h"
+
 enum {
     MP_FROZEN_NONE,
     MP_FROZEN_STR,
@@ -31,4 +33,5 @@ enum {
 };
 
 int mp_find_frozen_module(const char *str, size_t len, void **data);
+const char *mp_find_frozen_str(const char *str, size_t *len);
 mp_import_stat_t mp_frozen_stat(const char *str);
diff --git a/py/gc.c b/py/gc.c
index 7ed53cfc7f..937dae44f7 100644
--- a/py/gc.c
+++ b/py/gc.c
@@ -258,18 +258,20 @@ STATIC void gc_sweep(void) {
             case AT_HEAD:
 #if MICROPY_ENABLE_FINALISER
                 if (FTB_GET(block)) {
-                    #if MICROPY_PY_THREAD
-                    // TODO need to think about reentrancy with finaliser code
-                    assert(!"finaliser with threading not implemented");
-                    #endif
                     mp_obj_base_t *obj = (mp_obj_base_t*)PTR_FROM_BLOCK(block);
                     if (obj->type != NULL) {
                         // if the object has a type then see if it has a __del__ method
                         mp_obj_t dest[2];
                         mp_load_method_maybe(MP_OBJ_FROM_PTR(obj), MP_QSTR___del__, dest);
                         if (dest[0] != MP_OBJ_NULL) {
-                            // load_method returned a method
-                            mp_call_method_n_kw(0, 0, dest);
+                            // load_method returned a method, execute it in a protected environment
+                            #if MICROPY_ENABLE_SCHEDULER
+                            mp_sched_lock();
+                            #endif
+                            mp_call_function_1_protected(dest[0], dest[1]);
+                            #if MICROPY_ENABLE_SCHEDULER
+                            mp_sched_unlock();
+                            #endif
                         }
                     }
                     // clear finaliser flag
diff --git a/py/grammar.h b/py/grammar.h
index dd21d193a1..930d96dc15 100644
--- a/py/grammar.h
+++ b/py/grammar.h
@@ -37,12 +37,12 @@
 // file_input: (NEWLINE | stmt)* ENDMARKER
 // eval_input: testlist NEWLINE* ENDMARKER
 
-DEF_RULE(single_input, nc, or(3), tok(NEWLINE), rule(simple_stmt), rule(compound_stmt))
+DEF_RULE_NC(single_input, or(3), tok(NEWLINE), rule(simple_stmt), rule(compound_stmt))
 DEF_RULE(file_input, c(generic_all_nodes), and_ident(1), opt_rule(file_input_2))
 DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3))
-DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt))
-DEF_RULE(eval_input, nc, and_ident(2), rule(testlist), opt_rule(eval_input_2))
-DEF_RULE(eval_input_2, nc, and(1), tok(NEWLINE))
+DEF_RULE_NC(file_input_3, or(2), tok(NEWLINE), rule(stmt))
+DEF_RULE_NC(eval_input, and_ident(2), rule(testlist), opt_rule(eval_input_2))
+DEF_RULE_NC(eval_input_2, and(1), tok(NEWLINE))
 
 // decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 // decorators: decorator+
@@ -55,42 +55,42 @@ DEF_RULE(eval_input_2, nc, and(1), tok(NEWLINE))
 // varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef
 // vfpdef: NAME
 
-DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE))
-DEF_RULE(decorators, nc, one_or_more, rule(decorator))
+DEF_RULE_NC(decorator, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE))
+DEF_RULE_NC(decorators, one_or_more, rule(decorator))
 DEF_RULE(decorated, c(decorated), and_ident(2), rule(decorators), rule(decorated_body))
 #if MICROPY_PY_ASYNC_AWAIT
-DEF_RULE(decorated_body, nc, or(3), rule(classdef), rule(funcdef), rule(async_funcdef))
-DEF_RULE(async_funcdef, nc, and(2), tok(KW_ASYNC), rule(funcdef))
+DEF_RULE_NC(decorated_body, or(3), rule(classdef), rule(funcdef), rule(async_funcdef))
+DEF_RULE_NC(async_funcdef, and(2), tok(KW_ASYNC), rule(funcdef))
 #else
-DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef))
+DEF_RULE_NC(decorated_body, or(2), rule(classdef), rule(funcdef))
 #endif
 DEF_RULE(funcdef, c(funcdef), and_blank(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdefrettype), tok(DEL_COLON), rule(suite))
-DEF_RULE(funcdefrettype, nc, and_ident(2), tok(DEL_MINUS_MORE), rule(test))
+DEF_RULE_NC(funcdefrettype, and_ident(2), tok(DEL_MINUS_MORE), rule(test))
 // note: typedargslist lets through more than is allowed, compiler does further checks
-DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA))
-DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star))
-DEF_RULE(typedargslist_name, nc, and_ident(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal))
-DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef))
-DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon))
-DEF_RULE(typedargslist_colon, nc, and_ident(2), tok(DEL_COLON), rule(test))
-DEF_RULE(typedargslist_equal, nc, and_ident(2), tok(DEL_EQUAL), rule(test))
-DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon))
+DEF_RULE_NC(typedargslist, list_with_end, rule(typedargslist_item), tok(DEL_COMMA))
+DEF_RULE_NC(typedargslist_item, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star))
+DEF_RULE_NC(typedargslist_name, and_ident(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal))
+DEF_RULE_NC(typedargslist_star, and(2), tok(OP_STAR), opt_rule(tfpdef))
+DEF_RULE_NC(typedargslist_dbl_star, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon))
+DEF_RULE_NC(typedargslist_colon, and_ident(2), tok(DEL_COLON), rule(test))
+DEF_RULE_NC(typedargslist_equal, and_ident(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE_NC(tfpdef, and(2), tok(NAME), opt_rule(typedargslist_colon))
 // note: varargslist lets through more than is allowed, compiler does further checks
-DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA))
-DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star))
-DEF_RULE(varargslist_name, nc, and_ident(2), tok(NAME), opt_rule(varargslist_equal))
-DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef))
-DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME))
-DEF_RULE(varargslist_equal, nc, and_ident(2), tok(DEL_EQUAL), rule(test))
-DEF_RULE(vfpdef, nc, and_ident(1), tok(NAME))
+DEF_RULE_NC(varargslist, list_with_end, rule(varargslist_item), tok(DEL_COMMA))
+DEF_RULE_NC(varargslist_item, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star))
+DEF_RULE_NC(varargslist_name, and_ident(2), tok(NAME), opt_rule(varargslist_equal))
+DEF_RULE_NC(varargslist_star, and(2), tok(OP_STAR), opt_rule(vfpdef))
+DEF_RULE_NC(varargslist_dbl_star, and(2), tok(OP_DBL_STAR), tok(NAME))
+DEF_RULE_NC(varargslist_equal, and_ident(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE_NC(vfpdef, and_ident(1), tok(NAME))
 
 // stmt: compound_stmt | simple_stmt
 
-DEF_RULE(stmt, nc, or(2), rule(compound_stmt), rule(simple_stmt))
+DEF_RULE_NC(stmt, or(2), rule(compound_stmt), rule(simple_stmt))
 
 // simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 
-DEF_RULE(simple_stmt, nc, and_ident(2), rule(simple_stmt_2), tok(NEWLINE))
+DEF_RULE_NC(simple_stmt, and_ident(2), rule(simple_stmt_2), tok(NEWLINE))
 DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON))
 
 // small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
@@ -99,16 +99,16 @@ DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), t
 // augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
 // # For normal assignments, additional restrictions enforced by the interpreter
 
-DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt))
+DEF_RULE_NC(small_stmt, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt))
 DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2))
-DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list))
-DEF_RULE(expr_stmt_augassign, nc, and_ident(2), rule(augassign), rule(expr_stmt_6))
-DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign))
-DEF_RULE(expr_stmt_assign, nc, and_ident(2), tok(DEL_EQUAL), rule(expr_stmt_6))
-DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr))
+DEF_RULE_NC(expr_stmt_2, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list))
+DEF_RULE_NC(expr_stmt_augassign, and_ident(2), rule(augassign), rule(expr_stmt_6))
+DEF_RULE_NC(expr_stmt_assign_list, one_or_more, rule(expr_stmt_assign))
+DEF_RULE_NC(expr_stmt_assign, and_ident(2), tok(DEL_EQUAL), rule(expr_stmt_6))
+DEF_RULE_NC(expr_stmt_6, or(2), rule(yield_expr), rule(testlist_star_expr))
 DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA))
-DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test))
-DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL))
+DEF_RULE_NC(testlist_star_expr_2, or(2), rule(star_expr), rule(test))
+DEF_RULE_NC(augassign, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL))
 
 // del_stmt: 'del' exprlist
 // pass_stmt: 'pass'
@@ -121,14 +121,14 @@ DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(D
 
 DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist))
 DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS))
-DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt))
+DEF_RULE_NC(flow_stmt, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt))
 DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK))
 DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE))
 DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist))
 DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr))
 DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg))
-DEF_RULE(raise_stmt_arg, nc, and_ident(2), rule(test), opt_rule(raise_stmt_from))
-DEF_RULE(raise_stmt_from, nc, and_ident(2), tok(KW_FROM), rule(test))
+DEF_RULE_NC(raise_stmt_arg, and_ident(2), rule(test), opt_rule(raise_stmt_from))
+DEF_RULE_NC(raise_stmt_from, and_ident(2), tok(KW_FROM), rule(test))
 
 // import_stmt: import_name | import_from
 // import_name: 'import' dotted_as_names
@@ -142,26 +142,26 @@ DEF_RULE(raise_stmt_from, nc, and_ident(2), tok(KW_FROM), rule(test))
 // nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 // assert_stmt: 'assert' test [',' test]
 
-DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from))
+DEF_RULE_NC(import_stmt, or(2), rule(import_name), rule(import_from))
 DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names))
 DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3))
-DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b))
-DEF_RULE(import_from_2b, nc, and_ident(2), rule(one_or_more_period_or_ellipsis), opt_rule(dotted_name))
-DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names))
-DEF_RULE(import_as_names_paren, nc, and_ident(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE))
-DEF_RULE(one_or_more_period_or_ellipsis, nc, one_or_more, rule(period_or_ellipsis))
-DEF_RULE(period_or_ellipsis, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSIS))
-DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name))
-DEF_RULE(dotted_as_name, nc, and_ident(2), rule(dotted_name), opt_rule(as_name))
-DEF_RULE(as_name, nc, and_ident(2), tok(KW_AS), tok(NAME))
-DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA))
-DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA))
-DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD))
+DEF_RULE_NC(import_from_2, or(2), rule(dotted_name), rule(import_from_2b))
+DEF_RULE_NC(import_from_2b, and_ident(2), rule(one_or_more_period_or_ellipsis), opt_rule(dotted_name))
+DEF_RULE_NC(import_from_3, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names))
+DEF_RULE_NC(import_as_names_paren, and_ident(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE))
+DEF_RULE_NC(one_or_more_period_or_ellipsis, one_or_more, rule(period_or_ellipsis))
+DEF_RULE_NC(period_or_ellipsis, or(2), tok(DEL_PERIOD), tok(ELLIPSIS))
+DEF_RULE_NC(import_as_name, and(2), tok(NAME), opt_rule(as_name))
+DEF_RULE_NC(dotted_as_name, and_ident(2), rule(dotted_name), opt_rule(as_name))
+DEF_RULE_NC(as_name, and_ident(2), tok(KW_AS), tok(NAME))
+DEF_RULE_NC(import_as_names, list_with_end, rule(import_as_name), tok(DEL_COMMA))
+DEF_RULE_NC(dotted_as_names, list, rule(dotted_as_name), tok(DEL_COMMA))
+DEF_RULE_NC(dotted_name, list, tok(NAME), tok(DEL_PERIOD))
 DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list))
 DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list))
-DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA))
+DEF_RULE_NC(name_list, list, tok(NAME), tok(DEL_COMMA))
 DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra))
-DEF_RULE(assert_stmt_extra, nc, and_ident(2), tok(DEL_COMMA), rule(test))
+DEF_RULE_NC(assert_stmt_extra, and_ident(2), tok(DEL_COMMA), rule(test))
 
 // compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
 // if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
@@ -176,31 +176,31 @@ DEF_RULE(assert_stmt_extra, nc, and_ident(2), tok(DEL_COMMA), rule(test))
 // async_stmt: 'async' (funcdef | with_stmt | for_stmt)
 
 #if MICROPY_PY_ASYNC_AWAIT
-DEF_RULE(compound_stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(async_stmt))
+DEF_RULE_NC(compound_stmt, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(async_stmt))
 DEF_RULE(async_stmt, c(async_stmt), and(2), tok(KW_ASYNC), rule(async_stmt_2))
-DEF_RULE(async_stmt_2, nc, or(3), rule(funcdef), rule(with_stmt), rule(for_stmt))
+DEF_RULE_NC(async_stmt_2, or(3), rule(funcdef), rule(with_stmt), rule(for_stmt))
 #else
-DEF_RULE(compound_stmt, nc, or(8), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated))
+DEF_RULE_NC(compound_stmt, or(8), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated))
 #endif
 DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt))
-DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif))
-DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite))
+DEF_RULE_NC(if_stmt_elif_list, one_or_more, rule(if_stmt_elif))
+DEF_RULE_NC(if_stmt_elif, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite))
 DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
 DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
 DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2))
-DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally))
-DEF_RULE(try_stmt_except_and_more, nc, and_ident(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally))
-DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite))
-DEF_RULE(try_stmt_as_name, nc, and_ident(2), rule(test), opt_rule(as_name))
-DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except))
-DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite))
-DEF_RULE(else_stmt, nc, and_ident(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite))
+DEF_RULE_NC(try_stmt_2, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally))
+DEF_RULE_NC(try_stmt_except_and_more, and_ident(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally))
+DEF_RULE_NC(try_stmt_except, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite))
+DEF_RULE_NC(try_stmt_as_name, and_ident(2), rule(test), opt_rule(as_name))
+DEF_RULE_NC(try_stmt_except_list, one_or_more, rule(try_stmt_except))
+DEF_RULE_NC(try_stmt_finally, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite))
+DEF_RULE_NC(else_stmt, and_ident(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite))
 DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite))
-DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA))
-DEF_RULE(with_item, nc, and_ident(2), rule(test), opt_rule(with_item_as))
-DEF_RULE(with_item_as, nc, and_ident(2), tok(KW_AS), rule(expr))
-DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt))
-DEF_RULE(suite_block, nc, and_ident(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT))
+DEF_RULE_NC(with_stmt_list, list, rule(with_item), tok(DEL_COMMA))
+DEF_RULE_NC(with_item, and_ident(2), rule(test), opt_rule(with_item_as))
+DEF_RULE_NC(with_item_as, and_ident(2), tok(KW_AS), rule(expr))
+DEF_RULE_NC(suite, or(2), rule(suite_block), rule(simple_stmt))
+DEF_RULE_NC(suite_block, and_ident(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT))
 DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt))
 
 // test: or_test ['if' or_test 'else' test] | lambdef
@@ -208,10 +208,10 @@ DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt))
 // lambdef: 'lambda' [varargslist] ':' test
 // lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 
-DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr))
+DEF_RULE_NC(test, or(2), rule(lambdef), rule(test_if_expr))
 DEF_RULE(test_if_expr, c(test_if_expr), and_ident(2), rule(or_test), opt_rule(test_if_else))
-DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test))
-DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test))
+DEF_RULE_NC(test_if_else, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test))
+DEF_RULE_NC(test_nocond, or(2), rule(lambdef_nocond), rule(or_test))
 DEF_RULE(lambdef, c(lambdef), and_blank(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test))
 DEF_RULE(lambdef_nocond, c(lambdef), and_blank(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond))
 
@@ -233,53 +233,52 @@ DEF_RULE(lambdef_nocond, c(lambdef), and_blank(4), tok(KW_LAMBDA), opt_rule(vara
 
 DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR))
 DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND))
-DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison))
+DEF_RULE_NC(not_test, or(2), rule(not_test_2), rule(comparison))
 DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test))
 DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op))
-DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is))
-DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN))
-DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not))
-DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT))
+DEF_RULE_NC(comp_op, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is))
+DEF_RULE_NC(comp_op_not_in, and(2), tok(KW_NOT), tok(KW_IN))
+DEF_RULE_NC(comp_op_is, and(2), tok(KW_IS), opt_rule(comp_op_is_not))
+DEF_RULE_NC(comp_op_is_not, and(1), tok(KW_NOT))
 DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr))
 DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE))
 DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET))
 DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND))
 DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op))
-DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE))
+DEF_RULE_NC(shift_op, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE))
 DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op))
-DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS))
+DEF_RULE_NC(arith_op, or(2), tok(OP_PLUS), tok(OP_MINUS))
 DEF_RULE(term, c(term), list, rule(factor), rule(term_op))
-DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH))
-DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power))
+DEF_RULE_NC(term_op, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH))
+DEF_RULE_NC(factor, or(2), rule(factor_2), rule(power))
 DEF_RULE(factor_2, c(factor_2), and_ident(2), rule(factor_op), rule(factor))
-DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE))
+DEF_RULE_NC(factor_op, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE))
 DEF_RULE(power, c(power), and_ident(2), rule(atom_expr), opt_rule(power_dbl_star))
 #if MICROPY_PY_ASYNC_AWAIT
-DEF_RULE(atom_expr, nc, or(2), rule(atom_expr_await), rule(atom_expr_normal))
+DEF_RULE_NC(atom_expr, or(2), rule(atom_expr_await), rule(atom_expr_normal))
 DEF_RULE(atom_expr_await, c(atom_expr_await), and(3), tok(KW_AWAIT), rule(atom), opt_rule(atom_expr_trailers))
 #else
-DEF_RULE(atom_expr, nc, or(1), rule(atom_expr_normal))
+DEF_RULE_NC(atom_expr, or(1), rule(atom_expr_normal))
 #endif
 DEF_RULE(atom_expr_normal, c(atom_expr_normal), and_ident(2), rule(atom), opt_rule(atom_expr_trailers))
-DEF_RULE(atom_expr_trailers, c(atom_expr_trailers), one_or_more, rule(trailer))
-DEF_RULE(power_dbl_star, nc, and_ident(2), tok(OP_DBL_STAR), rule(factor))
+DEF_RULE_NC(atom_expr_trailers, one_or_more, rule(trailer))
+DEF_RULE_NC(power_dbl_star, and_ident(2), tok(OP_DBL_STAR), rule(factor))
 
 // atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False'
 // testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 // trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 
-DEF_RULE(atom, nc, or(12), tok(NAME), tok(INTEGER), tok(FLOAT_OR_IMAG), tok(STRING), tok(BYTES), tok(ELLIPSIS), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
-DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES))
+DEF_RULE_NC(atom, or(12), tok(NAME), tok(INTEGER), tok(FLOAT_OR_IMAG), tok(STRING), tok(BYTES), tok(ELLIPSIS), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
 DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE))
-DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp))
+DEF_RULE_NC(atom_2b, or(2), rule(yield_expr), rule(testlist_comp))
 DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE))
 DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE))
-DEF_RULE(testlist_comp, nc, and_ident(2), rule(testlist_comp_2), opt_rule(testlist_comp_3))
-DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test))
-DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b))
-DEF_RULE(testlist_comp_3b, nc, and_ident(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c))
-DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA))
-DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period))
+DEF_RULE_NC(testlist_comp, and_ident(2), rule(testlist_comp_2), opt_rule(testlist_comp_3))
+DEF_RULE_NC(testlist_comp_2, or(2), rule(star_expr), rule(test))
+DEF_RULE_NC(testlist_comp_3, or(2), rule(comp_for), rule(testlist_comp_3b))
+DEF_RULE_NC(testlist_comp_3b, and_ident(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c))
+DEF_RULE_NC(testlist_comp_3c, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA))
+DEF_RULE_NC(trailer, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period))
 DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
 DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE))
 DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME))
@@ -290,13 +289,13 @@ DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME))
 
 #if MICROPY_PY_BUILTINS_SLICE
 DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA))
-DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2))
+DEF_RULE_NC(subscript, or(2), rule(subscript_3), rule(subscript_2))
 DEF_RULE(subscript_2, c(subscript_2), and_ident(2), rule(test), opt_rule(subscript_3))
 DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b))
-DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d))
-DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test))
-DEF_RULE(subscript_3d, nc, and_ident(2), rule(test), opt_rule(sliceop))
-DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test))
+DEF_RULE_NC(subscript_3b, or(2), rule(subscript_3c), rule(subscript_3d))
+DEF_RULE_NC(subscript_3c, and(2), tok(DEL_COLON), opt_rule(test))
+DEF_RULE_NC(subscript_3d, and_ident(2), rule(test), opt_rule(sliceop))
+DEF_RULE_NC(sliceop, and(2), tok(DEL_COLON), opt_rule(test))
 #else
 DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA))
 #endif
@@ -305,33 +304,33 @@ DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COM
 // testlist: test (',' test)* [',']
 // dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [',']))
 
-DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA))
-DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr))
+DEF_RULE_NC(exprlist, list_with_end, rule(exprlist_2), tok(DEL_COMMA))
+DEF_RULE_NC(exprlist_2, or(2), rule(star_expr), rule(expr))
 DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA))
 // TODO dictorsetmaker lets through more than is allowed
-DEF_RULE(dictorsetmaker, nc, and_ident(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail))
+DEF_RULE_NC(dictorsetmaker, and_ident(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail))
 #if MICROPY_PY_BUILTINS_SET
 DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and_ident(2), rule(test), opt_rule(dictorsetmaker_colon))
-DEF_RULE(dictorsetmaker_colon, nc, and_ident(2), tok(DEL_COLON), rule(test))
+DEF_RULE_NC(dictorsetmaker_colon, and_ident(2), tok(DEL_COLON), rule(test))
 #else
 DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(3), rule(test), tok(DEL_COLON), rule(test))
 #endif
-DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list))
-DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2))
-DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA))
+DEF_RULE_NC(dictorsetmaker_tail, or(2), rule(comp_for), rule(dictorsetmaker_list))
+DEF_RULE_NC(dictorsetmaker_list, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2))
+DEF_RULE_NC(dictorsetmaker_list2, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA))
 
 // classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 
 DEF_RULE(classdef, c(classdef), and_blank(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite))
-DEF_RULE(classdef_2, nc, and_ident(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+DEF_RULE_NC(classdef_2, and_ident(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
 
 // arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
 
 // TODO arglist lets through more than is allowed, compiler needs to do further verification
-DEF_RULE(arglist, nc, list_with_end, rule(arglist_2), tok(DEL_COMMA))
-DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument))
-DEF_RULE(arglist_star, nc, and(2), tok(OP_STAR), rule(test))
-DEF_RULE(arglist_dbl_star, nc, and(2), tok(OP_DBL_STAR), rule(test))
+DEF_RULE_NC(arglist, list_with_end, rule(arglist_2), tok(DEL_COMMA))
+DEF_RULE_NC(arglist_2, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument))
+DEF_RULE_NC(arglist_star, and(2), tok(OP_STAR), rule(test))
+DEF_RULE_NC(arglist_dbl_star, and(2), tok(OP_DBL_STAR), rule(test))
 
 // # The reason that keywords are test nodes instead of NAME is that using NAME
 // # results in an ambiguity. ast.c makes sure it's a NAME.
@@ -340,12 +339,12 @@ DEF_RULE(arglist_dbl_star, nc, and(2), tok(OP_DBL_STAR), rule(test))
 // comp_for: 'for' exprlist 'in' or_test [comp_iter]
 // comp_if: 'if' test_nocond [comp_iter]
 
-DEF_RULE(argument, nc, and_ident(2), rule(test), opt_rule(argument_2))
-DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3))
-DEF_RULE(argument_3, nc, and_ident(2), tok(DEL_EQUAL), rule(test))
-DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if))
-DEF_RULE(comp_for, nc, and_blank(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter))
-DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter))
+DEF_RULE_NC(argument, and_ident(2), rule(test), opt_rule(argument_2))
+DEF_RULE_NC(argument_2, or(2), rule(comp_for), rule(argument_3))
+DEF_RULE_NC(argument_3, and_ident(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE_NC(comp_iter, or(2), rule(comp_for), rule(comp_if))
+DEF_RULE_NC(comp_for, and_blank(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter))
+DEF_RULE_NC(comp_if, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter))
 
 // # not used in grammar, but may appear in "node" passed from Parser to Compiler
 // encoding_decl: NAME
@@ -354,5 +353,5 @@ DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter)
 // yield_arg: 'from' test | testlist
 
 DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg))
-DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist))
-DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test))
+DEF_RULE_NC(yield_arg, or(2), rule(yield_arg_from), rule(testlist))
+DEF_RULE_NC(yield_arg_from, and(2), tok(KW_FROM), rule(test))
diff --git a/py/lexer.c b/py/lexer.c
index 458fba0900..abc1f3ebbb 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -25,6 +25,7 @@
  */
 
 #include <stdio.h>
+#include <string.h>
 #include <assert.h>
 
 #include "py/mpstate.h"
@@ -39,19 +40,6 @@
 // TODO seems that CPython allows NULL byte in the input stream
 // don't know if that's intentional or not, but we don't allow it
 
-// TODO replace with a call to a standard function
-STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
-    mp_uint_t i = 0;
-
-    while (i < len && *str == *strn) {
-        ++i;
-        ++str;
-        ++strn;
-    }
-
-    return i == len && *str == 0;
-}
-
 #define MP_LEXER_EOF ((unichar)MP_READER_EOF)
 #define CUR_CHAR(lex) ((lex)->chr0)
 
@@ -75,11 +63,9 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
     return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
 }
 
-/*
 STATIC bool is_char_following(mp_lexer_t *lex, byte c) {
     return lex->chr1 == c;
 }
-*/
 
 STATIC bool is_char_following_or(mp_lexer_t *lex, byte c1, byte c2) {
     return lex->chr1 == c1 || lex->chr1 == c2;
@@ -118,6 +104,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
     return lex->chr1 >= '0' && lex->chr1 <= '7';
 }
 
+STATIC bool is_string_or_bytes(mp_lexer_t *lex) {
+    return is_char_or(lex, '\'', '\"')
+        || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
+        || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r'))
+            && is_char_following_following_or(lex, '\'', '\"'));
+}
+
 // to easily parse utf-8 identifiers we allow any raw byte with high bit set
 STATIC bool is_head_of_identifier(mp_lexer_t *lex) {
     return is_letter(lex) || lex->chr0 == '_' || lex->chr0 >= 0x80;
@@ -144,36 +137,30 @@ STATIC void next_char(mp_lexer_t *lex) {
     lex->chr1 = lex->chr2;
     lex->chr2 = lex->reader.readbyte(lex->reader.data);
 
-    if (lex->chr0 == '\r') {
+    if (lex->chr1 == '\r') {
         // CR is a new line, converted to LF
-        lex->chr0 = '\n';
-        if (lex->chr1 == '\n') {
-            // CR LF is a single new line
-            lex->chr1 = lex->chr2;
+        lex->chr1 = '\n';
+        if (lex->chr2 == '\n') {
+            // CR LF is a single new line, throw out the extra LF
             lex->chr2 = lex->reader.readbyte(lex->reader.data);
         }
     }
 
-    if (lex->chr2 == MP_LEXER_EOF) {
-        // EOF, check if we need to insert a newline at end of file
-        if (lex->chr1 != MP_LEXER_EOF && lex->chr1 != '\n') {
-            // if lex->chr1 == '\r' then this makes a CR LF which will be converted to LF above
-            // otherwise it just inserts a LF
-            lex->chr2 = '\n';
-        }
+    // check if we need to insert a newline at end of file
+    if (lex->chr2 == MP_LEXER_EOF && lex->chr1 != MP_LEXER_EOF && lex->chr1 != '\n') {
+        lex->chr2 = '\n';
     }
 }
 
-STATIC void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
+STATIC void indent_push(mp_lexer_t *lex, size_t indent) {
     if (lex->num_indent_level >= lex->alloc_indent_level) {
-        // TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR
         lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
         lex->alloc_indent_level += MICROPY_ALLOC_LEXEL_INDENT_INC;
     }
     lex->indent_level[lex->num_indent_level++] = indent;
 }
 
-STATIC mp_uint_t indent_top(mp_lexer_t *lex) {
+STATIC size_t indent_top(mp_lexer_t *lex) {
     return lex->indent_level[lex->num_indent_level - 1];
 }
 
@@ -184,7 +171,6 @@ STATIC void indent_pop(mp_lexer_t *lex) {
 // some tricky operator encoding:
 //     <op>  = begin with <op>, if this opchar matches then begin here
 //     e<op> = end with <op>, if this opchar matches then end
-//     E<op> = mandatory end with <op>, this opchar must match, then end
 //     c<op> = continue with <op>, if this opchar matches then continue matching
 // this means if the start of two ops are the same then they are equal til the last char
 
@@ -201,7 +187,7 @@ STATIC const char *const tok_enc =
     "%e="         // % %=
     "^e="         // ^ ^=
     "=e="         // = ==
-    "!E=";        // !=
+    "!.";         // start of special cases: != . ...
 
 // TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
 STATIC const uint8_t tok_enc_kind[] = {
@@ -221,14 +207,15 @@ STATIC const uint8_t tok_enc_kind[] = {
     MP_TOKEN_OP_PERCENT, MP_TOKEN_DEL_PERCENT_EQUAL,
     MP_TOKEN_OP_CARET, MP_TOKEN_DEL_CARET_EQUAL,
     MP_TOKEN_DEL_EQUAL, MP_TOKEN_OP_DBL_EQUAL,
-    MP_TOKEN_OP_NOT_EQUAL,
 };
 
 // must have the same order as enum in lexer.h
+// must be sorted according to strcmp
 STATIC const char *const tok_kw[] = {
     "False",
     "None",
     "True",
+    "__debug__",
     "and",
     "as",
     "assert",
@@ -263,13 +250,12 @@ STATIC const char *const tok_kw[] = {
     "while",
     "with",
     "yield",
-    "__debug__",
 };
 
 // This is called with CUR_CHAR() before first hex digit, and should return with
 // it pointing to last hex digit
 // num_digits must be greater than zero
-STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) {
+STATIC bool get_hex(mp_lexer_t *lex, size_t num_digits, mp_uint_t *result) {
     mp_uint_t num = 0;
     while (num_digits-- != 0) {
         next_char(lex);
@@ -283,14 +269,144 @@ STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) {
     return true;
 }
 
-STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
-    // start new token text
-    vstr_reset(&lex->vstr);
+STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw) {
+    // get first quoting character
+    char quote_char = '\'';
+    if (is_char(lex, '\"')) {
+        quote_char = '\"';
+    }
+    next_char(lex);
 
-    // skip white space and comments
+    // work out if it's a single or triple quoted literal
+    size_t num_quotes;
+    if (is_char_and(lex, quote_char, quote_char)) {
+        // triple quotes
+        next_char(lex);
+        next_char(lex);
+        num_quotes = 3;
+    } else {
+        // single quotes
+        num_quotes = 1;
+    }
+
+    size_t n_closing = 0;
+    while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
+        if (is_char(lex, quote_char)) {
+            n_closing += 1;
+            vstr_add_char(&lex->vstr, CUR_CHAR(lex));
+        } else {
+            n_closing = 0;
+            if (is_char(lex, '\\')) {
+                next_char(lex);
+                unichar c = CUR_CHAR(lex);
+                if (is_raw) {
+                    // raw strings allow escaping of quotes, but the backslash is also emitted
+                    vstr_add_char(&lex->vstr, '\\');
+                } else {
+                    switch (c) {
+                        // note: "c" can never be MP_LEXER_EOF because next_char
+                        // always inserts a newline at the end of the input stream
+                        case '\n': c = MP_LEXER_EOF; break; // backslash escape the newline, just ignore it
+                        case '\\': break;
+                        case '\'': break;
+                        case '"': break;
+                        case 'a': c = 0x07; break;
+                        case 'b': c = 0x08; break;
+                        case 't': c = 0x09; break;
+                        case 'n': c = 0x0a; break;
+                        case 'v': c = 0x0b; break;
+                        case 'f': c = 0x0c; break;
+                        case 'r': c = 0x0d; break;
+                        case 'u':
+                        case 'U':
+                            if (lex->tok_kind == MP_TOKEN_BYTES) {
+                                // b'\u1234' == b'\\u1234'
+                                vstr_add_char(&lex->vstr, '\\');
+                                break;
+                            }
+                            // Otherwise fall through.
+                        case 'x':
+                        {
+                            mp_uint_t num = 0;
+                            if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) {
+                                // not enough hex chars for escape sequence
+                                lex->tok_kind = MP_TOKEN_INVALID;
+                            }
+                            c = num;
+                            break;
+                        }
+                        case 'N':
+                            // Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the
+                            // entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly
+                            // 3MB of text; even gzip-compressed and with minimal structure, it'll take
+                            // roughly half a meg of storage. This form of Unicode escape may be added
+                            // later on, but it's definitely not a priority right now. -- CJA 20140607
+                            mp_not_implemented("unicode name escapes");
+                            break;
+                        default:
+                            if (c >= '0' && c <= '7') {
+                                // Octal sequence, 1-3 chars
+                                size_t digits = 3;
+                                mp_uint_t num = c - '0';
+                                while (is_following_odigit(lex) && --digits != 0) {
+                                    next_char(lex);
+                                    num = num * 8 + (CUR_CHAR(lex) - '0');
+                                }
+                                c = num;
+                            } else {
+                                // unrecognised escape character; CPython lets this through verbatim as '\' and then the character
+                                vstr_add_char(&lex->vstr, '\\');
+                            }
+                            break;
+                    }
+                }
+                if (c != MP_LEXER_EOF) {
+                    if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
+                        if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
+                            vstr_add_char(&lex->vstr, c);
+                        } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
+                            vstr_add_byte(&lex->vstr, c);
+                        } else {
+                            // unicode character out of range
+                            // this raises a generic SyntaxError; could provide more info
+                            lex->tok_kind = MP_TOKEN_INVALID;
+                        }
+                    } else {
+                        // without unicode everything is just added as an 8-bit byte
+                        if (c < 0x100) {
+                            vstr_add_byte(&lex->vstr, c);
+                        } else {
+                            // 8-bit character out of range
+                            // this raises a generic SyntaxError; could provide more info
+                            lex->tok_kind = MP_TOKEN_INVALID;
+                        }
+                    }
+                }
+            } else {
+                // Add the "character" as a byte so that we remain 8-bit clean.
+                // This way, strings are parsed correctly whether or not they contain utf-8 chars.
+                vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
+            }
+        }
+        next_char(lex);
+    }
+
+    // check we got the required end quotes
+    if (n_closing < num_quotes) {
+        lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN;
+    }
+
+    // cut off the end quotes from the token text
+    vstr_cut_tail_bytes(&lex->vstr, n_closing);
+}
+
+STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
     bool had_physical_newline = false;
     while (!is_end(lex)) {
         if (is_physical_newline(lex)) {
+            if (stop_at_newline && lex->nested_bracket_level == 0) {
+                break;
+            }
             had_physical_newline = true;
             next_char(lex);
         } else if (is_whitespace(lex)) {
@@ -301,35 +417,29 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
                 next_char(lex);
             }
             // had_physical_newline will be set on next loop
-        } else if (is_char(lex, '\\')) {
-            // backslash (outside string literals) must appear just before a physical newline
+        } else if (is_char_and(lex, '\\', '\n')) {
+            // line-continuation, so don't set had_physical_newline
+            next_char(lex);
             next_char(lex);
-            if (!is_physical_newline(lex)) {
-                // SyntaxError: unexpected character after line continuation character
-                lex->tok_line = lex->line;
-                lex->tok_column = lex->column;
-                lex->tok_kind = MP_TOKEN_BAD_LINE_CONTINUATION;
-                return;
-            } else {
-                next_char(lex);
-            }
         } else {
             break;
         }
     }
+    return had_physical_newline;
+}
+
+void mp_lexer_to_next(mp_lexer_t *lex) {
+    // start new token text
+    vstr_reset(&lex->vstr);
+
+    // skip white space and comments
+    bool had_physical_newline = skip_whitespace(lex, false);
 
     // set token source information
     lex->tok_line = lex->line;
     lex->tok_column = lex->column;
 
-    if (first_token && lex->line == 1 && lex->column != 1) {
-        // check that the first token is in the first column
-        // if first token is not on first line, we get a physical newline and
-        // this check is done as part of normal indent/dedent checking below
-        // (done to get equivalence with CPython)
-        lex->tok_kind = MP_TOKEN_INDENT;
-
-    } else if (lex->emit_dent < 0) {
+    if (lex->emit_dent < 0) {
         lex->tok_kind = MP_TOKEN_DEDENT;
         lex->emit_dent += 1;
 
@@ -340,7 +450,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
     } else if (had_physical_newline && lex->nested_bracket_level == 0) {
         lex->tok_kind = MP_TOKEN_NEWLINE;
 
-        mp_uint_t num_spaces = lex->column - 1;
+        size_t num_spaces = lex->column - 1;
         if (num_spaces == indent_top(lex)) {
         } else if (num_spaces > indent_top(lex)) {
             indent_push(lex, num_spaces);
@@ -358,168 +468,65 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
     } else if (is_end(lex)) {
         lex->tok_kind = MP_TOKEN_END;
 
-    } else if (is_char_or(lex, '\'', '\"')
-               || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
-               || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) {
+    } else if (is_string_or_bytes(lex)) {
         // a string or bytes literal
 
-        // parse type codes
-        bool is_raw = false;
-        bool is_bytes = false;
-        if (is_char(lex, 'u')) {
-            next_char(lex);
-        } else if (is_char(lex, 'b')) {
-            is_bytes = true;
-            next_char(lex);
-            if (is_char(lex, 'r')) {
-                is_raw = true;
-                next_char(lex);
-            }
-        } else if (is_char(lex, 'r')) {
-            is_raw = true;
-            next_char(lex);
-            if (is_char(lex, 'b')) {
-                is_bytes = true;
-                next_char(lex);
-            }
-        }
+        // Python requires adjacent string/bytes literals to be automatically
+        // concatenated.  We do it here in the tokeniser to make efficient use of RAM,
+        // because then the lexer's vstr can be used to accumulate the string literal,
+        // in contrast to creating a parse tree of strings and then joining them later
+        // in the compiler.  It's also more compact in code size to do it here.
 
-        // set token kind
-        if (is_bytes) {
-            lex->tok_kind = MP_TOKEN_BYTES;
-        } else {
-            lex->tok_kind = MP_TOKEN_STRING;
-        }
+        // MP_TOKEN_END is used to indicate that this is the first string token
+        lex->tok_kind = MP_TOKEN_END;
 
-        // get first quoting character
-        char quote_char = '\'';
-        if (is_char(lex, '\"')) {
-            quote_char = '\"';
-        }
-        next_char(lex);
+        // Loop to accumulate string/bytes literals
+        do {
+            // parse type codes
+            bool is_raw = false;
+            mp_token_kind_t kind = MP_TOKEN_STRING;
+            int n_char = 0;
+            if (is_char(lex, 'u')) {
+                n_char = 1;
+            } else if (is_char(lex, 'b')) {
+                kind = MP_TOKEN_BYTES;
+                n_char = 1;
+                if (is_char_following(lex, 'r')) {
+                    is_raw = true;
+                    n_char = 2;
+                }
+            } else if (is_char(lex, 'r')) {
+                is_raw = true;
+                n_char = 1;
+                if (is_char_following(lex, 'b')) {
+                    kind = MP_TOKEN_BYTES;
+                    n_char = 2;
+                }
+            }
 
-        // work out if it's a single or triple quoted literal
-        mp_uint_t num_quotes;
-        if (is_char_and(lex, quote_char, quote_char)) {
-            // triple quotes
-            next_char(lex);
-            next_char(lex);
-            num_quotes = 3;
-        } else {
-            // single quotes
-            num_quotes = 1;
-        }
+            // Set or check token kind
+            if (lex->tok_kind == MP_TOKEN_END) {
+                lex->tok_kind = kind;
+            } else if (lex->tok_kind != kind) {
+                // Can't concatenate string with bytes
+                break;
+            }
 
-        // parse the literal
-        mp_uint_t n_closing = 0;
-        while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
-            if (is_char(lex, quote_char)) {
-                n_closing += 1;
-                vstr_add_char(&lex->vstr, CUR_CHAR(lex));
-            } else {
-                n_closing = 0;
-                if (is_char(lex, '\\')) {
+            // Skip any type code characters
+            if (n_char != 0) {
+                next_char(lex);
+                if (n_char == 2) {
                     next_char(lex);
-                    unichar c = CUR_CHAR(lex);
-                    if (is_raw) {
-                        // raw strings allow escaping of quotes, but the backslash is also emitted
-                        vstr_add_char(&lex->vstr, '\\');
-                    } else {
-                        switch (c) {
-                            // note: "c" can never be MP_LEXER_EOF because next_char
-                            // always inserts a newline at the end of the input stream
-                            case '\n': c = MP_LEXER_EOF; break; // backslash escape the newline, just ignore it
-                            case '\\': break;
-                            case '\'': break;
-                            case '"': break;
-                            case 'a': c = 0x07; break;
-                            case 'b': c = 0x08; break;
-                            case 't': c = 0x09; break;
-                            case 'n': c = 0x0a; break;
-                            case 'v': c = 0x0b; break;
-                            case 'f': c = 0x0c; break;
-                            case 'r': c = 0x0d; break;
-                            case 'u':
-                            case 'U':
-                                if (is_bytes) {
-                                    // b'\u1234' == b'\\u1234'
-                                    vstr_add_char(&lex->vstr, '\\');
-                                    break;
-                                }
-                                // Otherwise fall through.
-                            case 'x':
-                            {
-                                mp_uint_t num = 0;
-                                if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) {
-                                    // not enough hex chars for escape sequence
-                                    lex->tok_kind = MP_TOKEN_INVALID;
-                                }
-                                c = num;
-                                break;
-                            }
-                            case 'N':
-                                // Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the
-                                // entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly
-                                // 3MB of text; even gzip-compressed and with minimal structure, it'll take
-                                // roughly half a meg of storage. This form of Unicode escape may be added
-                                // later on, but it's definitely not a priority right now. -- CJA 20140607
-                                mp_not_implemented("unicode name escapes");
-                                break;
-                            default:
-                                if (c >= '0' && c <= '7') {
-                                    // Octal sequence, 1-3 chars
-                                    mp_uint_t digits = 3;
-                                    mp_uint_t num = c - '0';
-                                    while (is_following_odigit(lex) && --digits != 0) {
-                                        next_char(lex);
-                                        num = num * 8 + (CUR_CHAR(lex) - '0');
-                                    }
-                                    c = num;
-                                } else {
-                                    // unrecognised escape character; CPython lets this through verbatim as '\' and then the character
-                                    vstr_add_char(&lex->vstr, '\\');
-                                }
-                                break;
-                        }
-                    }
-                    if (c != MP_LEXER_EOF) {
-                        if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
-                            if (c < 0x110000 && !is_bytes) {
-                                vstr_add_char(&lex->vstr, c);
-                            } else if (c < 0x100 && is_bytes) {
-                                vstr_add_byte(&lex->vstr, c);
-                            } else {
-                                // unicode character out of range
-                                // this raises a generic SyntaxError; could provide more info
-                                lex->tok_kind = MP_TOKEN_INVALID;
-                            }
-                        } else {
-                            // without unicode everything is just added as an 8-bit byte
-                            if (c < 0x100) {
-                                vstr_add_byte(&lex->vstr, c);
-                            } else {
-                                // 8-bit character out of range
-                                // this raises a generic SyntaxError; could provide more info
-                                lex->tok_kind = MP_TOKEN_INVALID;
-                            }
-                        }
-                    }
-                } else {
-                    // Add the "character" as a byte so that we remain 8-bit clean.
-                    // This way, strings are parsed correctly whether or not they contain utf-8 chars.
-                    vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
                 }
             }
-            next_char(lex);
-        }
 
-        // check we got the required end quotes
-        if (n_closing < num_quotes) {
-            lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN;
-        }
+            // Parse the literal
+            parse_string_literal(lex, is_raw);
+
+            // Skip whitespace so we can check if there's another string following
+            skip_whitespace(lex, true);
 
-        // cut off the end quotes from the token text
-        vstr_cut_tail_bytes(&lex->vstr, n_closing);
+        } while (is_string_or_bytes(lex));
 
     } else if (is_head_of_identifier(lex)) {
         lex->tok_kind = MP_TOKEN_NAME;
@@ -534,6 +541,25 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
             next_char(lex);
         }
 
+        // Check if the name is a keyword.
+        // We also check for __debug__ here and convert it to its value.  This is
+        // so the parser gives a syntax error on, eg, x.__debug__.  Otherwise, we
+        // need to check for this special token in many places in the compiler.
+        const char *s = vstr_null_terminated_str(&lex->vstr);
+        for (size_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
+            int cmp = strcmp(s, tok_kw[i]);
+            if (cmp == 0) {
+                lex->tok_kind = MP_TOKEN_KW_FALSE + i;
+                if (lex->tok_kind == MP_TOKEN_KW___DEBUG__) {
+                    lex->tok_kind = (MP_STATE_VM(mp_optimise_value) == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
+                }
+                break;
+            } else if (cmp < 0) {
+                // Table is sorted and comparison was less-than, so stop searching
+                break;
+            }
+        }
+
     } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
         bool forced_integer = false;
         if (is_char(lex, '.')) {
@@ -570,34 +596,14 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
             }
         }
 
-    } else if (is_char(lex, '.')) {
-        // special handling for . and ... operators, because .. is not a valid operator
-
-        // get first char
-        vstr_add_char(&lex->vstr, '.');
-        next_char(lex);
-
-        if (is_char_and(lex, '.', '.')) {
-            vstr_add_char(&lex->vstr, '.');
-            vstr_add_char(&lex->vstr, '.');
-            next_char(lex);
-            next_char(lex);
-            lex->tok_kind = MP_TOKEN_ELLIPSIS;
-        } else {
-            lex->tok_kind = MP_TOKEN_DEL_PERIOD;
-        }
-
     } else {
         // search for encoded delimiter or operator
 
         const char *t = tok_enc;
-        mp_uint_t tok_enc_index = 0;
+        size_t tok_enc_index = 0;
         for (; *t != 0 && !is_char(lex, *t); t += 1) {
             if (*t == 'e' || *t == 'c') {
                 t += 1;
-            } else if (*t == 'E') {
-                tok_enc_index -= 1;
-                t += 1;
             }
             tok_enc_index += 1;
         }
@@ -608,55 +614,48 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
             // didn't match any delimiter or operator characters
             lex->tok_kind = MP_TOKEN_INVALID;
 
+        } else if (*t == '!') {
+            // "!=" is a special case because "!" is not a valid operator
+            if (is_char(lex, '=')) {
+                next_char(lex);
+                lex->tok_kind = MP_TOKEN_OP_NOT_EQUAL;
+            } else {
+                lex->tok_kind = MP_TOKEN_INVALID;
+            }
+
+        } else if (*t == '.') {
+            // "." and "..." are special cases because ".." is not a valid operator
+            if (is_char_and(lex, '.', '.')) {
+                next_char(lex);
+                next_char(lex);
+                lex->tok_kind = MP_TOKEN_ELLIPSIS;
+            } else {
+                lex->tok_kind = MP_TOKEN_DEL_PERIOD;
+            }
+
         } else {
             // matched a delimiter or operator character
 
             // get the maximum characters for a valid token
             t += 1;
-            mp_uint_t t_index = tok_enc_index;
-            for (;;) {
-                for (; *t == 'e'; t += 1) {
-                    t += 1;
-                    t_index += 1;
-                    if (is_char(lex, *t)) {
-                        next_char(lex);
-                        tok_enc_index = t_index;
-                        break;
-                    }
-                }
-
-                if (*t == 'E') {
-                    t += 1;
-                    if (is_char(lex, *t)) {
-                        next_char(lex);
-                        tok_enc_index = t_index;
-                    } else {
-                        lex->tok_kind = MP_TOKEN_INVALID;
-                        goto tok_enc_no_match;
-                    }
-                    break;
-                }
-
-                if (*t == 'c') {
-                    t += 1;
-                    t_index += 1;
-                    if (is_char(lex, *t)) {
-                        next_char(lex);
-                        tok_enc_index = t_index;
-                        t += 1;
-                    } else {
+            size_t t_index = tok_enc_index;
+            while (*t == 'c' || *t == 'e') {
+                t_index += 1;
+                if (is_char(lex, t[1])) {
+                    next_char(lex);
+                    tok_enc_index = t_index;
+                    if (*t == 'e') {
                         break;
                     }
-                } else {
+                } else if (*t == 'c') {
                     break;
                 }
+                t += 2;
             }
 
             // set token kind
             lex->tok_kind = tok_enc_kind[tok_enc_index];
 
-            tok_enc_no_match:
-
             // compute bracket level for implicit line joining
             if (lex->tok_kind == MP_TOKEN_DEL_PAREN_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACKET_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACE_OPEN) {
                 lex->nested_bracket_level += 1;
@@ -665,102 +664,55 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
             }
         }
     }
-
-    // check for keywords
-    if (lex->tok_kind == MP_TOKEN_NAME) {
-        // We check for __debug__ here and convert it to its value.  This is so
-        // the parser gives a syntax error on, eg, x.__debug__.  Otherwise, we
-        // need to check for this special token in many places in the compiler.
-        // TODO improve speed of these string comparisons
-        //for (mp_int_t i = 0; tok_kw[i] != NULL; i++) {
-        for (size_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
-            if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) {
-                if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
-                    // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
-                    lex->tok_kind = (MP_STATE_VM(mp_optimise_value) == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
-                } else {
-                    lex->tok_kind = MP_TOKEN_KW_FALSE + i;
-                }
-                break;
-            }
-        }
-    }
 }
 
 mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
-    mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t);
-
-    // check for memory allocation error
-    if (lex == NULL) {
-        reader.close(reader.data);
-        return NULL;
-    }
+    mp_lexer_t *lex = m_new_obj(mp_lexer_t);
 
     lex->source_name = src_name;
     lex->reader = reader;
     lex->line = 1;
-    lex->column = 1;
+    lex->column = -2;   // account for 3 dummy bytes
     lex->emit_dent = 0;
     lex->nested_bracket_level = 0;
     lex->alloc_indent_level = MICROPY_ALLOC_LEXER_INDENT_INIT;
     lex->num_indent_level = 1;
-    lex->indent_level = m_new_maybe(uint16_t, lex->alloc_indent_level);
+    lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
     vstr_init(&lex->vstr, 32);
 
-    // check for memory allocation error
-    // note: vstr_init above may fail on malloc, but so may mp_lexer_next_token_into below
-    if (lex->indent_level == NULL) {
-        mp_lexer_free(lex);
-        return NULL;
-    }
-
     // store sentinel for first indentation level
     lex->indent_level[0] = 0;
 
-    // preload characters
-    lex->chr0 = reader.readbyte(reader.data);
-    lex->chr1 = reader.readbyte(reader.data);
-    lex->chr2 = reader.readbyte(reader.data);
-
-    // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
-    if (lex->chr0 == MP_LEXER_EOF) {
-        lex->chr0 = '\n';
-    } else if (lex->chr1 == MP_LEXER_EOF) {
-        if (lex->chr0 == '\r') {
-            lex->chr0 = '\n';
-        } else if (lex->chr0 != '\n') {
-            lex->chr1 = '\n';
-        }
-    } else if (lex->chr2 == MP_LEXER_EOF) {
-        if (lex->chr1 == '\r') {
-            lex->chr1 = '\n';
-        } else if (lex->chr1 != '\n') {
-            lex->chr2 = '\n';
-        }
-    }
+    // load lexer with start of file, advancing lex->column to 1
+    // start with dummy bytes and use next_char() for proper EOL/EOF handling
+    lex->chr0 = lex->chr1 = lex->chr2 = 0;
+    next_char(lex);
+    next_char(lex);
+    next_char(lex);
 
     // preload first token
-    mp_lexer_next_token_into(lex, true);
+    mp_lexer_to_next(lex);
+
+    // Check that the first token is in the first column.  If it's not then we
+    // convert the token kind to INDENT so that the parser gives a syntax error.
+    if (lex->tok_column != 1) {
+        lex->tok_kind = MP_TOKEN_INDENT;
+    }
 
     return lex;
 }
 
-mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len) {
+mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len) {
     mp_reader_t reader;
-    if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) {
-        return NULL;
-    }
+    mp_reader_new_mem(&reader, (const byte*)str, len, free_len);
     return mp_lexer_new(src_name, reader);
 }
 
-#if MICROPY_READER_POSIX || MICROPY_READER_FATFS
+#if MICROPY_READER_POSIX || MICROPY_READER_VFS
 
 mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
     mp_reader_t reader;
-    int ret = mp_reader_new_file(&reader, filename);
-    if (ret != 0) {
-        return NULL;
-    }
+    mp_reader_new_file(&reader, filename);
     return mp_lexer_new(qstr_from_str(filename), reader);
 }
 
@@ -768,10 +720,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
 
 mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
     mp_reader_t reader;
-    int ret = mp_reader_new_file_from_fd(&reader, fd, close_fd);
-    if (ret != 0) {
-        return NULL;
-    }
+    mp_reader_new_file_from_fd(&reader, fd, close_fd);
     return mp_lexer_new(filename, reader);
 }
 
@@ -788,10 +737,6 @@ void mp_lexer_free(mp_lexer_t *lex) {
     }
 }
 
-void mp_lexer_to_next(mp_lexer_t *lex) {
-    mp_lexer_next_token_into(lex, false);
-}
-
 #if 0
 // This function is used to print the current token and should only be
 // needed to debug the lexer, so it's not available via a config option.
diff --git a/py/lexer.h b/py/lexer.h
index 32aef96266..5d998b3521 100644
--- a/py/lexer.h
+++ b/py/lexer.h
@@ -39,18 +39,17 @@
  */
 
 typedef enum _mp_token_kind_t {
-    MP_TOKEN_END,                   // 0
+    MP_TOKEN_END,
 
     MP_TOKEN_INVALID,
     MP_TOKEN_DEDENT_MISMATCH,
     MP_TOKEN_LONELY_STRING_OPEN,
-    MP_TOKEN_BAD_LINE_CONTINUATION,
 
-    MP_TOKEN_NEWLINE,               // 5
-    MP_TOKEN_INDENT,                // 6
-    MP_TOKEN_DEDENT,                // 7
+    MP_TOKEN_NEWLINE,
+    MP_TOKEN_INDENT,
+    MP_TOKEN_DEDENT,
 
-    MP_TOKEN_NAME,                  // 8
+    MP_TOKEN_NAME,
     MP_TOKEN_INTEGER,
     MP_TOKEN_FLOAT_OR_IMAG,
     MP_TOKEN_STRING,
@@ -58,9 +57,10 @@ typedef enum _mp_token_kind_t {
 
     MP_TOKEN_ELLIPSIS,
 
-    MP_TOKEN_KW_FALSE,              // 14
+    MP_TOKEN_KW_FALSE,
     MP_TOKEN_KW_NONE,
     MP_TOKEN_KW_TRUE,
+    MP_TOKEN_KW___DEBUG__,
     MP_TOKEN_KW_AND,
     MP_TOKEN_KW_AS,
     MP_TOKEN_KW_ASSERT,
@@ -71,7 +71,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_KW_BREAK,
     MP_TOKEN_KW_CLASS,
     MP_TOKEN_KW_CONTINUE,
-    MP_TOKEN_KW_DEF,                // 23
+    MP_TOKEN_KW_DEF,
     MP_TOKEN_KW_DEL,
     MP_TOKEN_KW_ELIF,
     MP_TOKEN_KW_ELSE,
@@ -81,7 +81,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_KW_FROM,
     MP_TOKEN_KW_GLOBAL,
     MP_TOKEN_KW_IF,
-    MP_TOKEN_KW_IMPORT,             // 33
+    MP_TOKEN_KW_IMPORT,
     MP_TOKEN_KW_IN,
     MP_TOKEN_KW_IS,
     MP_TOKEN_KW_LAMBDA,
@@ -91,12 +91,12 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_KW_PASS,
     MP_TOKEN_KW_RAISE,
     MP_TOKEN_KW_RETURN,
-    MP_TOKEN_KW_TRY,                // 43
+    MP_TOKEN_KW_TRY,
     MP_TOKEN_KW_WHILE,
     MP_TOKEN_KW_WITH,
     MP_TOKEN_KW_YIELD,
 
-    MP_TOKEN_OP_PLUS,               // 47
+    MP_TOKEN_OP_PLUS,
     MP_TOKEN_OP_MINUS,
     MP_TOKEN_OP_STAR,
     MP_TOKEN_OP_DBL_STAR,
@@ -106,7 +106,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_OP_LESS,
     MP_TOKEN_OP_DBL_LESS,
     MP_TOKEN_OP_MORE,
-    MP_TOKEN_OP_DBL_MORE,           // 57
+    MP_TOKEN_OP_DBL_MORE,
     MP_TOKEN_OP_AMPERSAND,
     MP_TOKEN_OP_PIPE,
     MP_TOKEN_OP_CARET,
@@ -116,7 +116,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_OP_DBL_EQUAL,
     MP_TOKEN_OP_NOT_EQUAL,
 
-    MP_TOKEN_DEL_PAREN_OPEN,        // 66
+    MP_TOKEN_DEL_PAREN_OPEN,
     MP_TOKEN_DEL_PAREN_CLOSE,
     MP_TOKEN_DEL_BRACKET_OPEN,
     MP_TOKEN_DEL_BRACKET_CLOSE,
@@ -126,7 +126,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_DEL_COLON,
     MP_TOKEN_DEL_PERIOD,
     MP_TOKEN_DEL_SEMICOLON,
-    MP_TOKEN_DEL_AT,                // 76
+    MP_TOKEN_DEL_AT,
     MP_TOKEN_DEL_EQUAL,
     MP_TOKEN_DEL_PLUS_EQUAL,
     MP_TOKEN_DEL_MINUS_EQUAL,
@@ -136,7 +136,7 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_DEL_PERCENT_EQUAL,
     MP_TOKEN_DEL_AMPERSAND_EQUAL,
     MP_TOKEN_DEL_PIPE_EQUAL,
-    MP_TOKEN_DEL_CARET_EQUAL,       // 86
+    MP_TOKEN_DEL_CARET_EQUAL,
     MP_TOKEN_DEL_DBL_MORE_EQUAL,
     MP_TOKEN_DEL_DBL_LESS_EQUAL,
     MP_TOKEN_DEL_DBL_STAR_EQUAL,
@@ -151,24 +151,24 @@ typedef struct _mp_lexer_t {
 
     unichar chr0, chr1, chr2;   // current cached characters from source
 
-    mp_uint_t line;             // current source line
-    mp_uint_t column;           // current source column
+    size_t line;                // current source line
+    size_t column;              // current source column
 
     mp_int_t emit_dent;             // non-zero when there are INDENT/DEDENT tokens to emit
     mp_int_t nested_bracket_level;  // >0 when there are nested brackets over multiple lines
 
-    mp_uint_t alloc_indent_level;
-    mp_uint_t num_indent_level;
+    size_t alloc_indent_level;
+    size_t num_indent_level;
     uint16_t *indent_level;
 
-    mp_uint_t tok_line;         // token source line
-    mp_uint_t tok_column;       // token source column
+    size_t tok_line;            // token source line
+    size_t tok_column;          // token source column
     mp_token_kind_t tok_kind;   // token kind
     vstr_t vstr;                // token data
 } mp_lexer_t;
 
 mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
-mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
+mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len);
 
 void mp_lexer_free(mp_lexer_t *lex);
 void mp_lexer_to_next(mp_lexer_t *lex);
diff --git a/py/map.c b/py/map.c
index 0916ec522d..50d74f38f5 100644
--- a/py/map.c
+++ b/py/map.c
@@ -56,7 +56,7 @@ STATIC const uint16_t hash_allocation_sizes[] = {
     3229, 4831, 7243, 10861, 16273, 24407, 36607, 54907, // *1.5
 };
 
-STATIC mp_uint_t get_hash_alloc_greater_or_equal_to(mp_uint_t x) {
+STATIC size_t get_hash_alloc_greater_or_equal_to(size_t x) {
     for (size_t i = 0; i < MP_ARRAY_SIZE(hash_allocation_sizes); i++) {
         if (hash_allocation_sizes[i] >= x) {
             return hash_allocation_sizes[i];
@@ -70,7 +70,7 @@ STATIC mp_uint_t get_hash_alloc_greater_or_equal_to(mp_uint_t x) {
 /******************************************************************************/
 /* map                                                                        */
 
-void mp_map_init(mp_map_t *map, mp_uint_t n) {
+void mp_map_init(mp_map_t *map, size_t n) {
     if (n == 0) {
         map->alloc = 0;
         map->table = NULL;
@@ -84,7 +84,7 @@ void mp_map_init(mp_map_t *map, mp_uint_t n) {
     map->is_ordered = 0;
 }
 
-void mp_map_init_fixed_table(mp_map_t *map, mp_uint_t n, const mp_obj_t *table) {
+void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table) {
     map->alloc = n;
     map->used = n;
     map->all_keys_are_qstrs = 1;
@@ -93,7 +93,7 @@ void mp_map_init_fixed_table(mp_map_t *map, mp_uint_t n, const mp_obj_t *table)
     map->table = (mp_map_elem_t*)table;
 }
 
-mp_map_t *mp_map_new(mp_uint_t n) {
+mp_map_t *mp_map_new(size_t n) {
     mp_map_t *map = m_new(mp_map_t, 1);
     mp_map_init(map, n);
     return map;
@@ -124,8 +124,8 @@ void mp_map_clear(mp_map_t *map) {
 }
 
 STATIC void mp_map_rehash(mp_map_t *map) {
-    mp_uint_t old_alloc = map->alloc;
-    mp_uint_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1);
+    size_t old_alloc = map->alloc;
+    size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1);
     mp_map_elem_t *old_table = map->table;
     mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc);
     // If we reach this point, table resizing succeeded, now we can edit the old map.
@@ -133,7 +133,7 @@ STATIC void mp_map_rehash(mp_map_t *map) {
     map->used = 0;
     map->all_keys_are_qstrs = 1;
     map->table = new_table;
-    for (mp_uint_t i = 0; i < old_alloc; i++) {
+    for (size_t i = 0; i < old_alloc; i++) {
         if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
             mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
         }
@@ -178,8 +178,14 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
         for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) {
             if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) {
                 if (MP_UNLIKELY(lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND)) {
-                    elem->key = MP_OBJ_SENTINEL;
-                    // keep elem->value so that caller can access it if needed
+                    // remove the found element by moving the rest of the array down
+                    mp_obj_t value = elem->value;
+                    --map->used;
+                    memmove(elem, elem + 1, (top - elem - 1) * sizeof(*elem));
+                    // put the found element after the end so the caller can access it if needed
+                    elem = &map->table[map->used];
+                    elem->key = MP_OBJ_NULL;
+                    elem->value = value;
                 }
                 return elem;
             }
@@ -187,7 +193,6 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
         if (MP_LIKELY(lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)) {
             return NULL;
         }
-        // TODO shrink array down over any previously-freed slots
         if (map->used == map->alloc) {
             // TODO: Alloc policy
             map->alloc += 4;
@@ -220,8 +225,8 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
         hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
     }
 
-    mp_uint_t pos = hash % map->alloc;
-    mp_uint_t start_pos = pos;
+    size_t pos = hash % map->alloc;
+    size_t start_pos = pos;
     mp_map_elem_t *avail_slot = NULL;
     for (;;) {
         mp_map_elem_t *slot = &map->table[pos];
@@ -296,19 +301,19 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
 
 #if MICROPY_PY_BUILTINS_SET
 
-void mp_set_init(mp_set_t *set, mp_uint_t n) {
+void mp_set_init(mp_set_t *set, size_t n) {
     set->alloc = n;
     set->used = 0;
     set->table = m_new0(mp_obj_t, set->alloc);
 }
 
 STATIC void mp_set_rehash(mp_set_t *set) {
-    mp_uint_t old_alloc = set->alloc;
+    size_t old_alloc = set->alloc;
     mp_obj_t *old_table = set->table;
     set->alloc = get_hash_alloc_greater_or_equal_to(set->alloc + 1);
     set->used = 0;
     set->table = m_new0(mp_obj_t, set->alloc);
-    for (mp_uint_t i = 0; i < old_alloc; i++) {
+    for (size_t i = 0; i < old_alloc; i++) {
         if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
             mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
         }
@@ -328,8 +333,8 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
         }
     }
     mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
-    mp_uint_t pos = hash % set->alloc;
-    mp_uint_t start_pos = pos;
+    size_t pos = hash % set->alloc;
+    size_t start_pos = pos;
     mp_obj_t *avail_slot = NULL;
     for (;;) {
         mp_obj_t elem = set->table[pos];
@@ -390,7 +395,7 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
 }
 
 mp_obj_t mp_set_remove_first(mp_set_t *set) {
-    for (mp_uint_t pos = 0; pos < set->alloc; pos++) {
+    for (size_t pos = 0; pos < set->alloc; pos++) {
         if (MP_SET_SLOT_IS_FILLED(set, pos)) {
             mp_obj_t elem = set->table[pos];
             // delete element
@@ -418,7 +423,7 @@ void mp_set_clear(mp_set_t *set) {
 
 #if defined(DEBUG_PRINT) && DEBUG_PRINT
 void mp_map_dump(mp_map_t *map) {
-    for (mp_uint_t i = 0; i < map->alloc; i++) {
+    for (size_t i = 0; i < map->alloc; i++) {
         if (map->table[i].key != NULL) {
             mp_obj_print(map->table[i].key, PRINT_REPR);
         } else {
diff --git a/py/mkenv.mk b/py/mkenv.mk
index 14e23e074c..eb1e44fef5 100644
--- a/py/mkenv.mk
+++ b/py/mkenv.mk
@@ -58,9 +58,9 @@ CXX += -m32
 LD += -m32
 endif
 
-MAKE_FROZEN = ../tools/make-frozen.py
-MPY_CROSS = ../mpy-cross/mpy-cross
-MPY_TOOL = ../tools/mpy-tool.py
+MAKE_FROZEN = $(TOP)/tools/make-frozen.py
+MPY_CROSS = $(TOP)/mpy-cross/mpy-cross
+MPY_TOOL = $(TOP)/tools/mpy-tool.py
 
 all:
 .PHONY: all
diff --git a/py/mkrules.mk b/py/mkrules.mk
index b71450a21d..00ed279176 100644
--- a/py/mkrules.mk
+++ b/py/mkrules.mk
@@ -71,12 +71,7 @@ $(OBJ): | $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/mpversion.h
 
 $(HEADER_BUILD)/qstr.i.last: $(SRC_QSTR) | $(HEADER_BUILD)/mpversion.h
 	$(ECHO) "GEN $@"
-	$(Q)if [ "$?" = "" ]; then \
-	    echo "QSTR Looks like -B used, trying to emulate"; \
-	    $(CPP) $(QSTR_GEN_EXTRA_CFLAGS) $(CFLAGS) $^ >$(HEADER_BUILD)/qstr.i.last; \
-	else \
-	    $(CPP) $(QSTR_GEN_EXTRA_CFLAGS) $(CFLAGS) $? >$(HEADER_BUILD)/qstr.i.last; \
-	fi
+	$(Q)$(CPP) $(QSTR_GEN_EXTRA_CFLAGS) $(CFLAGS) $(if $?,$?,$^) >$(HEADER_BUILD)/qstr.i.last;
 
 $(HEADER_BUILD)/qstr.split: $(HEADER_BUILD)/qstr.i.last
 	$(ECHO) "GEN $@"
@@ -107,15 +102,19 @@ $(BUILD)/frozen.c: $(wildcard $(FROZEN_DIR)/*) $(HEADER_BUILD) $(FROZEN_EXTRA_DE
 endif
 
 ifneq ($(FROZEN_MPY_DIR),)
+# to build the MicroPython cross compiler
+$(TOP)/mpy-cross/mpy-cross: $(TOP)/py/*.[ch] $(TOP)/mpy-cross/*.[ch] $(TOP)/windows/fmode.c
+	$(Q)$(MAKE) -C $(TOP)/mpy-cross
+
 # make a list of all the .py files that need compiling and freezing
 FROZEN_MPY_PY_FILES := $(shell find -L $(FROZEN_MPY_DIR) -type f -name '*.py' | $(SED) -e 's=^$(FROZEN_MPY_DIR)/==')
 FROZEN_MPY_MPY_FILES := $(addprefix $(BUILD)/frozen_mpy/,$(FROZEN_MPY_PY_FILES:.py=.mpy))
 
 # to build .mpy files from .py files
-$(BUILD)/frozen_mpy/%.mpy: $(FROZEN_MPY_DIR)/%.py
+$(BUILD)/frozen_mpy/%.mpy: $(FROZEN_MPY_DIR)/%.py $(TOP)/mpy-cross/mpy-cross
 	@$(ECHO) "MPY $<"
 	$(Q)$(MKDIR) -p $(dir $@)
-	$(Q)$(MPY_CROSS) -o $@ -s $(^:$(FROZEN_MPY_DIR)/%=%) $(MPY_CROSS_FLAGS) $^
+	$(Q)$(MPY_CROSS) -o $@ -s $(<:$(FROZEN_MPY_DIR)/%=%) $(MPY_CROSS_FLAGS) $<
 
 # to build frozen_mpy.c from all .mpy files
 $(BUILD)/frozen_mpy.c: $(FROZEN_MPY_MPY_FILES) $(BUILD)/genhdr/qstrdefs.generated.h
@@ -147,8 +146,15 @@ clean-prog:
 endif
 
 LIBMICROPYTHON = libmicropython.a
+
+# We can execute extra commands after library creation using
+# LIBMICROPYTHON_EXTRA_CMD. This may be needed e.g. to integrate
+# with 3rd-party projects which don't have proper dependency
+# tracking. Then LIBMICROPYTHON_EXTRA_CMD can e.g. touch some
+# other file to cause needed effect, e.g. relinking with new lib.
 lib $(LIBMICROPYTHON): $(OBJ)
 	$(AR) rcs $(LIBMICROPYTHON) $^
+	$(LIBMICROPYTHON_EXTRA_CMD)
 
 clean:
 	$(RM) -rf $(BUILD) $(CLEAN_EXTRA)
diff --git a/py/modbuiltins.c b/py/modbuiltins.c
index b7c8ff2601..17bd30c521 100644
--- a/py/modbuiltins.c
+++ b/py/modbuiltins.c
@@ -117,7 +117,8 @@ STATIC mp_obj_t mp_builtin_abs(mp_obj_t o_in) {
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_abs_obj, mp_builtin_abs);
 
 STATIC mp_obj_t mp_builtin_all(mp_obj_t o_in) {
-    mp_obj_t iterable = mp_getiter(o_in);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iterable = mp_getiter(o_in, &iter_buf);
     mp_obj_t item;
     while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
         if (!mp_obj_is_true(item)) {
@@ -129,7 +130,8 @@ STATIC mp_obj_t mp_builtin_all(mp_obj_t o_in) {
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_all_obj, mp_builtin_all);
 
 STATIC mp_obj_t mp_builtin_any(mp_obj_t o_in) {
-    mp_obj_t iterable = mp_getiter(o_in);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iterable = mp_getiter(o_in, &iter_buf);
     mp_obj_t item;
     while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
         if (mp_obj_is_true(item)) {
@@ -178,7 +180,7 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
         str[3] = (c & 0x3F) | 0x80;
         len = 4;
     } else {
-        mp_raise_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)");
+        mp_raise_ValueError("chr() arg not in range(0x110000)");
     }
     return mp_obj_new_str(str, len, true);
     #else
@@ -187,7 +189,7 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
         char str[1] = {ord};
         return mp_obj_new_str(str, 1, true);
     } else {
-        mp_raise_msg(&mp_type_ValueError, "chr() arg not in range(256)");
+        mp_raise_ValueError("chr() arg not in range(256)");
     }
     #endif
 }
@@ -258,7 +260,7 @@ STATIC mp_obj_t mp_builtin_hex(mp_obj_t o_in) {
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_hex_obj, mp_builtin_hex);
 
 STATIC mp_obj_t mp_builtin_iter(mp_obj_t o_in) {
-    return mp_getiter(o_in);
+    return mp_getiter(o_in, NULL);
 }
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_iter_obj, mp_builtin_iter);
 
@@ -270,7 +272,8 @@ STATIC mp_obj_t mp_builtin_min_max(size_t n_args, const mp_obj_t *args, mp_map_t
     mp_obj_t key_fn = key_elem == NULL ? MP_OBJ_NULL : key_elem->value;
     if (n_args == 1) {
         // given an iterable
-        mp_obj_t iterable = mp_getiter(args[0]);
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iterable = mp_getiter(args[0], &iter_buf);
         mp_obj_t best_key = MP_OBJ_NULL;
         mp_obj_t best_obj = MP_OBJ_NULL;
         mp_obj_t item;
@@ -286,7 +289,7 @@ STATIC mp_obj_t mp_builtin_min_max(size_t n_args, const mp_obj_t *args, mp_map_t
             if (default_elem != NULL) {
                 best_obj = default_elem->value;
             } else {
-                mp_raise_msg(&mp_type_ValueError, "arg is an empty sequence");
+                mp_raise_ValueError("arg is an empty sequence");
             }
         }
         return best_obj;
@@ -333,7 +336,7 @@ STATIC mp_obj_t mp_builtin_oct(mp_obj_t o_in) {
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct);
 
 STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
-    mp_uint_t len;
+    size_t len;
     const char *str = mp_obj_str_get_data(o_in, &len);
     #if MICROPY_PY_BUILTINS_STR_UNICODE
     if (MP_OBJ_IS_STR(o_in)) {
@@ -378,7 +381,14 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_ord_obj, mp_builtin_ord);
 STATIC mp_obj_t mp_builtin_pow(size_t n_args, const mp_obj_t *args) {
     switch (n_args) {
         case 2: return mp_binary_op(MP_BINARY_OP_POWER, args[0], args[1]);
-        default: return mp_binary_op(MP_BINARY_OP_MODULO, mp_binary_op(MP_BINARY_OP_POWER, args[0], args[1]), args[2]); // TODO optimise...
+        default:
+#if !MICROPY_PY_BUILTINS_POW3
+            mp_raise_msg(&mp_type_NotImplementedError, "3-arg pow() not supported");
+#elif MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_MPZ
+            return mp_binary_op(MP_BINARY_OP_MODULO, mp_binary_op(MP_BINARY_OP_POWER, args[0], args[1]), args[2]);
+#else
+            return mp_obj_int_pow3(args[0], args[1], args[2]);
+#endif
     }
 }
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_pow_obj, 2, 3, mp_builtin_pow);
@@ -387,16 +397,16 @@ STATIC mp_obj_t mp_builtin_print(size_t n_args, const mp_obj_t *args, mp_map_t *
     mp_map_elem_t *sep_elem = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(MP_QSTR_sep), MP_MAP_LOOKUP);
     mp_map_elem_t *end_elem = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(MP_QSTR_end), MP_MAP_LOOKUP);
     const char *sep_data = " ";
-    mp_uint_t sep_len = 1;
+    size_t sep_len = 1;
     const char *end_data = "\n";
-    mp_uint_t end_len = 1;
+    size_t end_len = 1;
     if (sep_elem != NULL && sep_elem->value != mp_const_none) {
         sep_data = mp_obj_str_get_data(sep_elem->value, &sep_len);
     }
     if (end_elem != NULL && end_elem->value != mp_const_none) {
         end_data = mp_obj_str_get_data(end_elem->value, &end_len);
     }
-    #if MICROPY_PY_IO
+    #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
     void *stream_obj = &mp_sys_stdout_obj;
     mp_map_elem_t *file_elem = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(MP_QSTR_file), MP_MAP_LOOKUP);
     if (file_elem != NULL && file_elem->value != mp_const_none) {
@@ -407,19 +417,19 @@ STATIC mp_obj_t mp_builtin_print(size_t n_args, const mp_obj_t *args, mp_map_t *
     #endif
     for (mp_uint_t i = 0; i < n_args; i++) {
         if (i > 0) {
-            #if MICROPY_PY_IO
+            #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
             mp_stream_write_adaptor(stream_obj, sep_data, sep_len);
             #else
             mp_print_strn(&mp_plat_print, sep_data, sep_len, 0, 0, 0);
             #endif
         }
-        #if MICROPY_PY_IO
+        #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
         mp_obj_print_helper(&print, args[i], PRINT_STR);
         #else
         mp_obj_print_helper(&mp_plat_print, args[i], PRINT_STR);
         #endif
     }
-    #if MICROPY_PY_IO
+    #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
     mp_stream_write_adaptor(stream_obj, end_data, end_len);
     #else
     mp_print_strn(&mp_plat_print, end_data, end_len, 0, 0, 0);
@@ -463,22 +473,16 @@ STATIC mp_obj_t mp_builtin_round(size_t n_args, const mp_obj_t *args) {
         mp_float_t val = mp_obj_get_float(o_in);
         mp_float_t mult = MICROPY_FLOAT_C_FUN(pow)(10, num_dig);
         // TODO may lead to overflow
-        mp_float_t rounded = MICROPY_FLOAT_C_FUN(round)(val * mult) / mult;
+        mp_float_t rounded = MICROPY_FLOAT_C_FUN(nearbyint)(val * mult) / mult;
         return mp_obj_new_float(rounded);
     }
     mp_float_t val = mp_obj_get_float(o_in);
-    mp_float_t rounded = MICROPY_FLOAT_C_FUN(round)(val);
-    mp_int_t r = rounded;
-    // make rounded value even if it was halfway between ints
-    if (val - rounded == 0.5) {
-        r = (r + 1) & (~1);
-    } else if (val - rounded == -0.5) {
-        r &= ~1;
-    }
+    mp_float_t rounded = MICROPY_FLOAT_C_FUN(nearbyint)(val);
+    return mp_obj_new_int_from_float(rounded);
 #else
     mp_int_t r = mp_obj_get_int(o_in);
-#endif
     return mp_obj_new_int(r);
+#endif
 }
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_round_obj, 1, 2, mp_builtin_round);
 
@@ -488,7 +492,8 @@ STATIC mp_obj_t mp_builtin_sum(size_t n_args, const mp_obj_t *args) {
         case 1: value = MP_OBJ_NEW_SMALL_INT(0); break;
         default: value = args[1]; break;
     }
-    mp_obj_t iterable = mp_getiter(args[0]);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iterable = mp_getiter(args[0], &iter_buf);
     mp_obj_t item;
     while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
         value = mp_binary_op(MP_BINARY_OP_ADD, value, item);
@@ -499,7 +504,7 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_sum_obj, 1, 2, mp_builtin_sum);
 
 STATIC mp_obj_t mp_builtin_sorted(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
     if (n_args > 1) {
-        mp_raise_msg(&mp_type_TypeError, "must use keyword argument for key function");
+        mp_raise_TypeError("must use keyword argument for key function");
     }
     mp_obj_t self = mp_type_list.make_new(&mp_type_list, 1, 0, args);
     mp_obj_list_sort(1, &self, kwargs);
@@ -666,6 +671,9 @@ STATIC const mp_rom_map_elem_t mp_module_builtins_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR_globals), MP_ROM_PTR(&mp_builtin_globals_obj) },
     { MP_ROM_QSTR(MP_QSTR_hasattr), MP_ROM_PTR(&mp_builtin_hasattr_obj) },
     { MP_ROM_QSTR(MP_QSTR_hash), MP_ROM_PTR(&mp_builtin_hash_obj) },
+    #if MICROPY_PY_BUILTINS_HELP
+    { MP_ROM_QSTR(MP_QSTR_help), MP_ROM_PTR(&mp_builtin_help_obj) },
+    #endif
     { MP_ROM_QSTR(MP_QSTR_hex), MP_ROM_PTR(&mp_builtin_hex_obj) },
     { MP_ROM_QSTR(MP_QSTR_id), MP_ROM_PTR(&mp_builtin_id_obj) },
     { MP_ROM_QSTR(MP_QSTR_isinstance), MP_ROM_PTR(&mp_builtin_isinstance_obj) },
diff --git a/py/modio.c b/py/modio.c
index d5da0b1db7..2d317d022a 100644
--- a/py/modio.c
+++ b/py/modio.c
@@ -30,6 +30,8 @@
 #include "py/runtime.h"
 #include "py/builtin.h"
 #include "py/stream.h"
+#include "py/objstringio.h"
+#include "py/frozenmod.h"
 
 #if MICROPY_PY_IO
 
@@ -129,11 +131,65 @@ STATIC const mp_obj_type_t bufwriter_type = {
 };
 #endif // MICROPY_PY_IO_BUFFEREDWRITER
 
+#if MICROPY_MODULE_FROZEN_STR
+STATIC mp_obj_t resource_stream(mp_obj_t package_in, mp_obj_t path_in) {
+    VSTR_FIXED(path_buf, MICROPY_ALLOC_PATH_MAX);
+    size_t len;
+
+    // As an extension to pkg_resources.resource_stream(), we support
+    // package parameter being None, the path_in is interpreted as a
+    // raw path.
+    if (package_in != mp_const_none) {
+        mp_obj_t args[5];
+        args[0] = package_in;
+        args[1] = mp_const_none; // TODO should be globals
+        args[2] = mp_const_none; // TODO should be locals
+        args[3] = mp_const_true; // Pass sentinel "non empty" value to force returning of leaf module
+        args[4] = MP_OBJ_NEW_SMALL_INT(0);
+
+        // TODO lookup __import__ and call that instead of going straight to builtin implementation
+        mp_obj_t pkg = mp_builtin___import__(5, args);
+
+        mp_obj_t dest[2];
+        mp_load_method_maybe(pkg, MP_QSTR___path__, dest);
+        if (dest[0] == MP_OBJ_NULL) {
+            mp_raise_TypeError(NULL);
+        }
+
+        const char *path = mp_obj_str_get_data(dest[0], &len);
+        vstr_add_strn(&path_buf, path, len);
+        vstr_add_byte(&path_buf, '/');
+    }
+
+    const char *path = mp_obj_str_get_data(path_in, &len);
+    vstr_add_strn(&path_buf, path, len);
+
+    len = path_buf.len;
+    const char *data = mp_find_frozen_str(path_buf.buf, &len);
+    if (data != NULL) {
+        mp_obj_stringio_t *o = m_new_obj(mp_obj_stringio_t);
+        o->base.type = &mp_type_bytesio;
+        o->vstr = m_new_obj(vstr_t);
+        vstr_init_fixed_buf(o->vstr, len + 1, (char*)data);
+        o->vstr->len = len;
+        o->pos = 0;
+        return MP_OBJ_FROM_PTR(o);
+    }
+
+    mp_obj_t path_out = mp_obj_new_str(path_buf.buf, path_buf.len, false);
+    return mp_builtin_open(1, &path_out, (mp_map_t*)&mp_const_empty_map);
+}
+MP_DEFINE_CONST_FUN_OBJ_2(resource_stream_obj, resource_stream);
+#endif
+
 STATIC const mp_rom_map_elem_t mp_module_io_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_uio) },
     // Note: mp_builtin_open_obj should be defined by port, it's not
     // part of the core.
     { MP_ROM_QSTR(MP_QSTR_open), MP_ROM_PTR(&mp_builtin_open_obj) },
+    #if MICROPY_PY_IO_RESOURCE_STREAM
+    { MP_ROM_QSTR(MP_QSTR_resource_stream), MP_ROM_PTR(&resource_stream_obj) },
+    #endif
     #if MICROPY_PY_IO_FILEIO
     { MP_ROM_QSTR(MP_QSTR_FileIO), MP_ROM_PTR(&mp_type_fileio) },
     #if MICROPY_CPYTHON_COMPAT
diff --git a/py/modmath.c b/py/modmath.c
index 7c51eab03a..ddab337d05 100644
--- a/py/modmath.c
+++ b/py/modmath.c
@@ -57,7 +57,7 @@ STATIC NORETURN void math_error(void) {
     STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_math_## py_name ## _obj, mp_math_ ## py_name);
 
 #define MATH_FUN_1_TO_INT(py_name, c_name) \
-    STATIC mp_obj_t mp_math_ ## py_name(mp_obj_t x_obj) { mp_int_t x = MICROPY_FLOAT_C_FUN(c_name)(mp_obj_get_float(x_obj)); return mp_obj_new_int(x); } \
+    STATIC mp_obj_t mp_math_ ## py_name(mp_obj_t x_obj) { return mp_obj_new_int_from_float(MICROPY_FLOAT_C_FUN(c_name)(mp_obj_get_float(x_obj))); } \
     STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_math_## py_name ## _obj, mp_math_ ## py_name);
 
 #define MATH_FUN_1_ERRCOND(py_name, c_name, error_condition) \
diff --git a/py/modmicropython.c b/py/modmicropython.c
index 675d169cc4..d767062301 100644
--- a/py/modmicropython.c
+++ b/py/modmicropython.c
@@ -29,7 +29,9 @@
 #include "py/mpstate.h"
 #include "py/builtin.h"
 #include "py/stackctrl.h"
+#include "py/runtime.h"
 #include "py/gc.h"
+#include "py/mphal.h"
 
 // Various builtins specific to MicroPython runtime,
 // living in micropython module
@@ -128,6 +130,24 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_0(mp_micropython_heap_unlock_obj, mp_micropython_
 STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_alloc_emergency_exception_buf_obj, mp_alloc_emergency_exception_buf);
 #endif
 
+#if MICROPY_KBD_EXCEPTION
+STATIC mp_obj_t mp_micropython_kbd_intr(mp_obj_t int_chr_in) {
+    mp_hal_set_interrupt_char(mp_obj_get_int(int_chr_in));
+    return mp_const_none;
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_micropython_kbd_intr_obj, mp_micropython_kbd_intr);
+#endif
+
+#if MICROPY_ENABLE_SCHEDULER
+STATIC mp_obj_t mp_micropython_schedule(mp_obj_t function, mp_obj_t arg) {
+    if (!mp_sched_schedule(function, arg)) {
+        mp_raise_msg(&mp_type_RuntimeError, "schedule stack full");
+    }
+    return mp_const_none;
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(mp_micropython_schedule_obj, mp_micropython_schedule);
+#endif
+
 STATIC const mp_rom_map_elem_t mp_module_micropython_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_micropython) },
     { MP_ROM_QSTR(MP_QSTR_const), MP_ROM_PTR(&mp_identity_obj) },
@@ -151,6 +171,12 @@ STATIC const mp_rom_map_elem_t mp_module_micropython_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR_heap_lock), MP_ROM_PTR(&mp_micropython_heap_lock_obj) },
     { MP_ROM_QSTR(MP_QSTR_heap_unlock), MP_ROM_PTR(&mp_micropython_heap_unlock_obj) },
     #endif
+    #if MICROPY_KBD_EXCEPTION
+    { MP_ROM_QSTR(MP_QSTR_kbd_intr), MP_ROM_PTR(&mp_micropython_kbd_intr_obj) },
+    #endif
+    #if MICROPY_ENABLE_SCHEDULER
+    { MP_ROM_QSTR(MP_QSTR_schedule), MP_ROM_PTR(&mp_micropython_schedule_obj) },
+    #endif
 };
 
 STATIC MP_DEFINE_CONST_DICT(mp_module_micropython_globals, mp_module_micropython_globals_table);
diff --git a/py/modstruct.c b/py/modstruct.c
index 88411ff0fc..3c99ef1d8d 100644
--- a/py/modstruct.c
+++ b/py/modstruct.c
@@ -113,9 +113,6 @@ STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) {
         } else {
             mp_uint_t align;
             size_t sz = mp_binary_get_size(fmt_type, *fmt, &align);
-            if (sz == 0) {
-                mp_raise_ValueError("unsupported format");
-            }
             while (cnt--) {
                 // Apply alignment
                 size = (size + align - 1) & ~(align - 1);
diff --git a/py/modsys.c b/py/modsys.c
index 8c368ac35b..5fbcb944c4 100644
--- a/py/modsys.c
+++ b/py/modsys.c
@@ -32,6 +32,7 @@
 #include "py/objstr.h"
 #include "py/objint.h"
 #include "py/stream.h"
+#include "py/smallint.h"
 
 #if MICROPY_PY_SYS
 
@@ -44,7 +45,7 @@ extern struct _mp_dummy_t mp_sys_stdin_obj;
 extern struct _mp_dummy_t mp_sys_stdout_obj;
 extern struct _mp_dummy_t mp_sys_stderr_obj;
 
-#if MICROPY_PY_IO
+#if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
 const mp_print_t mp_sys_stdout_print = {&mp_sys_stdout_obj, mp_stream_write_adaptor};
 #endif
 
@@ -105,7 +106,7 @@ STATIC mp_obj_t mp_sys_exit(size_t n_args, const mp_obj_t *args) {
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_sys_exit_obj, 0, 1, mp_sys_exit);
 
 STATIC mp_obj_t mp_sys_print_exception(size_t n_args, const mp_obj_t *args) {
-    #if MICROPY_PY_IO
+    #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
     void *stream_obj = &mp_sys_stdout_obj;
     if (n_args > 1) {
         stream_obj = MP_OBJ_TO_PTR(args[1]); // XXX may fail
@@ -162,12 +163,12 @@ STATIC const mp_rom_map_elem_t mp_module_sys_globals_table[] = {
 
     #if MICROPY_PY_SYS_MAXSIZE
     #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
-    // INT_MAX is not representable as small int, as we know that small int
-    // takes one bit for tag. So, we have little choice but to provide this
-    // value. Apps also should be careful to not try to compare sys.maxsize
-    // with some number (which may not fit in available int size), but instead
-    // count number of significant bits in sys.maxsize.
-    { MP_ROM_QSTR(MP_QSTR_maxsize), MP_OBJ_NEW_SMALL_INT(INT_MAX >> 1) },
+    // Maximum mp_int_t value is not representable as small int, so we have
+    // little choice but to use MP_SMALL_INT_MAX. Apps also should be careful
+    // to not try to compare sys.maxsize to some literal number (as this
+    // number might not fit in available int size), but instead count number
+    // of "one" bits in sys.maxsize.
+    { MP_ROM_QSTR(MP_QSTR_maxsize), MP_OBJ_NEW_SMALL_INT(MP_SMALL_INT_MAX) },
     #else
     { MP_ROM_QSTR(MP_QSTR_maxsize), MP_ROM_PTR(&mp_maxsize_obj) },
     #endif
diff --git a/py/modthread.c b/py/modthread.c
index 6f55281adc..d0e71dad3b 100644
--- a/py/modthread.c
+++ b/py/modthread.c
@@ -44,24 +44,19 @@
 
 /****************************************************************/
 // Lock object
-// Note: with the GIL enabled we can easily synthesise a lock object
 
 STATIC const mp_obj_type_t mp_type_thread_lock;
 
 typedef struct _mp_obj_thread_lock_t {
     mp_obj_base_t base;
-    #if !MICROPY_PY_THREAD_GIL
     mp_thread_mutex_t mutex;
-    #endif
     volatile bool locked;
 } mp_obj_thread_lock_t;
 
 STATIC mp_obj_thread_lock_t *mp_obj_new_thread_lock(void) {
     mp_obj_thread_lock_t *self = m_new_obj(mp_obj_thread_lock_t);
     self->base.type = &mp_type_thread_lock;
-    #if !MICROPY_PY_THREAD_GIL
     mp_thread_mutex_init(&self->mutex);
-    #endif
     self->locked = false;
     return self;
 }
@@ -73,20 +68,9 @@ STATIC mp_obj_t thread_lock_acquire(size_t n_args, const mp_obj_t *args) {
         wait = mp_obj_get_int(args[1]);
         // TODO support timeout arg
     }
-    #if MICROPY_PY_THREAD_GIL
-    if (self->locked) {
-        if (!wait) {
-            return mp_const_false;
-        }
-        do {
-            MP_THREAD_GIL_EXIT();
-            MP_THREAD_GIL_ENTER();
-        } while (self->locked);
-    }
-    self->locked = true;
-    return mp_const_true;
-    #else
+    MP_THREAD_GIL_EXIT();
     int ret = mp_thread_mutex_lock(&self->mutex, wait);
+    MP_THREAD_GIL_ENTER();
     if (ret == 0) {
         return mp_const_false;
     } else if (ret == 1) {
@@ -95,7 +79,6 @@ STATIC mp_obj_t thread_lock_acquire(size_t n_args, const mp_obj_t *args) {
     } else {
         mp_raise_OSError(-ret);
     }
-    #endif
 }
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(thread_lock_acquire_obj, 1, 3, thread_lock_acquire);
 
@@ -103,9 +86,9 @@ STATIC mp_obj_t thread_lock_release(mp_obj_t self_in) {
     mp_obj_thread_lock_t *self = MP_OBJ_TO_PTR(self_in);
     // TODO check if already unlocked
     self->locked = false;
-    #if !MICROPY_PY_THREAD_GIL
+    MP_THREAD_GIL_EXIT();
     mp_thread_mutex_unlock(&self->mutex);
-    #endif
+    MP_THREAD_GIL_ENTER();
     return mp_const_none;
 }
 STATIC MP_DEFINE_CONST_FUN_OBJ_1(thread_lock_release_obj, thread_lock_release);
@@ -160,6 +143,8 @@ STATIC mp_obj_t mod_thread_stack_size(size_t n_args, const mp_obj_t *args) {
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_thread_stack_size_obj, 0, 1, mod_thread_stack_size);
 
 typedef struct _thread_entry_args_t {
+    mp_obj_dict_t *dict_locals;
+    mp_obj_dict_t *dict_globals;
     size_t stack_size;
     mp_obj_t fun;
     size_t n_args;
@@ -178,6 +163,10 @@ STATIC void *thread_entry(void *args_in) {
     mp_stack_set_top(&ts + 1); // need to include ts in root-pointer scan
     mp_stack_set_limit(args->stack_size);
 
+    // set locals and globals from the calling context
+    mp_locals_set(args->dict_locals);
+    mp_globals_set(args->dict_globals);
+
     MP_THREAD_GIL_ENTER();
 
     // signal that we are set up and running
@@ -186,7 +175,6 @@ STATIC void *thread_entry(void *args_in) {
     // TODO set more thread-specific state here:
     //  mp_pending_exception? (root pointer)
     //  cur_exception (root pointer)
-    //  dict_locals? (root pointer) uPy doesn't make a new locals dict for functions, just for classes, so it's different to CPy
 
     DEBUG_printf("[thread] start ts=%p args=%p stack=%p\n", &ts, &args, MP_STATE_THREAD(stack_top));
 
@@ -227,7 +215,7 @@ STATIC mp_obj_t mod_thread_start_new_thread(size_t n_args, const mp_obj_t *args)
     thread_entry_args_t *th_args;
 
     // get positional arguments
-    mp_uint_t pos_args_len;
+    size_t pos_args_len;
     mp_obj_t *pos_args_items;
     mp_obj_get_array(args[1], &pos_args_len, &pos_args_items);
 
@@ -239,7 +227,7 @@ STATIC mp_obj_t mod_thread_start_new_thread(size_t n_args, const mp_obj_t *args)
     } else {
         // positional and keyword arguments
         if (mp_obj_get_type(args[2]) != &mp_type_dict) {
-            mp_raise_msg(&mp_type_TypeError, "expecting a dict for keyword args");
+            mp_raise_TypeError("expecting a dict for keyword args");
         }
         mp_map_t *map = &((mp_obj_dict_t*)MP_OBJ_TO_PTR(args[2]))->map;
         th_args = m_new_obj_var(thread_entry_args_t, mp_obj_t, pos_args_len + 2 * map->used);
@@ -257,6 +245,10 @@ STATIC mp_obj_t mod_thread_start_new_thread(size_t n_args, const mp_obj_t *args)
     th_args->n_args = pos_args_len;
     memcpy(th_args->args, pos_args_items, pos_args_len * sizeof(mp_obj_t));
 
+    // pass our locals and globals into the new thread
+    th_args->dict_locals = mp_locals_get();
+    th_args->dict_globals = mp_globals_get();
+
     // set the stack size to use
     th_args->stack_size = thread_stack_size;
 
diff --git a/py/moduerrno.c b/py/moduerrno.c
index 4a5e87419f..de66c941b0 100644
--- a/py/moduerrno.c
+++ b/py/moduerrno.c
@@ -32,9 +32,10 @@
 
 #if MICROPY_PY_UERRNO
 
-// This list could be defined per port in mpconfigport.h to tailor it to a
-// specific port's needs.  But for now we have a common list.
-#define ERRNO_LIST \
+// This list can be defined per port in mpconfigport.h to tailor it to a
+// specific port's needs.  If it's not defined then we provide a default.
+#ifndef MICROPY_PY_UERRNO_LIST
+#define MICROPY_PY_UERRNO_LIST \
     X(EPERM) \
     X(ENOENT) \
     X(EIO) \
@@ -58,9 +59,12 @@
     X(EALREADY) \
     X(EINPROGRESS) \
 
+#endif
+
+#if MICROPY_PY_UERRNO_ERRORCODE
 STATIC const mp_rom_map_elem_t errorcode_table[] = {
     #define X(e) { MP_ROM_INT(MP_ ## e), MP_ROM_QSTR(MP_QSTR_## e) },
-    ERRNO_LIST
+    MICROPY_PY_UERRNO_LIST
     #undef X
 };
 
@@ -75,13 +79,16 @@ STATIC const mp_obj_dict_t errorcode_dict = {
         .table = (mp_map_elem_t*)(mp_rom_map_elem_t*)errorcode_table,
     },
 };
+#endif
 
 STATIC const mp_rom_map_elem_t mp_module_uerrno_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_uerrno) },
+    #if MICROPY_PY_UERRNO_ERRORCODE
     { MP_ROM_QSTR(MP_QSTR_errorcode), MP_ROM_PTR(&errorcode_dict) },
+    #endif
 
     #define X(e) { MP_ROM_QSTR(MP_QSTR_## e), MP_ROM_INT(MP_ ## e) },
-    ERRNO_LIST
+    MICROPY_PY_UERRNO_LIST
     #undef X
 };
 
@@ -93,12 +100,23 @@ const mp_obj_module_t mp_module_uerrno = {
 };
 
 qstr mp_errno_to_str(mp_obj_t errno_val) {
+    #if MICROPY_PY_UERRNO_ERRORCODE
+    // We have the errorcode dict so can do a lookup using the hash map
     mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&errorcode_dict.map, errno_val, MP_MAP_LOOKUP);
     if (elem == NULL) {
         return MP_QSTR_NULL;
     } else {
         return MP_OBJ_QSTR_VALUE(elem->value);
     }
+    #else
+    // We don't have the errorcode dict so do a simple search in the modules dict
+    for (size_t i = 0; i < MP_ARRAY_SIZE(mp_module_uerrno_globals_table); ++i) {
+        if (errno_val == mp_module_uerrno_globals_table[i].value) {
+            return MP_OBJ_QSTR_VALUE(mp_module_uerrno_globals_table[i].key);
+        }
+    }
+    return MP_QSTR_NULL;
+    #endif
 }
 
 #endif //MICROPY_PY_UERRNO
diff --git a/py/mpconfig.h b/py/mpconfig.h
index 6d18937177..a61d431e5a 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -353,6 +353,12 @@
 #define MICROPY_COMP_TRIPLE_TUPLE_ASSIGN (0)
 #endif
 
+// Whether to enable optimisation of: return a if b else c
+// Costs about 80 bytes (Thumb2) and saves 2 bytes of bytecode for each use
+#ifndef MICROPY_COMP_RETURN_IF_EXPR
+#define MICROPY_COMP_RETURN_IF_EXPR (0)
+#endif
+
 /*****************************************************************************/
 /* Internal debugging stuff                                                  */
 
@@ -398,9 +404,9 @@
 #define MICROPY_READER_POSIX (0)
 #endif
 
-// Whether to use the FatFS reader for importing files
-#ifndef MICROPY_READER_FATFS
-#define MICROPY_READER_FATFS (0)
+// Whether to use the VFS reader for importing files
+#ifndef MICROPY_READER_VFS
+#define MICROPY_READER_VFS (0)
 #endif
 
 // Hook for the VM at the start of the opcode loop (can contain variable
@@ -445,7 +451,7 @@
 #   endif
 #endif
 
-// Whether to provide the mp_kbd_exception object
+// Whether to provide the mp_kbd_exception object, and micropython.kbd_intr function
 #ifndef MICROPY_KBD_EXCEPTION
 #define MICROPY_KBD_EXCEPTION (0)
 #endif
@@ -548,6 +554,12 @@ typedef double mp_float_t;
 #define MICROPY_PY_BUILTINS_COMPLEX (MICROPY_PY_BUILTINS_FLOAT)
 #endif
 
+// Whether to provide a high-quality hash for float and complex numbers.
+// Otherwise the default is a very simple but correct hashing function.
+#ifndef MICROPY_FLOAT_HIGH_QUALITY_HASH
+#define MICROPY_FLOAT_HIGH_QUALITY_HASH (0)
+#endif
+
 // Enable features which improve CPython compatibility
 // but may lead to more code size/memory usage.
 // TODO: Originally intended as generic category to not
@@ -556,6 +568,15 @@ typedef double mp_float_t;
 #define MICROPY_CPYTHON_COMPAT (1)
 #endif
 
+// Perform full checks as done by CPython. Disabling this
+// may produce incorrect results, if incorrect data is fed,
+// but should not lead to MicroPython crashes or similar
+// grave issues (in other words, only user app should be,
+// affected, not system).
+#ifndef MICROPY_FULL_CHECKS
+#define MICROPY_FULL_CHECKS (1)
+#endif
+
 // Whether POSIX-semantics non-blocking streams are supported
 #ifndef MICROPY_STREAMS_NON_BLOCK
 #define MICROPY_STREAMS_NON_BLOCK (0)
@@ -616,9 +637,19 @@ typedef double mp_float_t;
 #define MICROPY_USE_INTERNAL_PRINTF (1)
 #endif
 
-// Support for user-space VFS mount (selected ports)
-#ifndef MICROPY_FSUSERMOUNT
-#define MICROPY_FSUSERMOUNT (0)
+// Support for internal scheduler
+#ifndef MICROPY_ENABLE_SCHEDULER
+#define MICROPY_ENABLE_SCHEDULER (0)
+#endif
+
+// Maximum number of entries in the scheduler
+#ifndef MICROPY_SCHEDULER_DEPTH
+#define MICROPY_SCHEDULER_DEPTH (4)
+#endif
+
+// Support for generic VFS sub-system
+#ifndef MICROPY_VFS
+#define MICROPY_VFS (0)
 #endif
 
 /*****************************************************************************/
@@ -635,6 +666,12 @@ typedef double mp_float_t;
 #define MICROPY_PY_DESCRIPTORS (0)
 #endif
 
+// Whether to support class __delattr__ and __setattr__ methods
+// This costs some code size and makes all del attrs and store attrs slow
+#ifndef MICROPY_PY_DELATTR_SETATTR
+#define MICROPY_PY_DELATTR_SETATTR (0)
+#endif
+
 // Support for async/await/async for/async with
 #ifndef MICROPY_PY_ASYNC_AWAIT
 #define MICROPY_PY_ASYNC_AWAIT (1)
@@ -759,6 +796,27 @@ typedef double mp_float_t;
 #define MICROPY_PY_BUILTINS_MIN_MAX (1)
 #endif
 
+// Support for calls to pow() with 3 integer arguments
+#ifndef MICROPY_PY_BUILTINS_POW3
+#define MICROPY_PY_BUILTINS_POW3 (0)
+#endif
+
+// Whether to provide the help function
+#ifndef MICROPY_PY_BUILTINS_HELP
+#define MICROPY_PY_BUILTINS_HELP (0)
+#endif
+
+// Use this to configure the help text shown for help().  It should be a
+// variable with the type "const char*".  A sensible default is provided.
+#ifndef MICROPY_PY_BUILTINS_HELP_TEXT
+#define MICROPY_PY_BUILTINS_HELP_TEXT mp_help_default_text
+#endif
+
+// Add the ability to list the available modules when executing help('modules')
+#ifndef MICROPY_PY_BUILTINS_HELP_MODULES
+#define MICROPY_PY_BUILTINS_HELP_MODULES (0)
+#endif
+
 // Whether to set __file__ for imported modules
 #ifndef MICROPY_PY___FILE__
 #define MICROPY_PY___FILE__ (1)
@@ -828,6 +886,13 @@ typedef double mp_float_t;
 #define MICROPY_PY_IO (1)
 #endif
 
+// Whether to provide "uio.resource_stream()" function with
+// the semantics of CPython's pkg_resources.resource_stream()
+// (allows to access resources in frozen packages).
+#ifndef MICROPY_PY_IO_RESOURCE_STREAM
+#define MICROPY_PY_IO_RESOURCE_STREAM (0)
+#endif
+
 // Whether to provide "io.FileIO" class
 #ifndef MICROPY_PY_IO_FILEIO
 #define MICROPY_PY_IO_FILEIO (0)
@@ -890,6 +955,11 @@ typedef double mp_float_t;
 #define MICROPY_PY_UERRNO (0)
 #endif
 
+// Whether to provide the uerrno.errorcode dict
+#ifndef MICROPY_PY_UERRNO_ERRORCODE
+#define MICROPY_PY_UERRNO_ERRORCODE (1)
+#endif
+
 // Whether to provide "uselect" module (baremetal implementation)
 #ifndef MICROPY_PY_USELECT
 #define MICROPY_PY_USELECT (0)
@@ -922,6 +992,12 @@ typedef double mp_float_t;
 #define MICROPY_PY_THREAD_GIL (MICROPY_PY_THREAD)
 #endif
 
+// Number of VM jump-loops to do before releasing the GIL.
+// Set this to 0 to disable the divisor.
+#ifndef MICROPY_PY_THREAD_GIL_VM_DIVISOR
+#define MICROPY_PY_THREAD_GIL_VM_DIVISOR (32)
+#endif
+
 // Extended modules
 
 #ifndef MICROPY_PY_UCTYPES
@@ -1057,6 +1133,11 @@ typedef double mp_float_t;
 #define STATIC static
 #endif
 
+// Number of bytes in a word
+#ifndef BYTES_PER_WORD
+#define BYTES_PER_WORD (sizeof(mp_uint_t))
+#endif
+
 #define BITS_PER_BYTE (8)
 #define BITS_PER_WORD (BITS_PER_BYTE * BYTES_PER_WORD)
 // mp_int_t value with most significant bit set
diff --git a/py/mpprint.c b/py/mpprint.c
index 72d1c55ca0..4bc45fef4d 100644
--- a/py/mpprint.c
+++ b/py/mpprint.c
@@ -222,7 +222,7 @@ int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char
     char prefix_buf[4];
     char *prefix = prefix_buf;
 
-    if (mp_obj_int_sign(x) > 0) {
+    if (mp_obj_int_sign(x) >= 0) {
         if (flags & PF_FLAG_SHOW_SIGN) {
             *prefix++ = '+';
         } else if (flags & PF_FLAG_SPACE_SIGN) {
diff --git a/py/mpprint.h b/py/mpprint.h
index f9204e322d..4fc904a20a 100644
--- a/py/mpprint.h
+++ b/py/mpprint.h
@@ -39,7 +39,7 @@
 #define PF_FLAG_ADD_PERCENT       (0x100)
 #define PF_FLAG_SHOW_OCTAL_LETTER (0x200)
 
-#if MICROPY_PY_IO
+#if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
 #    define MP_PYTHON_PRINTER &mp_sys_stdout_print
 #else
 #    define MP_PYTHON_PRINTER &mp_plat_print
@@ -55,7 +55,7 @@ typedef struct _mp_print_t {
 // All (non-debug) prints go through one of the two interfaces below.
 // 1) Wrapper for platform print function, which wraps MP_PLAT_PRINT_STRN.
 extern const mp_print_t mp_plat_print;
-#if MICROPY_PY_IO
+#if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
 // 2) Wrapper for printing to sys.stdout.
 extern const mp_print_t mp_sys_stdout_print;
 #endif
diff --git a/py/mpstate.h b/py/mpstate.h
index 91fb68b3ad..2b8f29a6ae 100644
--- a/py/mpstate.h
+++ b/py/mpstate.h
@@ -50,6 +50,16 @@ typedef struct mp_dynamic_compiler_t {
 extern mp_dynamic_compiler_t mp_dynamic_compiler;
 #endif
 
+// These are the values for sched_state
+#define MP_SCHED_IDLE (1)
+#define MP_SCHED_LOCKED (-1)
+#define MP_SCHED_PENDING (0) // 0 so it's a quick check in the VM
+
+typedef struct _mp_sched_item_t {
+    mp_obj_t func;
+    mp_obj_t arg;
+} mp_sched_item_t;
+
 // This structure hold information about the memory allocation system.
 typedef struct _mp_state_mem_t {
     #if MICROPY_MEM_STATS
@@ -110,8 +120,8 @@ typedef struct _mp_state_vm_t {
     // memory for exception arguments if we can't allocate RAM
     #if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF
     #if MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE > 0
-    // statically allocated buf
-    byte mp_emergency_exception_buf[MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE];
+    // statically allocated buf (needs to be aligned to mp_obj_t)
+    mp_obj_t mp_emergency_exception_buf[MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE / sizeof(mp_obj_t)];
     #else
     // dynamically allocated buf
     byte *mp_emergency_exception_buf;
@@ -129,6 +139,12 @@ typedef struct _mp_state_vm_t {
     // pending exception object (MP_OBJ_NULL if not pending)
     volatile mp_obj_t mp_pending_exception;
 
+    #if MICROPY_ENABLE_SCHEDULER
+    volatile int16_t sched_state;
+    uint16_t sched_sp;
+    mp_sched_item_t sched_stack[MICROPY_SCHEDULER_DEPTH];
+    #endif
+
     // current exception being handled, for sys.exc_info()
     #if MICROPY_PY_SYS_EXC_INFO
     mp_obj_base_t *cur_exception;
@@ -160,9 +176,9 @@ typedef struct _mp_state_vm_t {
     mp_obj_t lwip_slip_stream;
     #endif
 
-    #if MICROPY_FSUSERMOUNT
-    // for user-mountable block device (max fixed at compile time)
-    struct _fs_user_mount_t *fs_user_mount[MICROPY_FATFS_VOLUMES];
+    #if MICROPY_VFS
+    struct _mp_vfs_mount_t *vfs_cur;
+    struct _mp_vfs_mount_t *vfs_mount_table;
     #endif
 
     //
@@ -196,11 +212,13 @@ typedef struct _mp_state_vm_t {
 // This structure holds state that is specific to a given thread.
 // Everything in this structure is scanned for root pointers.
 typedef struct _mp_state_thread_t {
+    mp_obj_dict_t *dict_locals;
+    mp_obj_dict_t *dict_globals;
+
     // Note: nlr asm code has the offset of this hard-coded
     nlr_buf_t *nlr_top; // ROOT POINTER
 
     // Stack top at the start of program
-    // Note: this entry is used to locate the end of the root pointer section.
     char *stack_top;
 
     #if MICROPY_STACK_CHECK
@@ -208,15 +226,11 @@ typedef struct _mp_state_thread_t {
     #endif
 } mp_state_thread_t;
 
-// This structure combines the above 3 structures, and adds the local
-// and global dicts.
+// This structure combines the above 3 structures.
+// The order of the entries are important for root pointer scanning in the GC to work.
 // Note: if this structure changes then revisit all nlr asm code since they
 // have the offset of nlr_top hard-coded.
 typedef struct _mp_state_ctx_t {
-    // these must come first for root pointer scanning in GC to work
-    mp_obj_dict_t *dict_locals;
-    mp_obj_dict_t *dict_globals;
-    // these must come next in this order for root pointer scanning in GC to work
     mp_state_thread_t thread;
     mp_state_vm_t vm;
     mp_state_mem_t mem;
@@ -224,7 +238,6 @@ typedef struct _mp_state_ctx_t {
 
 extern mp_state_ctx_t mp_state_ctx;
 
-#define MP_STATE_CTX(x) (mp_state_ctx.x)
 #define MP_STATE_VM(x) (mp_state_ctx.vm.x)
 #define MP_STATE_MEM(x) (mp_state_ctx.mem.x)
 
diff --git a/py/mpz.c b/py/mpz.c
index e503927d09..f5675a2917 100644
--- a/py/mpz.c
+++ b/py/mpz.c
@@ -49,11 +49,17 @@
  Definition of normalise: ?
 */
 
+STATIC size_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) {
+    for (--idig; idig >= oidig && *idig == 0; --idig) {
+    }
+    return idig + 1 - oidig;
+}
+
 /* compares i with j
    returns sign(i - j)
    assumes i, j are normalised
 */
-STATIC int mpn_cmp(const mpz_dig_t *idig, mp_uint_t ilen, const mpz_dig_t *jdig, mp_uint_t jlen) {
+STATIC int mpn_cmp(const mpz_dig_t *idig, size_t ilen, const mpz_dig_t *jdig, size_t jlen) {
     if (ilen < jlen) { return -1; }
     if (ilen > jlen) { return 1; }
 
@@ -71,7 +77,7 @@ STATIC int mpn_cmp(const mpz_dig_t *idig, mp_uint_t ilen, const mpz_dig_t *jdig,
    assumes enough memory in i; assumes normalised j; assumes n > 0
    can have i, j pointing to same memory
 */
-STATIC mp_uint_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_uint_t n) {
+STATIC size_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) {
     mp_uint_t n_whole = (n + DIG_SIZE - 1) / DIG_SIZE;
     mp_uint_t n_part = n % DIG_SIZE;
     if (n_part == 0) {
@@ -84,7 +90,7 @@ STATIC mp_uint_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_ui
 
     // shift the digits
     mpz_dbl_dig_t d = 0;
-    for (mp_uint_t i = jlen; i > 0; i--, idig--, jdig--) {
+    for (size_t i = jlen; i > 0; i--, idig--, jdig--) {
         d |= *jdig;
         *idig = (d >> (DIG_SIZE - n_part)) & DIG_MASK;
         d <<= DIG_SIZE;
@@ -110,7 +116,7 @@ STATIC mp_uint_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_ui
    assumes enough memory in i; assumes normalised j; assumes n > 0
    can have i, j pointing to same memory
 */
-STATIC mp_uint_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_uint_t n) {
+STATIC size_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) {
     mp_uint_t n_whole = n / DIG_SIZE;
     mp_uint_t n_part = n % DIG_SIZE;
 
@@ -121,7 +127,7 @@ STATIC mp_uint_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_ui
     jdig += n_whole;
     jlen -= n_whole;
 
-    for (mp_uint_t i = jlen; i > 0; i--, idig++, jdig++) {
+    for (size_t i = jlen; i > 0; i--, idig++, jdig++) {
         mpz_dbl_dig_t d = *jdig;
         if (i > 1) {
             d |= (mpz_dbl_dig_t)jdig[1] << DIG_SIZE;
@@ -142,7 +148,7 @@ STATIC mp_uint_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mp_ui
    assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
     mpz_dbl_dig_t carry = 0;
 
@@ -172,7 +178,7 @@ STATIC mp_uint_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen,
    assumes enough memory in i; assumes normalised j, k; assumes j >= k
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
     mpz_dbl_dig_signed_t borrow = 0;
 
@@ -190,16 +196,7 @@ STATIC mp_uint_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen,
         borrow >>= DIG_SIZE;
     }
 
-    for (--idig; idig >= oidig && *idig == 0; --idig) {
-    }
-
-    return idig + 1 - oidig;
-}
-
-STATIC mp_uint_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) {
-    for (--idig; idig >= oidig && *idig == 0; --idig) {
-    }
-    return idig + 1 - oidig;
+    return mpn_remove_trailing_zeros(oidig, idig);
 }
 
 #if MICROPY_OPT_MPZ_BITWISE
@@ -209,7 +206,7 @@ STATIC mp_uint_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) {
    assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen (jlen argument not needed)
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
 
     for (; klen > 0; --klen, ++idig, ++jdig, ++kdig) {
@@ -230,7 +227,7 @@ STATIC mp_uint_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t
    assumes enough memory in i; assumes normalised j, k; assumes length j >= length k
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen,
+STATIC size_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen,
                             mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) {
     mpz_dig_t *oidig = idig;
     mpz_dig_t imask = (0 == carryi) ? 0 : DIG_MASK;
@@ -261,7 +258,7 @@ STATIC mp_uint_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t j
    assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
 
     jlen -= klen;
@@ -291,7 +288,7 @@ STATIC mp_uint_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen,
 
 #if MICROPY_OPT_MPZ_BITWISE
 
-STATIC mp_uint_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen,
+STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen,
                             mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) {
     mpz_dig_t *oidig = idig;
     mpz_dbl_dig_t carryi = 1;
@@ -321,7 +318,7 @@ STATIC mp_uint_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jl
 
 #else
 
-STATIC mp_uint_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen,
+STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen,
                             mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) {
     mpz_dig_t *oidig = idig;
     mpz_dig_t imask = (0 == carryi) ? 0 : DIG_MASK;
@@ -353,7 +350,7 @@ STATIC mp_uint_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jl
    assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
 
     jlen -= klen;
@@ -380,7 +377,7 @@ STATIC mp_uint_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen,
    assumes enough memory in i; assumes normalised j, k; assumes length j >= length k
    can have i, j, k pointing to same memory
 */
-STATIC mp_uint_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t jlen, const mpz_dig_t *kdig, mp_uint_t klen,
+STATIC size_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen,
                               mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) {
     mpz_dig_t *oidig = idig;
 
@@ -405,7 +402,7 @@ STATIC mp_uint_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, mp_uint_t j
    returns number of digits in i
    assumes enough memory in i; assumes normalised i; assumes dmul != 0
 */
-STATIC mp_uint_t mpn_mul_dig_add_dig(mpz_dig_t *idig, mp_uint_t ilen, mpz_dig_t dmul, mpz_dig_t dadd) {
+STATIC size_t mpn_mul_dig_add_dig(mpz_dig_t *idig, size_t ilen, mpz_dig_t dmul, mpz_dig_t dadd) {
     mpz_dig_t *oidig = idig;
     mpz_dbl_dig_t carry = dadd;
 
@@ -427,15 +424,15 @@ STATIC mp_uint_t mpn_mul_dig_add_dig(mpz_dig_t *idig, mp_uint_t ilen, mpz_dig_t
    assumes enough memory in i; assumes i is zeroed; assumes normalised j, k
    can have j, k point to same memory
 */
-STATIC mp_uint_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mpz_dig_t *kdig, mp_uint_t klen) {
+STATIC size_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mpz_dig_t *kdig, size_t klen) {
     mpz_dig_t *oidig = idig;
-    mp_uint_t ilen = 0;
+    size_t ilen = 0;
 
     for (; klen > 0; --klen, ++idig, ++kdig) {
         mpz_dig_t *id = idig;
         mpz_dbl_dig_t carry = 0;
 
-        mp_uint_t jl = jlen;
+        size_t jl = jlen;
         for (mpz_dig_t *jd = jdig; jl > 0; --jl, ++jd, ++id) {
             carry += (mpz_dbl_dig_t)*id + (mpz_dbl_dig_t)*jd * (mpz_dbl_dig_t)*kdig; // will never overflow so long as DIG_SIZE <= 8*sizeof(mpz_dbl_dig_t)/2
             *id = carry & DIG_MASK;
@@ -458,7 +455,7 @@ STATIC mp_uint_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, mp_uint_t jlen, mpz_d
    assumes quo_dig has enough memory (as many digits as num)
    assumes quo_dig is filled with zeros
 */
-STATIC void mpn_div(mpz_dig_t *num_dig, mp_uint_t *num_len, const mpz_dig_t *den_dig, mp_uint_t den_len, mpz_dig_t *quo_dig, mp_uint_t *quo_len) {
+STATIC void mpn_div(mpz_dig_t *num_dig, size_t *num_len, const mpz_dig_t *den_dig, size_t den_len, mpz_dig_t *quo_dig, size_t *quo_len) {
     mpz_dig_t *orig_num_dig = num_dig;
     mpz_dig_t *orig_quo_dig = quo_dig;
     mpz_dig_t norm_shift = 0;
@@ -661,7 +658,7 @@ void mpz_init_from_int(mpz_t *z, mp_int_t val) {
     mpz_set_from_int(z, val);
 }
 
-void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, mp_uint_t alloc, mp_int_t val) {
+void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, size_t alloc, mp_int_t val) {
     z->neg = 0;
     z->fixed_dig = 1;
     z->alloc = alloc;
@@ -705,7 +702,7 @@ mpz_t *mpz_from_float(mp_float_t val) {
 }
 #endif
 
-mpz_t *mpz_from_str(const char *str, mp_uint_t len, bool neg, mp_uint_t base) {
+mpz_t *mpz_from_str(const char *str, size_t len, bool neg, unsigned int base) {
     mpz_t *z = mpz_zero();
     mpz_set_from_str(z, str, len, neg, base);
     return z;
@@ -719,7 +716,7 @@ STATIC void mpz_free(mpz_t *z) {
     }
 }
 
-STATIC void mpz_need_dig(mpz_t *z, mp_uint_t need) {
+STATIC void mpz_need_dig(mpz_t *z, size_t need) {
     if (need < MIN_ALLOC) {
         need = MIN_ALLOC;
     }
@@ -873,7 +870,7 @@ typedef uint32_t mp_float_int_t;
 #endif
 
 // returns number of bytes from str that were processed
-mp_uint_t mpz_set_from_str(mpz_t *z, const char *str, mp_uint_t len, bool neg, mp_uint_t base) {
+size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base) {
     assert(base <= 36);
 
     const char *cur = str;
@@ -909,6 +906,39 @@ mp_uint_t mpz_set_from_str(mpz_t *z, const char *str, mp_uint_t len, bool neg, m
     return cur - str;
 }
 
+void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) {
+    int delta = 1;
+    if (big_endian) {
+        buf += len - 1;
+        delta = -1;
+    }
+
+    mpz_need_dig(z, (len * 8 + DIG_SIZE - 1) / DIG_SIZE);
+
+    mpz_dig_t d = 0;
+    int num_bits = 0;
+    z->neg = 0;
+    z->len = 0;
+    while (len) {
+        while (len && num_bits < DIG_SIZE) {
+            d |= *buf << num_bits;
+            num_bits += 8;
+            buf += delta;
+            len--;
+        }
+        z->dig[z->len++] = d & DIG_MASK;
+        // Need this #if because it's C undefined behavior to do: uint32_t >> 32
+        #if DIG_SIZE != 8 && DIG_SIZE != 16 && DIG_SIZE != 32
+        d >>= DIG_SIZE;
+        #else
+        d = 0;
+        #endif
+        num_bits -= DIG_SIZE;
+    }
+
+    z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len);
+}
+
 bool mpz_is_zero(const mpz_t *z) {
     return z->len == 0;
 }
@@ -1120,7 +1150,7 @@ void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) {
             mp_uint_t n_whole = rhs / DIG_SIZE;
             mp_uint_t n_part = rhs % DIG_SIZE;
             mpz_dig_t round_up = 0;
-            for (mp_uint_t i = 0; i < lhs->len && i < n_whole; i++) {
+            for (size_t i = 0; i < lhs->len && i < n_whole; i++) {
                 if (lhs->dig[i] != 0) {
                     round_up = 1;
                     break;
@@ -1364,9 +1394,6 @@ void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) {
     mpz_free(n);
 }
 
-#if 0
-these functions are unused
-
 /* computes dest = (lhs ** rhs) % mod
    can have dest, lhs, rhs the same; mod can't be the same as dest
 */
@@ -1405,6 +1432,9 @@ void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t
     mpz_free(n);
 }
 
+#if 0
+these functions are unused
+
 /* computes gcd(z1, z2)
    based on Knuth's modified gcd algorithm (I think?)
    gcd(z1, z2) >= 0
@@ -1593,7 +1623,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
 }
 
 // writes at most len bytes to buf (so buf should be zeroed before calling)
-void mpz_as_bytes(const mpz_t *z, bool big_endian, mp_uint_t len, byte *buf) {
+void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
     byte *b = buf;
     if (big_endian) {
         b += len;
@@ -1602,7 +1632,7 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, mp_uint_t len, byte *buf) {
     int bits = 0;
     mpz_dbl_dig_t d = 0;
     mpz_dbl_dig_t carry = 1;
-    for (mp_uint_t zlen = z->len; zlen > 0; --zlen) {
+    for (size_t zlen = z->len; zlen > 0; --zlen) {
         bits += DIG_SIZE;
         d = (d << DIG_SIZE) | *zdig++;
         for (; bits >= 8; bits -= 8, d >>= 8) {
@@ -1645,8 +1675,8 @@ mp_float_t mpz_as_float(const mpz_t *i) {
 
 #if 0
 this function is unused
-char *mpz_as_str(const mpz_t *i, mp_uint_t base) {
-    char *s = m_new(char, mpz_as_str_size(i, base, NULL, '\0'));
+char *mpz_as_str(const mpz_t *i, unsigned int base) {
+    char *s = m_new(char, mp_int_format_size(mpz_max_num_bits(i), base, NULL, '\0'));
     mpz_as_str_inpl(i, base, NULL, 'a', '\0', s);
     return s;
 }
@@ -1654,7 +1684,7 @@ char *mpz_as_str(const mpz_t *i, mp_uint_t base) {
 
 // assumes enough space as calculated by mp_int_format_size
 // returns length of string, not including null byte
-mp_uint_t mpz_as_str_inpl(const mpz_t *i, mp_uint_t base, const char *prefix, char base_char, char comma, char *str) {
+size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, char base_char, char comma, char *str) {
     if (str == NULL) {
         return 0;
     }
@@ -1663,7 +1693,7 @@ mp_uint_t mpz_as_str_inpl(const mpz_t *i, mp_uint_t base, const char *prefix, ch
         return 0;
     }
 
-    mp_uint_t ilen = i->len;
+    size_t ilen = i->len;
 
     char *s = str;
     if (ilen == 0) {
diff --git a/py/mpz.h b/py/mpz.h
index 55ef3e15ff..5c88227223 100644
--- a/py/mpz.h
+++ b/py/mpz.h
@@ -87,10 +87,10 @@ typedef int8_t mpz_dbl_dig_signed_t;
 #define MPZ_NUM_DIG_FOR_LL ((sizeof(long long) * 8 + MPZ_DIG_SIZE - 1) / MPZ_DIG_SIZE)
 
 typedef struct _mpz_t {
-    mp_uint_t neg : 1;
-    mp_uint_t fixed_dig : 1;
-    mp_uint_t alloc : BITS_PER_WORD - 2;
-    mp_uint_t len;
+    size_t neg : 1;
+    size_t fixed_dig : 1;
+    size_t alloc : 8 * sizeof(size_t) - 2;
+    size_t len;
     mpz_dig_t *dig;
 } mpz_t;
 
@@ -99,7 +99,7 @@ typedef struct _mpz_t {
 
 void mpz_init_zero(mpz_t *z);
 void mpz_init_from_int(mpz_t *z, mp_int_t val);
-void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, mp_uint_t dig_alloc, mp_int_t val);
+void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, size_t dig_alloc, mp_int_t val);
 void mpz_deinit(mpz_t *z);
 
 void mpz_set(mpz_t *dest, const mpz_t *src);
@@ -108,7 +108,8 @@ void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
 #if MICROPY_PY_BUILTINS_FLOAT
 void mpz_set_from_float(mpz_t *z, mp_float_t src);
 #endif
-mp_uint_t mpz_set_from_str(mpz_t *z, const char *str, mp_uint_t len, bool neg, mp_uint_t base);
+size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base);
+void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf);
 
 bool mpz_is_zero(const mpz_t *z);
 int mpz_cmp(const mpz_t *lhs, const mpz_t *rhs);
@@ -122,6 +123,7 @@ void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
 void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
 void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
 void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
+void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod);
 void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
 void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
 void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
@@ -131,11 +133,10 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { return z->len * MPZ_DIG_
 mp_int_t mpz_hash(const mpz_t *z);
 bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
 bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
-void mpz_as_bytes(const mpz_t *z, bool big_endian, mp_uint_t len, byte *buf);
+void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *z);
 #endif
-mp_uint_t mpz_as_str_size(const mpz_t *i, mp_uint_t base, const char *prefix, char comma);
-mp_uint_t mpz_as_str_inpl(const mpz_t *z, mp_uint_t base, const char *prefix, char base_char, char comma, char *str);
+size_t mpz_as_str_inpl(const mpz_t *z, unsigned int base, const char *prefix, char base_char, char comma, char *str);
 
 #endif // __MICROPY_INCLUDED_PY_MPZ_H__
diff --git a/py/nativeglue.c b/py/nativeglue.c
index 5f2164ee0d..c75e5ec047 100644
--- a/py/nativeglue.c
+++ b/py/nativeglue.c
@@ -86,7 +86,7 @@ mp_obj_t mp_convert_native_to_obj(mp_uint_t val, mp_uint_t type) {
 
 // wrapper that accepts n_args and n_kw in one argument
 // (native emitter can only pass at most 3 arguments to a function)
-mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args_kw, const mp_obj_t *args) {
+mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, size_t n_args_kw, const mp_obj_t *args) {
     return mp_call_function_n_kw(fun_in, n_args_kw & 0xff, (n_args_kw >> 8) & 0xff, args);
 }
 
@@ -98,6 +98,32 @@ void mp_native_raise(mp_obj_t o) {
     }
 }
 
+// wrapper that handles iterator buffer
+STATIC mp_obj_t mp_native_getiter(mp_obj_t obj, mp_obj_iter_buf_t *iter) {
+    if (iter == NULL) {
+        return mp_getiter(obj, NULL);
+    } else {
+        obj = mp_getiter(obj, iter);
+        if (obj != MP_OBJ_FROM_PTR(iter)) {
+            // Iterator didn't use the stack so indicate that with MP_OBJ_NULL.
+            iter->base.type = MP_OBJ_NULL;
+            iter->buf[0] = obj;
+        }
+        return NULL;
+    }
+}
+
+// wrapper that handles iterator buffer
+STATIC mp_obj_t mp_native_iternext(mp_obj_iter_buf_t *iter) {
+    mp_obj_t obj;
+    if (iter->base.type == MP_OBJ_NULL) {
+        obj = iter->buf[0];
+    } else {
+        obj = MP_OBJ_FROM_PTR(iter);
+    }
+    return mp_iternext(obj);
+}
+
 // these must correspond to the respective enum in runtime0.h
 void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_convert_obj_to_native,
@@ -107,6 +133,7 @@ void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_load_build_class,
     mp_load_attr,
     mp_load_method,
+    mp_load_super_method,
     mp_store_name,
     mp_store_global,
     mp_store_attr,
@@ -127,8 +154,8 @@ void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_native_call_function_n_kw,
     mp_call_method_n_kw,
     mp_call_method_n_kw_var,
-    mp_getiter,
-    mp_iternext,
+    mp_native_getiter,
+    mp_native_iternext,
     nlr_push,
     nlr_pop,
     mp_native_raise,
diff --git a/py/nlr.h b/py/nlr.h
index 6c86fc26c3..7a71ef34bd 100644
--- a/py/nlr.h
+++ b/py/nlr.h
@@ -82,10 +82,10 @@ NORETURN void nlr_jump(void *val);
 // This must be implemented by a port.  It's called by nlr_jump
 // if no nlr buf has been pushed.  It must not return, but rather
 // should bail out with a fatal error.
-void nlr_jump_fail(void *val);
+NORETURN void nlr_jump_fail(void *val);
 
 // use nlr_raise instead of nlr_jump so that debugging is easier
-#ifndef DEBUG
+#ifndef MICROPY_DEBUG_NLR
 #define nlr_raise(val) nlr_jump(MP_OBJ_TO_PTR(val))
 #else
 #include "mpstate.h"
diff --git a/py/nlrsetjmp.c b/py/nlrsetjmp.c
index 43a13156f2..c3873e0b6d 100644
--- a/py/nlrsetjmp.c
+++ b/py/nlrsetjmp.c
@@ -29,10 +29,14 @@
 #if MICROPY_NLR_SETJMP
 
 void nlr_setjmp_jump(void *val) {
-    nlr_buf_t *buf = MP_STATE_THREAD(nlr_top);
-    MP_STATE_THREAD(nlr_top) = buf->prev;
-    buf->ret_val = val;
-    longjmp(buf->jmpbuf, 1);
+    nlr_buf_t **top_ptr = &MP_STATE_THREAD(nlr_top);
+    nlr_buf_t *top = *top_ptr;
+    if (top == NULL) {
+        nlr_jump_fail(val);
+    }
+    top->ret_val = val;
+    *top_ptr = top->prev;
+    longjmp(top->jmpbuf, 1);
 }
 
 #endif
diff --git a/py/nlrx64.S b/py/nlrx64.S
deleted file mode 100644
index caea35de2b..0000000000
--- a/py/nlrx64.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * This file is part of the Micro Python project, http://micropython.org/
- *
- * The MIT License (MIT)
- *
- * Copyright (c) 2013, 2014 Damien P. George
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#if defined(__x86_64__) && !MICROPY_NLR_SETJMP
-
-// We only need the functions here if we are on x86-64, and we are not
-// using setjmp/longjmp.
-//
-// For reference, x86-64 callee save regs are:
-//      rbx, rbp, rsp, r12, r13, r14, r15
-
-// the offset of nlr_top within mp_state_ctx_t
-#define NLR_TOP_OFFSET (2 * 8)
-
-#if defined(__APPLE__) && defined(__MACH__)
-#define NLR_TOP (_mp_state_ctx + NLR_TOP_OFFSET)
-#define MP_THREAD_GET_STATE _mp_thread_get_state
-#else
-#define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
-#define MP_THREAD_GET_STATE mp_thread_get_state
-#endif
-
-// offset of nlr_top within mp_state_thread_t structure
-#define NLR_TOP_TH_OFF (0)
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define NLR_OS_WINDOWS
-#endif
-
-    .file   "nlr.s"
-    .text
-
-#if !defined(NLR_OS_WINDOWS)
-
-/******************************************************************************/
-//
-// Functions for *nix and OSX.
-// OSX needs _ prefix for binding to C, and doesn't support some directives.
-//
-/******************************************************************************/
-
-/**************************************/
-// mp_uint_t nlr_push(rdi=nlr_buf_t *nlr)
-
-#if !(defined(__APPLE__) && defined(__MACH__))
-    .globl  nlr_push
-    .type   nlr_push, @function
-nlr_push:
-#else
-    .globl  _nlr_push
-_nlr_push:
-#endif
-    movq    (%rsp), %rax            # load return %rip
-    movq    %rax, 16(%rdi)          # store %rip into nlr_buf
-    movq    %rbp, 24(%rdi)          # store %rbp into nlr_buf
-    movq    %rsp, 32(%rdi)          # store %rsp into nlr_buf
-    movq    %rbx, 40(%rdi)          # store %rbx into nlr_buf
-    movq    %r12, 48(%rdi)          # store %r12 into nlr_buf
-    movq    %r13, 56(%rdi)          # store %r13 into nlr_buf
-    movq    %r14, 64(%rdi)          # store %r14 into nlr_buf
-    movq    %r15, 72(%rdi)          # store %r15 into nlr_buf
-
-#if !MICROPY_PY_THREAD
-    movq    NLR_TOP(%rip), %rax     # get last nlr_buf
-    movq    %rax, (%rdi)            # store it
-    movq    %rdi, NLR_TOP(%rip)     # stor new nlr_buf (to make linked list)
-#else
-    movq    %rdi, %rbp              # since we make a call, must save rdi in rbp
-    callq   MP_THREAD_GET_STATE     # get mp_state_thread ptr into rax
-    movq    NLR_TOP_TH_OFF(%rax), %rsi # get thread.nlr_top (last nlr_buf)
-    movq    %rsi, (%rbp)            # store it
-    movq    %rbp, NLR_TOP_TH_OFF(%rax) # store new nlr_buf (to make linked list)
-    movq    24(%rbp), %rbp          # restore rbp
-#endif
-
-    xorq    %rax, %rax              # return 0, normal return
-    ret                             # return
-#if !(defined(__APPLE__) && defined(__MACH__))
-    .size   nlr_push, .-nlr_push
-#endif
-
-/**************************************/
-// void nlr_pop()
-
-#if !(defined(__APPLE__) && defined(__MACH__))
-    .globl  nlr_pop
-    .type   nlr_pop, @function
-nlr_pop:
-#else
-    .globl  _nlr_pop
-_nlr_pop:
-#endif
-
-#if !MICROPY_PY_THREAD
-    movq    NLR_TOP(%rip), %rax     # get nlr_top into %rax
-    movq    (%rax), %rax            # load prev nlr_buf
-    movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
-#else
-    callq   MP_THREAD_GET_STATE     # get mp_state_thread ptr into rax
-    movq    NLR_TOP_TH_OFF(%rax), %rdi # get thread.nlr_top (last nlr_buf)
-    movq    (%rdi), %rdi            # load prev nlr_buf
-    movq    %rdi, NLR_TOP_TH_OFF(%rax) # store prev nlr_buf (to unlink list)
-#endif
-
-    ret                             # return
-#if !(defined(__APPLE__) && defined(__MACH__))
-    .size   nlr_pop, .-nlr_pop
-#endif
-
-/**************************************/
-// void nlr_jump(rdi=mp_uint_t val)
-
-#if !(defined(__APPLE__) && defined(__MACH__))
-    .globl  nlr_jump
-    .type   nlr_jump, @function
-nlr_jump:
-#else
-    .globl  _nlr_jump
-    _nlr_jump:
-#endif
-
-#if !MICROPY_PY_THREAD
-    movq    %rdi, %rax              # put return value in %rax
-    movq    NLR_TOP(%rip), %rdi     # get nlr_top into %rdi
-    test    %rdi, %rdi              # check for nlr_top being NULL
-    je      .fail                   # fail if nlr_top is NULL
-    movq    %rax, 8(%rdi)           # store return value
-    movq    (%rdi), %rax            # load prev nlr_buf
-    movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
-#else
-    movq    %rdi, %rbp              # put return value in rbp
-    callq   MP_THREAD_GET_STATE     # get thread ptr in rax
-    movq    %rax, %rsi              # put thread ptr in rsi
-    movq    %rbp, %rax              # put return value to rax (for je .fail)
-    movq    NLR_TOP_TH_OFF(%rsi), %rdi # get thread.nlr_top in rdi
-    test    %rdi, %rdi              # check for nlr_top being NULL
-    je      .fail                   # fail if nlr_top is NULL
-    movq    %rax, 8(%rdi)           # store return value
-    movq    (%rdi), %rax            # load prev nlr_buf
-    movq    %rax, NLR_TOP_TH_OFF(%rsi) # store prev nlr_buf (to unlink list)
-#endif
-
-    movq    72(%rdi), %r15          # load saved %r15
-    movq    64(%rdi), %r14          # load saved %r14
-    movq    56(%rdi), %r13          # load saved %r13
-    movq    48(%rdi), %r12          # load saved %r12
-    movq    40(%rdi), %rbx          # load saved %rbx
-    movq    32(%rdi), %rsp          # load saved %rsp
-    movq    24(%rdi), %rbp          # load saved %rbp
-    movq    16(%rdi), %rax          # load saved %rip
-    movq    %rax, (%rsp)            # store saved %rip to stack
-    xorq    %rax, %rax              # clear return register
-    inc     %al                     # increase to make 1, non-local return
-    ret                             # return
-.fail:
-    movq    %rax, %rdi              # put argument back in first-arg register
-#if !(defined(__APPLE__) && defined(__MACH__))
-    je      nlr_jump_fail           # transfer control to nlr_jump_fail
-    .size   nlr_jump, .-nlr_jump
-#else
-    je      _nlr_jump_fail          # transfer control to nlr_jump_fail
-#endif
-
-#else // !defined(NLR_OS_WINDOWS)
-
-/******************************************************************************/
-//
-// Functions for Windows
-//
-/******************************************************************************/
-
-/**************************************/
-// mp_uint_t nlr_push(rcx=nlr_buf_t *nlr)
-
-    .globl  nlr_push
-nlr_push:
-    movq    (%rsp), %rax            # load return %rip
-    movq    %rax, 16(%rcx)          # store %rip into nlr_buf
-    movq    %rbp, 24(%rcx)          # store %rbp into nlr_buf
-    movq    %rsp, 32(%rcx)          # store %rsp into nlr_buf
-    movq    %rbx, 40(%rcx)          # store %rbx into nlr_buf
-    movq    %r12, 48(%rcx)          # store %r12 into nlr_buf
-    movq    %r13, 56(%rcx)          # store %r13 into nlr_buf
-    movq    %r14, 64(%rcx)          # store %r14 into nlr_buf
-    movq    %r15, 72(%rcx)          # store %r15 into
-    movq    %rdi, 80(%rcx)          # store %rdr into
-    movq    %rsi, 88(%rcx)          # store %rsi into
-    movq    NLR_TOP(%rip), %rax     # get last nlr_buf
-    movq    %rax, (%rcx)            # store it
-    movq    %rcx, NLR_TOP(%rip)     # stor new nlr_buf (to make linked list)
-    xorq    %rax, %rax              # return 0, normal return
-    ret                             # return
-
-/**************************************/
-// void nlr_pop()
-
-    .globl  nlr_pop
-nlr_pop:
-    movq    NLR_TOP(%rip), %rax     # get nlr_top into %rax
-    movq    (%rax), %rax            # load prev nlr_buf
-    movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
-    ret                             # return
-
-/**************************************/
-// void nlr_jump(rcx=mp_uint_t val)
-
-    .globl  nlr_jump
-nlr_jump:
-    movq    %rcx, %rax              # put return value in %rax
-    movq    NLR_TOP(%rip), %rcx     # get nlr_top into %rcx
-    test    %rcx, %rcx              # check for nlr_top being NULL
-    je      .fail                   # fail if nlr_top is NULL
-    movq    %rax, 8(%rcx)           # store return value
-    movq    (%rcx), %rax            # load prev nlr_buf
-    movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
-    movq    72(%rcx), %r15          # load saved %r15
-    movq    64(%rcx), %r14          # load saved %r14
-    movq    56(%rcx), %r13          # load saved %r13
-    movq    48(%rcx), %r12          # load saved %r12
-    movq    40(%rcx), %rbx          # load saved %rbx
-    movq    32(%rcx), %rsp          # load saved %rsp
-    movq    24(%rcx), %rbp          # load saved %rbp
-    movq    16(%rcx), %rax          # load saved %rip
-    movq    80(%rcx), %rdi          # store %rdr into
-    movq    88(%rcx), %rsi          # store %rsi into
-    movq    %rax, (%rsp)            # store saved %rip to stack
-    xorq    %rax, %rax              # clear return register
-    inc     %al                     # increase to make 1, non-local return
-    ret                             # return
-.fail:
-    movq    %rax, %rcx              # put argument back in first-arg register
-    je      nlr_jump_fail           # transfer control to nlr_jump_fail
-
-#endif // !defined(NLR_OS_WINDOWS)
-
-#endif // defined(__x86_64__) && !MICROPY_NLR_SETJMP
diff --git a/py/nlrx64.c b/py/nlrx64.c
new file mode 100644
index 0000000000..c23fd8fc6a
--- /dev/null
+++ b/py/nlrx64.c
@@ -0,0 +1,136 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013-2017 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/mpstate.h"
+#include "py/nlr.h"
+
+#if !MICROPY_NLR_SETJMP && defined(__x86_64__)
+
+#undef nlr_push
+
+// x86-64 callee-save registers are:
+//  rbx, rbp, rsp, r12, r13, r14, r15
+
+#define NLR_OS_WINDOWS (defined(_WIN32) || defined(__CYGWIN__))
+
+__attribute__((used)) unsigned int nlr_push_tail(nlr_buf_t *nlr);
+
+unsigned int nlr_push(nlr_buf_t *nlr) {
+    (void)nlr;
+
+    #if NLR_OS_WINDOWS
+
+    __asm volatile (
+    "movq   (%rsp), %rax        \n" // load return %rip
+    "movq   %rax, 16(%rcx)      \n" // store %rip into nlr_buf
+    "movq   %rbp, 24(%rcx)      \n" // store %rbp into nlr_buf
+    "movq   %rsp, 32(%rcx)      \n" // store %rsp into nlr_buf
+    "movq   %rbx, 40(%rcx)      \n" // store %rbx into nlr_buf
+    "movq   %r12, 48(%rcx)      \n" // store %r12 into nlr_buf
+    "movq   %r13, 56(%rcx)      \n" // store %r13 into nlr_buf
+    "movq   %r14, 64(%rcx)      \n" // store %r14 into nlr_buf
+    "movq   %r15, 72(%rcx)      \n" // store %r15 into nlr_buf
+    "movq   %rdi, 80(%rcx)      \n" // store %rdr into nlr_buf
+    "movq   %rsi, 88(%rcx)      \n" // store %rsi into nlr_buf
+    "jmp    nlr_push_tail       \n" // do the rest in C
+    );
+
+    #else
+
+    __asm volatile (
+    #if defined(__APPLE__) || defined(__MACH__)
+    "pop    %rbp                \n" // undo function's prelude
+    #endif
+    "movq   (%rsp), %rax        \n" // load return %rip
+    "movq   %rax, 16(%rdi)      \n" // store %rip into nlr_buf
+    "movq   %rbp, 24(%rdi)      \n" // store %rbp into nlr_buf
+    "movq   %rsp, 32(%rdi)      \n" // store %rsp into nlr_buf
+    "movq   %rbx, 40(%rdi)      \n" // store %rbx into nlr_buf
+    "movq   %r12, 48(%rdi)      \n" // store %r12 into nlr_buf
+    "movq   %r13, 56(%rdi)      \n" // store %r13 into nlr_buf
+    "movq   %r14, 64(%rdi)      \n" // store %r14 into nlr_buf
+    "movq   %r15, 72(%rdi)      \n" // store %r15 into nlr_buf
+    #if defined(__APPLE__) || defined(__MACH__)
+    "jmp    _nlr_push_tail      \n" // do the rest in C
+    #else
+    "jmp    nlr_push_tail       \n" // do the rest in C
+    #endif
+    );
+
+    #endif
+
+    return 0; // needed to silence compiler warning
+}
+
+__attribute__((used)) unsigned int nlr_push_tail(nlr_buf_t *nlr) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    nlr->prev = *top;
+    *top = nlr;
+    return 0; // normal return
+}
+
+void nlr_pop(void) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    *top = (*top)->prev;
+}
+
+NORETURN void nlr_jump(void *val) {
+    nlr_buf_t **top_ptr = &MP_STATE_THREAD(nlr_top);
+    nlr_buf_t *top = *top_ptr;
+    if (top == NULL) {
+        nlr_jump_fail(val);
+    }
+
+    top->ret_val = val;
+    *top_ptr = top->prev;
+
+    __asm volatile (
+    "movq   %0, %%rcx           \n" // %rcx points to nlr_buf
+    #if NLR_OS_WINDOWS
+    "movq   88(%%rcx), %%rsi    \n" // load saved %rsi
+    "movq   80(%%rcx), %%rdi    \n" // load saved %rdr
+    #endif
+    "movq   72(%%rcx), %%r15    \n" // load saved %r15
+    "movq   64(%%rcx), %%r14    \n" // load saved %r14
+    "movq   56(%%rcx), %%r13    \n" // load saved %r13
+    "movq   48(%%rcx), %%r12    \n" // load saved %r12
+    "movq   40(%%rcx), %%rbx    \n" // load saved %rbx
+    "movq   32(%%rcx), %%rsp    \n" // load saved %rsp
+    "movq   24(%%rcx), %%rbp    \n" // load saved %rbp
+    "movq   16(%%rcx), %%rax    \n" // load saved %rip
+    "movq   %%rax, (%%rsp)      \n" // store saved %rip to stack
+    "xorq   %%rax, %%rax        \n" // clear return register
+    "inc    %%al                \n" // increase to make 1, non-local return
+    "ret                        \n" // return
+    :                               // output operands
+    : "r"(top)                      // input operands
+    :                               // clobbered registers
+    );
+
+    for (;;); // needed to silence compiler warning
+}
+
+#endif // !MICROPY_NLR_SETJMP && defined(__x86_64__)
diff --git a/py/nlrx86.S b/py/nlrx86.S
deleted file mode 100644
index 8c538ba176..0000000000
--- a/py/nlrx86.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * This file is part of the Micro Python project, http://micropython.org/
- *
- * The MIT License (MIT)
- *
- * Copyright (c) 2013, 2014 Damien P. George
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#if defined(__i386__) && !MICROPY_NLR_SETJMP
-
-// We only need the functions here if we are on x86, and we are not
-// using setjmp/longjmp.
-//
-// For reference, x86 callee save regs are:
-//      ebx, esi, edi, ebp, esp, eip
-
-// the offset of nlr_top within mp_state_ctx_t
-#define NLR_TOP_OFFSET (2 * 4)
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define NLR_OS_WINDOWS
-#endif
-
-#if defined(__APPLE__) && defined(__MACH__)
-#define NLR_OS_MAC
-#endif
-
-#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
-#define NLR_TOP (_mp_state_ctx + NLR_TOP_OFFSET)
-#define MP_THREAD_GET_STATE _mp_thread_get_state
-#else
-#define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
-#define MP_THREAD_GET_STATE mp_thread_get_state
-#endif
-
-// offset of nlr_top within mp_state_thread_t structure
-#define NLR_TOP_TH_OFF (0)
-
-    .file   "nlr.s"
-    .text
-
-/**************************************/
-// mp_uint_t nlr_push(4(%esp)=nlr_buf_t *nlr)
-
-#if defined(NLR_OS_WINDOWS)
-    .globl  _nlr_push
-    .def    _nlr_push; .scl 2; .type 32; .endef
-_nlr_push:
-#elif defined(NLR_OS_MAC)
-    .globl  _nlr_push
-_nlr_push:
-#else
-    .globl  nlr_push
-    .type   nlr_push, @function
-nlr_push:
-#endif
-    mov     4(%esp), %edx           # load nlr_buf
-    mov     (%esp), %eax            # load return %ip
-    mov     %eax, 8(%edx)           # store %ip into nlr_buf+8
-    mov     %ebp, 12(%edx)          # store %bp into nlr_buf+12
-    mov     %esp, 16(%edx)          # store %sp into nlr_buf+16
-    mov     %ebx, 20(%edx)          # store %bx into nlr_buf+20
-    mov     %edi, 24(%edx)          # store %di into nlr_buf
-    mov     %esi, 28(%edx)          # store %si into nlr_buf
-
-#if !MICROPY_PY_THREAD
-    mov     NLR_TOP, %eax           # load nlr_top
-    mov     %eax, (%edx)            # store it
-    mov     %edx, NLR_TOP           # stor new nlr_buf (to make linked list)
-#else
-    // to check: stack is aligned to 16-byte boundary before this call
-    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
-    mov     4(%esp), %edx           # load nlr_buf argument into edx (edx clobbered by call)
-    mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
-    mov     %ecx, (%edx)            # store it
-    mov     %edx, NLR_TOP_TH_OFF(%eax) # store new nlr_buf (to make linked list)
-#endif
-
-    xor     %eax, %eax              # return 0, normal return
-    ret                             # return
-#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
-    .size   nlr_push, .-nlr_push
-#endif
-
-/**************************************/
-// void nlr_pop()
-
-#if defined(NLR_OS_WINDOWS)
-    .globl  _nlr_pop
-    .def    _nlr_pop; .scl 2; .type 32; .endef
-_nlr_pop:
-#elif defined(NLR_OS_MAC)
-    .globl  _nlr_pop
-_nlr_pop:
-#else
-    .globl  nlr_pop
-    .type   nlr_pop, @function
-nlr_pop:
-#endif
-
-#if !MICROPY_PY_THREAD
-    mov     NLR_TOP, %eax           # load nlr_top
-    mov     (%eax), %eax            # load prev nlr_buf
-    mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
-#else
-    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
-    mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
-    mov     (%ecx), %ecx            # load prev nlr_buf
-    mov     %ecx, NLR_TOP_TH_OFF(%eax) # store prev nlr_buf (to unlink list)
-#endif
-
-    ret                             # return
-#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
-    .size   nlr_pop, .-nlr_pop
-#endif
-
-/**************************************/
-// void nlr_jump(4(%esp)=mp_uint_t val)
-
-#if defined(NLR_OS_WINDOWS)
-    .globl  _nlr_jump
-    .def    _nlr_jump; .scl 2; .type 32; .endef
-_nlr_jump:
-#elif defined(NLR_OS_MAC)
-    .globl  _nlr_jump
-_nlr_jump:
-#else
-    .globl  nlr_jump
-    .type   nlr_jump, @function
-nlr_jump:
-#endif
-
-#if !MICROPY_PY_THREAD
-    mov     NLR_TOP, %edx           # load nlr_top
-    test    %edx, %edx              # check for nlr_top being NULL
-#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
-    je      _nlr_jump_fail           # fail if nlr_top is NULL
-#else
-    je      nlr_jump_fail           # fail if nlr_top is NULL
-#endif
-    mov     4(%esp), %eax           # load return value
-    mov     %eax, 4(%edx)           # store return value
-    mov     (%edx), %eax            # load prev nlr_top
-    mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
-#else
-    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
-    mov     NLR_TOP_TH_OFF(%eax), %edx # get thread.nlr_top (last nlr_buf)
-    test    %edx, %edx              # check for nlr_top being NULL
-#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
-    je      _nlr_jump_fail          # fail if nlr_top is NULL
-#else
-    je      nlr_jump_fail           # fail if nlr_top is NULL
-#endif
-    mov     4(%esp), %ecx           # load return value
-    mov     %ecx, 4(%edx)           # store return value
-    mov     (%edx), %ecx            # load prev nlr_top
-    mov     %ecx, NLR_TOP_TH_OFF(%eax) # store nlr_top (to unlink list)
-#endif
-
-    mov     28(%edx), %esi          # load saved %si
-    mov     24(%edx), %edi          # load saved %di
-    mov     20(%edx), %ebx          # load saved %bx
-    mov     16(%edx), %esp          # load saved %sp
-    mov     12(%edx), %ebp          # load saved %bp
-    mov     8(%edx), %eax           # load saved %ip
-    mov     %eax, (%esp)            # store saved %ip to stack
-    xor     %eax, %eax              # clear return register
-    inc     %al                     # increase to make 1, non-local return
-    ret                             # return
-#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
-    .size   nlr_jump, .-nlr_jump
-#endif
-
-#endif // defined(__i386__) && !MICROPY_NLR_SETJMP
diff --git a/py/nlrx86.c b/py/nlrx86.c
new file mode 100644
index 0000000000..58aaa1a571
--- /dev/null
+++ b/py/nlrx86.c
@@ -0,0 +1,113 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013-2017 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/mpconfig.h"
+#include "py/mpstate.h"
+#include "py/nlr.h"
+
+#if !MICROPY_NLR_SETJMP && defined(__i386__)
+
+#undef nlr_push
+
+// For reference, x86 callee save regs are:
+//  ebx, esi, edi, ebp, esp, eip
+
+#define NLR_OS_WINDOWS (defined(_WIN32) || defined(__CYGWIN__))
+
+#if NLR_OS_WINDOWS
+unsigned int nlr_push_tail(nlr_buf_t *nlr) asm("nlr_push_tail");
+#else
+__attribute__((used)) unsigned int nlr_push_tail(nlr_buf_t *nlr);
+#endif
+
+unsigned int nlr_push(nlr_buf_t *nlr) {
+    (void)nlr;
+
+    __asm volatile (
+    // Check for Zephyr, which uses a different calling convention
+    // by default.
+    // TODE: Better support for various x86 calling conventions
+    // (unfortunately, __attribute__((naked)) is not supported on x86).
+    #ifndef __ZEPHYR__
+    "pop    %ebp                \n" // undo function's prelude
+    #endif
+    "mov    4(%esp), %edx       \n" // load nlr_buf
+    "mov    (%esp), %eax        \n" // load return %eip
+    "mov    %eax, 8(%edx)       \n" // store %eip into nlr_buf
+    "mov    %ebp, 12(%edx)      \n" // store %ebp into nlr_buf
+    "mov    %esp, 16(%edx)      \n" // store %esp into nlr_buf
+    "mov    %ebx, 20(%edx)      \n" // store %ebx into nlr_buf
+    "mov    %edi, 24(%edx)      \n" // store %edi into nlr_buf
+    "mov    %esi, 28(%edx)      \n" // store %esi into nlr_buf
+    "jmp    nlr_push_tail       \n" // do the rest in C
+    );
+
+    return 0; // needed to silence compiler warning
+}
+
+__attribute__((used)) unsigned int nlr_push_tail(nlr_buf_t *nlr) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    nlr->prev = *top;
+    *top = nlr;
+    return 0; // normal return
+}
+
+void nlr_pop(void) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    *top = (*top)->prev;
+}
+
+NORETURN void nlr_jump(void *val) {
+    nlr_buf_t **top_ptr = &MP_STATE_THREAD(nlr_top);
+    nlr_buf_t *top = *top_ptr;
+    if (top == NULL) {
+        nlr_jump_fail(val);
+    }
+
+    top->ret_val = val;
+    *top_ptr = top->prev;
+
+    __asm volatile (
+    "mov    %0, %%edx           \n" // %edx points to nlr_buf
+    "mov    28(%%edx), %%esi    \n" // load saved %esi
+    "mov    24(%%edx), %%edi    \n" // load saved %edi
+    "mov    20(%%edx), %%ebx    \n" // load saved %ebx
+    "mov    16(%%edx), %%esp    \n" // load saved %esp
+    "mov    12(%%edx), %%ebp    \n" // load saved %ebp
+    "mov    8(%%edx), %%eax     \n" // load saved %eip
+    "mov    %%eax, (%%esp)      \n" // store saved %eip to stack
+    "xor    %%eax, %%eax        \n" // clear return register
+    "inc    %%al                \n" // increase to make 1, non-local return
+    "ret                        \n" // return
+    :                               // output operands
+    : "r"(top)                      // input operands
+    :                               // clobbered registers
+    );
+
+    for (;;); // needed to silence compiler warning
+}
+
+#endif // !MICROPY_NLR_SETJMP && defined(__i386__)
diff --git a/py/nlrxtensa.S b/py/nlrxtensa.S
deleted file mode 100644
index 289996ccee..0000000000
--- a/py/nlrxtensa.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * This file is part of the Micro Python project, http://micropython.org/
- *
- * The MIT License (MIT)
- *
- * Copyright (c) 2014 Damien P. George
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#if defined(__xtensa__)
-
-/*
-    calling conventions:
-    a0 = return address
-    a1 = stack pointer
-    a2 = first arg, return value
-    a3-a7 = rest of args
-*/
-
-// the offset of nlr_top within mp_state_ctx_t
-#define NLR_TOP_OFFSET (2 * 4)
-
-#define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
-
-    .file   "nlr.s"
-    .text
-
-    .literal_position
-    .literal .LC0, NLR_TOP
-    .align  4
-    .global nlr_push
-    .type   nlr_push, @function
-nlr_push:
-    // save regs
-    s32i.n  a0, a2, 8
-    s32i.n  a1, a2, 12
-    s32i.n  a8, a2, 16
-    s32i.n  a9, a2, 20
-    s32i.n  a10, a2, 24
-    s32i.n  a11, a2, 28
-    s32i.n  a12, a2, 32
-    s32i.n  a13, a2, 36
-    s32i.n  a14, a2, 40
-    s32i.n  a15, a2, 44
-
-    l32r    a3, .LC0
-    l32i.n  a4, a3, 0
-    s32i.n  a2, a3, 0
-    s32i.n  a4, a2, 0
-    movi.n  a2, 0
-    ret.n
-    .size   nlr_push, .-nlr_push
-
-    .literal_position
-    .literal .LC1, NLR_TOP
-    .align  4
-    .global nlr_pop
-    .type   nlr_pop, @function
-nlr_pop:
-    l32r    a2, .LC1
-    l32i.n  a3, a2, 0
-    l32i.n  a3, a3, 0
-    s32i.n  a3, a2, 0
-    ret.n
-    .size   nlr_pop, .-nlr_pop
-
-    .literal_position
-    .literal .LC2, NLR_TOP
-    .align    4
-    .global    nlr_jump
-    .type    nlr_jump, @function
-nlr_jump:
-    l32r    a3, .LC2
-    l32i.n  a3, a3, 0   // a3 = nlr_top
-    bnez.n  a3, .L4
-    call0   nlr_jump_fail
-.L4:
-    s32i.n  a2, a3, 4   // nlr_top->ret_val = val
-
-    // restore regs
-    l32i.n  a0, a3, 8
-    l32i.n  a1, a3, 12
-    l32i.n  a8, a3, 16
-    l32i.n  a9, a3, 20
-    l32i.n  a10, a3, 24
-    l32i.n  a11, a3, 28
-    l32i.n  a12, a3, 32
-    l32i.n  a13, a3, 36
-    l32i.n  a14, a3, 40
-    l32i.n  a15, a3, 44
-
-    l32i.n  a3, a3, 0   // a3 = nlr_top->prev
-    l32r    a2, .LC2
-    s32i.n  a3, a2, 0   // nlr_top = a3
-    movi.n  a2, 1       // return 1
-    ret.n
-    .size   nlr_jump, .-nlr_jump
-
-#endif // defined(__xtensa__)
diff --git a/py/nlrxtensa.c b/py/nlrxtensa.c
new file mode 100644
index 0000000000..ccac3597b1
--- /dev/null
+++ b/py/nlrxtensa.c
@@ -0,0 +1,103 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2014-2017 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/mpstate.h"
+#include "py/nlr.h"
+
+#if !MICROPY_NLR_SETJMP && defined(__xtensa__)
+
+#undef nlr_push
+
+// Xtensa calling conventions:
+//  a0 = return address
+//  a1 = stack pointer
+//  a2 = first arg, return value
+//  a3-a7 = rest of args
+
+unsigned int nlr_push(nlr_buf_t *nlr) {
+
+    __asm volatile (
+    "s32i.n  a0, a2, 8          \n" // save regs...
+    "s32i.n  a1, a2, 12         \n"
+    "s32i.n  a8, a2, 16         \n"
+    "s32i.n  a9, a2, 20         \n"
+    "s32i.n  a10, a2, 24        \n"
+    "s32i.n  a11, a2, 28        \n"
+    "s32i.n  a12, a2, 32        \n"
+    "s32i.n  a13, a2, 36        \n"
+    "s32i.n  a14, a2, 40        \n"
+    "s32i.n  a15, a2, 44        \n"
+    "j      nlr_push_tail       \n" // do the rest in C
+    );
+
+    return 0; // needed to silence compiler warning
+}
+
+__attribute__((used)) unsigned int nlr_push_tail(nlr_buf_t *nlr) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    nlr->prev = *top;
+    *top = nlr;
+    return 0; // normal return
+}
+
+void nlr_pop(void) {
+    nlr_buf_t **top = &MP_STATE_THREAD(nlr_top);
+    *top = (*top)->prev;
+}
+
+NORETURN void nlr_jump(void *val) {
+    nlr_buf_t **top_ptr = &MP_STATE_THREAD(nlr_top);
+    nlr_buf_t *top = *top_ptr;
+    if (top == NULL) {
+        nlr_jump_fail(val);
+    }
+
+    top->ret_val = val;
+    *top_ptr = top->prev;
+
+    __asm volatile (
+    "mov.n   a2, %0             \n" // a2 points to nlr_buf
+    "l32i.n  a0, a2, 8          \n" // restore regs...
+    "l32i.n  a1, a2, 12         \n"
+    "l32i.n  a8, a2, 16         \n"
+    "l32i.n  a9, a2, 20         \n"
+    "l32i.n  a10, a2, 24        \n"
+    "l32i.n  a11, a2, 28        \n"
+    "l32i.n  a12, a2, 32        \n"
+    "l32i.n  a13, a2, 36        \n"
+    "l32i.n  a14, a2, 40        \n"
+    "l32i.n  a15, a2, 44        \n"
+    "movi.n a2, 1               \n" // return 1, non-local return
+    "ret.n                      \n" // return
+    :                               // output operands
+    : "r"(top)                      // input operands
+    :                               // clobbered registers
+    );
+
+    for (;;); // needed to silence compiler warning
+}
+
+#endif // !MICROPY_NLR_SETJMP && defined(__xtensa__)
diff --git a/py/obj.c b/py/obj.c
index 1d0c80ab90..98ffa930b5 100644
--- a/py/obj.c
+++ b/py/obj.c
@@ -229,7 +229,7 @@ mp_int_t mp_obj_get_int(mp_const_obj_t arg) {
         return mp_obj_int_get_checked(arg);
     } else {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "can't convert to int");
+            mp_raise_TypeError("can't convert to int");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "can't convert %s to int", mp_obj_get_type_str(arg)));
@@ -279,7 +279,7 @@ mp_float_t mp_obj_get_float(mp_obj_t arg) {
         return mp_obj_float_get(arg);
     } else {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "can't convert to float");
+            mp_raise_TypeError("can't convert to float");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "can't convert %s to float", mp_obj_get_type_str(arg)));
@@ -310,7 +310,7 @@ void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) {
         mp_obj_complex_get(arg, real, imag);
     } else {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "can't convert to complex");
+            mp_raise_TypeError("can't convert to complex");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "can't convert %s to complex", mp_obj_get_type_str(arg)));
@@ -321,14 +321,14 @@ void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) {
 #endif
 
 // note: returned value in *items may point to the interior of a GC block
-void mp_obj_get_array(mp_obj_t o, mp_uint_t *len, mp_obj_t **items) {
+void mp_obj_get_array(mp_obj_t o, size_t *len, mp_obj_t **items) {
     if (MP_OBJ_IS_TYPE(o, &mp_type_tuple)) {
         mp_obj_tuple_get(o, len, items);
     } else if (MP_OBJ_IS_TYPE(o, &mp_type_list)) {
         mp_obj_list_get(o, len, items);
     } else {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "expected tuple/list");
+            mp_raise_TypeError("expected tuple/list");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "object '%s' is not a tuple or list", mp_obj_get_type_str(o)));
@@ -337,12 +337,12 @@ void mp_obj_get_array(mp_obj_t o, mp_uint_t *len, mp_obj_t **items) {
 }
 
 // note: returned value in *items may point to the interior of a GC block
-void mp_obj_get_array_fixed_n(mp_obj_t o, mp_uint_t len, mp_obj_t **items) {
-    mp_uint_t seq_len;
+void mp_obj_get_array_fixed_n(mp_obj_t o, size_t len, mp_obj_t **items) {
+    size_t seq_len;
     mp_obj_get_array(o, &seq_len, items);
     if (seq_len != len) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_ValueError, "tuple/list has wrong length");
+            mp_raise_ValueError("tuple/list has wrong length");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
                 "requested length %d but object has length %d", (int)len, (int)seq_len));
@@ -351,13 +351,13 @@ void mp_obj_get_array_fixed_n(mp_obj_t o, mp_uint_t len, mp_obj_t **items) {
 }
 
 // is_slice determines whether the index is a slice index
-mp_uint_t mp_get_index(const mp_obj_type_t *type, mp_uint_t len, mp_obj_t index, bool is_slice) {
+size_t mp_get_index(const mp_obj_type_t *type, size_t len, mp_obj_t index, bool is_slice) {
     mp_int_t i;
     if (MP_OBJ_IS_SMALL_INT(index)) {
         i = MP_OBJ_SMALL_INT_VALUE(index);
     } else if (!mp_obj_get_int_maybe(index, &i)) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "indices must be integers");
+            mp_raise_TypeError("indices must be integers");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "%q indices must be integers, not %s",
@@ -384,7 +384,9 @@ mp_uint_t mp_get_index(const mp_obj_type_t *type, mp_uint_t len, mp_obj_t index,
             }
         }
     }
-    return i;
+
+    // By this point 0 <= i <= len and so fits in a size_t
+    return (size_t)i;
 }
 
 mp_obj_t mp_obj_id(mp_obj_t o_in) {
@@ -410,7 +412,7 @@ mp_obj_t mp_obj_len(mp_obj_t o_in) {
     mp_obj_t len = mp_obj_len_maybe(o_in);
     if (len == MP_OBJ_NULL) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "object has no len");
+            mp_raise_TypeError("object has no len");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "object of type '%s' has no len()", mp_obj_get_type_str(o_in)));
@@ -451,7 +453,7 @@ mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t value) {
     }
     if (value == MP_OBJ_NULL) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "object does not support item deletion");
+            mp_raise_TypeError("object does not support item deletion");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "'%s' object does not support item deletion", mp_obj_get_type_str(base)));
@@ -466,7 +468,7 @@ mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t value) {
         }
     } else {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "object does not support item assignment");
+            mp_raise_TypeError("object does not support item assignment");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "'%s' object does not support item assignment", mp_obj_get_type_str(base)));
@@ -481,6 +483,11 @@ mp_obj_t mp_identity(mp_obj_t self) {
 }
 MP_DEFINE_CONST_FUN_OBJ_1(mp_identity_obj, mp_identity);
 
+mp_obj_t mp_identity_getiter(mp_obj_t self, mp_obj_iter_buf_t *iter_buf) {
+    (void)iter_buf;
+    return self;
+}
+
 bool mp_get_buffer(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
     mp_obj_type_t *type = mp_obj_get_type(obj);
     if (type->buffer_p.get_buffer == NULL) {
@@ -495,7 +502,7 @@ bool mp_get_buffer(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
 
 void mp_get_buffer_raise(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
     if (!mp_get_buffer(obj, bufinfo, flags)) {
-        mp_raise_msg(&mp_type_TypeError, "object with buffer protocol required");
+        mp_raise_TypeError("object with buffer protocol required");
     }
 }
 
diff --git a/py/obj.h b/py/obj.h
index 6106bbe19a..a3c06a261a 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -353,11 +353,11 @@ typedef struct _mp_rom_map_elem_t {
 // would also need a trucated dict structure
 
 typedef struct _mp_map_t {
-    mp_uint_t all_keys_are_qstrs : 1;
-    mp_uint_t is_fixed : 1;     // a fixed array that can't be modified; must also be ordered
-    mp_uint_t is_ordered : 1;   // an ordered array
-    mp_uint_t used : (8 * sizeof(mp_uint_t) - 3);
-    mp_uint_t alloc;
+    size_t all_keys_are_qstrs : 1;
+    size_t is_fixed : 1;    // a fixed array that can't be modified; must also be ordered
+    size_t is_ordered : 1;  // an ordered array
+    size_t used : (8 * sizeof(size_t) - 3);
+    size_t alloc;
     mp_map_elem_t *table;
 } mp_map_t;
 
@@ -371,11 +371,11 @@ typedef enum _mp_map_lookup_kind_t {
 
 extern const mp_map_t mp_const_empty_map;
 
-static inline bool MP_MAP_SLOT_IS_FILLED(const mp_map_t *map, mp_uint_t pos) { return ((map)->table[pos].key != MP_OBJ_NULL && (map)->table[pos].key != MP_OBJ_SENTINEL); }
+static inline bool MP_MAP_SLOT_IS_FILLED(const mp_map_t *map, size_t pos) { return ((map)->table[pos].key != MP_OBJ_NULL && (map)->table[pos].key != MP_OBJ_SENTINEL); }
 
-void mp_map_init(mp_map_t *map, mp_uint_t n);
-void mp_map_init_fixed_table(mp_map_t *map, mp_uint_t n, const mp_obj_t *table);
-mp_map_t *mp_map_new(mp_uint_t n);
+void mp_map_init(mp_map_t *map, size_t n);
+void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table);
+mp_map_t *mp_map_new(size_t n);
 void mp_map_deinit(mp_map_t *map);
 void mp_map_free(mp_map_t *map);
 mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t lookup_kind);
@@ -385,14 +385,14 @@ void mp_map_dump(mp_map_t *map);
 // Underlying set implementation (not set object)
 
 typedef struct _mp_set_t {
-    mp_uint_t alloc;
-    mp_uint_t used;
+    size_t alloc;
+    size_t used;
     mp_obj_t *table;
 } mp_set_t;
 
-static inline bool MP_SET_SLOT_IS_FILLED(const mp_set_t *set, mp_uint_t pos) { return ((set)->table[pos] != MP_OBJ_NULL && (set)->table[pos] != MP_OBJ_SENTINEL); }
+static inline bool MP_SET_SLOT_IS_FILLED(const mp_set_t *set, size_t pos) { return ((set)->table[pos] != MP_OBJ_NULL && (set)->table[pos] != MP_OBJ_SENTINEL); }
 
-void mp_set_init(mp_set_t *set, mp_uint_t n);
+void mp_set_init(mp_set_t *set, size_t n);
 mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind);
 mp_obj_t mp_set_remove_first(mp_set_t *set);
 void mp_set_clear(mp_set_t *set);
@@ -417,6 +417,15 @@ typedef enum {
     PRINT_EXC_SUBCLASS = 0x80, // Internal flag for printing exception subclasses
 } mp_print_kind_t;
 
+typedef struct _mp_obj_iter_buf_t {
+    mp_obj_base_t base;
+    mp_obj_t buf[3];
+} mp_obj_iter_buf_t;
+
+// The number of slots that an mp_obj_iter_buf_t needs on the Python value stack.
+// It's rounded up in case mp_obj_base_t is smaller than mp_obj_t (eg for OBJ_REPR_D).
+#define MP_OBJ_ITER_BUF_NSLOTS ((sizeof(mp_obj_iter_buf_t) + sizeof(mp_obj_t) - 1) / sizeof(mp_obj_t))
+
 typedef void (*mp_print_fun_t)(const mp_print_t *print, mp_obj_t o, mp_print_kind_t kind);
 typedef mp_obj_t (*mp_make_new_fun_t)(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args);
 typedef mp_obj_t (*mp_call_fun_t)(mp_obj_t fun, size_t n_args, size_t n_kw, const mp_obj_t *args);
@@ -424,6 +433,7 @@ typedef mp_obj_t (*mp_unary_op_fun_t)(mp_uint_t op, mp_obj_t);
 typedef mp_obj_t (*mp_binary_op_fun_t)(mp_uint_t op, mp_obj_t, mp_obj_t);
 typedef void (*mp_attr_fun_t)(mp_obj_t self_in, qstr attr, mp_obj_t *dest);
 typedef mp_obj_t (*mp_subscr_fun_t)(mp_obj_t self_in, mp_obj_t index, mp_obj_t value);
+typedef mp_obj_t (*mp_getiter_fun_t)(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf);
 
 // Buffer protocol
 typedef struct _mp_buffer_info_t {
@@ -460,16 +470,27 @@ typedef struct _mp_stream_p_t {
 } mp_stream_p_t;
 
 struct _mp_obj_type_t {
+    // A type is an object so must start with this entry, which points to mp_type_type.
     mp_obj_base_t base;
+
+    // The name of this type.
     qstr name;
+
+    // Corresponds to __repr__ and __str__ special methods.
     mp_print_fun_t print;
-    mp_make_new_fun_t make_new;     // to make an instance of the type
 
+    // Corresponds to __new__ and __init__ special methods, to make an instance of the type.
+    mp_make_new_fun_t make_new;
+
+    // Corresponds to __call__ special method, ie T(...).
     mp_call_fun_t call;
-    mp_unary_op_fun_t unary_op;     // can return MP_OBJ_NULL if op not supported
-    mp_binary_op_fun_t binary_op;   // can return MP_OBJ_NULL if op not supported
 
-    // implements load, store and delete attribute
+    // Implements unary and binary operations.
+    // Can return MP_OBJ_NULL if the operation is not supported.
+    mp_unary_op_fun_t unary_op;
+    mp_binary_op_fun_t binary_op;
+
+    // Implements load, store and delete attribute.
     //
     // dest[0] = MP_OBJ_NULL means load
     //  return: for fail, do nothing
@@ -482,31 +503,36 @@ struct _mp_obj_type_t {
     //          for success set dest[0] = MP_OBJ_NULL
     mp_attr_fun_t attr;
 
-    mp_subscr_fun_t subscr;         // implements load, store, delete subscripting
-                                    // value=MP_OBJ_NULL means delete, value=MP_OBJ_SENTINEL means load, else store
-                                    // can return MP_OBJ_NULL if op not supported
+    // Implements load, store and delete subscripting:
+    //  - value = MP_OBJ_SENTINEL means load
+    //  - value = MP_OBJ_NULL means delete
+    //  - all other values mean store the value
+    // Can return MP_OBJ_NULL if operation not supported.
+    mp_subscr_fun_t subscr;
+
+    // Corresponds to __iter__ special method.
+    // Can use the given mp_obj_iter_buf_t to store iterator object,
+    // otherwise can return a pointer to an object on the heap.
+    mp_getiter_fun_t getiter;
 
-    mp_fun_1_t getiter;             // corresponds to __iter__ special method
-    mp_fun_1_t iternext; // may return MP_OBJ_STOP_ITERATION as an optimisation instead of raising StopIteration() (with no args)
+    // Corresponds to __next__ special method.  May return MP_OBJ_STOP_ITERATION
+    // as an optimisation instead of raising StopIteration() with no args.
+    mp_fun_1_t iternext;
 
+    // Implements the buffer protocol if supported by this type.
     mp_buffer_p_t buffer_p;
+
     // One of disjoint protocols (interfaces), like mp_stream_p_t, etc.
     const void *protocol;
 
-    // these are for dynamically created types (classes)
-    struct _mp_obj_tuple_t *bases_tuple;
-    struct _mp_obj_dict_t *locals_dict;
-
-    /*
-    What we might need to add here:
-
-    len             str tuple list map
-    abs             float complex
-    hash            bool int none str
-    equal           int str
+    // A pointer to the parents of this type:
+    //  - 0 parents: pointer is NULL (object is implicitly the single parent)
+    //  - 1 parent: a pointer to the type of that parent
+    //  - 2 or more parents: pointer to a tuple object containing the parent types
+    const void *parent;
 
-    unpack seq      list tuple
-    */
+    // A dict mapping qstrs to objects local methods/constants/etc.
+    struct _mp_obj_dict_t *locals_dict;
 };
 
 // Constant types, globally accessible
@@ -607,39 +633,38 @@ static inline mp_obj_t mp_obj_new_bool(mp_int_t x) { return x ? mp_const_true :
 mp_obj_t mp_obj_new_cell(mp_obj_t obj);
 mp_obj_t mp_obj_new_int(mp_int_t value);
 mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value);
-mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base);
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base);
 mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
 mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a multi-precision integer object (or raise an overflow exception)
-mp_obj_t mp_obj_new_str(const char* data, mp_uint_t len, bool make_qstr_if_not_already);
+mp_obj_t mp_obj_new_str(const char* data, size_t len, bool make_qstr_if_not_already);
 mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr);
-mp_obj_t mp_obj_new_bytes(const byte* data, mp_uint_t len);
-mp_obj_t mp_obj_new_bytearray(mp_uint_t n, void *items);
-mp_obj_t mp_obj_new_bytearray_by_ref(mp_uint_t n, void *items);
+mp_obj_t mp_obj_new_bytes(const byte* data, size_t len);
+mp_obj_t mp_obj_new_bytearray(size_t n, void *items);
+mp_obj_t mp_obj_new_bytearray_by_ref(size_t n, void *items);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_obj_t mp_obj_new_int_from_float(mp_float_t val);
 mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag);
 #endif
 mp_obj_t mp_obj_new_exception(const mp_obj_type_t *exc_type);
 mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg);
-mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, mp_uint_t n_args, const mp_obj_t *args);
+mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, size_t n_args, const mp_obj_t *args);
 mp_obj_t mp_obj_new_exception_msg(const mp_obj_type_t *exc_type, const char *msg);
 mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char *fmt, ...); // counts args by number of % symbols in fmt, excluding %%; can only handle void* sizes (ie no float/double!)
 mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args, mp_obj_t def_kw_args, const byte *code, const mp_uint_t *const_table);
 mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data, const mp_uint_t *const_table);
-mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig);
-mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig);
+mp_obj_t mp_obj_new_fun_viper(size_t n_args, void *fun_data, mp_uint_t type_sig);
+mp_obj_t mp_obj_new_fun_asm(size_t n_args, void *fun_data, mp_uint_t type_sig);
 mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun);
-mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_uint_t n_closed, const mp_obj_t *closed);
-mp_obj_t mp_obj_new_tuple(mp_uint_t n, const mp_obj_t *items);
-mp_obj_t mp_obj_new_list(mp_uint_t n, mp_obj_t *items);
-mp_obj_t mp_obj_new_dict(mp_uint_t n_args);
-mp_obj_t mp_obj_new_set(mp_uint_t n_args, mp_obj_t *items);
+mp_obj_t mp_obj_new_closure(mp_obj_t fun, size_t n_closed, const mp_obj_t *closed);
+mp_obj_t mp_obj_new_tuple(size_t n, const mp_obj_t *items);
+mp_obj_t mp_obj_new_list(size_t n, mp_obj_t *items);
+mp_obj_t mp_obj_new_dict(size_t n_args);
+mp_obj_t mp_obj_new_set(size_t n_args, mp_obj_t *items);
 mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step);
-mp_obj_t mp_obj_new_super(mp_obj_t type, mp_obj_t obj);
 mp_obj_t mp_obj_new_bound_meth(mp_obj_t meth, mp_obj_t self);
-mp_obj_t mp_obj_new_getitem_iter(mp_obj_t *args);
+mp_obj_t mp_obj_new_getitem_iter(mp_obj_t *args, mp_obj_iter_buf_t *iter_buf);
 mp_obj_t mp_obj_new_module(qstr module_name);
-mp_obj_t mp_obj_new_memoryview(byte typecode, mp_uint_t nitems, void *items);
+mp_obj_t mp_obj_new_memoryview(byte typecode, size_t nitems, void *items);
 
 mp_obj_type_t *mp_obj_get_type(mp_const_obj_t o_in);
 const char *mp_obj_get_type_str(mp_const_obj_t o_in);
@@ -662,9 +687,9 @@ mp_float_t mp_obj_get_float(mp_obj_t self_in);
 void mp_obj_get_complex(mp_obj_t self_in, mp_float_t *real, mp_float_t *imag);
 #endif
 //qstr mp_obj_get_qstr(mp_obj_t arg);
-void mp_obj_get_array(mp_obj_t o, mp_uint_t *len, mp_obj_t **items); // *items may point inside a GC block
-void mp_obj_get_array_fixed_n(mp_obj_t o, mp_uint_t len, mp_obj_t **items); // *items may point inside a GC block
-mp_uint_t mp_get_index(const mp_obj_type_t *type, mp_uint_t len, mp_obj_t index, bool is_slice);
+void mp_obj_get_array(mp_obj_t o, size_t *len, mp_obj_t **items); // *items may point inside a GC block
+void mp_obj_get_array_fixed_n(mp_obj_t o, size_t len, mp_obj_t **items); // *items may point inside a GC block
+size_t mp_get_index(const mp_obj_type_t *type, size_t len, mp_obj_t index, bool is_slice);
 mp_obj_t mp_obj_id(mp_obj_t o_in);
 mp_obj_t mp_obj_len(mp_obj_t o_in);
 mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); // may return MP_OBJ_NULL
@@ -698,12 +723,17 @@ void mp_init_emergency_exception_buf(void);
 bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
 qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr
 const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
-const char *mp_obj_str_get_data(mp_obj_t self_in, mp_uint_t *len);
+const char *mp_obj_str_get_data(mp_obj_t self_in, size_t *len);
 mp_obj_t mp_obj_str_intern(mp_obj_t str);
-void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, mp_uint_t str_len, bool is_bytes);
+void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, size_t str_len, bool is_bytes);
 
 #if MICROPY_PY_BUILTINS_FLOAT
 // float
+#if MICROPY_FLOAT_HIGH_QUALITY_HASH
+mp_int_t mp_float_hash(mp_float_t val);
+#else
+static inline mp_int_t mp_float_hash(mp_float_t val) { return (mp_int_t)val; }
+#endif
 mp_obj_t mp_obj_float_binary_op(mp_uint_t op, mp_float_t lhs_val, mp_obj_t rhs); // can return MP_OBJ_NULL if op not supported
 
 // complex
@@ -714,17 +744,17 @@ mp_obj_t mp_obj_complex_binary_op(mp_uint_t op, mp_float_t lhs_real, mp_float_t
 #endif
 
 // tuple
-void mp_obj_tuple_get(mp_obj_t self_in, mp_uint_t *len, mp_obj_t **items);
+void mp_obj_tuple_get(mp_obj_t self_in, size_t *len, mp_obj_t **items);
 void mp_obj_tuple_del(mp_obj_t self_in);
 mp_int_t mp_obj_tuple_hash(mp_obj_t self_in);
 
 // list
 struct _mp_obj_list_t;
-void mp_obj_list_init(struct _mp_obj_list_t *o, mp_uint_t n);
+void mp_obj_list_init(struct _mp_obj_list_t *o, size_t n);
 mp_obj_t mp_obj_list_append(mp_obj_t self_in, mp_obj_t arg);
 mp_obj_t mp_obj_list_remove(mp_obj_t self_in, mp_obj_t value);
-void mp_obj_list_get(mp_obj_t self_in, mp_uint_t *len, mp_obj_t **items);
-void mp_obj_list_set_len(mp_obj_t self_in, mp_uint_t len);
+void mp_obj_list_get(mp_obj_t self_in, size_t *len, mp_obj_t **items);
+void mp_obj_list_set_len(mp_obj_t self_in, size_t len);
 void mp_obj_list_store(mp_obj_t self_in, mp_obj_t index, mp_obj_t value);
 mp_obj_t mp_obj_list_sort(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs);
 
@@ -733,8 +763,8 @@ typedef struct _mp_obj_dict_t {
     mp_obj_base_t base;
     mp_map_t map;
 } mp_obj_dict_t;
-void mp_obj_dict_init(mp_obj_dict_t *dict, mp_uint_t n_args);
-mp_uint_t mp_obj_dict_len(mp_obj_t self_in);
+void mp_obj_dict_init(mp_obj_dict_t *dict, size_t n_args);
+size_t mp_obj_dict_len(mp_obj_t self_in);
 mp_obj_t mp_obj_dict_get(mp_obj_t self_in, mp_obj_t index);
 mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value);
 mp_obj_t mp_obj_dict_delete(mp_obj_t self_in, mp_obj_t key);
@@ -775,6 +805,7 @@ qstr mp_obj_code_get_name(const byte *code_info);
 
 mp_obj_t mp_identity(mp_obj_t self);
 MP_DECLARE_CONST_FUN_OBJ_1(mp_identity_obj);
+mp_obj_t mp_identity_getiter(mp_obj_t self, mp_obj_iter_buf_t *iter_buf);
 
 // module
 typedef struct _mp_obj_module_t {
@@ -808,17 +839,17 @@ typedef struct {
     mp_int_t step;
 } mp_bound_slice_t;
 
-void mp_seq_multiply(const void *items, mp_uint_t item_sz, mp_uint_t len, mp_uint_t times, void *dest);
+void mp_seq_multiply(const void *items, size_t item_sz, size_t len, size_t times, void *dest);
 #if MICROPY_PY_BUILTINS_SLICE
 bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice_t *indexes);
 #endif
 #define mp_seq_copy(dest, src, len, item_t) memcpy(dest, src, len * sizeof(item_t))
 #define mp_seq_cat(dest, src1, len1, src2, len2, item_t) { memcpy(dest, src1, (len1) * sizeof(item_t)); memcpy(dest + (len1), src2, (len2) * sizeof(item_t)); }
-bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, mp_uint_t len1, const byte *data2, mp_uint_t len2);
-bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, mp_uint_t len1, const mp_obj_t *items2, mp_uint_t len2);
-mp_obj_t mp_seq_index_obj(const mp_obj_t *items, mp_uint_t len, mp_uint_t n_args, const mp_obj_t *args);
-mp_obj_t mp_seq_count_obj(const mp_obj_t *items, mp_uint_t len, mp_obj_t value);
-mp_obj_t mp_seq_extract_slice(mp_uint_t len, const mp_obj_t *seq, mp_bound_slice_t *indexes);
+bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, size_t len1, const byte *data2, size_t len2);
+bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, size_t len1, const mp_obj_t *items2, size_t len2);
+mp_obj_t mp_seq_index_obj(const mp_obj_t *items, size_t len, size_t n_args, const mp_obj_t *args);
+mp_obj_t mp_seq_count_obj(const mp_obj_t *items, size_t len, mp_obj_t value);
+mp_obj_t mp_seq_extract_slice(size_t len, const mp_obj_t *seq, mp_bound_slice_t *indexes);
 // Helper to clear stale pointers from allocated, but unused memory, to preclude GC problems
 #define mp_seq_clear(start, len, alloc_len, item_sz) memset((byte*)(start) + (len) * (item_sz), 0, ((alloc_len) - (len)) * (item_sz))
 #define mp_seq_replace_slice_no_grow(dest, dest_len, beg, end, slice, slice_len, item_sz) \
@@ -827,9 +858,10 @@ mp_obj_t mp_seq_extract_slice(mp_uint_t len, const mp_obj_t *seq, mp_bound_slice
     /*printf("memmove(%p, %p, %d)\n", dest + (beg + slice_len), dest + end, (dest_len - end) * (item_sz));*/ \
     memmove(((char*)dest) + (beg + slice_len) * (item_sz), ((char*)dest) + (end) * (item_sz), (dest_len - end) * (item_sz));
 
+// Note: dest and slice regions may overlap
 #define mp_seq_replace_slice_grow_inplace(dest, dest_len, beg, end, slice, slice_len, len_adj, item_sz) \
     /*printf("memmove(%p, %p, %d)\n", dest + beg + len_adj, dest + beg, (dest_len - beg) * (item_sz));*/ \
-    memmove(((char*)dest) + (beg + len_adj) * (item_sz), ((char*)dest) + (beg) * (item_sz), (dest_len - beg) * (item_sz)); \
-    memcpy(((char*)dest) + (beg) * (item_sz), slice, slice_len * (item_sz));
+    memmove(((char*)dest) + (beg + slice_len) * (item_sz), ((char*)dest) + (end) * (item_sz), ((dest_len) + (len_adj) - ((beg) + (slice_len))) * (item_sz)); \
+    memmove(((char*)dest) + (beg) * (item_sz), slice, slice_len * (item_sz));
 
 #endif // __MICROPY_INCLUDED_PY_OBJ_H__
diff --git a/py/objarray.c b/py/objarray.c
index 8e1d32f0f4..21479a800f 100644
--- a/py/objarray.c
+++ b/py/objarray.c
@@ -56,10 +56,10 @@
 #if MICROPY_PY_BUILTINS_MEMORYVIEW
 #define TYPECODE_MASK (0x7f)
 #else
-#define TYPECODE_MASK (~(mp_uint_t)0)
+#define TYPECODE_MASK (~(size_t)0)
 #endif
 
-STATIC mp_obj_t array_iterator_new(mp_obj_t array_in);
+STATIC mp_obj_t array_iterator_new(mp_obj_t array_in, mp_obj_iter_buf_t *iter_buf);
 STATIC mp_obj_t array_append(mp_obj_t self_in, mp_obj_t arg);
 STATIC mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in);
 STATIC mp_int_t array_get_buffer(mp_obj_t o_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
@@ -78,7 +78,7 @@ STATIC void array_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
         mp_printf(print, "array('%c'", o->typecode);
         if (o->len > 0) {
             mp_print_str(print, ", [");
-            for (mp_uint_t i = 0; i < o->len; i++) {
+            for (size_t i = 0; i < o->len; i++) {
                 if (i > 0) {
                     mp_print_str(print, ", ");
                 }
@@ -92,11 +92,8 @@ STATIC void array_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
 #endif
 
 #if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY
-STATIC mp_obj_array_t *array_new(char typecode, mp_uint_t n) {
+STATIC mp_obj_array_t *array_new(char typecode, size_t n) {
     int typecode_size = mp_binary_get_size('@', typecode, NULL);
-    if (typecode_size == 0) {
-        mp_raise_msg(&mp_type_ValueError, "bad typecode");
-    }
     mp_obj_array_t *o = m_new_obj(mp_obj_array_t);
     #if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY
     o->base.type = (typecode == BYTEARRAY_TYPECODE) ? &mp_type_bytearray : &mp_type_array;
@@ -127,13 +124,13 @@ STATIC mp_obj_t array_construct(char typecode, mp_obj_t initializer) {
         // construct array from raw bytes
         // we round-down the len to make it a multiple of sz (CPython raises error)
         size_t sz = mp_binary_get_size('@', typecode, NULL);
-        mp_uint_t len = bufinfo.len / sz;
+        size_t len = bufinfo.len / sz;
         mp_obj_array_t *o = array_new(typecode, len);
         memcpy(o->items, bufinfo.buf, len * sz);
         return MP_OBJ_FROM_PTR(o);
     }
 
-    mp_uint_t len;
+    size_t len;
     // Try to create array of exact len if initializer len is known
     mp_obj_t len_in = mp_obj_len_maybe(initializer);
     if (len_in == MP_OBJ_NULL) {
@@ -144,9 +141,9 @@ STATIC mp_obj_t array_construct(char typecode, mp_obj_t initializer) {
 
     mp_obj_array_t *array = array_new(typecode, len);
 
-    mp_obj_t iterable = mp_getiter(initializer);
+    mp_obj_t iterable = mp_getiter(initializer, NULL);
     mp_obj_t item;
-    mp_uint_t i = 0;
+    size_t i = 0;
     while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
         if (len == 0) {
             array_append(MP_OBJ_FROM_PTR(array), item);
@@ -165,8 +162,7 @@ STATIC mp_obj_t array_make_new(const mp_obj_type_t *type_in, size_t n_args, size
     mp_arg_check_num(n_args, n_kw, 1, 2, false);
 
     // get typecode
-    mp_uint_t l;
-    const char *typecode = mp_obj_str_get_data(args[0], &l);
+    const char *typecode = mp_obj_str_get_str(args[0]);
 
     if (n_args == 1) {
         // 1 arg: make an empty array
@@ -201,7 +197,7 @@ STATIC mp_obj_t bytearray_make_new(const mp_obj_type_t *type_in, size_t n_args,
 
 #if MICROPY_PY_BUILTINS_MEMORYVIEW
 
-mp_obj_t mp_obj_new_memoryview(byte typecode, mp_uint_t nitems, void *items) {
+mp_obj_t mp_obj_new_memoryview(byte typecode, size_t nitems, void *items) {
     mp_obj_array_t *self = m_new_obj(mp_obj_array_t);
     self->base.type = &mp_type_memoryview;
     self->typecode = typecode;
@@ -257,7 +253,7 @@ STATIC mp_obj_t array_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in)
             size_t sz = mp_binary_get_size('@', lhs_bufinfo.typecode, NULL);
 
             // convert byte count to element count (in case rhs is not multiple of sz)
-            mp_uint_t rhs_len = rhs_bufinfo.len / sz;
+            size_t rhs_len = rhs_bufinfo.len / sz;
 
             // note: lhs->len is element count of lhs, lhs_bufinfo.len is byte count
             mp_obj_array_t *res = array_new(lhs_bufinfo.typecode, lhs->len + rhs_len);
@@ -348,7 +344,7 @@ STATIC mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in) {
     size_t sz = mp_binary_get_size('@', self->typecode, NULL);
 
     // convert byte count to element count
-    mp_uint_t len = arg_bufinfo.len / sz;
+    size_t len = arg_bufinfo.len / sz;
 
     // make sure we have enough room to extend
     // TODO: alloc policy; at the moment we go conservative
@@ -387,7 +383,7 @@ STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value
             if (value != MP_OBJ_SENTINEL) {
                 #if MICROPY_PY_ARRAY_SLICE_ASSIGN
                 // Assign
-                mp_uint_t src_len;
+                size_t src_len;
                 void *src_items;
                 size_t item_sz = mp_binary_get_size('@', o->typecode & TYPECODE_MASK, NULL);
                 if (MP_OBJ_IS_OBJ(value) && ((mp_obj_base_t*)MP_OBJ_TO_PTR(value))->type->subscr == array_subscr) {
@@ -395,7 +391,7 @@ STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value
                     mp_obj_array_t *src_slice = MP_OBJ_TO_PTR(value);
                     if (item_sz != mp_binary_get_size('@', src_slice->typecode & TYPECODE_MASK, NULL)) {
                     compat_error:
-                        mp_raise_msg(&mp_type_ValueError, "lhs and rhs should be compatible");
+                        mp_raise_ValueError("lhs and rhs should be compatible");
                     }
                     src_len = src_slice->len;
                     src_items = src_slice->items;
@@ -421,6 +417,10 @@ STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value
                 uint8_t* dest_items = o->items;
                 #if MICROPY_PY_BUILTINS_MEMORYVIEW
                 if (o->base.type == &mp_type_memoryview) {
+                    if ((o->typecode & 0x80) == 0) {
+                        // store to read-only memoryview not allowed
+                        return MP_OBJ_NULL;
+                    }
                     if (len_adj != 0) {
                         goto compat_error;
                     }
@@ -470,7 +470,7 @@ STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value
             return MP_OBJ_FROM_PTR(res);
 #endif
         } else {
-            mp_uint_t index = mp_get_index(o->base.type, o->len, index_in, false);
+            size_t index = mp_get_index(o->base.type, o->len, index_in, false);
             #if MICROPY_PY_BUILTINS_MEMORYVIEW
             if (o->base.type == &mp_type_memoryview) {
                 index += o->free;
@@ -504,7 +504,7 @@ STATIC mp_int_t array_get_buffer(mp_obj_t o_in, mp_buffer_info_t *bufinfo, mp_ui
             // read-only memoryview
             return 1;
         }
-        bufinfo->buf = (uint8_t*)bufinfo->buf + (mp_uint_t)o->free * sz;
+        bufinfo->buf = (uint8_t*)bufinfo->buf + (size_t)o->free * sz;
     }
     #else
     (void)flags;
@@ -565,20 +565,20 @@ const mp_obj_type_t mp_type_memoryview = {
 #endif
 
 /* unused
-mp_uint_t mp_obj_array_len(mp_obj_t self_in) {
+size_t mp_obj_array_len(mp_obj_t self_in) {
     return ((mp_obj_array_t *)self_in)->len;
 }
 */
 
 #if MICROPY_PY_BUILTINS_BYTEARRAY
-mp_obj_t mp_obj_new_bytearray(mp_uint_t n, void *items) {
+mp_obj_t mp_obj_new_bytearray(size_t n, void *items) {
     mp_obj_array_t *o = array_new(BYTEARRAY_TYPECODE, n);
     memcpy(o->items, items, n);
     return MP_OBJ_FROM_PTR(o);
 }
 
 // Create bytearray which references specified memory area
-mp_obj_t mp_obj_new_bytearray_by_ref(mp_uint_t n, void *items) {
+mp_obj_t mp_obj_new_bytearray_by_ref(size_t n, void *items) {
     mp_obj_array_t *o = m_new_obj(mp_obj_array_t);
     o->base.type = &mp_type_bytearray;
     o->typecode = BYTEARRAY_TYPECODE;
@@ -595,8 +595,8 @@ mp_obj_t mp_obj_new_bytearray_by_ref(mp_uint_t n, void *items) {
 typedef struct _mp_obj_array_it_t {
     mp_obj_base_t base;
     mp_obj_array_t *array;
-    mp_uint_t offset;
-    mp_uint_t cur;
+    size_t offset;
+    size_t cur;
 } mp_obj_array_it_t;
 
 STATIC mp_obj_t array_it_iternext(mp_obj_t self_in) {
@@ -611,15 +611,18 @@ STATIC mp_obj_t array_it_iternext(mp_obj_t self_in) {
 STATIC const mp_obj_type_t array_it_type = {
     { &mp_type_type },
     .name = MP_QSTR_iterator,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = array_it_iternext,
 };
 
-STATIC mp_obj_t array_iterator_new(mp_obj_t array_in) {
+STATIC mp_obj_t array_iterator_new(mp_obj_t array_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_array_t) <= sizeof(mp_obj_iter_buf_t));
     mp_obj_array_t *array = MP_OBJ_TO_PTR(array_in);
-    mp_obj_array_it_t *o = m_new0(mp_obj_array_it_t, 1);
+    mp_obj_array_it_t *o = (mp_obj_array_it_t*)iter_buf;
     o->base.type = &array_it_type;
     o->array = array;
+    o->offset = 0;
+    o->cur = 0;
     #if MICROPY_PY_BUILTINS_MEMORYVIEW
     if (array->base.type == &mp_type_memoryview) {
         o->offset = array->free;
diff --git a/py/objarray.h b/py/objarray.h
index 013ac5be9b..06a2a07efb 100644
--- a/py/objarray.h
+++ b/py/objarray.h
@@ -32,11 +32,11 @@
 
 typedef struct _mp_obj_array_t {
     mp_obj_base_t base;
-    mp_uint_t typecode : 8;
+    size_t typecode : 8;
     // free is number of unused elements after len used elements
     // alloc size = len + free
-    mp_uint_t free : (8 * sizeof(mp_uint_t) - 8);
-    mp_uint_t len; // in elements
+    size_t free : (8 * sizeof(size_t) - 8);
+    size_t len; // in elements
     void *items;
 } mp_obj_array_t;
 
diff --git a/py/objattrtuple.c b/py/objattrtuple.c
index c6dd3aeacf..8c5e795757 100644
--- a/py/objattrtuple.c
+++ b/py/objattrtuple.c
@@ -34,7 +34,7 @@ STATIC
 #endif
 void mp_obj_attrtuple_print_helper(const mp_print_t *print, const qstr *fields, mp_obj_tuple_t *o) {
     mp_print_str(print, "(");
-    for (mp_uint_t i = 0; i < o->len; i++) {
+    for (size_t i = 0; i < o->len; i++) {
         if (i > 0) {
             mp_print_str(print, ", ");
         }
@@ -59,9 +59,9 @@ STATIC void mp_obj_attrtuple_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
     if (dest[0] == MP_OBJ_NULL) {
         // load attribute
         mp_obj_tuple_t *self = MP_OBJ_TO_PTR(self_in);
-        mp_uint_t len = self->len;
+        size_t len = self->len;
         const qstr *fields = (const qstr*)MP_OBJ_TO_PTR(self->items[len]);
-        for (mp_uint_t i = 0; i < len; i++) {
+        for (size_t i = 0; i < len; i++) {
             if (fields[i] == attr) {
                 dest[0] = self->items[i];
                 return;
@@ -70,11 +70,11 @@ STATIC void mp_obj_attrtuple_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
     }
 }
 
-mp_obj_t mp_obj_new_attrtuple(const qstr *fields, mp_uint_t n, const mp_obj_t *items) {
+mp_obj_t mp_obj_new_attrtuple(const qstr *fields, size_t n, const mp_obj_t *items) {
     mp_obj_tuple_t *o = m_new_obj_var(mp_obj_tuple_t, mp_obj_t, n + 1);
     o->base.type = &mp_type_attrtuple;
     o->len = n;
-    for (mp_uint_t i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         o->items[i] = items[i];
     }
     o->items[n] = MP_OBJ_FROM_PTR(fields);
diff --git a/py/objclosure.c b/py/objclosure.c
index 4b37d2dd12..3e12358bbd 100644
--- a/py/objclosure.c
+++ b/py/objclosure.c
@@ -32,7 +32,7 @@
 typedef struct _mp_obj_closure_t {
     mp_obj_base_t base;
     mp_obj_t fun;
-    mp_uint_t n_closed;
+    size_t n_closed;
     mp_obj_t closed[];
 } mp_obj_closure_t;
 
@@ -41,7 +41,7 @@ STATIC mp_obj_t closure_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
 
     // need to concatenate closed-over-vars and args
 
-    mp_uint_t n_total = self->n_closed + n_args + 2 * n_kw;
+    size_t n_total = self->n_closed + n_args + 2 * n_kw;
     if (n_total <= 5) {
         // use stack to allocate temporary args array
         mp_obj_t args2[5];
@@ -66,7 +66,7 @@ STATIC void closure_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_
     mp_print_str(print, "<closure ");
     mp_obj_print_helper(print, o->fun, PRINT_REPR);
     mp_printf(print, " at %p, n_closed=%u ", o, (int)o->n_closed);
-    for (mp_uint_t i = 0; i < o->n_closed; i++) {
+    for (size_t i = 0; i < o->n_closed; i++) {
         if (o->closed[i] == MP_OBJ_NULL) {
             mp_print_str(print, "(nil)");
         } else {
@@ -87,7 +87,7 @@ const mp_obj_type_t closure_type = {
     .call = closure_call,
 };
 
-mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_uint_t n_closed_over, const mp_obj_t *closed) {
+mp_obj_t mp_obj_new_closure(mp_obj_t fun, size_t n_closed_over, const mp_obj_t *closed) {
     mp_obj_closure_t *o = m_new_obj_var(mp_obj_closure_t, mp_obj_t, n_closed_over);
     o->base.type = &closure_type;
     o->fun = fun;
diff --git a/py/objcomplex.c b/py/objcomplex.c
index 96be25255c..5f9183f0e7 100644
--- a/py/objcomplex.c
+++ b/py/objcomplex.c
@@ -50,7 +50,11 @@ STATIC void complex_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_
     mp_obj_complex_t *o = MP_OBJ_TO_PTR(o_in);
 #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
     char buf[16];
+    #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+    const int precision = 6;
+    #else
     const int precision = 7;
+    #endif
 #else
     char buf[32];
     const int precision = 16;
@@ -80,7 +84,7 @@ STATIC mp_obj_t complex_make_new(const mp_obj_type_t *type_in, size_t n_args, si
         case 1:
             if (MP_OBJ_IS_STR(args[0])) {
                 // a string, parse it
-                mp_uint_t l;
+                size_t l;
                 const char *s = mp_obj_str_get_data(args[0], &l);
                 return mp_parse_num_decimal(s, l, true, true, NULL);
             } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
@@ -117,6 +121,7 @@ STATIC mp_obj_t complex_unary_op(mp_uint_t op, mp_obj_t o_in) {
     mp_obj_complex_t *o = MP_OBJ_TO_PTR(o_in);
     switch (op) {
         case MP_UNARY_OP_BOOL: return mp_obj_new_bool(o->real != 0 || o->imag != 0);
+        case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mp_float_hash(o->real) ^ mp_float_hash(o->imag));
         case MP_UNARY_OP_POSITIVE: return o_in;
         case MP_UNARY_OP_NEGATIVE: return mp_obj_new_complex(-o->real, -o->imag);
         default: return MP_OBJ_NULL; // op not supported
@@ -222,8 +227,8 @@ mp_obj_t mp_obj_complex_binary_op(mp_uint_t op, mp_float_t lhs_real, mp_float_t
             //        = exp(x3)*(cos(y3) + i*sin(y3))
             mp_float_t abs1 = MICROPY_FLOAT_C_FUN(sqrt)(lhs_real*lhs_real + lhs_imag*lhs_imag);
             if (abs1 == 0) {
-                if (rhs_imag == 0) {
-                    lhs_real = 1;
+                if (rhs_imag == 0 && rhs_real >= 0) {
+                    lhs_real = (rhs_real == 0);
                     rhs_real = 0;
                 } else {
                     mp_raise_msg(&mp_type_ZeroDivisionError, "0.0 to a complex power");
diff --git a/py/objdict.c b/py/objdict.c
index 4942d37791..12ba61b2e9 100644
--- a/py/objdict.c
+++ b/py/objdict.c
@@ -41,11 +41,11 @@ STATIC mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg
 // This is a helper function to iterate through a dictionary.  The state of
 // the iteration is held in *cur and should be initialised with zero for the
 // first call.  Will return NULL when no more elements are available.
-STATIC mp_map_elem_t *dict_iter_next(mp_obj_dict_t *dict, mp_uint_t *cur) {
-    mp_uint_t max = dict->map.alloc;
+STATIC mp_map_elem_t *dict_iter_next(mp_obj_dict_t *dict, size_t *cur) {
+    size_t max = dict->map.alloc;
     mp_map_t *map = &dict->map;
 
-    for (mp_uint_t i = *cur; i < max; i++) {
+    for (size_t i = *cur; i < max; i++) {
         if (MP_MAP_SLOT_IS_FILLED(map, i)) {
             *cur = i + 1;
             return &(map->table[i]);
@@ -65,7 +65,7 @@ STATIC void dict_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_
         mp_printf(print, "%q(", self->base.type->name);
     }
     mp_print_str(print, "{");
-    mp_uint_t cur = 0;
+    size_t cur = 0;
     mp_map_elem_t *next = NULL;
     while ((next = dict_iter_next(self, &cur)) != NULL) {
         if (!first) {
@@ -121,7 +121,7 @@ STATIC mp_obj_t dict_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
             if (MP_UNLIKELY(MP_OBJ_IS_TYPE(lhs_in, &mp_type_ordereddict) && MP_OBJ_IS_TYPE(rhs_in, &mp_type_ordereddict))) {
                 // Iterate through both dictionaries simultaneously and compare keys and values.
                 mp_obj_dict_t *rhs = MP_OBJ_TO_PTR(rhs_in);
-                mp_uint_t c1 = 0, c2 = 0;
+                size_t c1 = 0, c2 = 0;
                 mp_map_elem_t *e1 = dict_iter_next(o, &c1), *e2 = dict_iter_next(rhs, &c2);
                 for (; e1 != NULL && e2 != NULL; e1 = dict_iter_next(o, &c1), e2 = dict_iter_next(rhs, &c2)) {
                     if (!mp_obj_equal(e1->key, e2->key) || !mp_obj_equal(e1->value, e2->value)) {
@@ -137,7 +137,7 @@ STATIC mp_obj_t dict_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
                     return mp_const_false;
                 }
 
-                mp_uint_t cur = 0;
+                size_t cur = 0;
                 mp_map_elem_t *next = NULL;
                 while ((next = dict_iter_next(o, &cur)) != NULL) {
                     mp_map_elem_t *elem = mp_map_lookup(&rhs->map, next->key, MP_MAP_LOOKUP);
@@ -196,7 +196,7 @@ typedef struct _mp_obj_dict_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_t dict;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_dict_it_t;
 
 STATIC mp_obj_t dict_it_iternext(mp_obj_t self_in) {
@@ -210,8 +210,9 @@ STATIC mp_obj_t dict_it_iternext(mp_obj_t self_in) {
     }
 }
 
-STATIC mp_obj_t dict_getiter(mp_obj_t self_in) {
-    mp_obj_dict_it_t *o = m_new_obj(mp_obj_dict_it_t);
+STATIC mp_obj_t dict_getiter(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_dict_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_dict_it_t *o = (mp_obj_dict_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = dict_it_iternext;
     o->dict = self_in;
@@ -249,7 +250,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_1(dict_copy_obj, dict_copy);
 
 // this is a classmethod
 STATIC mp_obj_t dict_fromkeys(size_t n_args, const mp_obj_t *args) {
-    mp_obj_t iter = mp_getiter(args[1]);
+    mp_obj_t iter = mp_getiter(args[1], NULL);
     mp_obj_t value = mp_const_none;
     mp_obj_t next = MP_OBJ_NULL;
 
@@ -340,7 +341,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(dict_setdefault_obj, 2, 3, dict_setde
 STATIC mp_obj_t dict_popitem(mp_obj_t self_in) {
     mp_check_self(MP_OBJ_IS_DICT_TYPE(self_in));
     mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in);
-    mp_uint_t cur = 0;
+    size_t cur = 0;
     mp_map_elem_t *next = dict_iter_next(self, &cur);
     if (next == NULL) {
         mp_raise_msg(&mp_type_KeyError, "popitem(): dictionary is empty");
@@ -367,7 +368,7 @@ STATIC mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg
         if (MP_OBJ_IS_DICT_TYPE(args[1])) {
             // update from other dictionary (make sure other is not self)
             if (args[1] != args[0]) {
-                mp_uint_t cur = 0;
+                size_t cur = 0;
                 mp_map_elem_t *elem = NULL;
                 while ((elem = dict_iter_next((mp_obj_dict_t*)MP_OBJ_TO_PTR(args[1]), &cur)) != NULL) {
                     mp_map_lookup(&self->map, elem->key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = elem->value;
@@ -375,17 +376,17 @@ STATIC mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg
             }
         } else {
             // update from a generic iterable of pairs
-            mp_obj_t iter = mp_getiter(args[1]);
+            mp_obj_t iter = mp_getiter(args[1], NULL);
             mp_obj_t next = MP_OBJ_NULL;
             while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
-                mp_obj_t inneriter = mp_getiter(next);
+                mp_obj_t inneriter = mp_getiter(next, NULL);
                 mp_obj_t key = mp_iternext(inneriter);
                 mp_obj_t value = mp_iternext(inneriter);
                 mp_obj_t stop = mp_iternext(inneriter);
                 if (key == MP_OBJ_STOP_ITERATION
                     || value == MP_OBJ_STOP_ITERATION
                     || stop != MP_OBJ_STOP_ITERATION) {
-                    mp_raise_msg(&mp_type_ValueError, "dictionary update sequence has the wrong length");
+                    mp_raise_ValueError("dict update sequence has wrong length");
                 } else {
                     mp_map_lookup(&self->map, key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = value;
                 }
@@ -394,7 +395,7 @@ STATIC mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg
     }
 
     // update the dict with any keyword args
-    for (mp_uint_t i = 0; i < kwargs->alloc; i++) {
+    for (size_t i = 0; i < kwargs->alloc; i++) {
         if (MP_MAP_SLOT_IS_FILLED(kwargs, i)) {
             mp_map_lookup(&self->map, kwargs->table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = kwargs->table[i].value;
         }
@@ -423,7 +424,7 @@ typedef struct _mp_obj_dict_view_it_t {
     mp_obj_base_t base;
     mp_dict_view_kind_t kind;
     mp_obj_t dict;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_dict_view_it_t;
 
 typedef struct _mp_obj_dict_view_t {
@@ -457,14 +458,15 @@ STATIC mp_obj_t dict_view_it_iternext(mp_obj_t self_in) {
 STATIC const mp_obj_type_t dict_view_it_type = {
     { &mp_type_type },
     .name = MP_QSTR_iterator,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = dict_view_it_iternext,
 };
 
-STATIC mp_obj_t dict_view_getiter(mp_obj_t view_in) {
+STATIC mp_obj_t dict_view_getiter(mp_obj_t view_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_dict_view_it_t) <= sizeof(mp_obj_iter_buf_t));
     mp_check_self(MP_OBJ_IS_TYPE(view_in, &dict_view_type));
     mp_obj_dict_view_t *view = MP_OBJ_TO_PTR(view_in);
-    mp_obj_dict_view_it_t *o = m_new_obj(mp_obj_dict_view_it_t);
+    mp_obj_dict_view_it_t *o = (mp_obj_dict_view_it_t*)iter_buf;
     o->base.type = &dict_view_it_type;
     o->kind = view->kind;
     o->dict = view->dict;
@@ -479,7 +481,8 @@ STATIC void dict_view_print(const mp_print_t *print, mp_obj_t self_in, mp_print_
     bool first = true;
     mp_print_str(print, mp_dict_view_names[self->kind]);
     mp_print_str(print, "([");
-    mp_obj_t self_iter = dict_view_getiter(self_in);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t self_iter = dict_view_getiter(self_in, &iter_buf);
     mp_obj_t next = MP_OBJ_NULL;
     while ((next = dict_view_it_iternext(self_iter)) != MP_OBJ_STOP_ITERATION) {
         if (!first) {
@@ -574,8 +577,6 @@ const mp_obj_type_t mp_type_dict = {
 };
 
 #if MICROPY_PY_COLLECTIONS_ORDEREDDICT
-STATIC const mp_rom_obj_tuple_t ordereddict_base_tuple = {{&mp_type_tuple}, 1, {MP_ROM_PTR(&mp_type_dict)}};
-
 const mp_obj_type_t mp_type_ordereddict = {
     { &mp_type_type },
     .name = MP_QSTR_OrderedDict,
@@ -585,23 +586,23 @@ const mp_obj_type_t mp_type_ordereddict = {
     .binary_op = dict_binary_op,
     .subscr = dict_subscr,
     .getiter = dict_getiter,
-    .bases_tuple = (mp_obj_tuple_t*)(mp_rom_obj_tuple_t*)&ordereddict_base_tuple,
+    .parent = &mp_type_dict,
     .locals_dict = (mp_obj_dict_t*)&dict_locals_dict,
 };
 #endif
 
-void mp_obj_dict_init(mp_obj_dict_t *dict, mp_uint_t n_args) {
+void mp_obj_dict_init(mp_obj_dict_t *dict, size_t n_args) {
     dict->base.type = &mp_type_dict;
     mp_map_init(&dict->map, n_args);
 }
 
-mp_obj_t mp_obj_new_dict(mp_uint_t n_args) {
+mp_obj_t mp_obj_new_dict(size_t n_args) {
     mp_obj_dict_t *o = m_new_obj(mp_obj_dict_t);
     mp_obj_dict_init(o, n_args);
     return MP_OBJ_FROM_PTR(o);
 }
 
-mp_uint_t mp_obj_dict_len(mp_obj_t self_in) {
+size_t mp_obj_dict_len(mp_obj_t self_in) {
     mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in);
     return self->map.used;
 }
diff --git a/py/objenumerate.c b/py/objenumerate.c
index 2b646ca45d..faae6516c0 100644
--- a/py/objenumerate.c
+++ b/py/objenumerate.c
@@ -56,13 +56,13 @@ STATIC mp_obj_t enumerate_make_new(const mp_obj_type_t *type, size_t n_args, siz
     // create enumerate object
     mp_obj_enumerate_t *o = m_new_obj(mp_obj_enumerate_t);
     o->base.type = type;
-    o->iter = mp_getiter(arg_vals.iterable.u_obj);
+    o->iter = mp_getiter(arg_vals.iterable.u_obj, NULL);
     o->cur = arg_vals.start.u_int;
 #else
     (void)n_kw;
     mp_obj_enumerate_t *o = m_new_obj(mp_obj_enumerate_t);
     o->base.type = type;
-    o->iter = mp_getiter(args[0]);
+    o->iter = mp_getiter(args[0], NULL);
     o->cur = n_args > 1 ? mp_obj_get_int(args[1]) : 0;
 #endif
 
@@ -74,7 +74,7 @@ const mp_obj_type_t mp_type_enumerate = {
     .name = MP_QSTR_enumerate,
     .make_new = enumerate_make_new,
     .iternext = enumerate_iternext,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
 };
 
 STATIC mp_obj_t enumerate_iternext(mp_obj_t self_in) {
diff --git a/py/objexcept.c b/py/objexcept.c
index c1b992d276..4722aca914 100644
--- a/py/objexcept.c
+++ b/py/objexcept.c
@@ -197,9 +197,6 @@ const mp_obj_type_t mp_type_BaseException = {
     .locals_dict = (mp_obj_dict_t*)&exc_locals_dict,
 };
 
-#define MP_DEFINE_EXCEPTION_BASE(base_name) \
-STATIC const mp_rom_obj_tuple_t mp_type_ ## base_name ## _base_tuple = {{&mp_type_tuple}, 1, {MP_ROM_PTR(&mp_type_ ## base_name)}};\
-
 #define MP_DEFINE_EXCEPTION(exc_name, base_name) \
 const mp_obj_type_t mp_type_ ## exc_name = { \
     { &mp_type_type }, \
@@ -207,23 +204,20 @@ const mp_obj_type_t mp_type_ ## exc_name = { \
     .print = mp_obj_exception_print, \
     .make_new = mp_obj_exception_make_new, \
     .attr = exception_attr, \
-    .bases_tuple = (mp_obj_tuple_t*)(mp_rom_obj_tuple_t*)&mp_type_ ## base_name ## _base_tuple, \
+    .parent = &mp_type_ ## base_name, \
 };
 
 // List of all exceptions, arranged as in the table at:
 // http://docs.python.org/3/library/exceptions.html
-MP_DEFINE_EXCEPTION_BASE(BaseException)
 MP_DEFINE_EXCEPTION(SystemExit, BaseException)
 MP_DEFINE_EXCEPTION(KeyboardInterrupt, BaseException)
 MP_DEFINE_EXCEPTION(GeneratorExit, BaseException)
 MP_DEFINE_EXCEPTION(Exception, BaseException)
-  MP_DEFINE_EXCEPTION_BASE(Exception)
   #if MICROPY_PY_ASYNC_AWAIT
   MP_DEFINE_EXCEPTION(StopAsyncIteration, Exception)
   #endif
   MP_DEFINE_EXCEPTION(StopIteration, Exception)
   MP_DEFINE_EXCEPTION(ArithmeticError, Exception)
-    MP_DEFINE_EXCEPTION_BASE(ArithmeticError)
     //MP_DEFINE_EXCEPTION(FloatingPointError, ArithmeticError)
     MP_DEFINE_EXCEPTION(OverflowError, ArithmeticError)
     MP_DEFINE_EXCEPTION(ZeroDivisionError, ArithmeticError)
@@ -235,18 +229,15 @@ MP_DEFINE_EXCEPTION(Exception, BaseException)
   MP_DEFINE_EXCEPTION(ImportError, Exception)
   //MP_DEFINE_EXCEPTION(IOError, Exception) use OSError instead
   MP_DEFINE_EXCEPTION(LookupError, Exception)
-    MP_DEFINE_EXCEPTION_BASE(LookupError)
     MP_DEFINE_EXCEPTION(IndexError, LookupError)
     MP_DEFINE_EXCEPTION(KeyError, LookupError)
   MP_DEFINE_EXCEPTION(MemoryError, Exception)
   MP_DEFINE_EXCEPTION(NameError, Exception)
     /*
-    MP_DEFINE_EXCEPTION_BASE(NameError)
     MP_DEFINE_EXCEPTION(UnboundLocalError, NameError)
     */
   MP_DEFINE_EXCEPTION(OSError, Exception)
 #if MICROPY_PY_BUILTINS_TIMEOUTERROR
-    MP_DEFINE_EXCEPTION_BASE(OSError)
     MP_DEFINE_EXCEPTION(TimeoutError, OSError)
 #endif
     /*
@@ -267,30 +258,24 @@ MP_DEFINE_EXCEPTION(Exception, BaseException)
     MP_DEFINE_EXCEPTION(ReferenceError, Exception)
     */
   MP_DEFINE_EXCEPTION(RuntimeError, Exception)
-    MP_DEFINE_EXCEPTION_BASE(RuntimeError)
     MP_DEFINE_EXCEPTION(NotImplementedError, RuntimeError)
   MP_DEFINE_EXCEPTION(SyntaxError, Exception)
-    MP_DEFINE_EXCEPTION_BASE(SyntaxError)
     MP_DEFINE_EXCEPTION(IndentationError, SyntaxError)
     /*
-      MP_DEFINE_EXCEPTION_BASE(IndentationError)
       MP_DEFINE_EXCEPTION(TabError, IndentationError)
       */
   //MP_DEFINE_EXCEPTION(SystemError, Exception)
   MP_DEFINE_EXCEPTION(TypeError, Exception)
 #if MICROPY_EMIT_NATIVE
-    MP_DEFINE_EXCEPTION_BASE(TypeError)
     MP_DEFINE_EXCEPTION(ViperTypeError, TypeError)
 #endif
   MP_DEFINE_EXCEPTION(ValueError, Exception)
 #if MICROPY_PY_BUILTINS_STR_UNICODE
-    MP_DEFINE_EXCEPTION_BASE(ValueError)
     MP_DEFINE_EXCEPTION(UnicodeError, ValueError)
     //TODO: Implement more UnicodeError subclasses which take arguments
 #endif
   /*
   MP_DEFINE_EXCEPTION(Warning, Exception)
-    MP_DEFINE_EXCEPTION_BASE(Warning)
     MP_DEFINE_EXCEPTION(DeprecationWarning, Warning)
     MP_DEFINE_EXCEPTION(PendingDeprecationWarning, Warning)
     MP_DEFINE_EXCEPTION(RuntimeWarning, Warning)
@@ -312,7 +297,7 @@ mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg)
     return mp_obj_new_exception_args(exc_type, 1, &arg);
 }
 
-mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, mp_uint_t n_args, const mp_obj_t *args) {
+mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, size_t n_args, const mp_obj_t *args) {
     assert(exc_type->make_new == mp_obj_exception_make_new);
     return exc_type->make_new(exc_type, n_args, 0, args);
 }
@@ -348,7 +333,7 @@ mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char
             tuple->items[0] = MP_OBJ_FROM_PTR(str);
 
             byte *str_data = (byte *)&str[1];
-            uint max_len = MP_STATE_VM(mp_emergency_exception_buf) + mp_emergency_exception_buf_size
+            size_t max_len = (byte*)MP_STATE_VM(mp_emergency_exception_buf) + mp_emergency_exception_buf_size
                          - str_data;
 
             vstr_t vstr;
@@ -366,14 +351,14 @@ mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char
 
             o->args = tuple;
 
-            uint offset = &str_data[str->len] - MP_STATE_VM(mp_emergency_exception_buf);
+            size_t offset = &str_data[str->len] - (byte*)MP_STATE_VM(mp_emergency_exception_buf);
             offset += sizeof(void *) - 1;
             offset &= ~(sizeof(void *) - 1);
 
             if ((mp_emergency_exception_buf_size - offset) > (sizeof(o->traceback_data[0]) * 3)) {
                 // We have room to store some traceback.
                 o->traceback_data = (size_t*)((byte *)MP_STATE_VM(mp_emergency_exception_buf) + offset);
-                o->traceback_alloc = (MP_STATE_VM(mp_emergency_exception_buf) + mp_emergency_exception_buf_size - (byte *)o->traceback_data) / sizeof(o->traceback_data[0]);
+                o->traceback_alloc = ((byte*)MP_STATE_VM(mp_emergency_exception_buf) + mp_emergency_exception_buf_size - (byte *)o->traceback_data) / sizeof(o->traceback_data[0]);
                 o->traceback_len = 0;
             }
         }
@@ -383,10 +368,8 @@ mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char
         o->traceback_data = NULL;
         o->args = MP_OBJ_TO_PTR(mp_obj_new_tuple(1, NULL));
 
-        if (fmt == NULL) {
-            // no message
-            assert(0);
-        } else {
+        assert(fmt != NULL);
+        {
             if (strchr(fmt, '%') == NULL) {
                 // no formatting substitutions, avoid allocating vstr.
                 o->args->items[0] = mp_obj_new_str(fmt, strlen(fmt), false);
diff --git a/py/objexcept.h b/py/objexcept.h
index 88bce2b370..3128fded79 100644
--- a/py/objexcept.h
+++ b/py/objexcept.h
@@ -31,8 +31,8 @@
 
 typedef struct _mp_obj_exception_t {
     mp_obj_base_t base;
-    mp_uint_t traceback_alloc : (BITS_PER_WORD / 2);
-    mp_uint_t traceback_len : (BITS_PER_WORD / 2);
+    size_t traceback_alloc : (8 * sizeof(size_t) / 2);
+    size_t traceback_len : (8 * sizeof(size_t) / 2);
     size_t *traceback_data;
     mp_obj_tuple_t *args;
 } mp_obj_exception_t;
diff --git a/py/objfilter.c b/py/objfilter.c
index a5c85b2cef..a655b8a785 100644
--- a/py/objfilter.c
+++ b/py/objfilter.c
@@ -39,7 +39,7 @@ STATIC mp_obj_t filter_make_new(const mp_obj_type_t *type, size_t n_args, size_t
     mp_obj_filter_t *o = m_new_obj(mp_obj_filter_t);
     o->base.type = type;
     o->fun = args[0];
-    o->iter = mp_getiter(args[1]);
+    o->iter = mp_getiter(args[1], NULL);
     return MP_OBJ_FROM_PTR(o);
 }
 
@@ -65,7 +65,7 @@ const mp_obj_type_t mp_type_filter = {
     { &mp_type_type },
     .name = MP_QSTR_filter,
     .make_new = filter_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = filter_iternext,
 };
 
diff --git a/py/objfloat.c b/py/objfloat.c
index 73d07feac8..d0e6166121 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -59,12 +59,65 @@ const mp_obj_float_t mp_const_float_pi_obj = {{&mp_type_float}, M_PI};
 
 #endif
 
+#if MICROPY_FLOAT_HIGH_QUALITY_HASH
+// must return actual integer value if it fits in mp_int_t
+mp_int_t mp_float_hash(mp_float_t src) {
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+typedef uint64_t mp_float_uint_t;
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+typedef uint32_t mp_float_uint_t;
+#endif
+    union {
+        mp_float_t f;
+        #if MP_ENDIANNESS_LITTLE
+        struct { mp_float_uint_t frc:MP_FLOAT_FRAC_BITS, exp:MP_FLOAT_EXP_BITS, sgn:1; } p;
+        #else
+        struct { mp_float_uint_t sgn:1, exp:MP_FLOAT_EXP_BITS, frc:MP_FLOAT_FRAC_BITS; } p;
+        #endif
+        mp_float_uint_t i;
+    } u = {.f = src};
+
+    mp_int_t val;
+    const int adj_exp = (int)u.p.exp - MP_FLOAT_EXP_BIAS;
+    if (adj_exp < 0) {
+        // value < 1; must be sure to handle 0.0 correctly (ie return 0)
+        val = u.i;
+    } else {
+        // if adj_exp is max then: u.p.frc==0 indicates inf, else NaN
+        // else: 1 <= value
+        mp_float_uint_t frc = u.p.frc | ((mp_float_uint_t)1 << MP_FLOAT_FRAC_BITS);
+
+        if (adj_exp <= MP_FLOAT_FRAC_BITS) {
+            // number may have a fraction; xor the integer part with the fractional part
+            val = (frc >> (MP_FLOAT_FRAC_BITS - adj_exp))
+                ^ (frc & ((1 << (MP_FLOAT_FRAC_BITS - adj_exp)) - 1));
+        } else if ((unsigned int)adj_exp < BITS_PER_BYTE * sizeof(mp_int_t) - 1) {
+            // the number is a (big) whole integer and will fit in val's signed-width
+            val = (mp_int_t)frc << (adj_exp - MP_FLOAT_FRAC_BITS);
+        } else {
+            // integer part will overflow val's width so just use what bits we can
+            val = frc;
+        }
+    }
+
+    if (u.p.sgn) {
+        val = -val;
+    }
+
+    return val;
+}
+#endif
+
 STATIC void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) {
     (void)kind;
     mp_float_t o_val = mp_obj_float_get(o_in);
 #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
     char buf[16];
+    #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+    const int precision = 6;
+    #else
     const int precision = 7;
+    #endif
 #else
     char buf[32];
     const int precision = 16;
@@ -89,7 +142,7 @@ STATIC mp_obj_t float_make_new(const mp_obj_type_t *type_in, size_t n_args, size
         default:
             if (MP_OBJ_IS_STR(args[0])) {
                 // a string, parse it
-                mp_uint_t l;
+                size_t l;
                 const char *s = mp_obj_str_get_data(args[0], &l);
                 return mp_parse_num_decimal(s, l, false, false, NULL);
             } else if (mp_obj_is_float(args[0])) {
@@ -106,6 +159,7 @@ STATIC mp_obj_t float_unary_op(mp_uint_t op, mp_obj_t o_in) {
     mp_float_t val = mp_obj_float_get(o_in);
     switch (op) {
         case MP_UNARY_OP_BOOL: return mp_obj_new_bool(val != 0);
+        case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mp_float_hash(val));
         case MP_UNARY_OP_POSITIVE: return o_in;
         case MP_UNARY_OP_NEGATIVE: return mp_obj_new_float(-val);
         default: return MP_OBJ_NULL; // op not supported
@@ -228,7 +282,12 @@ mp_obj_t mp_obj_float_binary_op(mp_uint_t op, mp_float_t lhs_val, mp_obj_t rhs_i
             }
             break;
         case MP_BINARY_OP_POWER:
-        case MP_BINARY_OP_INPLACE_POWER: lhs_val = MICROPY_FLOAT_C_FUN(pow)(lhs_val, rhs_val); break;
+        case MP_BINARY_OP_INPLACE_POWER:
+            if (lhs_val == 0 && rhs_val < 0) {
+                goto zero_division_error;
+            }
+            lhs_val = MICROPY_FLOAT_C_FUN(pow)(lhs_val, rhs_val);
+            break;
         case MP_BINARY_OP_DIVMOD: {
             if (rhs_val == 0) {
                 goto zero_division_error;
diff --git a/py/objfun.c b/py/objfun.c
index 207e68a771..08d031c8d8 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -181,9 +181,9 @@ STATIC void fun_bc_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
 #endif
 
 #if DEBUG_PRINT
-STATIC void dump_args(const mp_obj_t *a, mp_uint_t sz) {
+STATIC void dump_args(const mp_obj_t *a, size_t sz) {
     DEBUG_printf("%p: ", a);
-    for (mp_uint_t i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
         DEBUG_printf("%p ", a[i]);
     }
     DEBUG_printf("\n");
@@ -220,9 +220,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args
         return NULL;
     }
 
-    code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
-    code_state->n_state = n_state;
-    mp_setup_code_state(code_state, self, n_args, n_kw, args);
+    code_state->fun_bc = self;
+    code_state->ip = 0;
+    mp_setup_code_state(code_state, n_args, n_kw, args);
 
     // execute the byte code with the correct globals context
     code_state->old_globals = mp_globals_get();
@@ -247,15 +247,15 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
     const byte *ip = self->bytecode;
 
     // bytecode prelude: state size and exception stack size
-    mp_uint_t n_state = mp_decode_uint(&ip);
-    mp_uint_t n_exc_stack = mp_decode_uint(&ip);
+    size_t n_state = mp_decode_uint(&ip);
+    size_t n_exc_stack = mp_decode_uint(&ip);
 
 #if VM_DETECT_STACK_OVERFLOW
     n_state += 1;
 #endif
 
     // allocate state for locals and stack
-    mp_uint_t state_size = n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t);
+    size_t state_size = n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t);
     mp_code_state_t *code_state = NULL;
     if (state_size > VM_MAX_STATE_ON_STACK) {
         code_state = m_new_obj_var_maybe(mp_code_state_t, byte, state_size);
@@ -265,9 +265,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
         state_size = 0; // indicate that we allocated using alloca
     }
 
-    code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
-    code_state->n_state = n_state;
-    mp_setup_code_state(code_state, self, n_args, n_kw, args);
+    code_state->fun_bc = self;
+    code_state->ip = 0;
+    mp_setup_code_state(code_state, n_args, n_kw, args);
 
     // execute the byte code with the correct globals context
     code_state->old_globals = mp_globals_get();
@@ -288,7 +288,7 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
     if (!(vm_return_kind == MP_VM_RETURN_EXCEPTION && self->n_pos_args + self->n_kwonly_args == 0)) {
         // Just check to see that we have at least 1 null object left in the state.
         bool overflow = true;
-        for (mp_uint_t i = 0; i < n_state - self->n_pos_args - self->n_kwonly_args; i++) {
+        for (size_t i = 0; i < n_state - self->n_pos_args - self->n_kwonly_args; i++) {
             if (code_state->state[i] == MP_OBJ_NULL) {
                 overflow = false;
                 break;
@@ -350,8 +350,8 @@ const mp_obj_type_t mp_type_fun_bc = {
 };
 
 mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code, const mp_uint_t *const_table) {
-    mp_uint_t n_def_args = 0;
-    mp_uint_t n_extra_args = 0;
+    size_t n_def_args = 0;
+    size_t n_extra_args = 0;
     mp_obj_tuple_t *def_args = MP_OBJ_TO_PTR(def_args_in);
     if (def_args_in != MP_OBJ_NULL) {
         assert(MP_OBJ_IS_TYPE(def_args_in, &mp_type_tuple));
@@ -409,7 +409,7 @@ mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const
 
 typedef struct _mp_obj_fun_viper_t {
     mp_obj_base_t base;
-    mp_uint_t n_args;
+    size_t n_args;
     void *fun_data; // GC must be able to trace this pointer
     mp_uint_t type_sig;
 } mp_obj_fun_viper_t;
@@ -457,7 +457,7 @@ STATIC const mp_obj_type_t mp_type_fun_viper = {
     .unary_op = mp_generic_unary_op,
 };
 
-mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
+mp_obj_t mp_obj_new_fun_viper(size_t n_args, void *fun_data, mp_uint_t type_sig) {
     mp_obj_fun_viper_t *o = m_new_obj(mp_obj_fun_viper_t);
     o->base.type = &mp_type_fun_viper;
     o->n_args = n_args;
@@ -475,7 +475,7 @@ mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_s
 
 typedef struct _mp_obj_fun_asm_t {
     mp_obj_base_t base;
-    mp_uint_t n_args;
+    size_t n_args;
     void *fun_data; // GC must be able to trace this pointer
     mp_uint_t type_sig;
 } mp_obj_fun_asm_t;
@@ -501,7 +501,7 @@ STATIC mp_uint_t convert_obj_for_inline_asm(mp_obj_t obj) {
         return mp_obj_int_get_truncated(obj);
     } else if (MP_OBJ_IS_STR(obj)) {
         // pointer to the string (it's probably constant though!)
-        mp_uint_t l;
+        size_t l;
         return (mp_uint_t)mp_obj_str_get_data(obj, &l);
     } else {
         mp_obj_type_t *type = mp_obj_get_type(obj);
@@ -511,17 +511,11 @@ STATIC mp_uint_t convert_obj_for_inline_asm(mp_obj_t obj) {
             // convert float to int (could also pass in float registers)
             return (mp_int_t)mp_obj_float_get(obj);
 #endif
-        } else if (type == &mp_type_tuple) {
+        } else if (type == &mp_type_tuple || type == &mp_type_list) {
             // pointer to start of tuple (could pass length, but then could use len(x) for that)
-            mp_uint_t len;
-            mp_obj_t *items;
-            mp_obj_tuple_get(obj, &len, &items);
-            return (mp_uint_t)items;
-        } else if (type == &mp_type_list) {
-            // pointer to start of list (could pass length, but then could use len(x) for that)
-            mp_uint_t len;
+            size_t len;
             mp_obj_t *items;
-            mp_obj_list_get(obj, &len, &items);
+            mp_obj_get_array(obj, &len, &items);
             return (mp_uint_t)items;
         } else {
             mp_buffer_info_t bufinfo;
@@ -573,7 +567,7 @@ STATIC const mp_obj_type_t mp_type_fun_asm = {
     .unary_op = mp_generic_unary_op,
 };
 
-mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
+mp_obj_t mp_obj_new_fun_asm(size_t n_args, void *fun_data, mp_uint_t type_sig) {
     mp_obj_fun_asm_t *o = m_new_obj(mp_obj_fun_asm_t);
     o->base.type = &mp_type_fun_asm;
     o->n_args = n_args;
diff --git a/py/objgenerator.c b/py/objgenerator.c
index cbef9fea3d..2e57fdf4b6 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -67,9 +67,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
     o->base.type = &mp_type_gen_instance;
 
     o->globals = self_fun->globals;
-    o->code_state.n_state = n_state;
-    o->code_state.ip = (byte*)(ip - self_fun->bytecode); // offset to prelude
-    mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args);
+    o->code_state.fun_bc = self_fun;
+    o->code_state.ip = 0;
+    mp_setup_code_state(&o->code_state, n_args, n_kw, args);
     return MP_OBJ_FROM_PTR(o);
 }
 
@@ -92,7 +92,7 @@ mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) {
 STATIC void gen_instance_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
     (void)kind;
     mp_obj_gen_instance_t *self = MP_OBJ_TO_PTR(self_in);
-    mp_printf(print, "<generator object '%q' at %p>", mp_obj_code_get_name(self->code_state.code_info), self);
+    mp_printf(print, "<generator object '%q' at %p>", mp_obj_fun_get_name(MP_OBJ_FROM_PTR(self->code_state.fun_bc)), self);
 }
 
 mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) {
@@ -105,7 +105,7 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_
     }
     if (self->code_state.sp == self->code_state.state - 1) {
         if (send_value != mp_const_none) {
-            mp_raise_msg(&mp_type_TypeError, "can't send non-None value to a just-started generator");
+            mp_raise_TypeError("can't send non-None value to a just-started generator");
         }
     } else {
         *self->code_state.sp = send_value;
@@ -134,10 +134,12 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_
             }
             break;
 
-        case MP_VM_RETURN_EXCEPTION:
+        case MP_VM_RETURN_EXCEPTION: {
+            size_t n_state = mp_decode_uint_value(self->code_state.fun_bc->bytecode);
             self->code_state.ip = 0;
-            *ret_val = self->code_state.state[self->code_state.n_state - 1];
+            *ret_val = self->code_state.state[n_state - 1];
             break;
+        }
     }
 
     return ret_kind;
@@ -156,9 +158,6 @@ STATIC mp_obj_t gen_resume_and_raise(mp_obj_t self_in, mp_obj_t send_value, mp_o
             }
 
         case MP_VM_RETURN_YIELD:
-            if (throw_value != MP_OBJ_NULL && mp_obj_is_subclass_fast(MP_OBJ_FROM_PTR(mp_obj_get_type(throw_value)), MP_OBJ_FROM_PTR(&mp_type_GeneratorExit))) {
-                mp_raise_msg(&mp_type_RuntimeError, "generator ignored GeneratorExit");
-            }
             return ret;
 
         case MP_VM_RETURN_EXCEPTION:
@@ -193,7 +192,6 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(gen_instance_send_obj, gen_instance_send);
 STATIC mp_obj_t gen_instance_close(mp_obj_t self_in);
 STATIC mp_obj_t gen_instance_throw(size_t n_args, const mp_obj_t *args) {
     mp_obj_t exc = (n_args == 2) ? args[1] : args[2];
-    exc = mp_make_raise_obj(exc);
 
     mp_obj_t ret = gen_resume_and_raise(args[0], mp_const_none, exc);
     if (ret == MP_OBJ_STOP_ITERATION) {
@@ -240,7 +238,7 @@ const mp_obj_type_t mp_type_gen_instance = {
     { &mp_type_type },
     .name = MP_QSTR_generator,
     .print = gen_instance_print,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = gen_instance_iternext,
     .locals_dict = (mp_obj_dict_t*)&gen_instance_locals_dict,
 };
diff --git a/py/objgetitemiter.c b/py/objgetitemiter.c
index 526180fdbc..a3c754448f 100644
--- a/py/objgetitemiter.c
+++ b/py/objgetitemiter.c
@@ -61,13 +61,14 @@ STATIC mp_obj_t it_iternext(mp_obj_t self_in) {
 STATIC const mp_obj_type_t it_type = {
     { &mp_type_type },
     .name = MP_QSTR_iterator,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = it_iternext,
 };
 
 // args are those returned from mp_load_method_maybe (ie either an attribute or a method)
-mp_obj_t mp_obj_new_getitem_iter(mp_obj_t *args) {
-    mp_obj_getitem_iter_t *o = m_new_obj(mp_obj_getitem_iter_t);
+mp_obj_t mp_obj_new_getitem_iter(mp_obj_t *args, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_getitem_iter_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_getitem_iter_t *o = (mp_obj_getitem_iter_t*)iter_buf;
     o->base.type = &it_type;
     o->args[0] = args[0];
     o->args[1] = args[1];
diff --git a/py/objint.c b/py/objint.c
index 5842a00a4d..bda9c46cf0 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -56,7 +56,7 @@ STATIC mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
                 return args[0];
             } else if (MP_OBJ_IS_STR_OR_BYTES(args[0])) {
                 // a string, parse it
-                mp_uint_t l;
+                size_t l;
                 const char *s = mp_obj_str_get_data(args[0], &l);
                 return mp_parse_num_integer(s, l, 0, NULL);
 #if MICROPY_PY_BUILTINS_FLOAT
@@ -72,7 +72,7 @@ STATIC mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
         default: {
             // should be a string, parse it
             // TODO proper error checking of argument types
-            mp_uint_t l;
+            size_t l;
             const char *s = mp_obj_str_get_data(args[0], &l);
             return mp_parse_num_integer(s, l, mp_obj_get_int(args[1]), NULL);
         }
@@ -80,7 +80,14 @@ STATIC mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
 }
 
 #if MICROPY_PY_BUILTINS_FLOAT
-mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) {
+
+typedef enum {
+    MP_FP_CLASS_FIT_SMALLINT,
+    MP_FP_CLASS_FIT_LONGINT,
+    MP_FP_CLASS_OVERFLOW
+} mp_fp_as_int_class_t;
+
+STATIC mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) {
     union {
         mp_float_t f;
 #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
@@ -103,7 +110,12 @@ mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) {
 #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
         e |= u.i[MP_ENDIANNESS_BIG] != 0;
 #endif
-        e += ((1 << MP_FLOAT_EXP_BITS) - 1) << MP_FLOAT_EXP_SHIFT_I32;
+        if ((e & ~(1 << MP_FLOAT_SIGN_SHIFT_I32)) == 0) {
+            // handle case of -0 (when sign is set but rest of bits are zero)
+            e = 0;
+        } else {
+            e += ((1 << MP_FLOAT_EXP_BITS) - 1) << MP_FLOAT_EXP_SHIFT_I32;
+        }
     } else {
         e &= ~((1 << MP_FLOAT_EXP_SHIFT_I32) - 1);
     }
@@ -125,13 +137,50 @@ mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) {
 }
 #undef MP_FLOAT_SIGN_SHIFT_I32
 #undef MP_FLOAT_EXP_SHIFT_I32
+
+mp_obj_t mp_obj_new_int_from_float(mp_float_t val) {
+    int cl = fpclassify(val);
+    if (cl == FP_INFINITE) {
+        nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "can't convert inf to int"));
+    } else if (cl == FP_NAN) {
+        mp_raise_ValueError("can't convert NaN to int");
+    } else {
+        mp_fp_as_int_class_t icl = mp_classify_fp_as_int(val);
+        if (icl == MP_FP_CLASS_FIT_SMALLINT) {
+            return MP_OBJ_NEW_SMALL_INT((mp_int_t)val);
+        #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+        } else {
+            mp_obj_int_t *o = mp_obj_int_new_mpz();
+            mpz_set_from_float(&o->mpz, val);
+            return MP_OBJ_FROM_PTR(o);
+        }
+        #else
+        #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
+        } else if (icl == MP_FP_CLASS_FIT_LONGINT) {
+            return mp_obj_new_int_from_ll((long long)val);
+        #endif
+        } else {
+            mp_raise_ValueError("float too big");
+        }
+        #endif
+    }
+}
+
+#endif
+
+#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
+typedef mp_longint_impl_t fmt_int_t;
+typedef unsigned long long fmt_uint_t;
+#else
+typedef mp_int_t fmt_int_t;
+typedef mp_uint_t fmt_uint_t;
 #endif
 
 void mp_obj_int_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
     (void)kind;
     // The size of this buffer is rather arbitrary. If it's not large
     // enough, a dynamic one will be allocated.
-    char stack_buf[sizeof(mp_int_t) * 4];
+    char stack_buf[sizeof(fmt_int_t) * 4];
     char *buf = stack_buf;
     size_t buf_size = sizeof(stack_buf);
     size_t fmt_size;
@@ -144,12 +193,6 @@ void mp_obj_int_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t
     }
 }
 
-#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
-typedef mp_longint_impl_t fmt_int_t;
-#else
-typedef mp_int_t fmt_int_t;
-#endif
-
 STATIC const uint8_t log_base2_floor[] = {
     0, 1, 1, 2,
     2, 2, 2, 3,
@@ -183,7 +226,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
     fmt_int_t num;
     if (MP_OBJ_IS_SMALL_INT(self_in)) {
         // A small int; get the integer value to format.
-        num = mp_obj_get_int(self_in);
+        num = MP_OBJ_SMALL_INT_VALUE(self_in);
 #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
     } else if (MP_OBJ_IS_TYPE(self_in, &mp_type_int)) {
         // Not a small int.
@@ -224,8 +267,9 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
         *(--b) = '0';
     } else {
         do {
-            int c = num % base;
-            num /= base;
+            // The cast to fmt_uint_t is because num is positive and we want unsigned arithmetic
+            int c = (fmt_uint_t)num % base;
+            num = (fmt_uint_t)num / base;
             if (c >= 10) {
                 c += base_char - 10;
             } else {
@@ -291,7 +335,7 @@ mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
 }
 
 // This is called only with strings whose value doesn't fit in SMALL_INT
-mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base) {
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) {
     mp_raise_msg(&mp_type_OverflowError, "long int not supported in this build");
     return mp_const_none;
 }
@@ -318,24 +362,6 @@ mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value) {
     return mp_const_none;
 }
 
-#if MICROPY_PY_BUILTINS_FLOAT
-mp_obj_t mp_obj_new_int_from_float(mp_float_t val) {
-    int cl = fpclassify(val);
-    if (cl == FP_INFINITE) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OverflowError, "can't convert inf to int"));
-    } else if (cl == FP_NAN) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "can't convert NaN to int"));
-    } else {
-        mp_fp_as_int_class_t icl = mp_classify_fp_as_int(val);
-        if (icl == MP_FP_CLASS_FIT_SMALLINT) {
-            return MP_OBJ_NEW_SMALL_INT((mp_int_t)val);
-        } else {
-            nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "float too big"));
-        }
-    }
-}
-#endif
-
 mp_obj_t mp_obj_new_int(mp_int_t value) {
     if (MP_SMALL_INT_FITS(value)) {
         return MP_OBJ_NEW_SMALL_INT(value);
@@ -374,25 +400,31 @@ mp_obj_t mp_obj_int_binary_op_extra_cases(mp_uint_t op, mp_obj_t lhs_in, mp_obj_
 
 // this is a classmethod
 STATIC mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
-    // TODO: Support long ints
-    // TODO: Support byteorder param
     // TODO: Support signed param (assumes signed=False at the moment)
     (void)n_args;
 
-    if (args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little)) {
-        mp_not_implemented("");
-    }
-
     // get the buffer info
     mp_buffer_info_t bufinfo;
     mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
 
-    // convert the bytes to an integer
+    const byte* buf = (const byte*)bufinfo.buf;
+    int delta = 1;
+    if (args[2] == MP_OBJ_NEW_QSTR(MP_QSTR_little)) {
+        buf += bufinfo.len - 1;
+        delta = -1;
+    }
+
     mp_uint_t value = 0;
-    for (const byte* buf = (const byte*)bufinfo.buf + bufinfo.len - 1; buf >= (byte*)bufinfo.buf; buf--) {
+    size_t len = bufinfo.len;
+    for (; len--; buf += delta) {
+        #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
+        if (value > (MP_SMALL_INT_MAX >> 8)) {
+            // Result will overflow a small-int so construct a big-int
+            return mp_obj_int_from_bytes_impl(args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little), bufinfo.len, bufinfo.buf);
+        }
+        #endif
         value = (value << 8) | *buf;
     }
-
     return mp_obj_new_int_from_uint(value);
 }
 
diff --git a/py/objint.h b/py/objint.h
index f418c329e9..da56c18624 100644
--- a/py/objint.h
+++ b/py/objint.h
@@ -41,29 +41,26 @@ typedef struct _mp_obj_int_t {
 extern const mp_obj_int_t mp_maxsize_obj;
 
 #if MICROPY_PY_BUILTINS_FLOAT
-typedef enum {
-    MP_FP_CLASS_FIT_SMALLINT,
-    MP_FP_CLASS_FIT_LONGINT,
-    MP_FP_CLASS_OVERFLOW
-} mp_fp_as_int_class_t;
-
-mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val);
 mp_float_t mp_obj_int_as_float_impl(mp_obj_t self_in);
-#endif // MICROPY_PY_BUILTINS_FLOAT
+#endif
 
 size_t mp_int_format_size(size_t num_bits, int base, const char *prefix, char comma);
 
+mp_obj_int_t *mp_obj_int_new_mpz(void);
+
 void mp_obj_int_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind);
 char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
                            int base, const char *prefix, char base_char, char comma);
 char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
                                 int base, const char *prefix, char base_char, char comma);
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
 void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_abs(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
 mp_obj_t mp_obj_int_binary_op_extra_cases(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
+mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent,  mp_obj_t modulus);
 
 #endif // __MICROPY_INCLUDED_PY_OBJINT_H__
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index f5b5d9c939..f638a53202 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -40,19 +40,25 @@
 
 #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
 
-// Python3 no longer has "l" suffix for long ints. We allow to use it
-// for debugging purpose though.
-#ifdef DEBUG
-#define SUFFIX "l"
-#else
-#define SUFFIX ""
-#endif
-
 #if MICROPY_PY_SYS_MAXSIZE
 // Export value for sys.maxsize
 const mp_obj_int_t mp_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
 #endif
 
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
+    int delta = 1;
+    if (!big_endian) {
+        buf += len - 1;
+        delta = -1;
+    }
+
+    mp_longint_impl_t value = 0;
+    for (; len--; buf += delta) {
+        value = (value << 8) | *buf;
+    }
+    return mp_obj_new_int_from_ll(value);
+}
+
 void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
     assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
     mp_obj_int_t *self = self_in;
@@ -249,27 +255,7 @@ mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
     return o;
 }
 
-#if MICROPY_PY_BUILTINS_FLOAT
-mp_obj_t mp_obj_new_int_from_float(mp_float_t val) {
-    int cl = fpclassify(val);
-    if (cl == FP_INFINITE) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OverflowError, "can't convert inf to int"));
-    } else if (cl == FP_NAN) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "can't convert NaN to int"));
-    } else {
-        mp_fp_as_int_class_t icl = mp_classify_fp_as_int(val);
-        if (icl == MP_FP_CLASS_FIT_SMALLINT) {
-            return MP_OBJ_NEW_SMALL_INT((mp_int_t)val);
-        } else if (icl == MP_FP_CLASS_FIT_LONGINT) {
-            return mp_obj_new_int_from_ll((long long)val);
-        } else {
-            nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "float too big"));
-        }
-    }
-}
-#endif
-
-mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base) {
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) {
     // TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated
     // TODO check overflow
     mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
diff --git a/py/objint_mpz.c b/py/objint_mpz.c
index eadf64fce7..d818b6f407 100644
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@@ -74,7 +74,7 @@ const mp_obj_int_t mp_maxsize_obj = {
 #undef NUM_DIG
 #endif
 
-STATIC mp_obj_int_t *mp_obj_int_new_mpz(void) {
+mp_obj_int_t *mp_obj_int_new_mpz(void) {
     mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
     o->base.type = &mp_type_int;
     mpz_init_zero(&o->mpz);
@@ -107,9 +107,16 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
     return str;
 }
 
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
+    mp_obj_int_t *o = mp_obj_int_new_mpz();
+    mpz_set_from_bytes(&o->mpz, big_endian, len, buf);
+    return MP_OBJ_FROM_PTR(o);
+}
+
 void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
     assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
     mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
+    memset(buf, 0, len);
     mpz_as_bytes(&self->mpz, big_endian, len, buf);
 }
 
@@ -271,7 +278,7 @@ mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
             case MP_BINARY_OP_INPLACE_RSHIFT: {
                 mp_int_t irhs = mp_obj_int_get_checked(rhs_in);
                 if (irhs < 0) {
-                    mp_raise_msg(&mp_type_ValueError, "negative shift count");
+                    mp_raise_ValueError("negative shift count");
                 }
                 if (op == MP_BINARY_OP_LSHIFT || op == MP_BINARY_OP_INPLACE_LSHIFT) {
                     mpz_shl_inpl(&res->mpz, zlhs, irhs);
@@ -286,7 +293,8 @@ mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
                 mpz_pow_inpl(&res->mpz, zlhs, zrhs);
                 break;
 
-            case MP_BINARY_OP_DIVMOD: {
+            default: {
+                assert(op == MP_BINARY_OP_DIVMOD);
                 if (mpz_is_zero(zrhs)) {
                     goto zero_division_error;
                 }
@@ -295,9 +303,6 @@ mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
                 mp_obj_t tuple[2] = {MP_OBJ_FROM_PTR(quo), MP_OBJ_FROM_PTR(res)};
                 return mp_obj_new_tuple(2, tuple);
             }
-
-            default:
-                return MP_OBJ_NULL; // op not supported
         }
 
         return MP_OBJ_FROM_PTR(res);
@@ -322,6 +327,39 @@ mp_obj_t mp_obj_int_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
     }
 }
 
+#if MICROPY_PY_BUILTINS_POW3
+STATIC mpz_t *mp_mpz_for_int(mp_obj_t arg, mpz_t *temp) {
+    if (MP_OBJ_IS_SMALL_INT(arg)) {
+        mpz_init_from_int(temp, MP_OBJ_SMALL_INT_VALUE(arg));
+        return temp;
+    } else {
+        mp_obj_int_t *arp_p = MP_OBJ_TO_PTR(arg);
+        return &(arp_p->mpz);
+    }
+}
+
+mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent,  mp_obj_t modulus) {
+    if (!MP_OBJ_IS_INT(base) || !MP_OBJ_IS_INT(exponent) || !MP_OBJ_IS_INT(modulus)) {
+        mp_raise_TypeError("pow() with 3 arguments requires integers");
+    } else {
+        mp_obj_t result = mp_obj_new_int_from_ull(0); // Use the _from_ull version as this forces an mpz int
+        mp_obj_int_t *res_p = (mp_obj_int_t *) MP_OBJ_TO_PTR(result);
+
+        mpz_t l_temp, r_temp, m_temp;
+        mpz_t *lhs = mp_mpz_for_int(base,     &l_temp);
+        mpz_t *rhs = mp_mpz_for_int(exponent, &r_temp);
+        mpz_t *mod = mp_mpz_for_int(modulus,  &m_temp);
+
+        mpz_pow3_inpl(&(res_p->mpz), lhs, rhs, mod);
+
+        if (lhs == &l_temp) { mpz_deinit(lhs); }
+        if (rhs == &r_temp) { mpz_deinit(rhs); }
+        if (mod == &m_temp) { mpz_deinit(mod); }
+        return result;
+    }
+}
+#endif
+
 mp_obj_t mp_obj_new_int(mp_int_t value) {
     if (MP_SMALL_INT_FITS(value)) {
         return MP_OBJ_NEW_SMALL_INT(value);
@@ -350,29 +388,9 @@ mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value) {
     return mp_obj_new_int_from_ull(value);
 }
 
-#if MICROPY_PY_BUILTINS_FLOAT
-mp_obj_t mp_obj_new_int_from_float(mp_float_t val) {
-    int cl = fpclassify(val);
-    if (cl == FP_INFINITE) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OverflowError, "can't convert inf to int"));
-    } else if (cl == FP_NAN) {
-        nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "can't convert NaN to int"));
-    } else {
-        mp_fp_as_int_class_t icl = mp_classify_fp_as_int(val);
-        if (icl == MP_FP_CLASS_FIT_SMALLINT) {
-            return MP_OBJ_NEW_SMALL_INT((mp_int_t)val);
-        } else {
-            mp_obj_int_t *o = mp_obj_int_new_mpz();
-            mpz_set_from_float(&o->mpz, val);
-            return MP_OBJ_FROM_PTR(o);
-        }
-    }
-}
-#endif
-
-mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base) {
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) {
     mp_obj_int_t *o = mp_obj_int_new_mpz();
-    mp_uint_t n = mpz_set_from_str(&o->mpz, *str, len, neg, base);
+    size_t n = mpz_set_from_str(&o->mpz, *str, len, neg, base);
     *str += n;
     return MP_OBJ_FROM_PTR(o);
 }
diff --git a/py/objlist.c b/py/objlist.c
index 6d4a20a507..45e69c8bcf 100644
--- a/py/objlist.c
+++ b/py/objlist.c
@@ -33,8 +33,8 @@
 #include "py/runtime.h"
 #include "py/stackctrl.h"
 
-STATIC mp_obj_t mp_obj_new_list_iterator(mp_obj_t list, mp_uint_t cur);
-STATIC mp_obj_list_t *list_new(mp_uint_t n);
+STATIC mp_obj_t mp_obj_new_list_iterator(mp_obj_t list, size_t cur, mp_obj_iter_buf_t *iter_buf);
+STATIC mp_obj_list_t *list_new(size_t n);
 STATIC mp_obj_t list_extend(mp_obj_t self_in, mp_obj_t arg_in);
 STATIC mp_obj_t list_pop(size_t n_args, const mp_obj_t *args);
 
@@ -50,7 +50,7 @@ STATIC void list_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t k
         kind = PRINT_REPR;
     }
     mp_print_str(print, "[");
-    for (mp_uint_t i = 0; i < o->len; i++) {
+    for (size_t i = 0; i < o->len; i++) {
         if (i > 0) {
             mp_print_str(print, ", ");
         }
@@ -60,7 +60,7 @@ STATIC void list_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t k
 }
 
 STATIC mp_obj_t list_extend_from_iter(mp_obj_t list, mp_obj_t iterable) {
-    mp_obj_t iter = mp_getiter(iterable);
+    mp_obj_t iter = mp_getiter(iterable, NULL);
     mp_obj_t item;
     while ((item = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
         mp_obj_list_append(list, item);
@@ -186,19 +186,19 @@ STATIC mp_obj_t list_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
             return MP_OBJ_FROM_PTR(res);
         }
 #endif
-        mp_uint_t index_val = mp_get_index(self->base.type, self->len, index, false);
+        size_t index_val = mp_get_index(self->base.type, self->len, index, false);
         return self->items[index_val];
     } else {
 #if MICROPY_PY_BUILTINS_SLICE
         if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) {
             mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in);
-            mp_check_self(MP_OBJ_IS_TYPE(value, &mp_type_list));
-            mp_obj_list_t *slice = MP_OBJ_TO_PTR(value);
+            size_t value_len; mp_obj_t *value_items;
+            mp_obj_get_array(value, &value_len, &value_items);
             mp_bound_slice_t slice_out;
             if (!mp_seq_get_fast_slice_indexes(self->len, index, &slice_out)) {
                 mp_not_implemented("");
             }
-            mp_int_t len_adj = slice->len - (slice_out.stop - slice_out.start);
+            mp_int_t len_adj = value_len - (slice_out.stop - slice_out.start);
             //printf("Len adj: %d\n", len_adj);
             if (len_adj > 0) {
                 if (self->len + len_adj > self->alloc) {
@@ -208,10 +208,10 @@ STATIC mp_obj_t list_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
                     self->alloc = self->len + len_adj;
                 }
                 mp_seq_replace_slice_grow_inplace(self->items, self->len,
-                    slice_out.start, slice_out.stop, slice->items, slice->len, len_adj, sizeof(*self->items));
+                    slice_out.start, slice_out.stop, value_items, value_len, len_adj, sizeof(*self->items));
             } else {
                 mp_seq_replace_slice_no_grow(self->items, self->len,
-                    slice_out.start, slice_out.stop, slice->items, slice->len, sizeof(*self->items));
+                    slice_out.start, slice_out.stop, value_items, value_len, sizeof(*self->items));
                 // Clear "freed" elements at the end of list
                 mp_seq_clear(self->items, self->len + len_adj, self->len, sizeof(*self->items));
                 // TODO: apply allocation policy re: alloc_size
@@ -225,8 +225,8 @@ STATIC mp_obj_t list_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
     }
 }
 
-STATIC mp_obj_t list_getiter(mp_obj_t o_in) {
-    return mp_obj_new_list_iterator(o_in, 0);
+STATIC mp_obj_t list_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
+    return mp_obj_new_list_iterator(o_in, 0, iter_buf);
 }
 
 mp_obj_t mp_obj_list_append(mp_obj_t self_in, mp_obj_t arg) {
@@ -268,7 +268,7 @@ STATIC mp_obj_t list_pop(size_t n_args, const mp_obj_t *args) {
     if (self->len == 0) {
         mp_raise_msg(&mp_type_IndexError, "pop from empty list");
     }
-    mp_uint_t index = mp_get_index(self->base.type, self->len, n_args == 1 ? MP_OBJ_NEW_SMALL_INT(-1) : args[1], false);
+    size_t index = mp_get_index(self->base.type, self->len, n_args == 1 ? MP_OBJ_NEW_SMALL_INT(-1) : args[1], false);
     mp_obj_t ret = self->items[index];
     self->len -= 1;
     memmove(self->items + index, self->items + index + 1, (self->len - index) * sizeof(mp_obj_t));
@@ -374,7 +374,7 @@ STATIC mp_obj_t list_insert(mp_obj_t self_in, mp_obj_t idx, mp_obj_t obj) {
     if (index < 0) {
          index = 0;
     }
-    if ((mp_uint_t)index > self->len) {
+    if ((size_t)index > self->len) {
          index = self->len;
     }
 
@@ -451,7 +451,7 @@ const mp_obj_type_t mp_type_list = {
     .locals_dict = (mp_obj_dict_t*)&list_locals_dict,
 };
 
-void mp_obj_list_init(mp_obj_list_t *o, mp_uint_t n) {
+void mp_obj_list_init(mp_obj_list_t *o, size_t n) {
     o->base.type = &mp_type_list;
     o->alloc = n < LIST_MIN_ALLOC ? LIST_MIN_ALLOC : n;
     o->len = n;
@@ -459,29 +459,29 @@ void mp_obj_list_init(mp_obj_list_t *o, mp_uint_t n) {
     mp_seq_clear(o->items, n, o->alloc, sizeof(*o->items));
 }
 
-STATIC mp_obj_list_t *list_new(mp_uint_t n) {
+STATIC mp_obj_list_t *list_new(size_t n) {
     mp_obj_list_t *o = m_new_obj(mp_obj_list_t);
     mp_obj_list_init(o, n);
     return o;
 }
 
-mp_obj_t mp_obj_new_list(mp_uint_t n, mp_obj_t *items) {
+mp_obj_t mp_obj_new_list(size_t n, mp_obj_t *items) {
     mp_obj_list_t *o = list_new(n);
     if (items != NULL) {
-        for (mp_uint_t i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             o->items[i] = items[i];
         }
     }
     return MP_OBJ_FROM_PTR(o);
 }
 
-void mp_obj_list_get(mp_obj_t self_in, mp_uint_t *len, mp_obj_t **items) {
+void mp_obj_list_get(mp_obj_t self_in, size_t *len, mp_obj_t **items) {
     mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in);
     *len = self->len;
     *items = self->items;
 }
 
-void mp_obj_list_set_len(mp_obj_t self_in, mp_uint_t len) {
+void mp_obj_list_set_len(mp_obj_t self_in, size_t len) {
     // trust that the caller knows what it's doing
     // TODO realloc if len got much smaller than alloc
     mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in);
@@ -490,7 +490,7 @@ void mp_obj_list_set_len(mp_obj_t self_in, mp_uint_t len) {
 
 void mp_obj_list_store(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
     mp_obj_list_t *self = MP_OBJ_TO_PTR(self_in);
-    mp_uint_t i = mp_get_index(self->base.type, self->len, index, false);
+    size_t i = mp_get_index(self->base.type, self->len, index, false);
     self->items[i] = value;
 }
 
@@ -501,7 +501,7 @@ typedef struct _mp_obj_list_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_t list;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_list_it_t;
 
 STATIC mp_obj_t list_it_iternext(mp_obj_t self_in) {
@@ -516,8 +516,9 @@ STATIC mp_obj_t list_it_iternext(mp_obj_t self_in) {
     }
 }
 
-mp_obj_t mp_obj_new_list_iterator(mp_obj_t list, mp_uint_t cur) {
-    mp_obj_list_it_t *o = m_new_obj(mp_obj_list_it_t);
+mp_obj_t mp_obj_new_list_iterator(mp_obj_t list, size_t cur, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_list_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_list_it_t *o = (mp_obj_list_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = list_it_iternext;
     o->list = list;
diff --git a/py/objlist.h b/py/objlist.h
index 443ede5743..5b2d216fc2 100644
--- a/py/objlist.h
+++ b/py/objlist.h
@@ -30,8 +30,8 @@
 
 typedef struct _mp_obj_list_t {
     mp_obj_base_t base;
-    mp_uint_t alloc;
-    mp_uint_t len;
+    size_t alloc;
+    size_t len;
     mp_obj_t *items;
 } mp_obj_list_t;
 
diff --git a/py/objmap.c b/py/objmap.c
index ed0291435d..111c964fdd 100644
--- a/py/objmap.c
+++ b/py/objmap.c
@@ -31,7 +31,7 @@
 
 typedef struct _mp_obj_map_t {
     mp_obj_base_t base;
-    mp_uint_t n_iters;
+    size_t n_iters;
     mp_obj_t fun;
     mp_obj_t iters[];
 } mp_obj_map_t;
@@ -42,8 +42,8 @@ STATIC mp_obj_t map_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
     o->base.type = type;
     o->n_iters = n_args - 1;
     o->fun = args[0];
-    for (mp_uint_t i = 0; i < n_args - 1; i++) {
-        o->iters[i] = mp_getiter(args[i + 1]);
+    for (size_t i = 0; i < n_args - 1; i++) {
+        o->iters[i] = mp_getiter(args[i + 1], NULL);
     }
     return MP_OBJ_FROM_PTR(o);
 }
@@ -53,7 +53,7 @@ STATIC mp_obj_t map_iternext(mp_obj_t self_in) {
     mp_obj_map_t *self = MP_OBJ_TO_PTR(self_in);
     mp_obj_t *nextses = m_new(mp_obj_t, self->n_iters);
 
-    for (mp_uint_t i = 0; i < self->n_iters; i++) {
+    for (size_t i = 0; i < self->n_iters; i++) {
         mp_obj_t next = mp_iternext(self->iters[i]);
         if (next == MP_OBJ_STOP_ITERATION) {
             m_del(mp_obj_t, nextses, self->n_iters);
@@ -68,6 +68,6 @@ const mp_obj_type_t mp_type_map = {
     { &mp_type_type },
     .name = MP_QSTR_map,
     .make_new = map_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = map_iternext,
 };
diff --git a/py/objmodule.c b/py/objmodule.c
index 1c79e1a18d..43bb36b98c 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -227,15 +227,15 @@ STATIC const mp_rom_map_elem_t mp_builtin_module_table[] = {
     MICROPY_PORT_BUILTIN_MODULES
 };
 
-STATIC MP_DEFINE_CONST_MAP(mp_builtin_module_map, mp_builtin_module_table);
+MP_DEFINE_CONST_MAP(mp_builtin_module_map, mp_builtin_module_table);
 
-void mp_module_init(void) {
-    mp_obj_dict_init(&MP_STATE_VM(mp_loaded_modules_dict), 3);
-}
+#if MICROPY_MODULE_WEAK_LINKS
+STATIC const mp_rom_map_elem_t mp_builtin_module_weak_links_table[] = {
+    MICROPY_PORT_BUILTIN_MODULE_WEAK_LINKS
+};
 
-void mp_module_deinit(void) {
-    //mp_map_deinit(&MP_STATE_VM(mp_loaded_modules_map));
-}
+MP_DEFINE_CONST_MAP(mp_builtin_module_weak_links_map, mp_builtin_module_weak_links_table);
+#endif
 
 // returns MP_OBJ_NULL if not found
 mp_obj_t mp_module_get(qstr module_name) {
diff --git a/py/objmodule.h b/py/objmodule.h
index 138f942c21..4e6612adc7 100644
--- a/py/objmodule.h
+++ b/py/objmodule.h
@@ -28,8 +28,9 @@
 
 #include "py/obj.h"
 
-void mp_module_init(void);
-void mp_module_deinit(void);
+extern const mp_map_t mp_builtin_module_map;
+extern const mp_map_t mp_builtin_module_weak_links_map;
+
 mp_obj_t mp_module_get(qstr module_name);
 void mp_module_register(qstr qstr, mp_obj_t module);
 
diff --git a/py/objnamedtuple.c b/py/objnamedtuple.c
index 18931a16c2..cbd845dce8 100644
--- a/py/objnamedtuple.c
+++ b/py/objnamedtuple.c
@@ -36,7 +36,7 @@
 
 typedef struct _mp_obj_namedtuple_type_t {
     mp_obj_type_t base;
-    mp_uint_t n_fields;
+    size_t n_fields;
     qstr fields[];
 } mp_obj_namedtuple_type_t;
 
@@ -44,13 +44,13 @@ typedef struct _mp_obj_namedtuple_t {
     mp_obj_tuple_t tuple;
 } mp_obj_namedtuple_t;
 
-STATIC mp_uint_t namedtuple_find_field(const mp_obj_namedtuple_type_t *type, qstr name) {
-    for (mp_uint_t i = 0; i < type->n_fields; i++) {
+STATIC size_t namedtuple_find_field(const mp_obj_namedtuple_type_t *type, qstr name) {
+    for (size_t i = 0; i < type->n_fields; i++) {
         if (type->fields[i] == name) {
             return i;
         }
     }
-    return -1;
+    return (size_t)-1;
 }
 
 STATIC void namedtuple_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) {
@@ -65,8 +65,8 @@ STATIC void namedtuple_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
     if (dest[0] == MP_OBJ_NULL) {
         // load attribute
         mp_obj_namedtuple_t *self = MP_OBJ_TO_PTR(self_in);
-        int id = namedtuple_find_field((mp_obj_namedtuple_type_t*)self->tuple.base.type, attr);
-        if (id == -1) {
+        size_t id = namedtuple_find_field((mp_obj_namedtuple_type_t*)self->tuple.base.type, attr);
+        if (id == (size_t)-1) {
             return;
         }
         dest[0] = self->tuple.items[id];
@@ -102,14 +102,14 @@ STATIC mp_obj_t namedtuple_make_new(const mp_obj_type_t *type_in, size_t n_args,
         arg_objects = alloca(alloc_size);
         memset(arg_objects, 0, alloc_size);
 
-        for (mp_uint_t i = 0; i < n_args; i++) {
+        for (size_t i = 0; i < n_args; i++) {
             arg_objects[i] = args[i];
         }
 
-        for (mp_uint_t i = n_args; i < n_args + 2 * n_kw; i += 2) {
+        for (size_t i = n_args; i < n_args + 2 * n_kw; i += 2) {
             qstr kw = mp_obj_str_get_qstr(args[i]);
-            int id = namedtuple_find_field(type, kw);
-            if (id == -1) {
+            size_t id = namedtuple_find_field(type, kw);
+            if (id == (size_t)-1) {
                 if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
                     mp_arg_error_terse_mismatch();
                 } else {
@@ -134,9 +134,7 @@ STATIC mp_obj_t namedtuple_make_new(const mp_obj_type_t *type_in, size_t n_args,
     return MP_OBJ_FROM_PTR(tuple);
 }
 
-STATIC const mp_rom_obj_tuple_t namedtuple_base_tuple = {{&mp_type_tuple}, 1, {MP_ROM_PTR(&mp_type_tuple)}};
-
-STATIC mp_obj_t mp_obj_new_namedtuple_type(qstr name, mp_uint_t n_fields, mp_obj_t *fields) {
+STATIC mp_obj_t mp_obj_new_namedtuple_type(qstr name, size_t n_fields, mp_obj_t *fields) {
     mp_obj_namedtuple_type_t *o = m_new_obj_var(mp_obj_namedtuple_type_t, qstr, n_fields);
     memset(&o->base, 0, sizeof(o->base));
     o->base.base.type = &mp_type_type;
@@ -148,9 +146,9 @@ STATIC mp_obj_t mp_obj_new_namedtuple_type(qstr name, mp_uint_t n_fields, mp_obj
     o->base.attr = namedtuple_attr;
     o->base.subscr = mp_obj_tuple_subscr;
     o->base.getiter = mp_obj_tuple_getiter;
-    o->base.bases_tuple = (mp_obj_tuple_t*)(mp_rom_obj_tuple_t*)&namedtuple_base_tuple;
+    o->base.parent = &mp_type_tuple;
     o->n_fields = n_fields;
-    for (mp_uint_t i = 0; i < n_fields; i++) {
+    for (size_t i = 0; i < n_fields; i++) {
         o->fields[i] = mp_obj_str_get_qstr(fields[i]);
     }
     return MP_OBJ_FROM_PTR(o);
@@ -158,7 +156,7 @@ STATIC mp_obj_t mp_obj_new_namedtuple_type(qstr name, mp_uint_t n_fields, mp_obj
 
 STATIC mp_obj_t new_namedtuple_type(mp_obj_t name_in, mp_obj_t fields_in) {
     qstr name = mp_obj_str_get_qstr(name_in);
-    mp_uint_t n_fields;
+    size_t n_fields;
     mp_obj_t *fields;
     #if MICROPY_CPYTHON_COMPAT
     if (MP_OBJ_IS_STR(fields_in)) {
diff --git a/py/objobject.c b/py/objobject.c
index b33dc491c4..f9a7d17c34 100644
--- a/py/objobject.c
+++ b/py/objobject.c
@@ -50,7 +50,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_1(object___init___obj, object___init__);
 
 STATIC mp_obj_t object___new__(mp_obj_t cls) {
     if (!MP_OBJ_IS_TYPE(cls, &mp_type_type) || !mp_obj_is_instance_type((mp_obj_type_t*)MP_OBJ_TO_PTR(cls))) {
-        mp_raise_msg(&mp_type_TypeError, "__new__ arg must be a user-type");
+        mp_raise_TypeError("__new__ arg must be a user-type");
     }
     mp_obj_t o = MP_OBJ_SENTINEL;
     mp_obj_t res = mp_obj_instance_make_new(MP_OBJ_TO_PTR(cls), 1, 0, &o);
diff --git a/py/objpolyiter.c b/py/objpolyiter.c
index 9bba538c5f..61bd1e0ac2 100644
--- a/py/objpolyiter.c
+++ b/py/objpolyiter.c
@@ -49,6 +49,6 @@ STATIC mp_obj_t polymorph_it_iternext(mp_obj_t self_in) {
 const mp_obj_type_t mp_type_polymorph_iter = {
     { &mp_type_type },
     .name = MP_QSTR_iterator,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = polymorph_it_iternext,
 };
diff --git a/py/objrange.c b/py/objrange.c
index 79459316b1..8c4e14f49c 100644
--- a/py/objrange.c
+++ b/py/objrange.c
@@ -55,12 +55,13 @@ STATIC mp_obj_t range_it_iternext(mp_obj_t o_in) {
 STATIC const mp_obj_type_t range_it_type = {
     { &mp_type_type },
     .name = MP_QSTR_iterator,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = range_it_iternext,
 };
 
-STATIC mp_obj_t mp_obj_new_range_iterator(mp_int_t cur, mp_int_t stop, mp_int_t step) {
-    mp_obj_range_it_t *o = m_new_obj(mp_obj_range_it_t);
+STATIC mp_obj_t mp_obj_new_range_iterator(mp_int_t cur, mp_int_t stop, mp_int_t step, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_range_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_range_it_t *o = (mp_obj_range_it_t*)iter_buf;
     o->base.type = &range_it_type;
     o->cur = cur;
     o->stop = stop;
@@ -104,8 +105,10 @@ STATIC mp_obj_t range_make_new(const mp_obj_type_t *type, size_t n_args, size_t
         o->start = mp_obj_get_int(args[0]);
         o->stop = mp_obj_get_int(args[1]);
         if (n_args == 3) {
-            // TODO check step is non-zero
             o->step = mp_obj_get_int(args[2]);
+            if (o->step == 0) {
+                mp_raise_ValueError("zero step");
+            }
         }
     }
 
@@ -151,19 +154,23 @@ STATIC mp_obj_t range_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
             o->start = self->start + slice.start * self->step;
             o->stop = self->start + slice.stop * self->step;
             o->step = slice.step * self->step;
+            if (slice.step < 0) {
+                // Negative slice steps have inclusive stop, so adjust for exclusive
+                o->stop -= self->step;
+            }
             return MP_OBJ_FROM_PTR(o);
         }
 #endif
-        uint index_val = mp_get_index(self->base.type, len, index, false);
+        size_t index_val = mp_get_index(self->base.type, len, index, false);
         return MP_OBJ_NEW_SMALL_INT(self->start + index_val * self->step);
     } else {
         return MP_OBJ_NULL; // op not supported
     }
 }
 
-STATIC mp_obj_t range_getiter(mp_obj_t o_in) {
+STATIC mp_obj_t range_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
     mp_obj_range_t *o = MP_OBJ_TO_PTR(o_in);
-    return mp_obj_new_range_iterator(o->start, o->stop, o->step);
+    return mp_obj_new_range_iterator(o->start, o->stop, o->step, iter_buf);
 }
 
 
diff --git a/py/objreversed.c b/py/objreversed.c
index 4343c19780..fc85e72bfb 100644
--- a/py/objreversed.c
+++ b/py/objreversed.c
@@ -74,7 +74,7 @@ const mp_obj_type_t mp_type_reversed = {
     { &mp_type_type },
     .name = MP_QSTR_reversed,
     .make_new = reversed_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = reversed_iternext,
 };
 
diff --git a/py/objset.c b/py/objset.c
index fc124fcd8c..f74bc74a07 100644
--- a/py/objset.c
+++ b/py/objset.c
@@ -44,7 +44,7 @@ typedef struct _mp_obj_set_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_set_t *set;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_set_it_t;
 
 STATIC mp_obj_t set_it_iternext(mp_obj_t self_in);
@@ -96,7 +96,7 @@ STATIC void set_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t
     }
     #endif
     mp_print_str(print, "{");
-    for (mp_uint_t i = 0; i < self->set.alloc; i++) {
+    for (size_t i = 0; i < self->set.alloc; i++) {
         if (MP_SET_SLOT_IS_FILLED(&self->set, i)) {
             if (!first) {
                 mp_print_str(print, ", ");
@@ -129,7 +129,7 @@ STATIC mp_obj_t set_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
         default: { // can only be 0 or 1 arg
             // 1 argument, an iterable from which we make a new set
             mp_obj_t set = mp_obj_new_set(0, NULL);
-            mp_obj_t iterable = mp_getiter(args[0]);
+            mp_obj_t iterable = mp_getiter(args[0], NULL);
             mp_obj_t item;
             while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
                 mp_obj_set_store(set, item);
@@ -143,10 +143,10 @@ STATIC mp_obj_t set_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
 
 STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) {
     mp_obj_set_it_t *self = MP_OBJ_TO_PTR(self_in);
-    mp_uint_t max = self->set->set.alloc;
+    size_t max = self->set->set.alloc;
     mp_set_t *set = &self->set->set;
 
-    for (mp_uint_t i = self->cur; i < max; i++) {
+    for (size_t i = self->cur; i < max; i++) {
         if (MP_SET_SLOT_IS_FILLED(set, i)) {
             self->cur = i + 1;
             return set->table[i];
@@ -156,8 +156,9 @@ STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) {
     return MP_OBJ_STOP_ITERATION;
 }
 
-STATIC mp_obj_t set_getiter(mp_obj_t set_in) {
-    mp_obj_set_it_t *o = m_new_obj(mp_obj_set_it_t);
+STATIC mp_obj_t set_getiter(mp_obj_t set_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_set_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_set_it_t *o = (mp_obj_set_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = set_it_iternext;
     o->set = (mp_obj_set_t *)MP_OBJ_TO_PTR(set_in);
@@ -228,12 +229,12 @@ STATIC mp_obj_t set_diff_int(size_t n_args, const mp_obj_t *args, bool update) {
     }
 
 
-    for (mp_uint_t i = 1; i < n_args; i++) {
+    for (size_t i = 1; i < n_args; i++) {
         mp_obj_t other = args[i];
         if (self == other) {
             set_clear(self);
         } else {
-            mp_obj_t iter = mp_getiter(other);
+            mp_obj_t iter = mp_getiter(other, NULL);
             mp_obj_t next;
             while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
                 set_discard(self, next);
@@ -270,7 +271,7 @@ STATIC mp_obj_t set_intersect_int(mp_obj_t self_in, mp_obj_t other, bool update)
     mp_obj_set_t *self = MP_OBJ_TO_PTR(self_in);
     mp_obj_set_t *out = MP_OBJ_TO_PTR(mp_obj_new_set(0, NULL));
 
-    mp_obj_t iter = mp_getiter(other);
+    mp_obj_t iter = mp_getiter(other, NULL);
     mp_obj_t next;
     while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
         if (mp_set_lookup(&self->set, next, MP_MAP_LOOKUP)) {
@@ -302,7 +303,8 @@ STATIC mp_obj_t set_isdisjoint(mp_obj_t self_in, mp_obj_t other) {
     check_set_or_frozenset(self_in);
     mp_obj_set_t *self = MP_OBJ_TO_PTR(self_in);
 
-    mp_obj_t iter = mp_getiter(other);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iter = mp_getiter(other, &iter_buf);
     mp_obj_t next;
     while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
         if (mp_set_lookup(&self->set, next, MP_MAP_LOOKUP)) {
@@ -335,7 +337,8 @@ STATIC mp_obj_t set_issubset_internal(mp_obj_t self_in, mp_obj_t other_in, bool
     if (proper && self->set.used == other->set.used) {
         out = false;
     } else {
-        mp_obj_t iter = set_getiter(MP_OBJ_FROM_PTR(self));
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iter = set_getiter(MP_OBJ_FROM_PTR(self), &iter_buf);
         mp_obj_t next;
         while ((next = set_it_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
             if (!mp_set_lookup(&other->set, next, MP_MAP_LOOKUP)) {
@@ -408,7 +411,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_remove_obj, set_remove);
 STATIC mp_obj_t set_symmetric_difference_update(mp_obj_t self_in, mp_obj_t other_in) {
     check_set(self_in);
     mp_obj_set_t *self = MP_OBJ_TO_PTR(self_in);
-    mp_obj_t iter = mp_getiter(other_in);
+    mp_obj_t iter = mp_getiter(other_in, NULL);
     mp_obj_t next;
     while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
         mp_set_lookup(&self->set, next, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND_OR_REMOVE_IF_FOUND);
@@ -427,7 +430,7 @@ STATIC mp_obj_t set_symmetric_difference(mp_obj_t self_in, mp_obj_t other_in) {
 STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_symmetric_difference_obj, set_symmetric_difference);
 
 STATIC void set_update_int(mp_obj_set_t *self, mp_obj_t other_in) {
-    mp_obj_t iter = mp_getiter(other_in);
+    mp_obj_t iter = mp_getiter(other_in, NULL);
     mp_obj_t next;
     while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
         mp_set_lookup(&self->set, next, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
@@ -436,7 +439,7 @@ STATIC void set_update_int(mp_obj_set_t *self, mp_obj_t other_in) {
 
 STATIC mp_obj_t set_update(size_t n_args, const mp_obj_t *args) {
     check_set(args[0]);
-    for (mp_uint_t i = 1; i < n_args; i++) {
+    for (size_t i = 1; i < n_args; i++) {
         set_update_int(MP_OBJ_TO_PTR(args[0]), args[i]);
     }
 
@@ -462,10 +465,10 @@ STATIC mp_obj_t set_unary_op(mp_uint_t op, mp_obj_t self_in) {
             if (MP_OBJ_IS_TYPE(self_in, &mp_type_frozenset)) {
                 // start hash with unique value
                 mp_int_t hash = (mp_int_t)(uintptr_t)&mp_type_frozenset;
-                mp_uint_t max = self->set.alloc;
+                size_t max = self->set.alloc;
                 mp_set_t *set = &self->set;
 
-                for (mp_uint_t i = 0; i < max; i++) {
+                for (size_t i = 0; i < max; i++) {
                     if (MP_SET_SLOT_IS_FILLED(set, i)) {
                         hash += MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, set->table[i]));
                     }
@@ -479,6 +482,11 @@ STATIC mp_obj_t set_unary_op(mp_uint_t op, mp_obj_t self_in) {
 
 STATIC mp_obj_t set_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
     mp_obj_t args[] = {lhs, rhs};
+    #if MICROPY_PY_BUILTINS_FROZENSET
+    bool update = MP_OBJ_IS_TYPE(lhs, &mp_type_set);
+    #else
+    bool update = true;
+    #endif
     switch (op) {
         case MP_BINARY_OP_OR:
             return set_union(lhs, rhs);
@@ -489,13 +497,28 @@ STATIC mp_obj_t set_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
         case MP_BINARY_OP_SUBTRACT:
             return set_diff(2, args);
         case MP_BINARY_OP_INPLACE_OR:
-            return set_union(lhs, rhs);
+            if (update) {
+                set_update(2, args);
+                return lhs;
+            } else {
+                return set_union(lhs, rhs);
+            }
         case MP_BINARY_OP_INPLACE_XOR:
-            return set_symmetric_difference(lhs, rhs);
+            if (update) {
+                set_symmetric_difference_update(lhs, rhs);
+                return lhs;
+            } else {
+                return set_symmetric_difference(lhs, rhs);
+            }
         case MP_BINARY_OP_INPLACE_AND:
-            return set_intersect(lhs, rhs);
+            rhs = set_intersect_int(lhs, rhs, update);
+            if (update) {
+                return lhs;
+            } else {
+                return rhs;
+            }
         case MP_BINARY_OP_INPLACE_SUBTRACT:
-            return set_diff(2, args);
+            return set_diff_int(2, args, update);
         case MP_BINARY_OP_LESS:
             return set_issubset_proper(lhs, rhs);
         case MP_BINARY_OP_MORE:
@@ -567,11 +590,11 @@ const mp_obj_type_t mp_type_frozenset = {
 };
 #endif
 
-mp_obj_t mp_obj_new_set(mp_uint_t n_args, mp_obj_t *items) {
+mp_obj_t mp_obj_new_set(size_t n_args, mp_obj_t *items) {
     mp_obj_set_t *o = m_new_obj(mp_obj_set_t);
     o->base.type = &mp_type_set;
     mp_set_init(&o->set, n_args);
-    for (mp_uint_t i = 0; i < n_args; i++) {
+    for (size_t i = 0; i < n_args; i++) {
         mp_set_lookup(&o->set, items[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
     }
     return MP_OBJ_FROM_PTR(o);
diff --git a/py/objstr.c b/py/objstr.c
index f082e95591..70de0a693a 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -36,15 +36,15 @@
 #include "py/runtime.h"
 #include "py/stackctrl.h"
 
-STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_obj_t *args, mp_obj_t dict);
+STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_t *args, mp_obj_t dict);
 
-STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
+STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
 STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
 
 /******************************************************************************/
 /* str                                                                        */
 
-void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, mp_uint_t str_len, bool is_bytes) {
+void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, size_t str_len, bool is_bytes) {
     // this escapes characters, but it will be very slow to print (calling print many times)
     bool has_single_quote = false;
     bool has_double_quote = false;
@@ -231,11 +231,12 @@ STATIC mp_obj_t bytes_make_new(const mp_obj_type_t *type_in, size_t n_args, size
         vstr_init(&vstr, len);
     }
 
-    mp_obj_t iterable = mp_getiter(args[0]);
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iterable = mp_getiter(args[0], &iter_buf);
     mp_obj_t item;
     while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
         mp_int_t val = mp_obj_get_int(item);
-        #if MICROPY_CPYTHON_COMPAT
+        #if MICROPY_FULL_CHECKS
         if (val < 0 || val > 255) {
             mp_raise_ValueError("bytes value out of range");
         }
@@ -251,9 +252,9 @@ wrong_args:
 
 // like strstr but with specified length and allows \0 bytes
 // TODO replace with something more efficient/standard
-const byte *find_subbytes(const byte *haystack, mp_uint_t hlen, const byte *needle, mp_uint_t nlen, mp_int_t direction) {
+const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction) {
     if (hlen >= nlen) {
-        mp_uint_t str_index, str_index_end;
+        size_t str_index, str_index_end;
         if (direction > 0) {
             str_index = 0;
             str_index_end = hlen - nlen;
@@ -282,19 +283,14 @@ const byte *find_subbytes(const byte *haystack, mp_uint_t hlen, const byte *need
 mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
     // check for modulo
     if (op == MP_BINARY_OP_MODULO) {
-        mp_obj_t *args;
-        mp_uint_t n_args;
+        mp_obj_t *args = &rhs_in;
+        size_t n_args = 1;
         mp_obj_t dict = MP_OBJ_NULL;
         if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
             // TODO: Support tuple subclasses?
             mp_obj_tuple_get(rhs_in, &n_args, &args);
         } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_dict)) {
-            args = NULL;
-            n_args = 0;
             dict = rhs_in;
-        } else {
-            args = &rhs_in;
-            n_args = 1;
         }
         return str_modulo_format(lhs_in, n_args, args, dict);
     }
@@ -338,7 +334,7 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
     // size and execution time so we don't.
 
     const byte *rhs_data;
-    mp_uint_t rhs_len;
+    size_t rhs_len;
     if (lhs_type == mp_obj_get_type(rhs_in)) {
         GET_STR_DATA_LEN(rhs_in, rhs_data_, rhs_len_);
         rhs_data = rhs_data_;
@@ -358,6 +354,13 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
     switch (op) {
         case MP_BINARY_OP_ADD:
         case MP_BINARY_OP_INPLACE_ADD: {
+            if (lhs_len == 0 && mp_obj_get_type(rhs_in) == lhs_type) {
+                return rhs_in;
+            }
+            if (rhs_len == 0) {
+                return lhs_in;
+            }
+
             vstr_t vstr;
             vstr_init_len(&vstr, lhs_len + rhs_len);
             memcpy(vstr.buf, lhs_data, lhs_len);
@@ -385,7 +388,7 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
 // objstrunicode defines own version
 const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
                              mp_obj_t index, bool is_slice) {
-    mp_uint_t index_val = mp_get_index(type, self_len, index, is_slice);
+    size_t index_val = mp_get_index(type, self_len, index, is_slice);
     return self_data + index_val;
 }
 #endif
@@ -405,7 +408,7 @@ STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
             return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start);
         }
 #endif
-        mp_uint_t index_val = mp_get_index(type, self_len, index, false);
+        size_t index_val = mp_get_index(type, self_len, index, false);
         // If we have unicode enabled the type will always be bytes, so take the short cut.
         if (MICROPY_PY_BUILTINS_STR_UNICODE || type == &mp_type_bytes) {
             return MP_OBJ_NEW_SMALL_INT(self_data[index_val]);
@@ -425,22 +428,19 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
     GET_STR_DATA_LEN(self_in, sep_str, sep_len);
 
     // process args
-    mp_uint_t seq_len;
+    size_t seq_len;
     mp_obj_t *seq_items;
-    if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
-        mp_obj_tuple_get(arg, &seq_len, &seq_items);
-    } else {
-        if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
-            // arg is not a list, try to convert it to one
-            // TODO: Try to optimize?
-            arg = mp_type_list.make_new(&mp_type_list, 1, 0, &arg);
-        }
-        mp_obj_list_get(arg, &seq_len, &seq_items);
+
+    if (!MP_OBJ_IS_TYPE(arg, &mp_type_list) && !MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
+        // arg is not a list nor a tuple, try to convert it to a list
+        // TODO: Try to optimize?
+        arg = mp_type_list.make_new(&mp_type_list, 1, 0, &arg);
     }
+    mp_obj_get_array(arg, &seq_len, &seq_items);
 
     // count required length
-    mp_uint_t required_len = 0;
-    for (mp_uint_t i = 0; i < seq_len; i++) {
+    size_t required_len = 0;
+    for (size_t i = 0; i < seq_len; i++) {
         if (mp_obj_get_type(seq_items[i]) != self_type) {
             mp_raise_TypeError(
                 "join expects a list of str/bytes objects consistent with self object");
@@ -456,7 +456,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
     vstr_t vstr;
     vstr_init_len(&vstr, required_len);
     byte *data = (byte*)vstr.buf;
-    for (mp_uint_t i = 0; i < seq_len; i++) {
+    for (size_t i = 0; i < seq_len; i++) {
         if (i > 0) {
             memcpy(data, sep_str, sep_len);
             data += sep_len;
@@ -513,7 +513,7 @@ mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
             bad_implicit_conversion(sep);
         }
 
-        mp_uint_t sep_len;
+        size_t sep_len;
         const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
 
         if (sep_len == 0) {
@@ -611,7 +611,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) {
     if (sep == mp_const_none) {
         mp_not_implemented("rsplit(None,n)");
     } else {
-        mp_uint_t sep_len;
+        size_t sep_len;
         const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
 
         if (sep_len == 0) {
@@ -642,7 +642,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) {
         }
         if (idx != 0) {
             // We split less parts than split limit, now go cleanup surplus
-            mp_int_t used = org_splits + 1 - idx;
+            size_t used = org_splits + 1 - idx;
             memmove(res->items, &res->items[idx], used * sizeof(mp_obj_t));
             mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items));
             res->len = used;
@@ -652,7 +652,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) {
     return MP_OBJ_FROM_PTR(res);
 }
 
-STATIC mp_obj_t str_finder(mp_uint_t n_args, const mp_obj_t *args, mp_int_t direction, bool is_index) {
+STATIC mp_obj_t str_finder(size_t n_args, const mp_obj_t *args, int direction, bool is_index) {
     const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
     mp_check_self(MP_OBJ_IS_STR_OR_BYTES(args[0]));
 
@@ -738,7 +738,7 @@ STATIC mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) {
 
 enum { LSTRIP, RSTRIP, STRIP };
 
-STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args) {
+STATIC mp_obj_t str_uni_strip(int type, size_t n_args, const mp_obj_t *args) {
     mp_check_self(MP_OBJ_IS_STR_OR_BYTES(args[0]));
     const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
 
@@ -760,16 +760,16 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args)
 
     GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
 
-    mp_uint_t first_good_char_pos = 0;
+    size_t first_good_char_pos = 0;
     bool first_good_char_pos_set = false;
-    mp_uint_t last_good_char_pos = 0;
-    mp_uint_t i = 0;
-    mp_int_t delta = 1;
+    size_t last_good_char_pos = 0;
+    size_t i = 0;
+    int delta = 1;
     if (type == RSTRIP) {
         i = orig_str_len - 1;
         delta = -1;
     }
-    for (mp_uint_t len = orig_str_len; len > 0; len--) {
+    for (size_t len = orig_str_len; len > 0; len--) {
         if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
             if (!first_good_char_pos_set) {
                 first_good_char_pos_set = true;
@@ -799,7 +799,7 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args)
 
     assert(last_good_char_pos >= first_good_char_pos);
     //+1 to accomodate the last character
-    mp_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
+    size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
     if (stripped_len == orig_str_len) {
         // If nothing was stripped, don't bother to dup original string
         // TODO: watch out for this case when we'll get to bytearray.strip()
@@ -890,7 +890,7 @@ STATIC NORETURN void terse_str_format_value_error(void) {
 #define terse_str_format_value_error()
 #endif
 
-STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *arg_i, mp_uint_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
+STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *arg_i, size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
     vstr_t vstr;
     mp_print_t print;
     vstr_init_print(&vstr, 16, &print);
@@ -1306,12 +1306,12 @@ STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *ar
             switch (type) {
                 case '\0': // no explicit format type implies 's'
                 case 's': {
-                    mp_uint_t slen;
+                    size_t slen;
                     const char *s = mp_obj_str_get_data(arg, &slen);
                     if (precision < 0) {
                         precision = slen;
                     }
-                    if (slen > (mp_uint_t)precision) {
+                    if (slen > (size_t)precision) {
                         slen = precision;
                     }
                     mp_print_strn(&print, s, slen, flags, fill, width);
@@ -1342,13 +1342,13 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
     return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
 }
 
-STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_obj_t *args, mp_obj_t dict) {
+STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_t *args, mp_obj_t dict) {
     mp_check_self(MP_OBJ_IS_STR_OR_BYTES(pattern));
 
     GET_STR_DATA_LEN(pattern, str, len);
     const byte *start_str = str;
     bool is_bytes = MP_OBJ_IS_TYPE(pattern, &mp_type_bytes);
-    int arg_i = 0;
+    size_t arg_i = 0;
     vstr_t vstr;
     mp_print_t print;
     vstr_init_print(&vstr, 16, &print);
@@ -1369,6 +1369,10 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o
 
         // Dictionary value lookup
         if (*str == '(') {
+            if (dict == MP_OBJ_NULL) {
+                mp_raise_TypeError("format requires a dict");
+            }
+            arg_i = 1; // we used up the single dict argument
             const byte *key = ++str;
             while (*str != ')') {
                 if (str >= top) {
@@ -1403,7 +1407,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o
         int width = 0;
         if (str < top) {
             if (*str == '*') {
-                if ((uint)arg_i >= n_args) {
+                if (arg_i >= n_args) {
                     goto not_enough_args;
                 }
                 width = mp_obj_get_int(args[arg_i++]);
@@ -1416,7 +1420,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o
         if (str < top && *str == '.') {
             if (++str < top) {
                 if (*str == '*') {
-                    if ((uint)arg_i >= n_args) {
+                    if (arg_i >= n_args) {
                         goto not_enough_args;
                     }
                     prec = mp_obj_get_int(args[arg_i++]);
@@ -1439,7 +1443,7 @@ incomplete_format:
 
         // Tuple value lookup
         if (arg == MP_OBJ_NULL) {
-            if ((uint)arg_i >= n_args) {
+            if (arg_i >= n_args) {
 not_enough_args:
                 mp_raise_TypeError("not enough arguments for format string");
             }
@@ -1448,7 +1452,7 @@ not_enough_args:
         switch (*str) {
             case 'c':
                 if (MP_OBJ_IS_STR(arg)) {
-                    mp_uint_t slen;
+                    size_t slen;
                     const char *s = mp_obj_str_get_data(arg, &slen);
                     if (slen != 1) {
                         mp_raise_TypeError("%%c requires int or char");
@@ -1527,7 +1531,7 @@ not_enough_args:
         }
     }
 
-    if ((uint)arg_i != n_args) {
+    if (arg_i != n_args) {
         mp_raise_TypeError("not all arguments converted during string formatting");
     }
 
@@ -1582,11 +1586,11 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) {
     //   first pass computes the required length of the replaced string
     //   second pass does the replacements
     for (;;) {
-        mp_uint_t replaced_str_index = 0;
-        mp_uint_t num_replacements_done = 0;
+        size_t replaced_str_index = 0;
+        size_t num_replacements_done = 0;
         const byte *old_occurrence;
         const byte *offset_ptr = str;
-        mp_uint_t str_len_remain = str_len;
+        size_t str_len_remain = str_len;
         if (old_len == 0) {
             // if old_str is empty, copy new_str to start of replaced string
             // copy the replacement string
@@ -1596,7 +1600,7 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) {
             replaced_str_index += new_len;
             num_replacements_done++;
         }
-        while (num_replacements_done != (mp_uint_t)max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
+        while (num_replacements_done != (size_t)max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
             if (old_len == 0) {
                 old_occurrence += 1;
             }
@@ -1682,7 +1686,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
 }
 
 #if MICROPY_PY_BUILTINS_STR_PARTITION
-STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t direction) {
+STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
     mp_check_self(MP_OBJ_IS_STR_OR_BYTES(self_in));
     mp_obj_type_t *self_type = mp_obj_get_type(self_in);
     if (self_type != mp_obj_get_type(arg)) {
@@ -1715,7 +1719,7 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t directi
 
     const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
     if (position_ptr != NULL) {
-        mp_uint_t position = position_ptr - str;
+        size_t position = position_ptr - str;
         result[0] = mp_obj_new_str_of_type(self_type, str, position);
         result[1] = arg;
         result[2] = mp_obj_new_str_of_type(self_type, str + position + sep_len, str_len - position - sep_len);
@@ -1739,7 +1743,7 @@ STATIC mp_obj_t str_caseconv(unichar (*op)(unichar), mp_obj_t self_in) {
     vstr_t vstr;
     vstr_init_len(&vstr, self_len);
     byte *data = (byte*)vstr.buf;
-    for (mp_uint_t i = 0; i < self_len; i++) {
+    for (size_t i = 0; i < self_len; i++) {
         *data++ = op(*self_data++);
     }
     return mp_obj_new_str_from_vstr(mp_obj_get_type(self_in), &vstr);
@@ -1761,7 +1765,7 @@ STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) {
     }
 
     if (f != unichar_isupper && f != unichar_islower) {
-        for (mp_uint_t i = 0; i < self_len; i++) {
+        for (size_t i = 0; i < self_len; i++) {
             if (!f(*self_data++)) {
                 return mp_const_false;
             }
@@ -1769,7 +1773,7 @@ STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) {
     } else {
         bool contains_alpha = false;
 
-        for (mp_uint_t i = 0; i < self_len; i++) { // only check alphanumeric characters
+        for (size_t i = 0; i < self_len; i++) { // only check alphanumeric characters
             if (unichar_isalpha(*self_data++)) {
                 contains_alpha = true;
                 if (!f(*(self_data - 1))) { // -1 because we already incremented above
@@ -1936,7 +1940,7 @@ STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = {
 STATIC MP_DEFINE_CONST_DICT(str8_locals_dict, str8_locals_dict_table);
 
 #if !MICROPY_PY_BUILTINS_STR_UNICODE
-STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
+STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
 
 const mp_obj_type_t mp_type_str = {
     { &mp_type_type },
@@ -2013,7 +2017,7 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) {
     return MP_OBJ_FROM_PTR(o);
 }
 
-mp_obj_t mp_obj_new_str(const char* data, mp_uint_t len, bool make_qstr_if_not_already) {
+mp_obj_t mp_obj_new_str(const char* data, size_t len, bool make_qstr_if_not_already) {
     if (make_qstr_if_not_already) {
         // use existing, or make a new qstr
         return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len));
@@ -2034,7 +2038,7 @@ mp_obj_t mp_obj_str_intern(mp_obj_t str) {
     return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
 }
 
-mp_obj_t mp_obj_new_bytes(const byte* data, mp_uint_t len) {
+mp_obj_t mp_obj_new_bytes(const byte* data, size_t len) {
     return mp_obj_new_str_of_type(&mp_type_bytes, data, len);
 }
 
@@ -2061,9 +2065,10 @@ STATIC void bad_implicit_conversion(mp_obj_t self_in) {
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
         mp_raise_TypeError("can't convert to str implicitly");
     } else {
+        const qstr src_name = mp_obj_get_type(self_in)->name;
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
-            "can't convert '%s' object to str implicitly",
-            mp_obj_get_type_str(self_in)));
+            "can't convert '%q' object to %q implicitly",
+            src_name, src_name == MP_QSTR_str ? MP_QSTR_bytes : MP_QSTR_str));
     }
 }
 
@@ -2092,7 +2097,7 @@ const char *mp_obj_str_get_str(mp_obj_t self_in) {
     }
 }
 
-const char *mp_obj_str_get_data(mp_obj_t self_in, mp_uint_t *len) {
+const char *mp_obj_str_get_data(mp_obj_t self_in, size_t *len) {
     if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
         GET_STR_DATA_LEN(self_in, s, l);
         *len = l;
@@ -2120,7 +2125,7 @@ typedef struct _mp_obj_str8_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_t str;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_str8_it_t;
 
 #if !MICROPY_PY_BUILTINS_STR_UNICODE
@@ -2136,8 +2141,9 @@ STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
     }
 }
 
-STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
-    mp_obj_str8_it_t *o = m_new_obj(mp_obj_str8_it_t);
+STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_str8_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_str8_it_t *o = (mp_obj_str8_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = str_it_iternext;
     o->str = str;
@@ -2158,8 +2164,9 @@ STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
     }
 }
 
-mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
-    mp_obj_str8_it_t *o = m_new_obj(mp_obj_str8_it_t);
+mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_str8_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_str8_it_t *o = (mp_obj_str8_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = bytes_it_iternext;
     o->str = str;
diff --git a/py/objstr.h b/py/objstr.h
index ad2777afbc..e92832d106 100644
--- a/py/objstr.h
+++ b/py/objstr.h
@@ -32,7 +32,7 @@ typedef struct _mp_obj_str_t {
     mp_obj_base_t base;
     mp_uint_t hash;
     // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
-    mp_uint_t len;
+    size_t len;
     const byte *data;
 } mp_obj_str_t;
 
@@ -72,7 +72,7 @@ mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_u
 
 const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
                              mp_obj_t index, bool is_slice);
-const byte *find_subbytes(const byte *haystack, mp_uint_t hlen, const byte *needle, mp_uint_t nlen, mp_int_t direction);
+const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction);
 
 MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(str_encode_obj);
 MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj);
diff --git a/py/objstringio.c b/py/objstringio.c
index a430fca3b7..9f4adeebbf 100644
--- a/py/objstringio.c
+++ b/py/objstringio.c
@@ -39,7 +39,7 @@
 #if MICROPY_CPYTHON_COMPAT
 STATIC void check_stringio_is_open(const mp_obj_stringio_t *o) {
     if (o->vstr == NULL) {
-        mp_raise_msg(&mp_type_ValueError, "I/O operation on closed file");
+        mp_raise_ValueError("I/O operation on closed file");
     }
 }
 #else
@@ -56,6 +56,9 @@ STATIC mp_uint_t stringio_read(mp_obj_t o_in, void *buf, mp_uint_t size, int *er
     (void)errcode;
     mp_obj_stringio_t *o = MP_OBJ_TO_PTR(o_in);
     check_stringio_is_open(o);
+    if (o->vstr->len <= o->pos) {  // read to EOF, or seeked to EOF or beyond
+        return 0;
+    }
     mp_uint_t remaining = o->vstr->len - o->pos;
     if (size > remaining) {
         size = remaining;
@@ -69,22 +72,27 @@ STATIC mp_uint_t stringio_write(mp_obj_t o_in, const void *buf, mp_uint_t size,
     (void)errcode;
     mp_obj_stringio_t *o = MP_OBJ_TO_PTR(o_in);
     check_stringio_is_open(o);
-    mp_int_t remaining = o->vstr->alloc - o->pos;
+    mp_uint_t new_pos = o->pos + size;
+    if (new_pos < size) {
+        // Writing <size> bytes will overflow o->pos beyond limit of mp_uint_t.
+        *errcode = MP_EFBIG;
+        return MP_STREAM_ERROR;
+    }
     mp_uint_t org_len = o->vstr->len;
-    if ((mp_int_t)size > remaining) {
+    if (new_pos > o->vstr->alloc) {
         // Take all what's already allocated...
         o->vstr->len = o->vstr->alloc;
         // ... and add more
-        vstr_add_len(o->vstr, size - remaining);
+        vstr_add_len(o->vstr, new_pos - o->vstr->alloc);
     }
     // If there was a seek past EOF, clear the hole
     if (o->pos > org_len) {
         memset(o->vstr->buf + org_len, 0, o->pos - org_len);
     }
     memcpy(o->vstr->buf + o->pos, buf, size);
-    o->pos += size;
-    if (o->pos > o->vstr->len) {
-        o->vstr->len = o->pos;
+    o->pos = new_pos;
+    if (new_pos > o->vstr->len) {
+        o->vstr->len = new_pos;
     }
     return size;
 }
@@ -147,21 +155,34 @@ STATIC mp_obj_t stringio___exit__(size_t n_args, const mp_obj_t *args) {
 }
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(stringio___exit___obj, 4, 4, stringio___exit__);
 
-STATIC mp_obj_stringio_t *stringio_new(const mp_obj_type_t *type) {
+STATIC mp_obj_stringio_t *stringio_new(const mp_obj_type_t *type, mp_uint_t alloc) {
     mp_obj_stringio_t *o = m_new_obj(mp_obj_stringio_t);
     o->base.type = type;
-    o->vstr = vstr_new(16);
+    o->vstr = vstr_new(alloc);
     o->pos = 0;
     return o;
 }
 
 STATIC mp_obj_t stringio_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
     (void)n_kw; // TODO check n_kw==0
-    mp_obj_stringio_t *o = stringio_new(type_in);
+
+    mp_uint_t sz = 16;
+    bool initdata = false;
+    mp_buffer_info_t bufinfo;
 
     if (n_args > 0) {
-        mp_buffer_info_t bufinfo;
-        mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
+        if (MP_OBJ_IS_INT(args[0])) {
+            sz = mp_obj_get_int(args[0]);
+        } else {
+            mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
+            sz = bufinfo.len;
+            initdata = true;
+        }
+    }
+
+    mp_obj_stringio_t *o = stringio_new(type_in, sz);
+
+    if (initdata) {
         stringio_write(MP_OBJ_FROM_PTR(o), bufinfo.buf, bufinfo.len, NULL);
         // Cur ptr is always at the beginning of buffer at the construction
         o->pos = 0;
@@ -202,7 +223,7 @@ const mp_obj_type_t mp_type_stringio = {
     .name = MP_QSTR_StringIO,
     .print = stringio_print,
     .make_new = stringio_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = mp_stream_unbuffered_iter,
     .protocol = &stringio_stream_p,
     .locals_dict = (mp_obj_dict_t*)&stringio_locals_dict,
@@ -214,7 +235,7 @@ const mp_obj_type_t mp_type_bytesio = {
     .name = MP_QSTR_BytesIO,
     .print = stringio_print,
     .make_new = stringio_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = mp_stream_unbuffered_iter,
     .protocol = &bytesio_stream_p,
     .locals_dict = (mp_obj_dict_t*)&stringio_locals_dict,
diff --git a/py/objstrunicode.c b/py/objstrunicode.c
index 8444a26892..9a6ce9b9a2 100644
--- a/py/objstrunicode.c
+++ b/py/objstrunicode.c
@@ -36,7 +36,7 @@
 
 #if MICROPY_PY_BUILTINS_STR_UNICODE
 
-STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
+STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
 
 /******************************************************************************/
 /* str                                                                        */
@@ -120,7 +120,7 @@ const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, s
     // so it must handle bytes.
     if (type == &mp_type_bytes) {
         // Taken from objstr.c:str_index_to_ptr()
-        mp_uint_t index_val = mp_get_index(type, self_len, index, is_slice);
+        size_t index_val = mp_get_index(type, self_len, index, is_slice);
         return self_data + index_val;
     }
 
@@ -284,7 +284,7 @@ typedef struct _mp_obj_str_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_t str;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_str_it_t;
 
 STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
@@ -301,8 +301,9 @@ STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
     }
 }
 
-STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
-    mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
+STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_str_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_str_it_t *o = (mp_obj_str_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = str_it_iternext;
     o->str = str;
diff --git a/py/objtuple.c b/py/objtuple.c
index c547da9406..eaf0e37f47 100644
--- a/py/objtuple.c
+++ b/py/objtuple.c
@@ -32,8 +32,6 @@
 #include "py/runtime0.h"
 #include "py/runtime.h"
 
-STATIC mp_obj_t mp_obj_new_tuple_iterator(mp_obj_tuple_t *tuple, mp_uint_t cur);
-
 /******************************************************************************/
 /* tuple                                                                      */
 
@@ -45,7 +43,7 @@ void mp_obj_tuple_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
         mp_print_str(print, "(");
         kind = PRINT_REPR;
     }
-    for (mp_uint_t i = 0; i < o->len; i++) {
+    for (size_t i = 0; i < o->len; i++) {
         if (i > 0) {
             mp_print_str(print, ", ");
         }
@@ -80,11 +78,11 @@ STATIC mp_obj_t mp_obj_tuple_make_new(const mp_obj_type_t *type_in, size_t n_arg
 
             // TODO optimise for cases where we know the length of the iterator
 
-            mp_uint_t alloc = 4;
-            mp_uint_t len = 0;
+            size_t alloc = 4;
+            size_t len = 0;
             mp_obj_t *items = m_new(mp_obj_t, alloc);
 
-            mp_obj_t iterable = mp_getiter(args[0]);
+            mp_obj_t iterable = mp_getiter(args[0], NULL);
             mp_obj_t item;
             while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
                 if (len >= alloc) {
@@ -127,7 +125,7 @@ mp_obj_t mp_obj_tuple_unary_op(mp_uint_t op, mp_obj_t self_in) {
         case MP_UNARY_OP_HASH: {
             // start hash with pointer to empty tuple, to make it fairly unique
             mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
-            for (mp_uint_t i = 0; i < self->len; i++) {
+            for (size_t i = 0; i < self->len; i++) {
                 hash += MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, self->items[i]));
             }
             return MP_OBJ_NEW_SMALL_INT(hash);
@@ -140,7 +138,8 @@ mp_obj_t mp_obj_tuple_unary_op(mp_uint_t op, mp_obj_t self_in) {
 mp_obj_t mp_obj_tuple_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
     mp_obj_tuple_t *o = MP_OBJ_TO_PTR(lhs);
     switch (op) {
-        case MP_BINARY_OP_ADD: {
+        case MP_BINARY_OP_ADD:
+        case MP_BINARY_OP_INPLACE_ADD: {
             if (!mp_obj_is_subclass_fast(MP_OBJ_FROM_PTR(mp_obj_get_type(rhs)), MP_OBJ_FROM_PTR(&mp_type_tuple))) {
                 return MP_OBJ_NULL; // op not supported
             }
@@ -188,17 +187,13 @@ mp_obj_t mp_obj_tuple_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
             return MP_OBJ_FROM_PTR(res);
         }
 #endif
-        mp_uint_t index_value = mp_get_index(self->base.type, self->len, index, false);
+        size_t index_value = mp_get_index(self->base.type, self->len, index, false);
         return self->items[index_value];
     } else {
         return MP_OBJ_NULL; // op not supported
     }
 }
 
-mp_obj_t mp_obj_tuple_getiter(mp_obj_t o_in) {
-    return mp_obj_new_tuple_iterator(MP_OBJ_TO_PTR(o_in), 0);
-}
-
 STATIC mp_obj_t tuple_count(mp_obj_t self_in, mp_obj_t value) {
     mp_check_self(MP_OBJ_IS_TYPE(self_in, &mp_type_tuple));
     mp_obj_tuple_t *self = MP_OBJ_TO_PTR(self_in);
@@ -235,7 +230,7 @@ const mp_obj_type_t mp_type_tuple = {
 // the zero-length tuple
 const mp_obj_tuple_t mp_const_empty_tuple_obj = {{&mp_type_tuple}, 0};
 
-mp_obj_t mp_obj_new_tuple(mp_uint_t n, const mp_obj_t *items) {
+mp_obj_t mp_obj_new_tuple(size_t n, const mp_obj_t *items) {
     if (n == 0) {
         return mp_const_empty_tuple;
     }
@@ -243,14 +238,14 @@ mp_obj_t mp_obj_new_tuple(mp_uint_t n, const mp_obj_t *items) {
     o->base.type = &mp_type_tuple;
     o->len = n;
     if (items) {
-        for (mp_uint_t i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             o->items[i] = items[i];
         }
     }
     return MP_OBJ_FROM_PTR(o);
 }
 
-void mp_obj_tuple_get(mp_obj_t self_in, mp_uint_t *len, mp_obj_t **items) {
+void mp_obj_tuple_get(mp_obj_t self_in, size_t *len, mp_obj_t **items) {
     assert(MP_OBJ_IS_TYPE(self_in, &mp_type_tuple));
     mp_obj_tuple_t *self = MP_OBJ_TO_PTR(self_in);
     *len = self->len;
@@ -270,7 +265,7 @@ typedef struct _mp_obj_tuple_it_t {
     mp_obj_base_t base;
     mp_fun_1_t iternext;
     mp_obj_tuple_t *tuple;
-    mp_uint_t cur;
+    size_t cur;
 } mp_obj_tuple_it_t;
 
 STATIC mp_obj_t tuple_it_iternext(mp_obj_t self_in) {
@@ -284,11 +279,12 @@ STATIC mp_obj_t tuple_it_iternext(mp_obj_t self_in) {
     }
 }
 
-STATIC mp_obj_t mp_obj_new_tuple_iterator(mp_obj_tuple_t *tuple, mp_uint_t cur) {
-    mp_obj_tuple_it_t *o = m_new_obj(mp_obj_tuple_it_t);
+mp_obj_t mp_obj_tuple_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_tuple_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_tuple_it_t *o = (mp_obj_tuple_it_t*)iter_buf;
     o->base.type = &mp_type_polymorph_iter;
     o->iternext = tuple_it_iternext;
-    o->tuple = tuple;
-    o->cur = cur;
+    o->tuple = MP_OBJ_TO_PTR(o_in);
+    o->cur = 0;
     return MP_OBJ_FROM_PTR(o);
 }
diff --git a/py/objtuple.h b/py/objtuple.h
index ebfc5c4066..555c3b3c2c 100644
--- a/py/objtuple.h
+++ b/py/objtuple.h
@@ -30,13 +30,13 @@
 
 typedef struct _mp_obj_tuple_t {
     mp_obj_base_t base;
-    mp_uint_t len;
+    size_t len;
     mp_obj_t items[];
 } mp_obj_tuple_t;
 
 typedef struct _mp_rom_obj_tuple_t {
     mp_obj_base_t base;
-    mp_uint_t len;
+    size_t len;
     mp_rom_obj_t items[];
 } mp_rom_obj_tuple_t;
 
@@ -44,7 +44,7 @@ void mp_obj_tuple_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
 mp_obj_t mp_obj_tuple_unary_op(mp_uint_t op, mp_obj_t self_in);
 mp_obj_t mp_obj_tuple_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs);
 mp_obj_t mp_obj_tuple_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t value);
-mp_obj_t mp_obj_tuple_getiter(mp_obj_t o_in);
+mp_obj_t mp_obj_tuple_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf);
 
 extern const mp_obj_type_t mp_type_attrtuple;
 
@@ -59,6 +59,6 @@ extern const mp_obj_type_t mp_type_attrtuple;
 void mp_obj_attrtuple_print_helper(const mp_print_t *print, const qstr *fields, mp_obj_tuple_t *o);
 #endif
 
-mp_obj_t mp_obj_new_attrtuple(const qstr *fields, mp_uint_t n, const mp_obj_t *items);
+mp_obj_t mp_obj_new_attrtuple(const qstr *fields, size_t n, const mp_obj_t *items);
 
 #endif // __MICROPY_INCLUDED_PY_OBJTUPLE_H__
diff --git a/py/objtype.c b/py/objtype.c
index c20b0693e5..2a119e40fb 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -48,7 +48,7 @@ STATIC mp_obj_t static_class_method_make_new(const mp_obj_type_t *self_in, size_
 /******************************************************************************/
 // instance object
 
-STATIC mp_obj_t mp_obj_new_instance(const mp_obj_type_t *class, uint subobjs) {
+STATIC mp_obj_t mp_obj_new_instance(const mp_obj_type_t *class, size_t subobjs) {
     mp_obj_instance_t *o = m_new_obj_var(mp_obj_instance_t, mp_obj_t, subobjs);
     o->base.type = class;
     mp_map_init(&o->members, 0);
@@ -57,26 +57,34 @@ STATIC mp_obj_t mp_obj_new_instance(const mp_obj_type_t *class, uint subobjs) {
 }
 
 STATIC int instance_count_native_bases(const mp_obj_type_t *type, const mp_obj_type_t **last_native_base) {
-    mp_uint_t len = type->bases_tuple->len;
-    mp_obj_t *items = type->bases_tuple->items;
-
     int count = 0;
-    for (uint i = 0; i < len; i++) {
-        assert(MP_OBJ_IS_TYPE(items[i], &mp_type_type));
-        const mp_obj_type_t *bt = (const mp_obj_type_t *)MP_OBJ_TO_PTR(items[i]);
-        if (bt == &mp_type_object) {
-            // Not a "real" type
-            continue;
-        }
-        if (mp_obj_is_native_type(bt)) {
-            *last_native_base = bt;
-            count++;
+    for (;;) {
+        if (type == &mp_type_object) {
+            // Not a "real" type, end search here.
+            return count;
+        } else if (mp_obj_is_native_type(type)) {
+            // Native types don't have parents (at least not from our perspective) so end.
+            *last_native_base = type;
+            return count + 1;
+        } else if (type->parent == NULL) {
+            // No parents so end search here.
+            return count;
+        } else if (((mp_obj_base_t*)type->parent)->type == &mp_type_tuple) {
+            // Multiple parents, search through them all recursively.
+            const mp_obj_tuple_t *parent_tuple = type->parent;
+            const mp_obj_t *item = parent_tuple->items;
+            const mp_obj_t *top = item + parent_tuple->len;
+            for (; item < top; ++item) {
+                assert(MP_OBJ_IS_TYPE(*item, &mp_type_type));
+                const mp_obj_type_t *bt = (const mp_obj_type_t *)MP_OBJ_TO_PTR(*item);
+                count += instance_count_native_bases(bt, last_native_base);
+            }
+            return count;
         } else {
-            count += instance_count_native_bases(bt, last_native_base);
+            // A single parent, use iteration to continue the search.
+            type = type->parent;
         }
     }
-
-    return count;
 }
 
 // TODO
@@ -96,7 +104,7 @@ STATIC int instance_count_native_bases(const mp_obj_type_t *type, const mp_obj_t
 struct class_lookup_data {
     mp_obj_instance_t *obj;
     qstr attr;
-    mp_uint_t meth_offset;
+    size_t meth_offset;
     mp_obj_t *dest;
     bool is_type;
 };
@@ -160,32 +168,31 @@ STATIC void mp_obj_class_lookup(struct class_lookup_data  *lookup, const mp_obj_
 
         // attribute not found, keep searching base classes
 
-        // for a const struct, this entry might be NULL
-        if (type->bases_tuple == NULL) {
+        if (type->parent == NULL) {
             return;
-        }
-
-        mp_uint_t len = type->bases_tuple->len;
-        mp_obj_t *items = type->bases_tuple->items;
-        if (len == 0) {
-            return;
-        }
-        for (uint i = 0; i < len - 1; i++) {
-            assert(MP_OBJ_IS_TYPE(items[i], &mp_type_type));
-            mp_obj_type_t *bt = (mp_obj_type_t*)MP_OBJ_TO_PTR(items[i]);
-            if (bt == &mp_type_object) {
-                // Not a "real" type
-                continue;
-            }
-            mp_obj_class_lookup(lookup, bt);
-            if (lookup->dest[0] != MP_OBJ_NULL) {
-                return;
+        } else if (((mp_obj_base_t*)type->parent)->type == &mp_type_tuple) {
+            const mp_obj_tuple_t *parent_tuple = type->parent;
+            const mp_obj_t *item = parent_tuple->items;
+            const mp_obj_t *top = item + parent_tuple->len - 1;
+            for (; item < top; ++item) {
+                assert(MP_OBJ_IS_TYPE(*item, &mp_type_type));
+                mp_obj_type_t *bt = (mp_obj_type_t*)MP_OBJ_TO_PTR(*item);
+                if (bt == &mp_type_object) {
+                    // Not a "real" type
+                    continue;
+                }
+                mp_obj_class_lookup(lookup, bt);
+                if (lookup->dest[0] != MP_OBJ_NULL) {
+                    return;
+                }
             }
-        }
 
-        // search last base (simple tail recursion elimination)
-        assert(MP_OBJ_IS_TYPE(items[len - 1], &mp_type_type));
-        type = (mp_obj_type_t*)MP_OBJ_TO_PTR(items[len - 1]);
+            // search last base (simple tail recursion elimination)
+            assert(MP_OBJ_IS_TYPE(*item, &mp_type_type));
+            type = (mp_obj_type_t*)MP_OBJ_TO_PTR(*item);
+        } else {
+            type = type->parent;
+        }
         if (type == &mp_type_object) {
             // Not a "real" type
             return;
@@ -239,7 +246,7 @@ mp_obj_t mp_obj_instance_make_new(const mp_obj_type_t *self, size_t n_args, size
     assert(mp_obj_is_instance_type(self));
 
     const mp_obj_type_t *native_base;
-    uint num_native_bases = instance_count_native_bases(self, &native_base);
+    size_t num_native_bases = instance_count_native_bases(self, &native_base);
     assert(num_native_bases < 2);
 
     mp_obj_instance_t *o = MP_OBJ_TO_PTR(mp_obj_new_instance(self, num_native_bases));
@@ -284,7 +291,7 @@ mp_obj_t mp_obj_instance_make_new(const mp_obj_type_t *self, size_t n_args, size
     }
 
     // https://docs.python.org/3.4/reference/datamodel.html#object.__new__
-    // "If __new__() does not return an instance of cls, then the new instance’s __init__() method will not be invoked."
+    // "If __new__() does not return an instance of cls, then the new instance's __init__() method will not be invoked."
     if (mp_obj_get_type(new_ret) != self) {
         return new_ret;
     }
@@ -311,7 +318,7 @@ mp_obj_t mp_obj_instance_make_new(const mp_obj_type_t *self, size_t n_args, size
         }
         if (init_ret != mp_const_none) {
             if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-                mp_raise_msg(&mp_type_TypeError, "__init__() should return None");
+                mp_raise_TypeError("__init__() should return None");
             } else {
                 nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                     "__init__() should return None, not '%s'", mp_obj_get_type_str(init_ret)));
@@ -477,7 +484,7 @@ STATIC void mp_obj_instance_load_attr(mp_obj_t self_in, qstr attr, mp_obj_t *des
         // it will not result in modifications to the actual instance members.
         mp_map_t *map = &self->members;
         mp_obj_t attr_dict = mp_obj_new_dict(map->used);
-        for (mp_uint_t i = 0; i < map->alloc; ++i) {
+        for (size_t i = 0; i < map->alloc; ++i) {
             if (MP_MAP_SLOT_IS_FILLED(map, i)) {
                 mp_obj_dict_store(attr_dict, map->table[i].key, map->table[i].value);
             }
@@ -533,6 +540,15 @@ STATIC void mp_obj_instance_load_attr(mp_obj_t self_in, qstr attr, mp_obj_t *des
 
     // try __getattr__
     if (attr != MP_QSTR___getattr__) {
+        #if MICROPY_PY_DELATTR_SETATTR
+        // If the requested attr is __setattr__/__delattr__ then don't delegate the lookup
+        // to __getattr__.  If we followed CPython's behaviour then __setattr__/__delattr__
+        // would have already been found in the "object" base class.
+        if (attr == MP_QSTR___setattr__ || attr == MP_QSTR___delattr__) {
+            return;
+        }
+        #endif
+
         mp_obj_t dest2[3];
         mp_load_method_maybe(self_in, MP_QSTR___getattr__, dest2);
         if (dest2[0] != MP_OBJ_NULL) {
@@ -626,10 +642,35 @@ STATIC bool mp_obj_instance_store_attr(mp_obj_t self_in, qstr attr, mp_obj_t val
 
     if (value == MP_OBJ_NULL) {
         // delete attribute
+        #if MICROPY_PY_DELATTR_SETATTR
+        // try __delattr__ first
+        mp_obj_t attr_delattr_method[3];
+        mp_load_method_maybe(self_in, MP_QSTR___delattr__, attr_delattr_method);
+        if (attr_delattr_method[0] != MP_OBJ_NULL) {
+            // __delattr__ exists, so call it
+            attr_delattr_method[2] = MP_OBJ_NEW_QSTR(attr);
+            mp_call_method_n_kw(1, 0, attr_delattr_method);
+            return true;
+        }
+        #endif
+
         mp_map_elem_t *elem = mp_map_lookup(&self->members, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP_REMOVE_IF_FOUND);
         return elem != NULL;
     } else {
         // store attribute
+        #if MICROPY_PY_DELATTR_SETATTR
+        // try __setattr__ first
+        mp_obj_t attr_setattr_method[4];
+        mp_load_method_maybe(self_in, MP_QSTR___setattr__, attr_setattr_method);
+        if (attr_setattr_method[0] != MP_OBJ_NULL) {
+            // __setattr__ exists, so call it
+            attr_setattr_method[2] = MP_OBJ_NEW_QSTR(attr);
+            attr_setattr_method[3] = value;
+            mp_call_method_n_kw(2, 0, attr_setattr_method);
+            return true;
+        }
+        #endif
+
         mp_map_lookup(&self->members, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = value;
         return true;
     }
@@ -654,7 +695,7 @@ STATIC mp_obj_t instance_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value
         .dest = member,
         .is_type = false,
     };
-    uint meth_args;
+    size_t meth_args;
     if (value == MP_OBJ_NULL) {
         // delete item
         lookup.attr = MP_QSTR___delitem__;
@@ -710,7 +751,7 @@ mp_obj_t mp_obj_instance_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
     mp_obj_t call = mp_obj_instance_get_call(self_in, member);
     if (call == MP_OBJ_NULL) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "object not callable");
+            mp_raise_TypeError("object not callable");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "'%s' object is not callable", mp_obj_get_type_str(self_in)));
@@ -724,7 +765,7 @@ mp_obj_t mp_obj_instance_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
     return mp_call_method_self_n_kw(member[0], member[1], n_args, n_kw, args);
 }
 
-STATIC mp_obj_t instance_getiter(mp_obj_t self_in) {
+STATIC mp_obj_t instance_getiter(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
     mp_obj_instance_t *self = MP_OBJ_TO_PTR(self_in);
     mp_obj_t member[2] = {MP_OBJ_NULL};
     struct class_lookup_data lookup = {
@@ -739,7 +780,7 @@ STATIC mp_obj_t instance_getiter(mp_obj_t self_in) {
         return MP_OBJ_NULL;
     } else if (member[0] == MP_OBJ_SENTINEL) {
         mp_obj_type_t *type = mp_obj_get_type(self->subobj[0]);
-        return type->getiter(self->subobj[0]);
+        return type->getiter(self->subobj[0], iter_buf);
     } else {
         return mp_call_method_n_kw(0, 0, member);
     }
@@ -792,7 +833,7 @@ STATIC mp_obj_t type_make_new(const mp_obj_type_t *type_in, size_t n_args, size_
             return mp_obj_new_type(mp_obj_str_get_qstr(args[0]), args[1], args[2]);
 
         default:
-            mp_raise_msg(&mp_type_TypeError, "type takes 1 or 3 arguments");
+            mp_raise_TypeError("type takes 1 or 3 arguments");
     }
 }
 
@@ -803,7 +844,7 @@ STATIC mp_obj_t type_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp
 
     if (self->make_new == NULL) {
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "cannot create instance");
+            mp_raise_TypeError("cannot create instance");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "cannot create '%q' instances", self->name));
@@ -882,16 +923,16 @@ mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict)
     // TODO might need to make a copy of locals_dict; at least that's how CPython does it
 
     // Basic validation of base classes
-    mp_uint_t len;
+    size_t len;
     mp_obj_t *items;
     mp_obj_tuple_get(bases_tuple, &len, &items);
-    for (uint i = 0; i < len; i++) {
+    for (size_t i = 0; i < len; i++) {
         assert(MP_OBJ_IS_TYPE(items[i], &mp_type_type));
         mp_obj_type_t *t = MP_OBJ_TO_PTR(items[i]);
         // TODO: Verify with CPy, tested on function type
         if (t->make_new == NULL) {
             if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-                mp_raise_msg(&mp_type_TypeError, "type is not an acceptable base type");
+                mp_raise_TypeError("type is not an acceptable base type");
             } else {
                 nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                     "type '%q' is not an acceptable base type", t->name));
@@ -912,20 +953,27 @@ mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict)
     o->getiter = instance_getiter;
     //o->iternext = ; not implemented
     o->buffer_p.get_buffer = instance_get_buffer;
-    // Inherit protocol from a base class. This allows to define an
-    // abstract base class which would translate C-level protocol to
-    // Python method calls, and any subclass inheriting from it will
-    // support this feature.
+
     if (len > 0) {
+        // Inherit protocol from a base class. This allows to define an
+        // abstract base class which would translate C-level protocol to
+        // Python method calls, and any subclass inheriting from it will
+        // support this feature.
         o->protocol = ((mp_obj_type_t*)MP_OBJ_TO_PTR(items[0]))->protocol;
+
+        if (len >= 2) {
+            o->parent = MP_OBJ_TO_PTR(bases_tuple);
+        } else {
+            o->parent = MP_OBJ_TO_PTR(items[0]);
+        }
     }
-    o->bases_tuple = MP_OBJ_TO_PTR(bases_tuple);
+
     o->locals_dict = MP_OBJ_TO_PTR(locals_dict);
 
     const mp_obj_type_t *native_base;
-    uint num_native_bases = instance_count_native_bases(o, &native_base);
+    size_t num_native_bases = instance_count_native_bases(o, &native_base);
     if (num_native_bases > 1) {
-        mp_raise_msg(&mp_type_TypeError, "multiple bases have instance lay-out conflict");
+        mp_raise_TypeError("multiple bases have instance lay-out conflict");
     }
 
     mp_map_t *locals_map = &o->locals_dict->map;
@@ -965,7 +1013,9 @@ STATIC mp_obj_t super_make_new(const mp_obj_type_t *type_in, size_t n_args, size
     // 0 arguments are turned into 2 in the compiler
     // 1 argument is not yet implemented
     mp_arg_check_num(n_args, n_kw, 2, 2, false);
-    return mp_obj_new_super(args[0], args[1]);
+    mp_obj_super_t *o = m_new_obj(mp_obj_super_t);
+    *o = (mp_obj_super_t){{type_in}, args[0], args[1]};
+    return MP_OBJ_FROM_PTR(o);
 }
 
 STATIC void super_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
@@ -981,13 +1031,6 @@ STATIC void super_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
 
     mp_obj_type_t *type = MP_OBJ_TO_PTR(self->type);
 
-    // for a const struct, this entry might be NULL
-    if (type->bases_tuple == NULL) {
-        return;
-    }
-
-    mp_uint_t len = type->bases_tuple->len;
-    mp_obj_t *items = type->bases_tuple->items;
     struct class_lookup_data lookup = {
         .obj = MP_OBJ_TO_PTR(self->obj),
         .attr = attr,
@@ -995,13 +1038,27 @@ STATIC void super_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
         .dest = dest,
         .is_type = false,
     };
-    for (uint i = 0; i < len; i++) {
-        assert(MP_OBJ_IS_TYPE(items[i], &mp_type_type));
-        mp_obj_class_lookup(&lookup, (mp_obj_type_t*)MP_OBJ_TO_PTR(items[i]));
+
+    if (type->parent == NULL) {
+        // no parents, do nothing
+    } else if (((mp_obj_base_t*)type->parent)->type == &mp_type_tuple) {
+        const mp_obj_tuple_t *parent_tuple = type->parent;
+        size_t len = parent_tuple->len;
+        const mp_obj_t *items = parent_tuple->items;
+        for (size_t i = 0; i < len; i++) {
+            assert(MP_OBJ_IS_TYPE(items[i], &mp_type_type));
+            mp_obj_class_lookup(&lookup, (mp_obj_type_t*)MP_OBJ_TO_PTR(items[i]));
+            if (dest[0] != MP_OBJ_NULL) {
+                return;
+            }
+        }
+    } else {
+        mp_obj_class_lookup(&lookup, type->parent);
         if (dest[0] != MP_OBJ_NULL) {
             return;
         }
     }
+
     mp_obj_class_lookup(&lookup, &mp_type_object);
 }
 
@@ -1013,10 +1070,9 @@ const mp_obj_type_t mp_type_super = {
     .attr = super_attr,
 };
 
-mp_obj_t mp_obj_new_super(mp_obj_t type, mp_obj_t obj) {
-    mp_obj_super_t *o = m_new_obj(mp_obj_super_t);
-    *o = (mp_obj_super_t){{&mp_type_super}, type, obj};
-    return MP_OBJ_FROM_PTR(o);
+void mp_load_super_method(qstr attr, mp_obj_t *dest) {
+    mp_obj_super_t super = {{&mp_type_super}, dest[1], dest[2]};
+    mp_load_method(MP_OBJ_FROM_PTR(&super), attr, dest);
 }
 
 /******************************************************************************/
@@ -1039,32 +1095,33 @@ bool mp_obj_is_subclass_fast(mp_const_obj_t object, mp_const_obj_t classinfo) {
 
         const mp_obj_type_t *self = MP_OBJ_TO_PTR(object);
 
-        // for a const struct, this entry might be NULL
-        if (self->bases_tuple == NULL) {
-            return false;
-        }
-
-        // get the base objects (they should be type objects)
-        mp_uint_t len = self->bases_tuple->len;
-        mp_obj_t *items = self->bases_tuple->items;
-        if (len == 0) {
+        if (self->parent == NULL) {
+            // type has no parents
             return false;
-        }
-
-        // iterate through the base objects
-        for (uint i = 0; i < len - 1; i++) {
-            if (mp_obj_is_subclass_fast(items[i], classinfo)) {
-                return true;
+        } else if (((mp_obj_base_t*)self->parent)->type == &mp_type_tuple) {
+            // get the base objects (they should be type objects)
+            const mp_obj_tuple_t *parent_tuple = self->parent;
+            const mp_obj_t *item = parent_tuple->items;
+            const mp_obj_t *top = item + parent_tuple->len - 1;
+
+            // iterate through the base objects
+            for (; item < top; ++item) {
+                if (mp_obj_is_subclass_fast(*item, classinfo)) {
+                    return true;
+                }
             }
-        }
 
-        // search last base (simple tail recursion elimination)
-        object = items[len - 1];
+            // search last base (simple tail recursion elimination)
+            object = *item;
+        } else {
+            // type has 1 parent
+            object = MP_OBJ_FROM_PTR(self->parent);
+        }
     }
 }
 
 STATIC mp_obj_t mp_obj_is_subclass(mp_obj_t object, mp_obj_t classinfo) {
-    mp_uint_t len;
+    size_t len;
     mp_obj_t *items;
     if (MP_OBJ_IS_TYPE(classinfo, &mp_type_type)) {
         len = 1;
@@ -1072,10 +1129,10 @@ STATIC mp_obj_t mp_obj_is_subclass(mp_obj_t object, mp_obj_t classinfo) {
     } else if (MP_OBJ_IS_TYPE(classinfo, &mp_type_tuple)) {
         mp_obj_tuple_get(classinfo, &len, &items);
     } else {
-        mp_raise_msg(&mp_type_TypeError, "issubclass() arg 2 must be a class or a tuple of classes");
+        mp_raise_TypeError("issubclass() arg 2 must be a class or a tuple of classes");
     }
 
-    for (uint i = 0; i < len; i++) {
+    for (size_t i = 0; i < len; i++) {
         // We explicitly check for 'object' here since no-one explicitly derives from it
         if (items[i] == MP_OBJ_FROM_PTR(&mp_type_object) || mp_obj_is_subclass_fast(object, items[i])) {
             return mp_const_true;
@@ -1086,7 +1143,7 @@ STATIC mp_obj_t mp_obj_is_subclass(mp_obj_t object, mp_obj_t classinfo) {
 
 STATIC mp_obj_t mp_builtin_issubclass(mp_obj_t object, mp_obj_t classinfo) {
     if (!MP_OBJ_IS_TYPE(object, &mp_type_type)) {
-        mp_raise_msg(&mp_type_TypeError, "issubclass() arg 1 must be a class");
+        mp_raise_TypeError("issubclass() arg 1 must be a class");
     }
     return mp_obj_is_subclass(object, classinfo);
 }
diff --git a/py/objzip.c b/py/objzip.c
index 6edefc3611..6f72d15954 100644
--- a/py/objzip.c
+++ b/py/objzip.c
@@ -32,7 +32,7 @@
 
 typedef struct _mp_obj_zip_t {
     mp_obj_base_t base;
-    mp_uint_t n_iters;
+    size_t n_iters;
     mp_obj_t iters[];
 } mp_obj_zip_t;
 
@@ -42,8 +42,8 @@ STATIC mp_obj_t zip_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
     mp_obj_zip_t *o = m_new_obj_var(mp_obj_zip_t, mp_obj_t, n_args);
     o->base.type = type;
     o->n_iters = n_args;
-    for (mp_uint_t i = 0; i < n_args; i++) {
-        o->iters[i] = mp_getiter(args[i]);
+    for (size_t i = 0; i < n_args; i++) {
+        o->iters[i] = mp_getiter(args[i], NULL);
     }
     return MP_OBJ_FROM_PTR(o);
 }
@@ -56,7 +56,7 @@ STATIC mp_obj_t zip_iternext(mp_obj_t self_in) {
     }
     mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(self->n_iters, NULL));
 
-    for (mp_uint_t i = 0; i < self->n_iters; i++) {
+    for (size_t i = 0; i < self->n_iters; i++) {
         mp_obj_t next = mp_iternext(self->iters[i]);
         if (next == MP_OBJ_STOP_ITERATION) {
             mp_obj_tuple_del(MP_OBJ_FROM_PTR(tuple));
@@ -71,6 +71,6 @@ const mp_obj_type_t mp_type_zip = {
     { &mp_type_type },
     .name = MP_QSTR_zip,
     .make_new = zip_make_new,
-    .getiter = mp_identity,
+    .getiter = mp_identity_getiter,
     .iternext = zip_iternext,
 };
diff --git a/py/parse.c b/py/parse.c
index dc360e40ce..2f16748a6c 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -38,6 +38,7 @@
 #include "py/runtime0.h"
 #include "py/runtime.h"
 #include "py/objint.h"
+#include "py/objstr.h"
 #include "py/builtin.h"
 
 #if MICROPY_ENABLE_COMPILER
@@ -69,13 +70,20 @@ typedef struct _rule_t {
 } rule_t;
 
 enum {
+// define rules with a compile function
 #define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
+#define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
 #undef DEF_RULE
-    RULE_maximum_number_of,
-    RULE_string, // special node for non-interned string
-    RULE_bytes, // special node for non-interned bytes
+#undef DEF_RULE_NC
     RULE_const_object, // special node for a constant, generic Python object
+
+// define rules without a compile function
+#define DEF_RULE(rule, comp, kind, ...)
+#define DEF_RULE_NC(rule, kind, ...) RULE_##rule,
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
 };
 
 #define or(n)                   (RULE_ACT_OR | n)
@@ -90,8 +98,10 @@ enum {
 #define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
 #ifdef USE_RULE_NAME
 #define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
+#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
 #else
 #define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
+#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
 #endif
 #include "py/grammar.h"
 #undef or
@@ -103,11 +113,23 @@ enum {
 #undef opt_rule
 #undef one_or_more
 #undef DEF_RULE
+#undef DEF_RULE_NC
 
 STATIC const rule_t *const rules[] = {
+// define rules with a compile function
 #define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
+#define DEF_RULE_NC(rule, kind, ...)
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
+    NULL, // RULE_const_object
+
+// define rules without a compile function
+#define DEF_RULE(rule, comp, kind, ...)
+#define DEF_RULE_NC(rule, kind, ...) &rule_##rule,
 #include "py/grammar.h"
 #undef DEF_RULE
+#undef DEF_RULE_NC
 };
 
 typedef struct _rule_stack_t {
@@ -125,15 +147,7 @@ typedef struct _mp_parse_chunk_t {
     byte data[];
 } mp_parse_chunk_t;
 
-typedef enum {
-    PARSE_ERROR_NONE = 0,
-    PARSE_ERROR_MEMORY,
-    PARSE_ERROR_CONST,
-} parse_error_t;
-
 typedef struct _parser_t {
-    parse_error_t parse_error;
-
     size_t rule_stack_alloc;
     size_t rule_stack_top;
     rule_stack_t *rule_stack;
@@ -194,15 +208,8 @@ STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
 }
 
 STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
-    if (parser->parse_error) {
-        return;
-    }
     if (parser->rule_stack_top >= parser->rule_stack_alloc) {
-        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
-        if (rs == NULL) {
-            parser->parse_error = PARSE_ERROR_MEMORY;
-            return;
-        }
+        rule_stack_t *rs = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
         parser->rule_stack = rs;
         parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
     }
@@ -215,12 +222,10 @@ STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, siz
 STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
     assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
     size_t rule_id = arg & RULE_ARG_ARG_MASK;
-    assert(rule_id < RULE_maximum_number_of);
     push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
 }
 
 STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
-    assert(!parser->parse_error);
     parser->rule_stack_top -= 1;
     *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
     *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
@@ -295,17 +300,14 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
             case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
             case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
             case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
-            case MP_PARSE_NODE_TOKEN: printf("tok(%u)\n", (uint)arg); break;
-            default: assert(0);
+            default:
+                assert(MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_TOKEN);
+                printf("tok(%u)\n", (uint)arg); break;
         }
     } else {
         // node must be a mp_parse_node_struct_t
         mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
-        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
-            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
-        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
-            printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
-        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
+        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
             #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
             printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
             #else
@@ -336,58 +338,26 @@ STATIC void result_stack_show(parser_t *parser) {
 */
 
 STATIC mp_parse_node_t pop_result(parser_t *parser) {
-    if (parser->parse_error) {
-        return MP_PARSE_NODE_NULL;
-    }
     assert(parser->result_stack_top > 0);
     return parser->result_stack[--parser->result_stack_top];
 }
 
 STATIC mp_parse_node_t peek_result(parser_t *parser, size_t pos) {
-    if (parser->parse_error) {
-        return MP_PARSE_NODE_NULL;
-    }
     assert(parser->result_stack_top > pos);
     return parser->result_stack[parser->result_stack_top - 1 - pos];
 }
 
 STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
-    if (parser->parse_error) {
-        return;
-    }
     if (parser->result_stack_top >= parser->result_stack_alloc) {
-        mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
-        if (stack == NULL) {
-            parser->parse_error = PARSE_ERROR_MEMORY;
-            return;
-        }
+        mp_parse_node_t *stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
         parser->result_stack = stack;
         parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
     }
     parser->result_stack[parser->result_stack_top++] = pn;
 }
 
-STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
-    mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
-    if (pn == NULL) {
-        parser->parse_error = PARSE_ERROR_MEMORY;
-        return MP_PARSE_NODE_NULL;
-    }
-    pn->source_line = src_line;
-    pn->kind_num_nodes = rule_kind | (2 << 8);
-    char *p = m_new(char, len);
-    memcpy(p, str, len);
-    pn->nodes[0] = (uintptr_t)p;
-    pn->nodes[1] = len;
-    return (mp_parse_node_t)pn;
-}
-
 STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
     mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
-    if (pn == NULL) {
-        parser->parse_error = PARSE_ERROR_MEMORY;
-        return MP_PARSE_NODE_NULL;
-    }
     pn->source_line = src_line;
     #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
     // nodes are 32-bit pointers, but need to store 64-bit object
@@ -411,7 +381,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
         mp_map_elem_t *elem;
         if (rule->rule_id == RULE_atom
             && (elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP)) != NULL) {
-            pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value));
+            if (MP_OBJ_IS_SMALL_INT(elem->value)) {
+                pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value));
+            } else {
+                pn = make_node_const_object(parser, lex->tok_line, elem->value);
+            }
         } else {
             pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
         }
@@ -444,8 +418,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
             // qstr exists, make a leaf node
             pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
         } else {
-            // not interned, make a node holding a pointer to the string/bytes data
-            pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
+            // not interned, make a node holding a pointer to the string/bytes object
+            mp_obj_t o = mp_obj_new_str_of_type(
+                lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
+                (const byte*)lex->vstr.buf, lex->vstr.len);
+            pn = make_node_const_object(parser, lex->tok_line, o);
         }
     } else {
         pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
@@ -641,16 +618,19 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
 
                 // get the value
                 mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
-                if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) {
-                    parser->parse_error = PARSE_ERROR_CONST;
-                    return false;
+                mp_obj_t value;
+                if (!mp_parse_node_get_int_maybe(pn_value, &value)) {
+                    mp_obj_t exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
+                        "constant must be an integer");
+                    mp_obj_exception_add_traceback(exc, parser->lexer->source_name,
+                        ((mp_parse_node_struct_t*)pn1)->source_line, MP_QSTR_NULL);
+                    nlr_raise(exc);
                 }
-                mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value);
 
                 // store the value in the table of dynamic constants
                 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
                 assert(elem->value == MP_OBJ_NULL);
-                elem->value = MP_OBJ_NEW_SMALL_INT(value);
+                elem->value = value;
 
                 // If the constant starts with an underscore then treat it as a private
                 // variable and don't emit any code to store the value to the id.
@@ -745,10 +725,6 @@ STATIC void push_result_rule(parser_t *parser, size_t src_line, const rule_t *ru
     #endif
 
     mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
-    if (pn == NULL) {
-        parser->parse_error = PARSE_ERROR_MEMORY;
-        return;
-    }
     pn->source_line = src_line;
     pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
     for (size_t i = num_args; i > 0; i--) {
@@ -763,15 +739,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
 
     parser_t parser;
 
-    parser.parse_error = PARSE_ERROR_NONE;
-
     parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
     parser.rule_stack_top = 0;
-    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
+    parser.rule_stack = m_new(rule_stack_t, parser.rule_stack_alloc);
 
     parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
     parser.result_stack_top = 0;
-    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
+    parser.result_stack = m_new(mp_parse_node_t, parser.result_stack_alloc);
 
     parser.lexer = lex;
 
@@ -782,11 +756,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
     mp_map_init(&parser.consts, 0);
     #endif
 
-    // check if we could allocate the stacks
-    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
-        goto memory_error;
-    }
-
     // work out the top-level rule to use, and push it on the stack
     size_t top_level_rule;
     switch (input_kind) {
@@ -805,7 +774,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
 
     for (;;) {
         next_rule:
-        if (parser.rule_stack_top == 0 || parser.parse_error) {
+        if (parser.rule_stack_top == 0) {
             break;
         }
 
@@ -870,38 +839,30 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
 
                 // progress through the rule
                 for (; i < n; ++i) {
-                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
-                        case RULE_ARG_TOK: {
-                            // need to match a token
-                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
-                            if (lex->tok_kind == tok_kind) {
-                                // matched token
-                                if (tok_kind == MP_TOKEN_NAME) {
-                                    push_result_token(&parser, rule);
-                                }
-                                mp_lexer_to_next(lex);
+                    if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                        // need to match a token
+                        mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
+                        if (lex->tok_kind == tok_kind) {
+                            // matched token
+                            if (tok_kind == MP_TOKEN_NAME) {
+                                push_result_token(&parser, rule);
+                            }
+                            mp_lexer_to_next(lex);
+                        } else {
+                            // failed to match token
+                            if (i > 0) {
+                                // already eaten tokens so can't backtrack
+                                goto syntax_error;
                             } else {
-                                // failed to match token
-                                if (i > 0) {
-                                    // already eaten tokens so can't backtrack
-                                    goto syntax_error;
-                                } else {
-                                    // this rule failed, so backtrack
-                                    backtrack = true;
-                                    goto next_rule;
-                                }
+                                // this rule failed, so backtrack
+                                backtrack = true;
+                                goto next_rule;
                             }
-                            break;
                         }
-                        case RULE_ARG_RULE:
-                        case RULE_ARG_OPT_RULE:
-                        rule_and_no_other_choice:
-                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
-                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
-                            goto next_rule;
-                        default:
-                            assert(0);
-                            goto rule_and_no_other_choice; // to help flow control analysis
+                    } else {
+                        push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
+                        push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
+                        goto next_rule;
                     }
                 }
 
@@ -913,15 +874,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                 // this code discards lonely statements, such as doc strings
                 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                     mp_parse_node_t p = peek_result(&parser, 1);
-                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
+                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p))
+                        || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) {
                         pop_result(&parser); // MP_PARSE_NODE_NULL
-                        mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
-                        if (MP_PARSE_NODE_IS_STRUCT(pn)) {
-                            mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
-                            if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
-                                m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
-                            }
-                        }
+                        pop_result(&parser); // const expression (leaf or RULE_const_object)
+                        // Pushing the "pass" rule here will overwrite any RULE_const_object
+                        // entry that was on the result stack, allowing the GC to reclaim
+                        // the memory from the const object when needed.
                         push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
                         break;
                     }
@@ -973,7 +932,9 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                 break;
             }
 
-            case RULE_ACT_LIST: {
+            default: {
+                assert((rule->act & RULE_ACT_KIND_MASK) == RULE_ACT_LIST);
+
                 // n=2 is: item item*
                 // n=1 is: item (sep item)*
                 // n=3 is: item (sep item)* [sep]
@@ -1011,32 +972,27 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                 } else {
                     for (;;) {
                         size_t arg = rule->arg[i & 1 & n];
-                        switch (arg & RULE_ARG_KIND_MASK) {
-                            case RULE_ARG_TOK:
-                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
-                                    if (i & 1 & n) {
-                                        // separators which are tokens are not pushed to result stack
-                                    } else {
-                                        push_result_token(&parser, rule);
-                                    }
-                                    mp_lexer_to_next(lex);
-                                    // got element of list, so continue parsing list
-                                    i += 1;
+                        if ((arg & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                            if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
+                                if (i & 1 & n) {
+                                    // separators which are tokens are not pushed to result stack
                                 } else {
-                                    // couldn't get element of list
-                                    i += 1;
-                                    backtrack = true;
-                                    goto list_backtrack;
+                                    push_result_token(&parser, rule);
                                 }
-                                break;
-                            case RULE_ARG_RULE:
-                            rule_list_no_other_choice:
-                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
-                                push_rule_from_arg(&parser, arg); // push child of list-rule
-                                goto next_rule;
-                            default:
-                                assert(0);
-                                goto rule_list_no_other_choice; // to help flow control analysis
+                                mp_lexer_to_next(lex);
+                                // got element of list, so continue parsing list
+                                i += 1;
+                            } else {
+                                // couldn't get element of list
+                                i += 1;
+                                backtrack = true;
+                                goto list_backtrack;
+                            }
+                        } else {
+                            assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE);
+                            push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
+                            push_rule_from_arg(&parser, arg); // push child of list-rule
+                            goto next_rule;
                         }
                     }
                 }
@@ -1062,9 +1018,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                 }
                 break;
             }
-
-            default:
-                assert(0);
         }
     }
 
@@ -1083,27 +1036,12 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
         parser.tree.chunk = parser.cur_chunk;
     }
 
-    mp_obj_t exc;
-
-    if (parser.parse_error) {
-        #if MICROPY_COMP_CONST
-        if (parser.parse_error == PARSE_ERROR_CONST) {
-            exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
-                "constant must be an integer");
-        } else
-        #endif
-        {
-            assert(parser.parse_error == PARSE_ERROR_MEMORY);
-        memory_error:
-            exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
-                "parser could not allocate enough memory");
-        }
-        parser.tree.root = MP_PARSE_NODE_NULL;
-    } else if (
+    if (
         lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream
         || parser.result_stack_top == 0 // check that we got a node (can fail on empty input)
         ) {
-    syntax_error:
+    syntax_error:;
+        mp_obj_t exc;
         if (lex->tok_kind == MP_TOKEN_INDENT) {
             exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
                 "unexpected indent");
@@ -1114,37 +1052,24 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
             exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
                 "invalid syntax");
         }
-        parser.tree.root = MP_PARSE_NODE_NULL;
-    } else {
-        // no errors
-
-        //result_stack_show(parser);
-        //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
-        //printf("result stack alloc: %d\n", parser.result_stack_alloc);
-        //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
-
-        // get the root parse node that we created
-        assert(parser.result_stack_top == 1);
-        exc = MP_OBJ_NULL;
-        parser.tree.root = parser.result_stack[0];
+        // add traceback to give info about file name and location
+        // we don't have a 'block' name, so just pass the NULL qstr to indicate this
+        mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
+        nlr_raise(exc);
     }
 
+    // get the root parse node that we created
+    assert(parser.result_stack_top == 1);
+    parser.tree.root = parser.result_stack[0];
+
     // free the memory that we don't need anymore
     m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
     m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
-    // we also free the lexer on behalf of the caller (see below)
 
-    if (exc != MP_OBJ_NULL) {
-        // had an error so raise the exception
-        // add traceback to give info about file name and location
-        // we don't have a 'block' name, so just pass the NULL qstr to indicate this
-        mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
-        mp_lexer_free(lex);
-        nlr_raise(exc);
-    } else {
-        mp_lexer_free(lex);
-        return parser.tree;
-    }
+    // we also free the lexer on behalf of the caller
+    mp_lexer_free(lex);
+
+    return parser.tree;
 }
 
 void mp_parse_tree_clear(mp_parse_tree_t *tree) {
diff --git a/py/parsenum.c b/py/parsenum.c
index 2e41801ee9..1771188434 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -55,7 +55,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
     // check radix base
     if ((base != 0 && base < 2) || base > 36) {
         // this won't be reached if lex!=NULL
-        mp_raise_msg(&mp_type_ValueError, "int() arg 2 must be >= 2 and <= 36");
+        mp_raise_ValueError("int() arg 2 must be >= 2 and <= 36");
     }
 
     // skip leading space
diff --git a/py/persistentcode.c b/py/persistentcode.c
index 99b01f8e27..a71045a290 100644
--- a/py/persistentcode.c
+++ b/py/persistentcode.c
@@ -38,6 +38,9 @@
 
 #include "py/smallint.h"
 
+// The current version of .mpy files
+#define MPY_VERSION (2)
+
 // The feature flags byte encodes the compile-time config options that
 // affect the generate bytecode.
 #define MPY_FEATURE_FLAGS ( \
@@ -108,8 +111,8 @@ STATIC void read_bytes(mp_reader_t *reader, byte *buf, size_t len) {
     }
 }
 
-STATIC mp_uint_t read_uint(mp_reader_t *reader) {
-    mp_uint_t unum = 0;
+STATIC size_t read_uint(mp_reader_t *reader) {
+    size_t unum = 0;
     for (;;) {
         byte b = reader->readbyte(reader->data);
         unum = (unum << 7) | (b & 0x7f);
@@ -121,7 +124,7 @@ STATIC mp_uint_t read_uint(mp_reader_t *reader) {
 }
 
 STATIC qstr load_qstr(mp_reader_t *reader) {
-    mp_uint_t len = read_uint(reader);
+    size_t len = read_uint(reader);
     char *str = m_new(char, len);
     read_bytes(reader, (byte*)str, len);
     qstr qst = qstr_from_strn(str, len);
@@ -164,7 +167,7 @@ STATIC void load_bytecode_qstrs(mp_reader_t *reader, byte *ip, byte *ip_top) {
 
 STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) {
     // load bytecode
-    mp_uint_t bc_len = read_uint(reader);
+    size_t bc_len = read_uint(reader);
     byte *bytecode = m_new(byte, bc_len);
     read_bytes(reader, bytecode, bc_len);
 
@@ -182,17 +185,17 @@ STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) {
     load_bytecode_qstrs(reader, (byte*)ip, bytecode + bc_len);
 
     // load constant table
-    mp_uint_t n_obj = read_uint(reader);
-    mp_uint_t n_raw_code = read_uint(reader);
+    size_t n_obj = read_uint(reader);
+    size_t n_raw_code = read_uint(reader);
     mp_uint_t *const_table = m_new(mp_uint_t, prelude.n_pos_args + prelude.n_kwonly_args + n_obj + n_raw_code);
     mp_uint_t *ct = const_table;
-    for (mp_uint_t i = 0; i < prelude.n_pos_args + prelude.n_kwonly_args; ++i) {
+    for (size_t i = 0; i < prelude.n_pos_args + prelude.n_kwonly_args; ++i) {
         *ct++ = (mp_uint_t)MP_OBJ_NEW_QSTR(load_qstr(reader));
     }
-    for (mp_uint_t i = 0; i < n_obj; ++i) {
+    for (size_t i = 0; i < n_obj; ++i) {
         *ct++ = (mp_uint_t)load_obj(reader);
     }
-    for (mp_uint_t i = 0; i < n_raw_code; ++i) {
+    for (size_t i = 0; i < n_raw_code; ++i) {
         *ct++ = (mp_uint_t)(uintptr_t)load_raw_code(reader);
     }
 
@@ -209,10 +212,10 @@ STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) {
 mp_raw_code_t *mp_raw_code_load(mp_reader_t *reader) {
     byte header[4];
     read_bytes(reader, header, sizeof(header));
-    if (strncmp((char*)header, "M\x00", 2) != 0) {
-        mp_raise_ValueError("invalid .mpy file");
-    }
-    if (header[2] != MPY_FEATURE_FLAGS || header[3] > mp_small_int_bits()) {
+    if (header[0] != 'M'
+        || header[1] != MPY_VERSION
+        || header[2] != MPY_FEATURE_FLAGS
+        || header[3] > mp_small_int_bits()) {
         mp_raise_ValueError("incompatible .mpy file");
     }
     mp_raw_code_t *rc = load_raw_code(reader);
@@ -222,18 +225,13 @@ mp_raw_code_t *mp_raw_code_load(mp_reader_t *reader) {
 
 mp_raw_code_t *mp_raw_code_load_mem(const byte *buf, size_t len) {
     mp_reader_t reader;
-    if (!mp_reader_new_mem(&reader, buf, len, 0)) {
-        m_malloc_fail(BYTES_PER_WORD); // we need to raise a MemoryError
-    }
+    mp_reader_new_mem(&reader, buf, len, 0);
     return mp_raw_code_load(&reader);
 }
 
 mp_raw_code_t *mp_raw_code_load_file(const char *filename) {
     mp_reader_t reader;
-    int ret = mp_reader_new_file(&reader, filename);
-    if (ret != 0) {
-        mp_raise_OSError(ret);
-    }
+    mp_reader_new_file(&reader, filename);
     return mp_raw_code_load(&reader);
 }
 
@@ -248,7 +246,7 @@ STATIC void mp_print_bytes(mp_print_t *print, const byte *data, size_t len) {
 }
 
 #define BYTES_FOR_INT ((BYTES_PER_WORD * 8 + 6) / 7)
-STATIC void mp_print_uint(mp_print_t *print, mp_uint_t n) {
+STATIC void mp_print_uint(mp_print_t *print, size_t n) {
     byte buf[BYTES_FOR_INT];
     byte *p = buf + sizeof(buf);
     *--p = n & 0x7f;
@@ -359,7 +357,7 @@ void mp_raw_code_save(mp_raw_code_t *rc, mp_print_t *print) {
     //  byte  version
     //  byte  feature flags
     //  byte  number of bits in a small int
-    byte header[4] = {'M', 0, MPY_FEATURE_FLAGS_DYNAMIC,
+    byte header[4] = {'M', MPY_VERSION, MPY_FEATURE_FLAGS_DYNAMIC,
         #if MICROPY_DYNAMIC_COMPILER
         mp_dynamic_compiler.small_int_bits,
         #else
diff --git a/py/py.mk b/py/py.mk
index 11c55f122e..30109ac06f 100644
--- a/py/py.mk
+++ b/py/py.mk
@@ -16,8 +16,8 @@ endif
 # some code is performance bottleneck and compiled with other optimization options
 CSUPEROPT = -O3
 
-INC += -I../lib
-INC += -I../lib/netutils
+# this sets the config file for FatFs
+CFLAGS_MOD += -DFFCONF_H=\"lib/oofatfs/ffconf.h\"
 
 ifeq ($(MICROPY_PY_USSL),1)
 CFLAGS_MOD += -DMICROPY_PY_USSL=1
@@ -140,6 +140,7 @@ PY_O_BASENAME = \
 	persistentcode.o \
 	runtime.o \
 	runtime_utils.o \
+	scheduler.o \
 	nativeglue.o \
 	stackctrl.o \
 	argcheck.o \
@@ -189,6 +190,7 @@ PY_O_BASENAME = \
 	binary.o \
 	builtinimport.o \
 	builtinevex.o \
+	builtinhelp.o \
 	modarray.o \
 	modbuiltins.o \
 	modcollections.o \
@@ -218,6 +220,7 @@ PY_O_BASENAME = \
 	../extmod/virtpin.o \
 	../extmod/machine_mem.o \
 	../extmod/machine_pinbase.o \
+	../extmod/machine_signal.o \
 	../extmod/machine_pulse.o \
 	../extmod/machine_i2c.o \
 	../extmod/machine_spi.o \
@@ -228,12 +231,11 @@ PY_O_BASENAME = \
 	../extmod/modwebsocket.o \
 	../extmod/modwebrepl.o \
 	../extmod/modframebuf.o \
-	../extmod/fsusermount.o \
+	../extmod/vfs.o \
+	../extmod/vfs_reader.o \
 	../extmod/vfs_fat.o \
-	../extmod/vfs_fat_ffconf.o \
 	../extmod/vfs_fat_diskio.o \
 	../extmod/vfs_fat_file.o \
-	../extmod/vfs_fat_reader.o \
 	../extmod/vfs_fat_misc.o \
 	../extmod/utime_mphal.o \
 	../extmod/uos_dupterm.o \
@@ -278,6 +280,10 @@ $(HEADER_BUILD)/qstrdefs.generated.h: $(PY_QSTR_DEFS) $(QSTR_DEFS) $(QSTR_DEFS_C
 	$(Q)cat $(PY_QSTR_DEFS) $(QSTR_DEFS) $(QSTR_DEFS_COLLECTED) | $(SED) 's/^Q(.*)/"&"/' | $(CPP) $(CFLAGS) - | $(SED) 's/^"\(Q(.*)\)"/\1/' > $(HEADER_BUILD)/qstrdefs.preprocessed.h
 	$(Q)$(PYTHON) $(PY_SRC)/makeqstrdata.py $(HEADER_BUILD)/qstrdefs.preprocessed.h > $@
 
+# Force nlr code to always be compiled with space-saving optimisation so
+# that the function preludes are of a minimal and predictable form.
+$(PY_BUILD)/nlr%.o: CFLAGS += -Os
+
 # emitters
 
 $(PY_BUILD)/emitnx64.o: CFLAGS += -DN_X64
diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index c98a253a69..4581e5e1b1 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h
@@ -36,6 +36,7 @@ QCFG(BYTES_IN_HASH, MICROPY_QSTR_BYTES_IN_HASH)
 Q()
 Q(*)
 Q(_)
+Q(/)
 Q(%#o)
 Q(%#x)
 Q({:#b})
diff --git a/py/reader.c b/py/reader.c
index d7de7aa6c4..5df45c4957 100644
--- a/py/reader.c
+++ b/py/reader.c
@@ -27,6 +27,7 @@
 #include <stdio.h>
 #include <assert.h>
 
+#include "py/runtime.h"
 #include "py/mperrno.h"
 #include "py/reader.h"
 
@@ -54,11 +55,8 @@ STATIC void mp_reader_mem_close(void *data) {
     m_del_obj(mp_reader_mem_t, reader);
 }
 
-bool mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len) {
-    mp_reader_mem_t *rm = m_new_obj_maybe(mp_reader_mem_t);
-    if (rm == NULL) {
-        return false;
-    }
+void mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len) {
+    mp_reader_mem_t *rm = m_new_obj(mp_reader_mem_t);
     rm->free_len = free_len;
     rm->beg = buf;
     rm->cur = buf;
@@ -66,7 +64,6 @@ bool mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t
     reader->data = rm;
     reader->readbyte = mp_reader_mem_readbyte;
     reader->close = mp_reader_mem_close;
-    return true;
 }
 
 #if MICROPY_READER_POSIX
@@ -110,14 +107,8 @@ STATIC void mp_reader_posix_close(void *data) {
     m_del_obj(mp_reader_posix_t, reader);
 }
 
-int mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd) {
-    mp_reader_posix_t *rp = m_new_obj_maybe(mp_reader_posix_t);
-    if (rp == NULL) {
-        if (close_fd) {
-            close(fd);
-        }
-        return MP_ENOMEM;
-    }
+void mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd) {
+    mp_reader_posix_t *rp = m_new_obj(mp_reader_posix_t);
     rp->close_fd = close_fd;
     rp->fd = fd;
     int n = read(rp->fd, rp->buf, sizeof(rp->buf));
@@ -125,22 +116,21 @@ int mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd) {
         if (close_fd) {
             close(fd);
         }
-        return errno;
+        mp_raise_OSError(errno);
     }
     rp->len = n;
     rp->pos = 0;
     reader->data = rp;
     reader->readbyte = mp_reader_posix_readbyte;
     reader->close = mp_reader_posix_close;
-    return 0; // success
 }
 
-int mp_reader_new_file(mp_reader_t *reader, const char *filename) {
+void mp_reader_new_file(mp_reader_t *reader, const char *filename) {
     int fd = open(filename, O_RDONLY, 0644);
     if (fd < 0) {
-        return errno;
+        mp_raise_OSError(errno);
     }
-    return mp_reader_new_file_from_fd(reader, fd, true);
+    mp_reader_new_file_from_fd(reader, fd, true);
 }
 
 #endif
diff --git a/py/reader.h b/py/reader.h
index b02d96149b..8511c72ce5 100644
--- a/py/reader.h
+++ b/py/reader.h
@@ -39,8 +39,8 @@ typedef struct _mp_reader_t {
     void (*close)(void *data);
 } mp_reader_t;
 
-bool mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len);
-int mp_reader_new_file(mp_reader_t *reader, const char *filename);
-int mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd);
+void mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len);
+void mp_reader_new_file(mp_reader_t *reader, const char *filename);
+void mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd);
 
 #endif // MICROPY_INCLUDED_PY_READER_H
diff --git a/py/repl.c b/py/repl.c
index 997d800054..6d8f7cca46 100644
--- a/py/repl.c
+++ b/py/repl.c
@@ -153,7 +153,7 @@ mp_uint_t mp_repl_autocomplete(const char *str, mp_uint_t len, const mp_print_t
             mp_obj_t obj = MP_OBJ_NULL;
             for (mp_uint_t i = 0; i < dict->map.alloc; i++) {
                 if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) {
-                    mp_uint_t d_len;
+                    size_t d_len;
                     const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len);
                     if (s_len == d_len && strncmp(s_start, d_str, d_len) == 0) {
                         obj = dict->map.table[i].value;
@@ -197,7 +197,7 @@ mp_uint_t mp_repl_autocomplete(const char *str, mp_uint_t len, const mp_print_t
             mp_uint_t match_len = 0;
             for (mp_uint_t i = 0; i < dict->map.alloc; i++) {
                 if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) {
-                    mp_uint_t d_len;
+                    size_t d_len;
                     const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len);
                     if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) {
                         if (match_str == NULL) {
@@ -247,7 +247,7 @@ mp_uint_t mp_repl_autocomplete(const char *str, mp_uint_t len, const mp_print_t
             int line_len = MAX_LINE_LEN; // force a newline for first word
             for (mp_uint_t i = 0; i < dict->map.alloc; i++) {
                 if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) {
-                    mp_uint_t d_len;
+                    size_t d_len;
                     const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len);
                     if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) {
                         int gap = (line_len + WORD_SLOT_LEN - 1) / WORD_SLOT_LEN * WORD_SLOT_LEN - line_len;
diff --git a/py/runtime.c b/py/runtime.c
index 8b4420926c..bf2bfb8eae 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -63,6 +63,10 @@ void mp_init(void) {
 
     // no pending exceptions to start with
     MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
+    #if MICROPY_ENABLE_SCHEDULER
+    MP_STATE_VM(sched_state) = MP_SCHED_IDLE;
+    MP_STATE_VM(sched_sp) = 0;
+    #endif
 
 #if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF
     mp_init_emergency_exception_buf();
@@ -74,7 +78,7 @@ void mp_init(void) {
     MP_STATE_VM(mp_kbd_exception).traceback_alloc = 0;
     MP_STATE_VM(mp_kbd_exception).traceback_len = 0;
     MP_STATE_VM(mp_kbd_exception).traceback_data = NULL;
-    MP_STATE_VM(mp_kbd_exception).args = mp_const_empty_tuple;
+    MP_STATE_VM(mp_kbd_exception).args = (mp_obj_tuple_t*)&mp_const_empty_tuple_obj;
     #endif
 
     // call port specific initialization if any
@@ -85,15 +89,16 @@ void mp_init(void) {
     // optimization disabled by default
     MP_STATE_VM(mp_optimise_value) = 0;
 
-    // init global module stuff
-    mp_module_init();
+    // init global module dict
+    mp_obj_dict_init(&MP_STATE_VM(mp_loaded_modules_dict), 3);
 
     // initialise the __main__ module
     mp_obj_dict_init(&MP_STATE_VM(dict_main), 1);
     mp_obj_dict_store(MP_OBJ_FROM_PTR(&MP_STATE_VM(dict_main)), MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR___main__));
 
     // locals = globals for outer module (see Objects/frameobject.c/PyFrame_New())
-    MP_STATE_CTX(dict_locals) = MP_STATE_CTX(dict_globals) = &MP_STATE_VM(dict_main);
+    mp_locals_set(&MP_STATE_VM(dict_main));
+    mp_globals_set(&MP_STATE_VM(dict_main));
 
     #if MICROPY_CAN_OVERRIDE_BUILTINS
     // start with no extensions to builtins
@@ -105,6 +110,12 @@ void mp_init(void) {
     memset(MP_STATE_VM(fs_user_mount), 0, sizeof(MP_STATE_VM(fs_user_mount)));
     #endif
 
+    #if MICROPY_VFS
+    // initialise the VFS sub-system
+    MP_STATE_VM(vfs_cur) = NULL;
+    MP_STATE_VM(vfs_mount_table) = NULL;
+    #endif
+
     #if MICROPY_PY_THREAD_GIL
     mp_thread_mutex_init(&MP_STATE_VM(gil_mutex));
     #endif
@@ -114,7 +125,7 @@ void mp_init(void) {
 
 void mp_deinit(void) {
     //mp_obj_dict_free(&dict_main);
-    mp_module_deinit();
+    //mp_map_deinit(&MP_STATE_VM(mp_loaded_modules_map));
 
     // call port specific deinitialization if any 
 #ifdef MICROPY_PORT_INIT_FUNC
@@ -126,8 +137,8 @@ mp_obj_t mp_load_name(qstr qst) {
     // logic: search locals, globals, builtins
     DEBUG_OP_printf("load name %s\n", qstr_str(qst));
     // If we're at the outer scope (locals == globals), dispatch to load_global right away
-    if (MP_STATE_CTX(dict_locals) != MP_STATE_CTX(dict_globals)) {
-        mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_CTX(dict_locals)->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
+    if (mp_locals_get() != mp_globals_get()) {
+        mp_map_elem_t *elem = mp_map_lookup(&mp_locals_get()->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
         if (elem != NULL) {
             return elem->value;
         }
@@ -138,7 +149,7 @@ mp_obj_t mp_load_name(qstr qst) {
 mp_obj_t mp_load_global(qstr qst) {
     // logic: search globals, builtins
     DEBUG_OP_printf("load global %s\n", qstr_str(qst));
-    mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_CTX(dict_globals)->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
+    mp_map_elem_t *elem = mp_map_lookup(&mp_globals_get()->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
     if (elem == NULL) {
         #if MICROPY_CAN_OVERRIDE_BUILTINS
         if (MP_STATE_VM(mp_module_builtins_override_dict) != NULL) {
@@ -178,24 +189,24 @@ mp_obj_t mp_load_build_class(void) {
 
 void mp_store_name(qstr qst, mp_obj_t obj) {
     DEBUG_OP_printf("store name %s <- %p\n", qstr_str(qst), obj);
-    mp_obj_dict_store(MP_OBJ_FROM_PTR(MP_STATE_CTX(dict_locals)), MP_OBJ_NEW_QSTR(qst), obj);
+    mp_obj_dict_store(MP_OBJ_FROM_PTR(mp_locals_get()), MP_OBJ_NEW_QSTR(qst), obj);
 }
 
 void mp_delete_name(qstr qst) {
     DEBUG_OP_printf("delete name %s\n", qstr_str(qst));
     // TODO convert KeyError to NameError if qst not found
-    mp_obj_dict_delete(MP_OBJ_FROM_PTR(MP_STATE_CTX(dict_locals)), MP_OBJ_NEW_QSTR(qst));
+    mp_obj_dict_delete(MP_OBJ_FROM_PTR(mp_locals_get()), MP_OBJ_NEW_QSTR(qst));
 }
 
 void mp_store_global(qstr qst, mp_obj_t obj) {
     DEBUG_OP_printf("store global %s <- %p\n", qstr_str(qst), obj);
-    mp_obj_dict_store(MP_OBJ_FROM_PTR(MP_STATE_CTX(dict_globals)), MP_OBJ_NEW_QSTR(qst), obj);
+    mp_obj_dict_store(MP_OBJ_FROM_PTR(mp_globals_get()), MP_OBJ_NEW_QSTR(qst), obj);
 }
 
 void mp_delete_global(qstr qst) {
     DEBUG_OP_printf("delete global %s\n", qstr_str(qst));
     // TODO convert KeyError to NameError if qst not found
-    mp_obj_dict_delete(MP_OBJ_FROM_PTR(MP_STATE_CTX(dict_globals)), MP_OBJ_NEW_QSTR(qst));
+    mp_obj_dict_delete(MP_OBJ_FROM_PTR(mp_globals_get()), MP_OBJ_NEW_QSTR(qst));
 }
 
 mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
@@ -220,11 +231,9 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
                 } else {
                     return MP_OBJ_NEW_SMALL_INT(-val);
                 }
-            case MP_UNARY_OP_INVERT:
-                return MP_OBJ_NEW_SMALL_INT(~val);
             default:
-                assert(0);
-                return arg;
+                assert(op == MP_UNARY_OP_INVERT);
+                return MP_OBJ_NEW_SMALL_INT(~val);
         }
     } else if (op == MP_UNARY_OP_HASH && MP_OBJ_IS_STR_OR_BYTES(arg)) {
         // fast path for hashing str/bytes
@@ -243,7 +252,7 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
             }
         }
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "unsupported type for operator");
+            mp_raise_TypeError("unsupported type for operator");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "unsupported type for %q: '%s'",
@@ -297,7 +306,7 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
             }
         } else if (MP_OBJ_IS_TYPE(rhs, &mp_type_tuple)) {
             mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(rhs);
-            for (mp_uint_t i = 0; i < tuple->len; i++) {
+            for (size_t i = 0; i < tuple->len; i++) {
                 rhs = tuple->items[i];
                 if (!mp_obj_is_exception_type(rhs)) {
                     goto unsupported_op;
@@ -335,7 +344,7 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
                 case MP_BINARY_OP_INPLACE_LSHIFT: {
                     if (rhs_val < 0) {
                         // negative shift not allowed
-                        mp_raise_msg(&mp_type_ValueError, "negative shift count");
+                        mp_raise_ValueError("negative shift count");
                     } else if (rhs_val >= (mp_int_t)BITS_PER_WORD || lhs_val > (MP_SMALL_INT_MAX >> rhs_val) || lhs_val < (MP_SMALL_INT_MIN >> rhs_val)) {
                         // left-shift will overflow, so use higher precision integer
                         lhs = mp_obj_new_int_from_ll(lhs_val);
@@ -350,7 +359,7 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
                 case MP_BINARY_OP_INPLACE_RSHIFT:
                     if (rhs_val < 0) {
                         // negative shift not allowed
-                        mp_raise_msg(&mp_type_ValueError, "negative shift count");
+                        mp_raise_ValueError("negative shift count");
                     } else {
                         // standard precision is enough for right-shift
                         if (rhs_val >= (mp_int_t)BITS_PER_WORD) {
@@ -426,7 +435,7 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
                         lhs = mp_obj_new_float(lhs_val);
                         goto generic_binary_op;
                         #else
-                        mp_raise_msg(&mp_type_ValueError, "negative power with no float support");
+                        mp_raise_ValueError("negative power with no float support");
                         #endif
                     } else {
                         mp_int_t ans = 1;
@@ -516,7 +525,8 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
         }
         if (type->getiter != NULL) {
             /* second attempt, walk the iterator */
-            mp_obj_t iter = mp_getiter(rhs);
+            mp_obj_iter_buf_t iter_buf;
+            mp_obj_t iter = mp_getiter(rhs, &iter_buf);
             mp_obj_t next;
             while ((next = mp_iternext(iter)) != MP_OBJ_STOP_ITERATION) {
                 if (mp_obj_equal(next, lhs)) {
@@ -527,7 +537,7 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs) {
         }
 
         if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-            mp_raise_msg(&mp_type_TypeError, "object not iterable");
+            mp_raise_TypeError("object not iterable");
         } else {
             nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                 "'%s' object is not iterable", mp_obj_get_type_str(rhs)));
@@ -549,7 +559,7 @@ generic_binary_op:
 
 unsupported_op:
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_TypeError, "unsupported type for operator");
+        mp_raise_TypeError("unsupported type for operator");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
             "unsupported types for %q: '%s', '%s'",
@@ -576,7 +586,7 @@ mp_obj_t mp_call_function_2(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2) {
 }
 
 // args contains, eg: arg0  arg1  key0  value0  key1  value1
-mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
+mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
     // TODO improve this: fun object can specify its type and we parse here the arguments,
     // passing to the function arrays of fixed and keyword arguments
 
@@ -591,7 +601,7 @@ mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args, mp_uint_t n_kw
     }
 
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_TypeError, "object not callable");
+        mp_raise_TypeError("object not callable");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
             "'%s' object is not callable", mp_obj_get_type_str(fun_in)));
@@ -600,7 +610,7 @@ mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args, mp_uint_t n_kw
 
 // args contains: fun  self/NULL  arg(0)  ...  arg(n_args-2)  arg(n_args-1)  kw_key(0)  kw_val(0)  ... kw_key(n_kw-1)  kw_val(n_kw-1)
 // if n_args==0 and n_kw==0 then there are only fun and self/NULL
-mp_obj_t mp_call_method_n_kw(mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
+mp_obj_t mp_call_method_n_kw(size_t n_args, size_t n_kw, const mp_obj_t *args) {
     DEBUG_OP_printf("call method (fun=%p, self=%p, n_args=" UINT_FMT ", n_kw=" UINT_FMT ", args=%p)\n", args[0], args[1], n_args, n_kw, args);
     int adjust = (args[1] == MP_OBJ_NULL) ? 0 : 1;
     return mp_call_function_n_kw(args[0], n_args + adjust, n_kw, args + 2 - adjust);
@@ -610,7 +620,7 @@ mp_obj_t mp_call_method_n_kw(mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *a
 #if !MICROPY_STACKLESS
 STATIC
 #endif
-void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const mp_obj_t *args, mp_call_args_t *out_args) {
+void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_obj_t *args, mp_call_args_t *out_args) {
     mp_obj_t fun = *args++;
     mp_obj_t self = MP_OBJ_NULL;
     if (have_self) {
@@ -660,7 +670,7 @@ void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const
         // optimise the case of a tuple and list
 
         // get the items
-        mp_uint_t len;
+        size_t len;
         mp_obj_t *items;
         mp_obj_get_array(pos_seq, &len, &items);
 
@@ -694,7 +704,8 @@ void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const
         args2_len += n_args;
 
         // extract the variable position args from the iterator
-        mp_obj_t iterable = mp_getiter(pos_seq);
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iterable = mp_getiter(pos_seq, &iter_buf);
         mp_obj_t item;
         while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
             if (args2_len >= args2_alloc) {
@@ -720,7 +731,7 @@ void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const
         // dictionary
         mp_map_t *map = mp_obj_dict_get_map(kw_dict);
         assert(args2_len + 2 * map->used <= args2_alloc); // should have enough, since kw_dict_len is in this case hinted correctly above
-        for (mp_uint_t i = 0; i < map->alloc; i++) {
+        for (size_t i = 0; i < map->alloc; i++) {
             if (MP_MAP_SLOT_IS_FILLED(map, i)) {
                 // the key must be a qstr, so intern it if it's a string
                 mp_obj_t key = map->table[i].key;
@@ -739,7 +750,7 @@ void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const
         // get the keys iterable
         mp_obj_t dest[3];
         mp_load_method(kw_dict, MP_QSTR_keys, dest);
-        mp_obj_t iterable = mp_getiter(mp_call_method_n_kw(0, 0, dest));
+        mp_obj_t iterable = mp_getiter(mp_call_method_n_kw(0, 0, dest), NULL);
 
         mp_obj_t key;
         while ((key = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
@@ -776,7 +787,7 @@ void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const
     out_args->n_alloc = args2_alloc;
 }
 
-mp_obj_t mp_call_method_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const mp_obj_t *args) {
+mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_obj_t *args) {
     mp_call_args_t out_args;
     mp_call_prepare_args_n_kw_var(have_self, n_args_n_kw, args, &out_args);
 
@@ -787,25 +798,22 @@ mp_obj_t mp_call_method_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const mp
 }
 
 // unpacked items are stored in reverse order into the array pointed to by items
-void mp_unpack_sequence(mp_obj_t seq_in, mp_uint_t num, mp_obj_t *items) {
-    mp_uint_t seq_len;
+void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
+    size_t seq_len;
     if (MP_OBJ_IS_TYPE(seq_in, &mp_type_tuple) || MP_OBJ_IS_TYPE(seq_in, &mp_type_list)) {
         mp_obj_t *seq_items;
-        if (MP_OBJ_IS_TYPE(seq_in, &mp_type_tuple)) {
-            mp_obj_tuple_get(seq_in, &seq_len, &seq_items);
-        } else {
-            mp_obj_list_get(seq_in, &seq_len, &seq_items);
-        }
+        mp_obj_get_array(seq_in, &seq_len, &seq_items);
         if (seq_len < num) {
             goto too_short;
         } else if (seq_len > num) {
             goto too_long;
         }
-        for (mp_uint_t i = 0; i < num; i++) {
+        for (size_t i = 0; i < num; i++) {
             items[i] = seq_items[num - 1 - i];
         }
     } else {
-        mp_obj_t iterable = mp_getiter(seq_in);
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iterable = mp_getiter(seq_in, &iter_buf);
 
         for (seq_len = 0; seq_len < num; seq_len++) {
             mp_obj_t el = mp_iternext(iterable);
@@ -822,14 +830,14 @@ void mp_unpack_sequence(mp_obj_t seq_in, mp_uint_t num, mp_obj_t *items) {
 
 too_short:
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_ValueError, "wrong number of values to unpack");
+        mp_raise_ValueError("wrong number of values to unpack");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
             "need more than %d values to unpack", (int)seq_len));
     }
 too_long:
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_ValueError, "wrong number of values to unpack");
+        mp_raise_ValueError("wrong number of values to unpack");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
             "too many values to unpack (expected %d)", (int)num));
@@ -837,31 +845,22 @@ too_long:
 }
 
 // unpacked items are stored in reverse order into the array pointed to by items
-void mp_unpack_ex(mp_obj_t seq_in, mp_uint_t num_in, mp_obj_t *items) {
-    mp_uint_t num_left = num_in & 0xff;
-    mp_uint_t num_right = (num_in >> 8) & 0xff;
+void mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
+    size_t num_left = num_in & 0xff;
+    size_t num_right = (num_in >> 8) & 0xff;
     DEBUG_OP_printf("unpack ex " UINT_FMT " " UINT_FMT "\n", num_left, num_right);
-    mp_uint_t seq_len;
+    size_t seq_len;
     if (MP_OBJ_IS_TYPE(seq_in, &mp_type_tuple) || MP_OBJ_IS_TYPE(seq_in, &mp_type_list)) {
         mp_obj_t *seq_items;
-        if (MP_OBJ_IS_TYPE(seq_in, &mp_type_tuple)) {
-            mp_obj_tuple_get(seq_in, &seq_len, &seq_items);
-        } else {
-            if (num_left == 0 && num_right == 0) {
-                // *a, = b # sets a to b if b is a list
-                items[0] = seq_in;
-                return;
-            }
-            mp_obj_list_get(seq_in, &seq_len, &seq_items);
-        }
+        mp_obj_get_array(seq_in, &seq_len, &seq_items);
         if (seq_len < num_left + num_right) {
             goto too_short;
         }
-        for (mp_uint_t i = 0; i < num_right; i++) {
+        for (size_t i = 0; i < num_right; i++) {
             items[i] = seq_items[seq_len - 1 - i];
         }
         items[num_right] = mp_obj_new_list(seq_len - num_left - num_right, seq_items + num_left);
-        for (mp_uint_t i = 0; i < num_left; i++) {
+        for (size_t i = 0; i < num_left; i++) {
             items[num_right + 1 + i] = seq_items[num_left - 1 - i];
         }
     } else {
@@ -869,7 +868,7 @@ void mp_unpack_ex(mp_obj_t seq_in, mp_uint_t num_in, mp_obj_t *items) {
         // items destination array, then the rest to a dynamically created list.  Once the
         // iterable is exhausted, we take from this list for the right part of the items.
         // TODO Improve to waste less memory in the dynamically created list.
-        mp_obj_t iterable = mp_getiter(seq_in);
+        mp_obj_t iterable = mp_getiter(seq_in, NULL);
         mp_obj_t item;
         for (seq_len = 0; seq_len < num_left; seq_len++) {
             item = mp_iternext(iterable);
@@ -886,7 +885,7 @@ void mp_unpack_ex(mp_obj_t seq_in, mp_uint_t num_in, mp_obj_t *items) {
             goto too_short;
         }
         items[num_right] = MP_OBJ_FROM_PTR(rest);
-        for (mp_uint_t i = 0; i < num_right; i++) {
+        for (size_t i = 0; i < num_right; i++) {
             items[num_right - 1 - i] = rest->items[rest->len - num_right + i];
         }
         mp_obj_list_set_len(MP_OBJ_FROM_PTR(rest), rest->len - num_right);
@@ -895,7 +894,7 @@ void mp_unpack_ex(mp_obj_t seq_in, mp_uint_t num_in, mp_obj_t *items) {
 
 too_short:
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_ValueError, "wrong number of values to unpack");
+        mp_raise_ValueError("wrong number of values to unpack");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
             "need more than %d values to unpack", (int)seq_len));
@@ -934,7 +933,7 @@ STATIC mp_obj_t checked_fun_call(mp_obj_t self_in, size_t n_args, size_t n_kw, c
         const mp_obj_type_t *arg0_type = mp_obj_get_type(args[0]);
         if (arg0_type != self->type) {
             if (MICROPY_ERROR_REPORTING != MICROPY_ERROR_REPORTING_DETAILED) {
-                mp_raise_msg(&mp_type_TypeError, "argument has wrong type");
+                mp_raise_TypeError("argument has wrong type");
             } else {
                 nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                     "argument should be a '%q' not a '%q'", self->type->name, arg0_type->name));
@@ -1092,13 +1091,24 @@ void mp_store_attr(mp_obj_t base, qstr attr, mp_obj_t value) {
     }
 }
 
-mp_obj_t mp_getiter(mp_obj_t o_in) {
+mp_obj_t mp_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
     assert(o_in);
+    mp_obj_type_t *type = mp_obj_get_type(o_in);
+
+    // Check for native getiter which is the identity.  We handle this case explicitly
+    // so we don't unnecessarily allocate any RAM for the iter_buf, which won't be used.
+    if (type->getiter == mp_identity_getiter) {
+        return o_in;
+    }
+
+    // if caller did not provide a buffer then allocate one on the heap
+    if (iter_buf == NULL) {
+        iter_buf = m_new_obj(mp_obj_iter_buf_t);
+    }
 
     // check for native getiter (corresponds to __iter__)
-    mp_obj_type_t *type = mp_obj_get_type(o_in);
     if (type->getiter != NULL) {
-        mp_obj_t iter = type->getiter(o_in);
+        mp_obj_t iter = type->getiter(o_in, iter_buf);
         if (iter != MP_OBJ_NULL) {
             return iter;
         }
@@ -1109,12 +1119,12 @@ mp_obj_t mp_getiter(mp_obj_t o_in) {
     mp_load_method_maybe(o_in, MP_QSTR___getitem__, dest);
     if (dest[0] != MP_OBJ_NULL) {
         // __getitem__ exists, create and return an iterator
-        return mp_obj_new_getitem_iter(dest);
+        return mp_obj_new_getitem_iter(dest, iter_buf);
     }
 
     // object not iterable
     if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-        mp_raise_msg(&mp_type_TypeError, "object not iterable");
+        mp_raise_TypeError("object not iterable");
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
             "'%s' object is not iterable", mp_obj_get_type_str(o_in)));
@@ -1136,7 +1146,7 @@ mp_obj_t mp_iternext_allow_raise(mp_obj_t o_in) {
             return mp_call_method_n_kw(0, 0, dest);
         } else {
             if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-                mp_raise_msg(&mp_type_TypeError, "object not an iterator");
+                mp_raise_TypeError("object not an iterator");
             } else {
                 nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                     "'%s' object is not an iterator", mp_obj_get_type_str(o_in)));
@@ -1172,7 +1182,7 @@ mp_obj_t mp_iternext(mp_obj_t o_in) {
             }
         } else {
             if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
-                mp_raise_msg(&mp_type_TypeError, "object not an iterator");
+                mp_raise_TypeError("object not an iterator");
             } else {
                 nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                     "'%s' object is not an iterator", mp_obj_get_type_str(o_in)));
@@ -1234,7 +1244,8 @@ mp_vm_return_kind_t mp_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t th
         return MP_VM_RETURN_YIELD;
     }
 
-    if (throw_value != MP_OBJ_NULL) {
+    assert(throw_value != MP_OBJ_NULL);
+    {
         if (mp_obj_is_subclass_fast(MP_OBJ_FROM_PTR(mp_obj_get_type(throw_value)), MP_OBJ_FROM_PTR(&mp_type_GeneratorExit))) {
             mp_load_method_maybe(self_in, MP_QSTR_close, dest);
             if (dest[0] != MP_OBJ_NULL) {
@@ -1244,13 +1255,15 @@ mp_vm_return_kind_t mp_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t th
                 // We assume one can't "yield" from close()
                 return MP_VM_RETURN_NORMAL;
             }
-        }
-        mp_load_method_maybe(self_in, MP_QSTR_throw, dest);
-        if (dest[0] != MP_OBJ_NULL) {
-            *ret_val = mp_call_method_n_kw(1, 0, &throw_value);
-            // If .throw() method returned, we assume it's value to yield
-            // - any exception would be thrown with nlr_raise().
-            return MP_VM_RETURN_YIELD;
+        } else {
+            mp_load_method_maybe(self_in, MP_QSTR_throw, dest);
+            if (dest[0] != MP_OBJ_NULL) {
+                dest[2] = throw_value;
+                *ret_val = mp_call_method_n_kw(1, 0, dest);
+                // If .throw() method returned, we assume it's value to yield
+                // - any exception would be thrown with nlr_raise().
+                return MP_VM_RETURN_YIELD;
+            }
         }
         // If there's nowhere to throw exception into, then we assume that object
         // is just incapable to handle it, so any exception thrown into it
@@ -1260,9 +1273,6 @@ mp_vm_return_kind_t mp_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t th
         *ret_val = throw_value;
         return MP_VM_RETURN_EXCEPTION;
     }
-
-    assert(0);
-    return MP_VM_RETURN_NORMAL; // Should be unreachable
 }
 
 mp_obj_t mp_make_raise_obj(mp_obj_t o) {
@@ -1320,7 +1330,7 @@ import_error:
     }
 
     mp_load_method_maybe(module, MP_QSTR___name__, dest);
-    mp_uint_t pkg_name_len;
+    size_t pkg_name_len;
     const char *pkg_name = mp_obj_str_get_data(dest[0], &pkg_name_len);
 
     const uint dot_name_len = pkg_name_len + 1 + qstr_len(name);
@@ -1346,7 +1356,7 @@ void mp_import_all(mp_obj_t module) {
 
     // TODO: Support __all__
     mp_map_t *map = mp_obj_dict_get_map(MP_OBJ_FROM_PTR(mp_obj_module_get_globals(module)));
-    for (mp_uint_t i = 0; i < map->alloc; i++) {
+    for (size_t i = 0; i < map->alloc; i++) {
         if (MP_MAP_SLOT_IS_FILLED(map, i)) {
             qstr name = MP_OBJ_QSTR_VALUE(map->table[i].key);
             if (*qstr_str(name) != '_') {
@@ -1413,7 +1423,11 @@ void *m_malloc_fail(size_t num_bytes) {
 }
 
 NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, const char *msg) {
-    nlr_raise(mp_obj_new_exception_msg(exc_type, msg));
+    if (msg == NULL) {
+        nlr_raise(mp_obj_new_exception(exc_type));
+    } else {
+        nlr_raise(mp_obj_new_exception_msg(exc_type, msg));
+    }
 }
 
 NORETURN void mp_raise_ValueError(const char *msg) {
diff --git a/py/runtime.h b/py/runtime.h
index 3532b838de..d75d23ff18 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -64,6 +64,16 @@ extern const qstr mp_binary_op_method_name[];
 void mp_init(void);
 void mp_deinit(void);
 
+void mp_handle_pending(void);
+void mp_handle_pending_tail(mp_uint_t atomic_state);
+
+#if MICROPY_ENABLE_SCHEDULER
+void mp_sched_lock(void);
+void mp_sched_unlock(void);
+static inline unsigned int mp_sched_num_pending(void) { return MP_STATE_VM(sched_sp); }
+bool mp_sched_schedule(mp_obj_t function, mp_obj_t arg);
+#endif
+
 // extra printing method specifically for mp_obj_t's which are integral type
 int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char, int flags, char fill, int width, int prec);
 
@@ -73,10 +83,10 @@ void mp_arg_parse_all_kw_array(size_t n_pos, size_t n_kw, const mp_obj_t *args,
 NORETURN void mp_arg_error_terse_mismatch(void);
 NORETURN void mp_arg_error_unimpl_kw(void);
 
-static inline mp_obj_dict_t *mp_locals_get(void) { return MP_STATE_CTX(dict_locals); }
-static inline void mp_locals_set(mp_obj_dict_t *d) { MP_STATE_CTX(dict_locals) = d; }
-static inline mp_obj_dict_t *mp_globals_get(void) { return MP_STATE_CTX(dict_globals); }
-static inline void mp_globals_set(mp_obj_dict_t *d) { MP_STATE_CTX(dict_globals) = d; }
+static inline mp_obj_dict_t *mp_locals_get(void) { return MP_STATE_THREAD(dict_locals); }
+static inline void mp_locals_set(mp_obj_dict_t *d) { MP_STATE_THREAD(dict_locals) = d; }
+static inline mp_obj_dict_t *mp_globals_get(void) { return MP_STATE_THREAD(dict_globals); }
+static inline void mp_globals_set(mp_obj_dict_t *d) { MP_STATE_THREAD(dict_globals) = d; }
 
 mp_obj_t mp_load_name(qstr qst);
 mp_obj_t mp_load_global(qstr qst);
@@ -92,9 +102,9 @@ mp_obj_t mp_binary_op(mp_uint_t op, mp_obj_t lhs, mp_obj_t rhs);
 mp_obj_t mp_call_function_0(mp_obj_t fun);
 mp_obj_t mp_call_function_1(mp_obj_t fun, mp_obj_t arg);
 mp_obj_t mp_call_function_2(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2);
-mp_obj_t mp_call_function_n_kw(mp_obj_t fun, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args);
-mp_obj_t mp_call_method_n_kw(mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args);
-mp_obj_t mp_call_method_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const mp_obj_t *args);
+mp_obj_t mp_call_function_n_kw(mp_obj_t fun, size_t n_args, size_t n_kw, const mp_obj_t *args);
+mp_obj_t mp_call_method_n_kw(size_t n_args, size_t n_kw, const mp_obj_t *args);
+mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_obj_t *args);
 mp_obj_t mp_call_method_self_n_kw(mp_obj_t meth, mp_obj_t self, size_t n_args, size_t n_kw, const mp_obj_t *args);
 // Call function and catch/dump exception - for Python callbacks from C code
 void mp_call_function_1_protected(mp_obj_t fun, mp_obj_t arg);
@@ -102,7 +112,7 @@ void mp_call_function_2_protected(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2);
 
 typedef struct _mp_call_args_t {
     mp_obj_t fun;
-    mp_uint_t n_args, n_kw, n_alloc;
+    size_t n_args, n_kw, n_alloc;
     mp_obj_t *args;
 } mp_call_args_t;
 
@@ -111,19 +121,20 @@ typedef struct _mp_call_args_t {
 // prepares argument array suitable for passing to ->call() method of a
 // function object (and mp_call_function_n_kw()).
 // (Only needed in stackless mode.)
-void mp_call_prepare_args_n_kw_var(bool have_self, mp_uint_t n_args_n_kw, const mp_obj_t *args, mp_call_args_t *out_args);
+void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_obj_t *args, mp_call_args_t *out_args);
 #endif
 
-void mp_unpack_sequence(mp_obj_t seq, mp_uint_t num, mp_obj_t *items);
-void mp_unpack_ex(mp_obj_t seq, mp_uint_t num, mp_obj_t *items);
+void mp_unpack_sequence(mp_obj_t seq, size_t num, mp_obj_t *items);
+void mp_unpack_ex(mp_obj_t seq, size_t num, mp_obj_t *items);
 mp_obj_t mp_store_map(mp_obj_t map, mp_obj_t key, mp_obj_t value);
 mp_obj_t mp_load_attr(mp_obj_t base, qstr attr);
 void mp_convert_member_lookup(mp_obj_t obj, const mp_obj_type_t *type, mp_obj_t member, mp_obj_t *dest);
 void mp_load_method(mp_obj_t base, qstr attr, mp_obj_t *dest);
 void mp_load_method_maybe(mp_obj_t base, qstr attr, mp_obj_t *dest);
+void mp_load_super_method(qstr attr, mp_obj_t *dest);
 void mp_store_attr(mp_obj_t base, qstr attr, mp_obj_t val);
 
-mp_obj_t mp_getiter(mp_obj_t o);
+mp_obj_t mp_getiter(mp_obj_t o, mp_obj_iter_buf_t *iter_buf);
 mp_obj_t mp_iternext_allow_raise(mp_obj_t o); // may return MP_OBJ_STOP_ITERATION instead of raising StopIteration()
 mp_obj_t mp_iternext(mp_obj_t o); // will always return MP_OBJ_STOP_ITERATION instead of raising StopIteration(...)
 mp_vm_return_kind_t mp_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val);
@@ -155,7 +166,7 @@ NORETURN void mp_exc_recursion_depth(void);
 // helper functions for native/viper code
 mp_uint_t mp_convert_obj_to_native(mp_obj_t obj, mp_uint_t type);
 mp_obj_t mp_convert_native_to_obj(mp_uint_t val, mp_uint_t type);
-mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args_kw, const mp_obj_t *args);
+mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, size_t n_args_kw, const mp_obj_t *args);
 void mp_native_raise(mp_obj_t o);
 
 #define mp_sys_path (MP_OBJ_FROM_PTR(&MP_STATE_VM(mp_sys_path_obj)))
diff --git a/py/runtime0.h b/py/runtime0.h
index 8d62403a7c..720fe6a23b 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -107,6 +107,7 @@ typedef enum {
     MP_F_LOAD_BUILD_CLASS,
     MP_F_LOAD_ATTR,
     MP_F_LOAD_METHOD,
+    MP_F_LOAD_SUPER_METHOD,
     MP_F_STORE_NAME,
     MP_F_STORE_GLOBAL,
     MP_F_STORE_ATTR,
@@ -127,8 +128,8 @@ typedef enum {
     MP_F_NATIVE_CALL_FUNCTION_N_KW,
     MP_F_CALL_METHOD_N_KW,
     MP_F_CALL_METHOD_N_KW_VAR,
-    MP_F_GETITER,
-    MP_F_ITERNEXT,
+    MP_F_NATIVE_GETITER,
+    MP_F_NATIVE_ITERNEXT,
     MP_F_NLR_PUSH,
     MP_F_NLR_POP,
     MP_F_NATIVE_RAISE,
diff --git a/py/scheduler.c b/py/scheduler.c
new file mode 100644
index 0000000000..30851a4d2b
--- /dev/null
+++ b/py/scheduler.c
@@ -0,0 +1,117 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include "py/runtime.h"
+
+#if MICROPY_ENABLE_SCHEDULER
+
+// A variant of this is inlined in the VM at the pending exception check
+void mp_handle_pending(void) {
+    if (MP_STATE_VM(sched_state) == MP_SCHED_PENDING) {
+        mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();
+        mp_obj_t obj = MP_STATE_VM(mp_pending_exception);
+        if (obj != MP_OBJ_NULL) {
+            MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
+            if (!mp_sched_num_pending()) {
+                MP_STATE_VM(sched_state) = MP_SCHED_IDLE;
+            }
+            MICROPY_END_ATOMIC_SECTION(atomic_state);
+            nlr_raise(obj);
+        }
+        mp_handle_pending_tail(atomic_state);
+    }
+}
+
+// This function should only be called be mp_sched_handle_pending,
+// or by the VM's inlined version of that function.
+void mp_handle_pending_tail(mp_uint_t atomic_state) {
+    MP_STATE_VM(sched_state) = MP_SCHED_LOCKED;
+    if (MP_STATE_VM(sched_sp) > 0) {
+        mp_sched_item_t item = MP_STATE_VM(sched_stack)[--MP_STATE_VM(sched_sp)];
+        MICROPY_END_ATOMIC_SECTION(atomic_state);
+        mp_call_function_1_protected(item.func, item.arg);
+    } else {
+        MICROPY_END_ATOMIC_SECTION(atomic_state);
+    }
+    mp_sched_unlock();
+}
+
+void mp_sched_lock(void) {
+    mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();
+    if (MP_STATE_VM(sched_state) < 0) {
+        --MP_STATE_VM(sched_state);
+    } else {
+        MP_STATE_VM(sched_state) = MP_SCHED_LOCKED;
+    }
+    MICROPY_END_ATOMIC_SECTION(atomic_state);
+}
+
+void mp_sched_unlock(void) {
+    mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();
+    if (++MP_STATE_VM(sched_state) == 0) {
+        // vm became unlocked
+        if (MP_STATE_VM(mp_pending_exception) != MP_OBJ_NULL || mp_sched_num_pending()) {
+            MP_STATE_VM(sched_state) = MP_SCHED_PENDING;
+        } else {
+            MP_STATE_VM(sched_state) = MP_SCHED_IDLE;
+        }
+    }
+    MICROPY_END_ATOMIC_SECTION(atomic_state);
+}
+
+bool mp_sched_schedule(mp_obj_t function, mp_obj_t arg) {
+    mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();
+    bool ret;
+    if (MP_STATE_VM(sched_sp) < MICROPY_SCHEDULER_DEPTH) {
+        if (MP_STATE_VM(sched_state) == MP_SCHED_IDLE) {
+            MP_STATE_VM(sched_state) = MP_SCHED_PENDING;
+        }
+        MP_STATE_VM(sched_stack)[MP_STATE_VM(sched_sp)].func = function;
+        MP_STATE_VM(sched_stack)[MP_STATE_VM(sched_sp)].arg = arg;
+        ++MP_STATE_VM(sched_sp);
+        ret = true;
+    } else {
+        // schedule stack is full
+        ret = false;
+    }
+    MICROPY_END_ATOMIC_SECTION(atomic_state);
+    return ret;
+}
+
+#else // MICROPY_ENABLE_SCHEDULER
+
+// A variant of this is inlined in the VM at the pending exception check
+void mp_handle_pending(void) {
+    if (MP_STATE_VM(mp_pending_exception) != MP_OBJ_NULL) {
+        mp_obj_t obj = MP_STATE_VM(mp_pending_exception);
+        MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
+        nlr_raise(obj);
+    }
+}
+
+#endif // MICROPY_ENABLE_SCHEDULER
diff --git a/py/sequence.c b/py/sequence.c
index bc2cfc077c..32db640dc1 100644
--- a/py/sequence.c
+++ b/py/sequence.c
@@ -38,9 +38,9 @@
 
 // Implements backend of sequence * integer operation. Assumes elements are
 // memory-adjacent in sequence.
-void mp_seq_multiply(const void *items, mp_uint_t item_sz, mp_uint_t len, mp_uint_t times, void *dest) {
-    for (mp_uint_t i = 0; i < times; i++) {
-        uint copy_sz = item_sz * len;
+void mp_seq_multiply(const void *items, size_t item_sz, size_t len, size_t times, void *dest) {
+    for (size_t i = 0; i < times; i++) {
+        size_t copy_sz = item_sz * len;
         memcpy(dest, items, copy_sz);
         dest = (char*)dest + copy_sz;
     }
@@ -88,15 +88,22 @@ bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice
     if (start < 0) {
         start = len + start;
         if (start < 0) {
-            start = 0;
+            if (indexes->step < 0) {
+                start = -1;
+            } else {
+                start = 0;
+            }
         }
     } else if (indexes->step > 0 && (mp_uint_t)start > len) {
         start = len;
-    } else if (indexes->step < 0 && (mp_uint_t)start > len - 1) {
+    } else if (indexes->step < 0 && (mp_uint_t)start >= len) {
         start = len - 1;
     }
     if (stop < 0) {
         stop = len + stop;
+        if (stop < 0) {
+            stop = -1;
+        }
         if (indexes->step < 0) {
             stop += 1;
         }
@@ -119,7 +126,7 @@ bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice
 
 #endif
 
-mp_obj_t mp_seq_extract_slice(mp_uint_t len, const mp_obj_t *seq, mp_bound_slice_t *indexes) {
+mp_obj_t mp_seq_extract_slice(size_t len, const mp_obj_t *seq, mp_bound_slice_t *indexes) {
     (void)len; // TODO can we remove len from the arg list?
 
     mp_int_t start = indexes->start, stop = indexes->stop;
@@ -143,7 +150,7 @@ mp_obj_t mp_seq_extract_slice(mp_uint_t len, const mp_obj_t *seq, mp_bound_slice
 
 // Special-case comparison function for sequences of bytes
 // Don't pass MP_BINARY_OP_NOT_EQUAL here
-bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, mp_uint_t len1, const byte *data2, mp_uint_t len2) {
+bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, size_t len1, const byte *data2, size_t len2) {
     if (op == MP_BINARY_OP_EQUAL && len1 != len2) {
         return false;
     }
@@ -151,14 +158,14 @@ bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, mp_uint_t len1, const byt
     // Let's deal only with > & >=
     if (op == MP_BINARY_OP_LESS || op == MP_BINARY_OP_LESS_EQUAL) {
         SWAP(const byte*, data1, data2);
-        SWAP(uint, len1, len2);
+        SWAP(size_t, len1, len2);
         if (op == MP_BINARY_OP_LESS) {
             op = MP_BINARY_OP_MORE;
         } else {
             op = MP_BINARY_OP_MORE_EQUAL;
         }
     }
-    uint min_len = len1 < len2 ? len1 : len2;
+    size_t min_len = len1 < len2 ? len1 : len2;
     int res = memcmp(data1, data2, min_len);
     if (op == MP_BINARY_OP_EQUAL) {
         // If we are checking for equality, here're the answer
@@ -187,7 +194,7 @@ bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, mp_uint_t len1, const byt
 
 // Special-case comparison function for sequences of mp_obj_t
 // Don't pass MP_BINARY_OP_NOT_EQUAL here
-bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, mp_uint_t len1, const mp_obj_t *items2, mp_uint_t len2) {
+bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, size_t len1, const mp_obj_t *items2, size_t len2) {
     if (op == MP_BINARY_OP_EQUAL && len1 != len2) {
         return false;
     }
@@ -195,7 +202,7 @@ bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, mp_uint_t len1, const
     // Let's deal only with > & >=
     if (op == MP_BINARY_OP_LESS || op == MP_BINARY_OP_LESS_EQUAL) {
         SWAP(const mp_obj_t *, items1, items2);
-        SWAP(uint, len1, len2);
+        SWAP(size_t, len1, len2);
         if (op == MP_BINARY_OP_LESS) {
             op = MP_BINARY_OP_MORE;
         } else {
@@ -203,8 +210,8 @@ bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, mp_uint_t len1, const
         }
     }
 
-    mp_uint_t len = len1 < len2 ? len1 : len2;
-    for (mp_uint_t i = 0; i < len; i++) {
+    size_t len = len1 < len2 ? len1 : len2;
+    for (size_t i = 0; i < len; i++) {
         // If current elements equal, can't decide anything - go on
         if (mp_obj_equal(items1[i], items2[i])) {
             continue;
@@ -236,11 +243,11 @@ bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, mp_uint_t len1, const
 }
 
 // Special-case of index() which searches for mp_obj_t
-mp_obj_t mp_seq_index_obj(const mp_obj_t *items, mp_uint_t len, mp_uint_t n_args, const mp_obj_t *args) {
+mp_obj_t mp_seq_index_obj(const mp_obj_t *items, size_t len, size_t n_args, const mp_obj_t *args) {
     mp_obj_type_t *type = mp_obj_get_type(args[0]);
     mp_obj_t value = args[1];
-    uint start = 0;
-    uint stop = len;
+    size_t start = 0;
+    size_t stop = len;
 
     if (n_args >= 3) {
         start = mp_get_index(type, len, args[2], true);
@@ -249,19 +256,19 @@ mp_obj_t mp_seq_index_obj(const mp_obj_t *items, mp_uint_t len, mp_uint_t n_args
         }
     }
 
-    for (mp_uint_t i = start; i < stop; i++) {
+    for (size_t i = start; i < stop; i++) {
         if (mp_obj_equal(items[i], value)) {
             // Common sense says this cannot overflow small int
             return MP_OBJ_NEW_SMALL_INT(i);
         }
     }
 
-    mp_raise_msg(&mp_type_ValueError, "object not in sequence");
+    mp_raise_ValueError("object not in sequence");
 }
 
-mp_obj_t mp_seq_count_obj(const mp_obj_t *items, mp_uint_t len, mp_obj_t value) {
-    mp_uint_t count = 0;
-    for (uint i = 0; i < len; i++) {
+mp_obj_t mp_seq_count_obj(const mp_obj_t *items, size_t len, mp_obj_t value) {
+    size_t count = 0;
+    for (size_t i = 0; i < len; i++) {
          if (mp_obj_equal(items[i], value)) {
               count++;
          }
diff --git a/py/showbc.c b/py/showbc.c
index 684d9af0c0..0bccf8427f 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -82,7 +82,6 @@ const mp_uint_t *mp_showbc_const_table;
 
 void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) {
     mp_showbc_code_start = ip;
-    mp_showbc_const_table = const_table;
 
     // get bytecode parameters
     mp_uint_t n_state = mp_decode_uint(&ip);
@@ -159,7 +158,7 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
             printf("  bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
         }
     }
-    mp_bytecode_print2(ip, len - 0);
+    mp_bytecode_print2(ip, len - 0, const_table);
 }
 
 const byte *mp_bytecode_print_str(const byte *ip) {
@@ -246,6 +245,11 @@ const byte *mp_bytecode_print_str(const byte *ip) {
             printf("LOAD_METHOD %s", qstr_str(qst));
             break;
 
+        case MP_BC_LOAD_SUPER_METHOD:
+            DECODE_QSTR;
+            printf("LOAD_SUPER_METHOD %s", qstr_str(qst));
+            break;
+
         case MP_BC_LOAD_BUILD_CLASS:
             printf("LOAD_BUILD_CLASS");
             break;
@@ -388,6 +392,10 @@ const byte *mp_bytecode_print_str(const byte *ip) {
             printf("GET_ITER");
             break;
 
+        case MP_BC_GET_ITER_STACK:
+            printf("GET_ITER_STACK");
+            break;
+
         case MP_BC_FOR_ITER:
             DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward
             printf("FOR_ITER " UINT_FMT, (mp_uint_t)(ip + unum - mp_showbc_code_start));
@@ -547,8 +555,9 @@ const byte *mp_bytecode_print_str(const byte *ip) {
     return ip;
 }
 
-void mp_bytecode_print2(const byte *ip, mp_uint_t len) {
+void mp_bytecode_print2(const byte *ip, size_t len, const mp_uint_t *const_table) {
     mp_showbc_code_start = ip;
+    mp_showbc_const_table = const_table;
     while (ip < len + mp_showbc_code_start) {
         printf("%02u ", (uint)(ip - mp_showbc_code_start));
         ip = mp_bytecode_print_str(ip);
diff --git a/py/vm.c b/py/vm.c
index 363824e5f1..5094e3e450 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -38,8 +38,7 @@
 #include "py/bc.h"
 
 #if 0
-//#define TRACE(ip) printf("sp=" INT_FMT " ", sp - code_state->sp); mp_bytecode_print2(ip, 1);
-#define TRACE(ip) printf("sp=%d ", sp - code_state->sp); mp_bytecode_print2(ip, 1);
+#define TRACE(ip) printf("sp=%d ", (int)(sp - &code_state->state[0] + 1)); mp_bytecode_print2(ip, 1, code_state->fun_bc->const_table);
 #else
 #define TRACE(ip)
 #endif
@@ -64,8 +63,8 @@ typedef enum {
     do { \
         unum = (unum << 7) + (*ip & 0x7f); \
     } while ((*ip++ & 0x80) != 0)
-#define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2
-#define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2
+#define DECODE_ULABEL size_t ulab = (ip[0] | (ip[1] << 8)); ip += 2
+#define DECODE_SLABEL size_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2
 
 #if MICROPY_PERSISTENT_CODE
 
@@ -74,10 +73,10 @@ typedef enum {
     ip += 2;
 #define DECODE_PTR \
     DECODE_UINT; \
-    void *ptr = (void*)(uintptr_t)code_state->const_table[unum]
+    void *ptr = (void*)(uintptr_t)code_state->fun_bc->const_table[unum]
 #define DECODE_OBJ \
     DECODE_UINT; \
-    mp_obj_t obj = (mp_obj_t)code_state->const_table[unum]
+    mp_obj_t obj = (mp_obj_t)code_state->fun_bc->const_table[unum]
 
 #else
 
@@ -163,13 +162,20 @@ mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp
 run_code_state: ;
 #endif
     // Pointers which are constant for particular invocation of mp_execute_bytecode()
-    mp_obj_t * /*const*/ fastn = &code_state->state[code_state->n_state - 1];
-    mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
+    size_t n_state = mp_decode_uint_value(code_state->fun_bc->bytecode);
+    mp_obj_t * /*const*/ fastn = &code_state->state[n_state - 1];
+    mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
 
     // variables that are visible to the exception handler (declared volatile)
     volatile bool currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
     mp_exc_stack_t *volatile exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack
 
+    #if MICROPY_PY_THREAD_GIL && MICROPY_PY_THREAD_GIL_VM_DIVISOR
+    // This needs to be volatile and outside the VM loop so it persists across handling
+    // of any exceptions.  Otherwise it's possible that the VM never gives up the GIL.
+    volatile int gil_divisor = MICROPY_PY_THREAD_GIL_VM_DIVISOR;
+    #endif
+
     // outer exception handling loop
     for (;;) {
         nlr_buf_t nlr;
@@ -279,12 +285,12 @@ dispatch_loop:
                     DECODE_QSTR;
                     mp_obj_t key = MP_OBJ_NEW_QSTR(qst);
                     mp_uint_t x = *ip;
-                    if (x < MP_STATE_CTX(dict_locals)->map.alloc && MP_STATE_CTX(dict_locals)->map.table[x].key == key) {
-                        PUSH(MP_STATE_CTX(dict_locals)->map.table[x].value);
+                    if (x < mp_locals_get()->map.alloc && mp_locals_get()->map.table[x].key == key) {
+                        PUSH(mp_locals_get()->map.table[x].value);
                     } else {
-                        mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_CTX(dict_locals)->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
+                        mp_map_elem_t *elem = mp_map_lookup(&mp_locals_get()->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
                         if (elem != NULL) {
-                            *(byte*)ip = (elem - &MP_STATE_CTX(dict_locals)->map.table[0]) & 0xff;
+                            *(byte*)ip = (elem - &mp_locals_get()->map.table[0]) & 0xff;
                             PUSH(elem->value);
                         } else {
                             PUSH(mp_load_name(MP_OBJ_QSTR_VALUE(key)));
@@ -308,12 +314,12 @@ dispatch_loop:
                     DECODE_QSTR;
                     mp_obj_t key = MP_OBJ_NEW_QSTR(qst);
                     mp_uint_t x = *ip;
-                    if (x < MP_STATE_CTX(dict_globals)->map.alloc && MP_STATE_CTX(dict_globals)->map.table[x].key == key) {
-                        PUSH(MP_STATE_CTX(dict_globals)->map.table[x].value);
+                    if (x < mp_globals_get()->map.alloc && mp_globals_get()->map.table[x].key == key) {
+                        PUSH(mp_globals_get()->map.table[x].value);
                     } else {
-                        mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_CTX(dict_globals)->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
+                        mp_map_elem_t *elem = mp_map_lookup(&mp_globals_get()->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);
                         if (elem != NULL) {
-                            *(byte*)ip = (elem - &MP_STATE_CTX(dict_globals)->map.table[0]) & 0xff;
+                            *(byte*)ip = (elem - &mp_globals_get()->map.table[0]) & 0xff;
                             PUSH(elem->value);
                         } else {
                             PUSH(mp_load_global(MP_OBJ_QSTR_VALUE(key)));
@@ -370,6 +376,14 @@ dispatch_loop:
                     DISPATCH();
                 }
 
+                ENTRY(MP_BC_LOAD_SUPER_METHOD): {
+                    MARK_EXC_IP_SELECTIVE();
+                    DECODE_QSTR;
+                    sp -= 1;
+                    mp_load_super_method(qst, sp - 1);
+                    DISPATCH();
+                }
+
                 ENTRY(MP_BC_LOAD_BUILD_CLASS):
                     MARK_EXC_IP_SELECTIVE();
                     PUSH(mp_load_build_class());
@@ -676,7 +690,8 @@ unwind_jump:;
                     }
                     ip = (const byte*)MP_OBJ_TO_PTR(POP()); // pop destination ip for jump
                     if (unum != 0) {
-                        sp--;
+                        // pop the exhausted iterator
+                        sp -= MP_OBJ_ITER_BUF_NSLOTS;
                     }
                     DISPATCH_WITH_PEND_EXC_CHECK();
                 }
@@ -718,17 +733,40 @@ unwind_jump:;
 
                 ENTRY(MP_BC_GET_ITER):
                     MARK_EXC_IP_SELECTIVE();
-                    SET_TOP(mp_getiter(TOP()));
+                    SET_TOP(mp_getiter(TOP(), NULL));
                     DISPATCH();
 
+                // An iterator for a for-loop takes MP_OBJ_ITER_BUF_NSLOTS slots on
+                // the Python value stack.  These slots are either used to store the
+                // iterator object itself, or the first slot is MP_OBJ_NULL and
+                // the second slot holds a reference to the iterator object.
+                ENTRY(MP_BC_GET_ITER_STACK): {
+                    MARK_EXC_IP_SELECTIVE();
+                    mp_obj_t obj = TOP();
+                    mp_obj_iter_buf_t *iter_buf = (mp_obj_iter_buf_t*)sp;
+                    sp += MP_OBJ_ITER_BUF_NSLOTS - 1;
+                    obj = mp_getiter(obj, iter_buf);
+                    if (obj != MP_OBJ_FROM_PTR(iter_buf)) {
+                        // Iterator didn't use the stack so indicate that with MP_OBJ_NULL.
+                        sp[-MP_OBJ_ITER_BUF_NSLOTS + 1] = MP_OBJ_NULL;
+                        sp[-MP_OBJ_ITER_BUF_NSLOTS + 2] = obj;
+                    }
+                    DISPATCH();
+                }
+
                 ENTRY(MP_BC_FOR_ITER): {
                     MARK_EXC_IP_SELECTIVE();
                     DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward
                     code_state->sp = sp;
-                    assert(TOP());
-                    mp_obj_t value = mp_iternext_allow_raise(TOP());
+                    mp_obj_t obj;
+                    if (sp[-MP_OBJ_ITER_BUF_NSLOTS + 1] == MP_OBJ_NULL) {
+                        obj = sp[-MP_OBJ_ITER_BUF_NSLOTS + 2];
+                    } else {
+                        obj = MP_OBJ_FROM_PTR(&sp[-MP_OBJ_ITER_BUF_NSLOTS + 1]);
+                    }
+                    mp_obj_t value = mp_iternext_allow_raise(obj);
                     if (value == MP_OBJ_STOP_ITERATION) {
-                        --sp; // pop the exhausted iterator
+                        sp -= MP_OBJ_ITER_BUF_NSLOTS; // pop the exhausted iterator
                         ip += ulab; // jump to after for-block
                     } else {
                         PUSH(value); // push the next iteration value
@@ -858,7 +896,7 @@ unwind_jump:;
 
                 ENTRY(MP_BC_MAKE_CLOSURE): {
                     DECODE_PTR;
-                    mp_uint_t n_closed_over = *ip++;
+                    size_t n_closed_over = *ip++;
                     // Stack layout: closed_overs <- TOS
                     sp -= n_closed_over - 1;
                     SET_TOP(mp_make_closure_from_raw_code(ptr, n_closed_over, sp));
@@ -867,7 +905,7 @@ unwind_jump:;
 
                 ENTRY(MP_BC_MAKE_CLOSURE_DEFARGS): {
                     DECODE_PTR;
-                    mp_uint_t n_closed_over = *ip++;
+                    size_t n_closed_over = *ip++;
                     // Stack layout: def_tuple def_dict closed_overs <- TOS
                     sp -= 2 + n_closed_over - 1;
                     SET_TOP(mp_make_closure_from_raw_code(ptr, 0x100 | n_closed_over, sp));
@@ -953,8 +991,8 @@ unwind_jump:;
                         code_state->sp = sp;
                         code_state->exc_sp = MP_TAGPTR_MAKE(exc_sp, currently_in_except_block);
 
-                        mp_uint_t n_args = unum & 0xff;
-                        mp_uint_t n_kw = (unum >> 8) & 0xff;
+                        size_t n_args = unum & 0xff;
+                        size_t n_kw = (unum >> 8) & 0xff;
                         int adjust = (sp[1] == MP_OBJ_NULL) ? 0 : 1;
 
                         mp_code_state_t *new_state = mp_obj_fun_bc_prepare_codestate(*sp, n_args + adjust, n_kw, sp + 2 - adjust);
@@ -1237,16 +1275,50 @@ yield:
 
 pending_exception_check:
                 MICROPY_VM_HOOK_LOOP
+
+                #if MICROPY_ENABLE_SCHEDULER
+                // This is an inlined variant of mp_handle_pending
+                if (MP_STATE_VM(sched_state) == MP_SCHED_PENDING) {
+                    MARK_EXC_IP_SELECTIVE();
+                    mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();
+                    mp_obj_t obj = MP_STATE_VM(mp_pending_exception);
+                    if (obj != MP_OBJ_NULL) {
+                        MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
+                        if (!mp_sched_num_pending()) {
+                            MP_STATE_VM(sched_state) = MP_SCHED_IDLE;
+                        }
+                        MICROPY_END_ATOMIC_SECTION(atomic_state);
+                        RAISE(obj);
+                    }
+                    mp_handle_pending_tail(atomic_state);
+                }
+                #else
+                // This is an inlined variant of mp_handle_pending
                 if (MP_STATE_VM(mp_pending_exception) != MP_OBJ_NULL) {
                     MARK_EXC_IP_SELECTIVE();
                     mp_obj_t obj = MP_STATE_VM(mp_pending_exception);
                     MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
                     RAISE(obj);
                 }
+                #endif
 
-                // TODO make GIL release more efficient
-                MP_THREAD_GIL_EXIT();
-                MP_THREAD_GIL_ENTER();
+                #if MICROPY_PY_THREAD_GIL
+                #if MICROPY_PY_THREAD_GIL_VM_DIVISOR
+                if (--gil_divisor == 0) {
+                    gil_divisor = MICROPY_PY_THREAD_GIL_VM_DIVISOR;
+                #else
+                {
+                #endif
+                    #if MICROPY_ENABLE_SCHEDULER
+                    // can only switch threads if the scheduler is unlocked
+                    if (MP_STATE_VM(sched_state) == MP_SCHED_IDLE)
+                    #endif
+                    {
+                    MP_THREAD_GIL_EXIT();
+                    MP_THREAD_GIL_ENTER();
+                    }
+                }
+                #endif
 
             } // for loop
 
@@ -1270,7 +1342,7 @@ exception_handler:
                         const byte *ip = code_state->ip + 1;
                         DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward
                         code_state->ip = ip + ulab; // jump to after for-block
-                        code_state->sp -= 1; // pop the exhausted iterator
+                        code_state->sp -= MP_OBJ_ITER_BUF_NSLOTS; // pop the exhausted iterator
                         goto outer_dispatch_loop; // continue with dispatch loop
                     } else if (*code_state->ip == MP_BC_YIELD_FROM) {
                         // StopIteration inside yield from call means return a value of
@@ -1290,8 +1362,16 @@ unwind_loop:
             // But consider how to handle nested exceptions.
             // TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj)
             if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
-                const byte *ip = code_state->code_info;
-                mp_uint_t code_info_size = mp_decode_uint(&ip);
+                const byte *ip = code_state->fun_bc->bytecode;
+                mp_decode_uint(&ip); // skip n_state
+                mp_decode_uint(&ip); // skip n_exc_stack
+                ip++; // skip scope_params
+                ip++; // skip n_pos_args
+                ip++; // skip n_kwonly_args
+                ip++; // skip n_def_pos_args
+                size_t bc = code_state->ip - ip;
+                size_t code_info_size = mp_decode_uint(&ip);
+                bc -= code_info_size;
                 #if MICROPY_PERSISTENT_CODE
                 qstr block_name = ip[0] | (ip[1] << 8);
                 qstr source_file = ip[2] | (ip[3] << 8);
@@ -1300,11 +1380,10 @@ unwind_loop:
                 qstr block_name = mp_decode_uint(&ip);
                 qstr source_file = mp_decode_uint(&ip);
                 #endif
-                size_t bc = code_state->ip - code_state->code_info - code_info_size;
                 size_t source_line = 1;
                 size_t c;
                 while ((c = *ip)) {
-                    mp_uint_t b, l;
+                    size_t b, l;
                     if ((c & 0x80) == 0) {
                         // 0b0LLBBBBB encoding
                         b = c & 0x1f;
@@ -1356,8 +1435,9 @@ unwind_loop:
             } else if (code_state->prev != NULL) {
                 mp_globals_set(code_state->old_globals);
                 code_state = code_state->prev;
-                fastn = &code_state->state[code_state->n_state - 1];
-                exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
+                size_t n_state = mp_decode_uint_value(code_state->fun_bc->bytecode);
+                fastn = &code_state->state[n_state - 1];
+                exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
                 // variables that are visible to the exception handler (declared volatile)
                 currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
                 exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack
diff --git a/py/vmentrytable.h b/py/vmentrytable.h
index dd30dd7a54..dd9789e348 100644
--- a/py/vmentrytable.h
+++ b/py/vmentrytable.h
@@ -44,6 +44,7 @@ static const void *const entry_table[256] = {
     [MP_BC_LOAD_GLOBAL] = &&entry_MP_BC_LOAD_GLOBAL,
     [MP_BC_LOAD_ATTR] = &&entry_MP_BC_LOAD_ATTR,
     [MP_BC_LOAD_METHOD] = &&entry_MP_BC_LOAD_METHOD,
+    [MP_BC_LOAD_SUPER_METHOD] = &&entry_MP_BC_LOAD_SUPER_METHOD,
     [MP_BC_LOAD_BUILD_CLASS] = &&entry_MP_BC_LOAD_BUILD_CLASS,
     [MP_BC_LOAD_SUBSCR] = &&entry_MP_BC_LOAD_SUBSCR,
     [MP_BC_STORE_FAST_N] = &&entry_MP_BC_STORE_FAST_N,
@@ -73,6 +74,7 @@ static const void *const entry_table[256] = {
     [MP_BC_SETUP_FINALLY] = &&entry_MP_BC_SETUP_FINALLY,
     [MP_BC_END_FINALLY] = &&entry_MP_BC_END_FINALLY,
     [MP_BC_GET_ITER] = &&entry_MP_BC_GET_ITER,
+    [MP_BC_GET_ITER_STACK] = &&entry_MP_BC_GET_ITER_STACK,
     [MP_BC_FOR_ITER] = &&entry_MP_BC_FOR_ITER,
     [MP_BC_POP_BLOCK] = &&entry_MP_BC_POP_BLOCK,
     [MP_BC_POP_EXCEPT] = &&entry_MP_BC_POP_EXCEPT,