summaryrefslogtreecommitdiffstatshomepage
path: root/py
diff options
context:
space:
mode:
Diffstat (limited to 'py')
-rw-r--r--py/builtin.c19
-rw-r--r--py/builtineval.c6
-rw-r--r--py/builtinimport.c1
-rw-r--r--py/builtinmp.c8
-rw-r--r--py/compile.c45
-rw-r--r--py/emitbc.c1
-rw-r--r--py/emitcommon.c1
-rw-r--r--py/emitcpy.c1
-rw-r--r--py/emitinlinethumb.c1
-rw-r--r--py/emitnative.c1
-rw-r--r--py/emitpass1.c2
-rw-r--r--py/lexer.c4
-rw-r--r--py/lexerstr.c2
-rw-r--r--py/lexerunix.c1
-rw-r--r--py/makeqstrdata.py62
-rw-r--r--py/map.c1
-rw-r--r--py/misc.h10
-rw-r--r--py/mpqstr.h13
-rw-r--r--py/obj.c4
-rw-r--r--py/objarray.c2
-rw-r--r--py/objbool.c2
-rw-r--r--py/objboundmeth.c1
-rw-r--r--py/objcell.c1
-rw-r--r--py/objclosure.c1
-rw-r--r--py/objcomplex.c2
-rw-r--r--py/objdict.c2
-rw-r--r--py/objenumerate.c1
-rw-r--r--py/objexcept.c4
-rw-r--r--py/objfilter.c2
-rw-r--r--py/objfloat.c2
-rw-r--r--py/objfun.c2
-rw-r--r--py/objgenerator.c2
-rw-r--r--py/objint.c2
-rw-r--r--py/objint_longlong.c2
-rw-r--r--py/objlist.c6
-rw-r--r--py/objmap.c2
-rw-r--r--py/objmodule.c2
-rw-r--r--py/objnone.c1
-rw-r--r--py/objrange.c1
-rw-r--r--py/objset.c2
-rw-r--r--py/objslice.c1
-rw-r--r--py/objstr.c94
-rw-r--r--py/objtuple.c2
-rw-r--r--py/objtype.c6
-rw-r--r--py/objzip.c1
-rw-r--r--py/parse.c12
-rw-r--r--py/py.mk31
-rw-r--r--py/qstr.c156
-rw-r--r--py/qstr.h35
-rw-r--r--py/qstrdefs.h (renamed from py/mpqstrraw.h)0
-rw-r--r--py/runtime.c4
-rw-r--r--py/scope.c13
-rw-r--r--py/showbc.c1
-rw-r--r--py/stream.c27
-rw-r--r--py/strtonum.c2
-rw-r--r--py/vm.c1
56 files changed, 404 insertions, 207 deletions
diff --git a/py/builtin.c b/py/builtin.c
index f102aa5885..8340ad3045 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -8,7 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
@@ -139,8 +139,8 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_callable_obj, mp_builtin_callable);
static mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
int ord = mp_obj_get_int(o_in);
if (0 <= ord && ord <= 0x10ffff) {
- char str[2] = {ord, '\0'};
- return mp_obj_new_str(qstr_from_strn_copy(str, 1));
+ char str[1] = {ord};
+ return mp_obj_new_str(qstr_from_strn(str, 1));
} else {
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_ValueError, "chr() arg not in range(0x110000)"));
}
@@ -257,11 +257,12 @@ static mp_obj_t mp_builtin_next(mp_obj_t o) {
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_next_obj, mp_builtin_next);
static mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
- const char *str = qstr_str(mp_obj_get_qstr(o_in));
- if (strlen(str) == 1) {
+ uint len;
+ const byte *str = qstr_data(mp_obj_get_qstr(o_in), &len);
+ if (len == 1) {
return mp_obj_new_int(str[0]);
} else {
- nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "ord() expected a character, but string of length %d found", strlen(str)));
+ nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "ord() expected a character, but string of length %d found", len));
}
}
@@ -304,7 +305,8 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_range_obj, 1, 3, mp_builtin_range
static mp_obj_t mp_builtin_repr(mp_obj_t o_in) {
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void *env, const char *fmt, ...))vstr_printf, vstr, o_in, PRINT_REPR);
- return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
+ // TODO don't intern this string
+ return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_repr_obj, mp_builtin_repr);
@@ -343,7 +345,8 @@ MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
static mp_obj_t mp_builtin_str(mp_obj_t o_in) {
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
- return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
+ // TODO don't intern this string
+ return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
diff --git a/py/builtineval.c b/py/builtineval.c
index c7bd6b6298..67072a0fa7 100644
--- a/py/builtineval.c
+++ b/py/builtineval.c
@@ -8,6 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
#include "parse.h"
@@ -19,10 +20,11 @@
#include "builtin.h"
static mp_obj_t mp_builtin_eval(mp_obj_t o_in) {
- const char *str = qstr_str(mp_obj_get_qstr(o_in));
+ uint str_len;
+ const byte *str = qstr_data(mp_obj_get_qstr(o_in), &str_len);
// create the lexer
- mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", str, strlen(str), 0);
+ mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0);
// parse the string
qstr parse_exc_id;
diff --git a/py/builtinimport.c b/py/builtinimport.c
index 92d5d5ac9f..4cdad4e249 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c
@@ -8,6 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
#include "parse.h"
diff --git a/py/builtinmp.c b/py/builtinmp.c
index f72a80f1a8..dfbea0906e 100644
--- a/py/builtinmp.c
+++ b/py/builtinmp.c
@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "builtin.h"
@@ -38,8 +38,8 @@ void mp_module_micropython_init(void) {
rt_store_name(MP_QSTR_micropython, m_mp);
#if MICROPY_MEM_STATS
- rt_store_attr(m_mp, qstr_from_str_static("mem_total"), (mp_obj_t)&mp_builtin_mem_total_obj);
- rt_store_attr(m_mp, qstr_from_str_static("mem_current"), (mp_obj_t)&mp_builtin_mem_current_obj);
- rt_store_attr(m_mp, qstr_from_str_static("mem_peak"), (mp_obj_t)&mp_builtin_mem_peak_obj);
+ rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_total"), (mp_obj_t)&mp_builtin_mem_total_obj);
+ rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_current"), (mp_obj_t)&mp_builtin_mem_current_obj);
+ rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_peak"), (mp_obj_t)&mp_builtin_mem_peak_obj);
#endif
}
diff --git a/py/compile.c b/py/compile.c
index 27b2439639..f61c4580c7 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
@@ -273,8 +273,8 @@ static bool cpython_c_tuple_is_const(mp_parse_node_t pn) {
}
static void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {
- const char *str = qstr_str(qstr);
- int len = strlen(str);
+ uint len;
+ const byte *str = qstr_data(qstr, &len);
bool has_single_quote = false;
bool has_double_quote = false;
for (int i = 0; i < len; i++) {
@@ -1169,22 +1169,20 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q1, qstr *q2) {
int n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
int len = n - 1;
for (int i = 0; i < n; i++) {
- len += strlen(qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
+ len += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
}
- char *str = m_new(char, len + 1);
- char *str_dest = str;
- str[0] = 0;
+ byte *q_ptr;
+ byte *str_dest = qstr_build_start(len, &q_ptr);
for (int i = 0; i < n; i++) {
if (i > 0) {
*str_dest++ = '.';
}
- const char *str_src = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
- size_t str_src_len = strlen(str_src);
+ uint str_src_len;
+ const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len);
memcpy(str_dest, str_src, str_src_len);
str_dest += str_src_len;
}
- *str_dest = '\0';
- *q2 = qstr_from_str_take(str, len + 1);
+ *q2 = qstr_build_end(q_ptr);
EMIT(import_name, *q2);
if (is_as) {
for (int i = 1; i < n; i++) {
@@ -1221,7 +1219,7 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
#if MICROPY_EMIT_CPYTHON
EMIT(load_const_verbatim_str, "('*',)");
#else
- EMIT(load_const_str, qstr_from_str_static("*"), false);
+ EMIT(load_const_str, QSTR_FROM_STR_STATIC("*"), false);
EMIT(build_tuple, 1);
#endif
@@ -1248,7 +1246,9 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
vstr_printf(vstr, ", ");
}
vstr_printf(vstr, "'");
- vstr_printf(vstr, qstr_str(id2));
+ uint len;
+ const byte *str = qstr_data(id2, &len);
+ vstr_add_strn(vstr, (const char*)str, len);
vstr_printf(vstr, "'");
}
if (n == 1) {
@@ -2128,24 +2128,21 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
printf("SyntaxError: cannot mix bytes and nonbytes literals\n");
return;
}
- const char *str = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
- n_bytes += strlen(str);
+ n_bytes += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
}
- // allocate memory for concatenated string/bytes
- char *cat_str = m_new(char, n_bytes + 1);
-
// concatenate string/bytes
- char *s_dest = cat_str;
+ byte *q_ptr;
+ byte *s_dest = qstr_build_start(n_bytes, &q_ptr);
for (int i = 0; i < n; i++) {
- const char *s = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
- size_t s_len = strlen(s);
+ uint s_len;
+ const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
memcpy(s_dest, s, s_len);
s_dest += s_len;
}
- *s_dest = '\0';
+ qstr q = qstr_build_end(q_ptr);
- EMIT(load_const_str, qstr_from_str_take(cat_str, n_bytes + 1), string_kind == MP_PARSE_NODE_BYTES);
+ EMIT(load_const_str, q, string_kind == MP_PARSE_NODE_BYTES);
}
// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node
@@ -2767,7 +2764,7 @@ void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
assert(MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
mp_parse_node_struct_t *pns_comp_for = (mp_parse_node_struct_t*)pns->nodes[1];
- qstr qstr_arg = qstr_from_str_static(".0");
+ qstr qstr_arg = QSTR_FROM_STR_STATIC(".0");
if (comp->pass == PASS_1) {
bool added;
id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added);
diff --git a/py/emitbc.c b/py/emitbc.c
index 1f034e9df1..c3385e0b66 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/emitcommon.c b/py/emitcommon.c
index e30cad7496..bfcdad7973 100644
--- a/py/emitcommon.c
+++ b/py/emitcommon.c
@@ -6,6 +6,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/emitcpy.c b/py/emitcpy.c
index 42eef91d11..de2a5784db 100644
--- a/py/emitcpy.c
+++ b/py/emitcpy.c
@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c
index 073dfa0604..9d5a4206a0 100644
--- a/py/emitinlinethumb.c
+++ b/py/emitinlinethumb.c
@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/emitnative.c b/py/emitnative.c
index a80cd2cf10..6fc1742489 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -25,6 +25,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/emitpass1.c b/py/emitpass1.c
index c73522e474..38115a51c1 100644
--- a/py/emitpass1.c
+++ b/py/emitpass1.c
@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
diff --git a/py/lexer.c b/py/lexer.c
index e8c6bc3082..daaeebf511 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -7,6 +7,8 @@
#include <assert.h>
#include "misc.h"
+#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#define TAB_SIZE (8)
@@ -593,7 +595,7 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
mp_lexer_t *lex = m_new(mp_lexer_t, 1);
- lex->source_name = qstr_from_strn_copy(src_name, strlen(src_name));
+ lex->source_name = qstr_from_str(src_name);
lex->stream_data = stream_data;
lex->stream_next_char = stream_next_char;
lex->stream_close = stream_close;
diff --git a/py/lexerstr.c b/py/lexerstr.c
index b8594f4205..8cbf36d831 100644
--- a/py/lexerstr.c
+++ b/py/lexerstr.c
@@ -2,6 +2,8 @@
#include <stdio.h>
#include "misc.h"
+#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
typedef struct _mp_lexer_str_buf_t {
diff --git a/py/lexerunix.c b/py/lexerunix.c
index 225ed20a7f..7846120a4a 100644
--- a/py/lexerunix.c
+++ b/py/lexerunix.c
@@ -5,6 +5,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py
new file mode 100644
index 0000000000..4e74ea8419
--- /dev/null
+++ b/py/makeqstrdata.py
@@ -0,0 +1,62 @@
+import argparse
+import re
+
+# this must match the equivalent function in qstr.c
+def compute_hash(qstr):
+ hash = 0
+ for char in qstr:
+ hash += ord(char)
+ return hash & 0xffff
+
+def do_work(infiles):
+ # read the qstrs in from the input files
+ qstrs = []
+ for infile in infiles:
+ with open(infile, 'rt') as f:
+ line_number = 0
+ for line in f:
+ line_number += 1
+ line = line.strip()
+
+ # ignore blank lines and comments
+ if len(line) == 0 or line.startswith('//'):
+ continue
+
+ # verify line is of the correct form
+ match = re.match(r'Q\(([0-9A-Za-z_]+)\)$', line)
+ if not match:
+ print('({}:{}) bad qstr format, got {}'.format(infile, line_number, line))
+ return False
+
+ # get the qstr value
+ qstr = match.group(1)
+
+ # don't add duplicates
+ if qstr in qstrs:
+ continue
+
+ # add the qstr to the list
+ qstrs.append(qstr)
+
+ # process the qstrs, printing out the generated C header file
+ print('// This file was automatically generated by makeqstrdata.py')
+ print()
+ for qstr in qstrs:
+ qhash = compute_hash(qstr)
+ qlen = len(qstr)
+ print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(qstr, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
+
+ return True
+
+def main():
+ arg_parser = argparse.ArgumentParser(description='Process raw qstr file and output qstr data with length, hash and data bytes')
+ arg_parser.add_argument('files', nargs='+', help='input file(s)')
+ args = arg_parser.parse_args()
+
+ result = do_work(args.files)
+ if not result:
+ print('exiting with error code')
+ exit(1)
+
+if __name__ == "__main__":
+ main()
diff --git a/py/map.c b/py/map.c
index 1ce763ab0e..9f919e06ae 100644
--- a/py/map.c
+++ b/py/map.c
@@ -4,6 +4,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "map.h"
diff --git a/py/misc.h b/py/misc.h
index 5b012d03dc..8756c25a07 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -88,14 +88,4 @@ void vstr_printf(vstr_t *vstr, const char *fmt, ...);
void vstr_vprintf(vstr_t *vstr, const char *fmt, va_list ap);
#endif
-/** unique string ***********************************************/
-
-typedef unsigned int qstr;
-
-void qstr_init(void);
-qstr qstr_from_str_static(const char *str);
-qstr qstr_from_str_take(char *str, int alloc_len);
-qstr qstr_from_strn_copy(const char *str, int len);
-const char* qstr_str(qstr qstr);
-
#endif // _INCLUDED_MINILIB_H
diff --git a/py/mpqstr.h b/py/mpqstr.h
deleted file mode 100644
index 1440fb3b80..0000000000
--- a/py/mpqstr.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// See mpqstrraw.h for a list of qstr's that are available as constants.
-// Reference them as MP_QSTR_xxxx.
-//
-// Note: it would be possible to define MP_QSTR_xxx as qstr_from_str_static("xxx")
-// for qstrs that are referenced this way, but you don't want to have them in ROM.
-
-enum {
- MP_QSTR_nil = 0,
-#define Q(id) MP_QSTR_##id,
-#include "mpqstrraw.h"
-#undef Q
- MP_QSTR_number_of,
-} category_t;
diff --git a/py/obj.c b/py/obj.c
index 42f86cf175..5a6c08332a 100644
--- a/py/obj.c
+++ b/py/obj.c
@@ -8,7 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
@@ -268,7 +268,7 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index)
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in) {
mp_small_int_t len = 0;
if (MP_OBJ_IS_TYPE(o_in, &str_type)) {
- len = strlen(qstr_str(mp_obj_str_get(o_in)));
+ len = qstr_len(mp_obj_str_get(o_in));
} else if (MP_OBJ_IS_TYPE(o_in, &tuple_type)) {
uint seq_len;
mp_obj_t *seq_items;
diff --git a/py/objarray.c b/py/objarray.c
index a321e2a8e5..a054c8f980 100644
--- a/py/objarray.c
+++ b/py/objarray.c
@@ -7,7 +7,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "map.h"
#include "runtime0.h"
diff --git a/py/objbool.c b/py/objbool.c
index fac26f31e7..729ffb4e6d 100644
--- a/py/objbool.c
+++ b/py/objbool.c
@@ -4,7 +4,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objboundmeth.c b/py/objboundmeth.c
index 2b132004e2..500e61bd53 100644
--- a/py/objboundmeth.c
+++ b/py/objboundmeth.c
@@ -6,6 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objcell.c b/py/objcell.c
index 264125bf3a..04c7f36851 100644
--- a/py/objcell.c
+++ b/py/objcell.c
@@ -5,6 +5,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objclosure.c b/py/objclosure.c
index 7f6bcf4acc..0f4816e5fd 100644
--- a/py/objclosure.c
+++ b/py/objclosure.c
@@ -6,6 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objcomplex.c b/py/objcomplex.c
index b56f75c4cd..af148a2786 100644
--- a/py/objcomplex.c
+++ b/py/objcomplex.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "map.h"
diff --git a/py/objdict.c b/py/objdict.c
index 9493bc89b1..55a612913d 100644
--- a/py/objdict.c
+++ b/py/objdict.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
diff --git a/py/objenumerate.c b/py/objenumerate.c
index 3e25124c49..564cb1c474 100644
--- a/py/objenumerate.c
+++ b/py/objenumerate.c
@@ -3,6 +3,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objexcept.c b/py/objexcept.c
index 2e145ee358..c91b71dd9e 100644
--- a/py/objexcept.c
+++ b/py/objexcept.c
@@ -7,7 +7,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "objtuple.h"
@@ -100,7 +100,7 @@ mp_obj_t mp_obj_new_exception_msg_varg(qstr id, const char *fmt, ...) {
va_start(ap, fmt);
vstr_vprintf(vstr, fmt, ap);
va_end(ap);
- o->msg = qstr_from_str_take(vstr->buf, vstr->alloc);
+ o->msg = qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len);
}
return o;
diff --git a/py/objfilter.c b/py/objfilter.c
index 6ef3ef62d4..bfed2420f0 100644
--- a/py/objfilter.c
+++ b/py/objfilter.c
@@ -4,7 +4,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objfloat.c b/py/objfloat.c
index d397daab27..9f1f478cab 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
diff --git a/py/objfun.c b/py/objfun.c
index 0bac142dab..b749860c25 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "map.h"
#include "runtime.h"
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 2e8bd3d328..192e5c6328 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "bc.h"
diff --git a/py/objint.c b/py/objint.c
index 49341d38a2..02628b7ef9 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "objint.h"
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index 24d693761e..fd13a038b6 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "objint.h"
#include "runtime0.h"
diff --git a/py/objlist.c b/py/objlist.c
index 0ad7b68799..bc363d38fd 100644
--- a/py/objlist.c
+++ b/py/objlist.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "map.h"
#include "runtime0.h"
@@ -259,8 +259,8 @@ mp_obj_t mp_obj_list_sort(uint n_args, const mp_obj_t *args, mp_map_t *kwargs) {
}
mp_obj_list_t *self = args[0];
if (self->len > 1) {
- mp_map_elem_t *keyfun = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(qstr_from_str_static("key")), MP_MAP_LOOKUP);
- mp_map_elem_t *reverse = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(qstr_from_str_static("reverse")), MP_MAP_LOOKUP);
+ mp_map_elem_t *keyfun = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(QSTR_FROM_STR_STATIC("key")), MP_MAP_LOOKUP);
+ mp_map_elem_t *reverse = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(QSTR_FROM_STR_STATIC("reverse")), MP_MAP_LOOKUP);
mp_quicksort(self->items, self->items + self->len - 1,
keyfun ? keyfun->value : NULL,
reverse && reverse->value ? rt_is_true(reverse->value) : false);
diff --git a/py/objmap.c b/py/objmap.c
index 0c25cfdca3..39778e11ee 100644
--- a/py/objmap.c
+++ b/py/objmap.c
@@ -4,7 +4,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/objmodule.c b/py/objmodule.c
index fb7842e5af..73f146131e 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "map.h"
diff --git a/py/objnone.c b/py/objnone.c
index 84d0ba164c..ecc7c4b4e7 100644
--- a/py/objnone.c
+++ b/py/objnone.c
@@ -4,6 +4,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
typedef struct _mp_obj_none_t {
diff --git a/py/objrange.c b/py/objrange.c
index a2a0e67b00..1fff327ab7 100644
--- a/py/objrange.c
+++ b/py/objrange.c
@@ -4,6 +4,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
/******************************************************************************/
diff --git a/py/objset.c b/py/objset.c
index a9ba2ad885..cf4545c257 100644
--- a/py/objset.c
+++ b/py/objset.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "runtime0.h"
diff --git a/py/objslice.c b/py/objslice.c
index 8abcea08d0..d5c31f4461 100644
--- a/py/objslice.c
+++ b/py/objslice.c
@@ -6,6 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
diff --git a/py/objstr.c b/py/objstr.c
index 5e87097a82..3552058430 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -7,7 +7,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
@@ -36,9 +36,30 @@ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj
mp_obj_str_print_qstr(print, env, self->qstr, kind);
}
+// like strstr but with specified length and allows \0 bytes
+// TODO replace with something more efficient/standard
+static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
+ if (hlen >= nlen) {
+ for (uint i = 0; i <= hlen - nlen; i++) {
+ bool found = true;
+ for (uint j = 0; j < nlen; j++) {
+ if (haystack[i + j] != needle[j]) {
+ found = false;
+ break;
+ }
+ }
+ if (found) {
+ return haystack + i;
+ }
+ }
+ }
+ return NULL;
+}
+
mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
mp_obj_str_t *lhs = lhs_in;
- const char *lhs_str = qstr_str(lhs->qstr);
+ uint lhs_len;
+ const byte *lhs_data = qstr_data(lhs->qstr, &lhs_len);
switch (op) {
case RT_BINARY_OP_SUBSCR:
// TODO: need predicate to check for int-like type (bools are such for example)
@@ -46,31 +67,30 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
// TODO: This implements byte string access for single index so far
// TODO: Handle negative indexes.
- return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]);
+ return mp_obj_new_int(lhs_data[mp_obj_get_int(rhs_in)]);
#if MICROPY_ENABLE_SLICE
} else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
machine_int_t start, stop, step;
mp_obj_slice_get(rhs_in, &start, &stop, &step);
assert(step == 1);
- int len = strlen(lhs_str);
if (start < 0) {
- start = len + start;
+ start = lhs_len + start;
if (start < 0) {
start = 0;
}
- } else if (start > len) {
- start = len;
+ } else if (start > lhs_len) {
+ start = lhs_len;
}
if (stop <= 0) {
- stop = len + stop;
+ stop = lhs_len + stop;
// CPython returns empty string in such case
if (stop < 0) {
stop = start;
}
- } else if (stop > len) {
- stop = len;
+ } else if (stop > lhs_len) {
+ stop = lhs_len;
}
- return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start));
+ return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + start, stop - start));
#endif
} else {
// Message doesn't match CPython, but we don't have so much bytes as they
@@ -82,24 +102,24 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
case RT_BINARY_OP_INPLACE_ADD:
if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
// add 2 strings
- const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
- size_t lhs_len = strlen(lhs_str);
- size_t rhs_len = strlen(rhs_str);
- int alloc_len = lhs_len + rhs_len + 1;
- char *val = m_new(char, alloc_len);
- memcpy(val, lhs_str, lhs_len);
- memcpy(val + lhs_len, rhs_str, rhs_len);
- val[lhs_len + rhs_len] = '\0';
- return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
+ uint rhs_len;
+ const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
+ int alloc_len = lhs_len + rhs_len;
+ byte *q_ptr;
+ byte *val = qstr_build_start(alloc_len, &q_ptr);
+ memcpy(val, lhs_data, lhs_len);
+ memcpy(val + lhs_len, rhs_data, rhs_len);
+ return mp_obj_new_str(qstr_build_end(q_ptr));
}
break;
case RT_COMPARE_OP_IN:
case RT_COMPARE_OP_NOT_IN:
/* NOTE `a in b` is `b.__contains__(a)` */
if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
- const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
- /* FIXME \0 in strs */
- return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (strstr(lhs_str, rhs_str) == NULL));
+ uint rhs_len;
+ const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
+ return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) == NULL));
+ return mp_const_false;
}
break;
}
@@ -143,22 +163,22 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
}
// make joined string
- char *joined_str = m_new(char, required_len + 1);
- char *s_dest = joined_str;
+ byte *q_ptr;
+ byte *s_dest = qstr_build_start(required_len, &q_ptr);
for (int i = 0; i < seq_len; i++) {
if (i > 0) {
memcpy(s_dest, sep_str, sep_len);
s_dest += sep_len;
}
- const char *s2 = qstr_str(mp_obj_str_get(seq_items[i]));
- size_t s2_len = strlen(s2);
+ uint s2_len;
+ const byte *s2 = qstr_data(mp_obj_str_get(seq_items[i]), &s2_len);
memcpy(s_dest, s2, s2_len);
s_dest += s2_len;
}
- *s_dest = '\0';
+ qstr q = qstr_build_end(q_ptr);
// return joined string
- return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1));
+ return mp_obj_new_str(q);
bad_arg:
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
@@ -246,20 +266,14 @@ mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
}
if (first_good_char_pos == 0 && last_good_char_pos == 0) {
- //string is all whitespace, return '\0'
- char *empty = m_new(char, 1);
- empty[0] = '\0';
- return mp_obj_new_str(qstr_from_str_take(empty, 1));
+ //string is all whitespace, return ''
+ return mp_obj_new_str(MP_QSTR_);
}
assert(last_good_char_pos >= first_good_char_pos);
//+1 to accomodate the last character
size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
- //+1 to accomodate '\0'
- char *stripped_str = m_new(char, stripped_len + 1);
- memcpy(stripped_str, orig_str + first_good_char_pos, stripped_len);
- stripped_str[stripped_len] = '\0';
- return mp_obj_new_str(qstr_from_str_take(stripped_str, stripped_len + 1));
+ return mp_obj_new_str(qstr_from_strn(orig_str + first_good_char_pos, stripped_len));
}
mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
@@ -288,7 +302,7 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
}
}
- return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
+ return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
}
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
@@ -339,7 +353,7 @@ mp_obj_t str_it_iternext(mp_obj_t self_in) {
mp_obj_str_it_t *self = self_in;
const char *str = qstr_str(self->str->qstr);
if (self->cur < strlen(str)) {
- mp_obj_t o_out = mp_obj_new_str(qstr_from_strn_copy(str + self->cur, 1));
+ mp_obj_t o_out = mp_obj_new_str(qstr_from_strn(str + self->cur, 1));
self->cur += 1;
return o_out;
} else {
diff --git a/py/objtuple.c b/py/objtuple.c
index fd6d415e28..ec35ef8550 100644
--- a/py/objtuple.c
+++ b/py/objtuple.c
@@ -5,7 +5,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
diff --git a/py/objtype.c b/py/objtype.c
index d448bb03ed..5dea6938d7 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -6,7 +6,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "map.h"
#include "runtime0.h"
@@ -166,7 +166,7 @@ static mp_obj_t class_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
if (op_name == NULL) {
return MP_OBJ_NULL;
}
- mp_obj_t member = mp_obj_class_lookup(lhs->base.type, qstr_from_str_static(op_name));
+ mp_obj_t member = mp_obj_class_lookup(lhs->base.type, QSTR_FROM_STR_STATIC(op_name));
if (member != MP_OBJ_NULL) {
return rt_call_function_2(member, lhs_in, rhs_in);
} else {
@@ -219,7 +219,7 @@ static bool class_store_attr(mp_obj_t self_in, qstr attr, mp_obj_t value) {
bool class_store_item(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
mp_obj_class_t *self = self_in;
- mp_obj_t member = mp_obj_class_lookup(self->base.type, qstr_from_str_static("__setitem__"));
+ mp_obj_t member = mp_obj_class_lookup(self->base.type, QSTR_FROM_STR_STATIC("__setitem__"));
if (member != MP_OBJ_NULL) {
mp_obj_t args[3] = {self_in, index, value};
rt_call_function_n_kw(member, 3, 0, args);
diff --git a/py/objzip.c b/py/objzip.c
index 72db06ac20..6560a08bc7 100644
--- a/py/objzip.c
+++ b/py/objzip.c
@@ -3,6 +3,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
diff --git a/py/parse.c b/py/parse.c
index 93d75424ac..4288c74ccf 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -8,7 +8,7 @@
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "lexer.h"
#include "parse.h"
@@ -205,7 +205,7 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
const mp_token_t *tok = mp_lexer_cur(lex);
mp_parse_node_t pn;
if (tok->kind == MP_TOKEN_NAME) {
- pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len));
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
} else if (tok->kind == MP_TOKEN_NUMBER) {
bool dec = false;
bool small_int = true;
@@ -254,16 +254,16 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
}
}
if (dec) {
- pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len));
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
} else if (small_int && !overflow && MP_FIT_SMALL_INT(int_val)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
} else {
- pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len));
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
}
} else if (tok->kind == MP_TOKEN_STRING) {
- pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len));
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
} else if (tok->kind == MP_TOKEN_BYTES) {
- pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len));
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
} else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
}
diff --git a/py/py.mk b/py/py.mk
index ce5169f777..72013ef999 100644
--- a/py/py.mk
+++ b/py/py.mk
@@ -25,13 +25,8 @@ endif
# default settings; can be overriden in main Makefile
-ifndef PY_SRC
-PY_SRC = ../py
-endif
-
-ifndef BUILD
-BUILD = build
-endif
+PY_SRC ?= ../py
+BUILD ?= build
# to create the build directory
@@ -42,6 +37,10 @@ $(BUILD):
PY_BUILD = $(BUILD)/py.
+# file containing qstr defs for the core Python bit
+
+PY_QSTR_DEFS = $(PY_SRC)/qstrdefs.h
+
# py object files
PY_O_BASENAME = \
@@ -97,6 +96,7 @@ PY_O_BASENAME = \
objstr.o \
objtuple.o \
objtype.o \
+ objzip.o \
stream.o \
builtin.o \
builtinimport.o \
@@ -105,12 +105,21 @@ PY_O_BASENAME = \
vm.o \
showbc.o \
repl.o \
- objzip.o \
# prepend the build destination prefix to the py object files
PY_O = $(addprefix $(PY_BUILD), $(PY_O_BASENAME))
+# qstr data
+
+$(PY_BUILD)qstr.o: $(PY_BUILD)qstrdefs.generated.h
+
+$(PY_BUILD)qstrdefs.generated.h: $(PY_QSTR_DEFS) $(QSTR_DEFS) $(PY_SRC)/makeqstrdata.py
+ $(ECHO) "makeqstrdata $(PY_QSTR_DEFS) $(QSTR_DEFS)"
+ $(Q)python $(PY_SRC)/makeqstrdata.py $(PY_QSTR_DEFS) $(QSTR_DEFS) > $@
+
+# emitters
+
$(PY_BUILD)emitnx64.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h
$(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -DN_X64 -c -o $@ $<
@@ -119,11 +128,13 @@ $(PY_BUILD)emitnthumb.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h
$(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -DN_THUMB -c -o $@ $<
+# general source files
+
$(PY_BUILD)%.o: $(PY_SRC)/%.S
$(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -c -o $@ $<
-$(PY_BUILD)%.o: $(PY_SRC)/%.c mpconfigport.h
+$(PY_BUILD)%.o: $(PY_SRC)/%.c mpconfigport.h $(PY_SRC)/qstr.h $(PY_QSTR_DEFS) $(QSTR_DEFS)
$(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -c -o $@ $<
@@ -141,5 +152,5 @@ $(PY_BUILD)vm.o: $(PY_SRC)/vm.c
$(PY_BUILD)parse.o: $(PY_SRC)/grammar.h
$(PY_BUILD)compile.o: $(PY_SRC)/grammar.h
-$(PY_BUILD)/emitcpy.o: $(PY_SRC)/emit.h
+$(PY_BUILD)emitcpy.o: $(PY_SRC)/emit.h
$(PY_BUILD)emitbc.o: $(PY_SRC)/emit.h
diff --git a/py/qstr.c b/py/qstr.c
index 93ae3ab665..2830341a2d 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -2,7 +2,8 @@
#include <string.h>
#include "misc.h"
-#include "mpqstr.h"
+#include "mpconfig.h"
+#include "qstr.h"
// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings)
// ultimately we will replace this with a static hash table of some kind
@@ -15,12 +16,33 @@
#define DEBUG_printf(args...) (void)0
#endif
+// A qstr is an index into the qstr pool.
+// The data for a qstr contains (hash, length, data).
+// For now we use very simple encoding, just to get the framework correct:
+// - hash is 2 bytes (simply the sum of data bytes)
+// - length is 2 bytes
+// - data follows
+// - \0 terminated (for now, so they can be printed using printf)
+
+#define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8))
+#define Q_GET_ALLOC(q) (4 + Q_GET_LENGTH(q) + 1)
+#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
+#define Q_GET_DATA(q) ((q) + 4)
+
+static machine_uint_t compute_hash(const byte *data, uint len) {
+ machine_uint_t hash = 0;
+ for (const byte *top = data + len; data < top; data++) {
+ hash += *data;
+ }
+ return hash & 0xffff;
+}
+
typedef struct _qstr_pool_t {
struct _qstr_pool_t *prev;
uint total_prev_len;
uint alloc;
uint len;
- const char *qstrs[];
+ const byte *qstrs[];
} qstr_pool_t;
const static qstr_pool_t const_pool = {
@@ -29,9 +51,11 @@ const static qstr_pool_t const_pool = {
10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below)
MP_QSTR_number_of, // corresponds to number of strings in array just below
{
- "nil", // must be first, since 0 qstr is nil
-#define Q(id) #id,
-#include "mpqstrraw.h"
+ (const byte*) "\0\0\0\0", // invalid/no qstr has empty data
+ (const byte*) "\0\0\0\0", // empty qstr
+#define Q(id, str) str,
+// TODO having 'build/' here is a bit of a hack, should take config variable from Makefile
+#include "build/py.qstrdefs.generated.h"
#undef Q
},
};
@@ -42,8 +66,20 @@ void qstr_init(void) {
last_pool = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left
}
-static qstr qstr_add(const char *str) {
- DEBUG_printf("QSTR: add %s\n", str);
+static const byte *find_qstr(qstr q) {
+ // search pool for this qstr
+ for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
+ if (q >= pool->total_prev_len) {
+ return pool->qstrs[q - pool->total_prev_len];
+ }
+ }
+
+ // not found
+ return 0;
+}
+
+static qstr qstr_add(const byte *q_ptr) {
+ DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
// make sure we have room in the pool for a new qstr
if (last_pool->len >= last_pool->alloc) {
@@ -57,55 +93,95 @@ static qstr qstr_add(const char *str) {
}
// add the new qstr
- last_pool->qstrs[last_pool->len++] = str;
+ last_pool->qstrs[last_pool->len++] = q_ptr;
// return id for the newly-added qstr
return last_pool->total_prev_len + last_pool->len - 1;
}
-qstr qstr_from_str_static(const char *str) {
+static qstr qstr_find_strn(const byte *str, uint str_len) {
+ // work out hash of str
+ machine_uint_t str_hash = compute_hash((const byte*)str, str_len);
+
+ // search pools for the data
for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
- for (const char **qstr = pool->qstrs, **qstr_top = pool->qstrs + pool->len; qstr < qstr_top; qstr++) {
- if (strcmp(*qstr, str) == 0) {
- return pool->total_prev_len + (qstr - pool->qstrs);
+ for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
+ if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && strncmp((const char*)Q_GET_DATA(*q), (const char*)str, str_len) == 0) {
+ return pool->total_prev_len + (q - pool->qstrs);
}
}
}
- return qstr_add(str);
+
+ // not found; return null qstr
+ return 0;
}
-qstr qstr_from_str_take(char *str, int alloc_len) {
- for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
- for (const char **qstr = pool->qstrs, **qstr_top = pool->qstrs + pool->len; qstr < qstr_top; qstr++) {
- if (strcmp(*qstr, str) == 0) {
- m_del(char, str, alloc_len);
- return pool->total_prev_len + (qstr - pool->qstrs);
- }
- }
- }
- return qstr_add(str);
+qstr qstr_from_str(const char *str) {
+ return qstr_from_strn(str, strlen(str));
}
-qstr qstr_from_strn_copy(const char *str, int len) {
- for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
- for (const char **qstr = pool->qstrs, **qstr_top = pool->qstrs + pool->len; qstr < qstr_top; qstr++) {
- if (strncmp(*qstr, str, len) == 0 && (*qstr)[len] == '\0') {
- return pool->total_prev_len + (qstr - pool->qstrs);
- }
- }
+qstr qstr_from_strn(const char *str, uint len) {
+ qstr q = qstr_find_strn((const byte*)str, len);
+ if (q == 0) {
+ machine_uint_t hash = compute_hash((const byte*)str, len);
+ byte *q_ptr = m_new(byte, 4 + len + 1);
+ q_ptr[0] = hash;
+ q_ptr[1] = hash >> 8;
+ q_ptr[2] = len;
+ q_ptr[3] = len >> 8;
+ memcpy(q_ptr + 4, str, len);
+ q_ptr[4 + len] = '\0';
+ q = qstr_add(q_ptr);
}
- return qstr_add(strndup(str, len));
+ return q;
}
-// convert qstr id to pointer to its string
-const char *qstr_str(qstr qstr) {
- // search
- for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
- if (qstr >= pool->total_prev_len) {
- return pool->qstrs[qstr - pool->total_prev_len];
- }
+qstr qstr_from_strn_take(char *str, uint alloc_len, uint len) {
+ qstr q = qstr_from_strn(str, len);
+ m_del(char, str, alloc_len);
+ return q;
+}
+
+byte *qstr_build_start(uint len, byte **q_ptr) {
+ assert(len <= 65535);
+ *q_ptr = m_new(byte, 4 + len + 1);
+ (*q_ptr)[2] = len;
+ (*q_ptr)[3] = len >> 8;
+ return Q_GET_DATA(*q_ptr);
+}
+
+qstr qstr_build_end(byte *q_ptr) {
+ qstr q = qstr_find_strn(Q_GET_DATA(q_ptr), Q_GET_LENGTH(q_ptr));
+ if (q == 0) {
+ machine_uint_t len = Q_GET_LENGTH(q_ptr);
+ machine_uint_t hash = compute_hash(Q_GET_DATA(q_ptr), len);
+ q_ptr[0] = hash;
+ q_ptr[1] = hash >> 8;
+ q_ptr[4 + len] = '\0';
+ q = qstr_add(q_ptr);
+ } else {
+ m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
}
+ return q;
+}
+
+machine_uint_t qstr_hash(qstr q) {
+ return Q_GET_HASH(find_qstr(q));
+}
+
+uint qstr_len(qstr q) {
+ const byte *qd = find_qstr(q);
+ return Q_GET_LENGTH(qd);
+}
+
+// XXX to remove!
+const char *qstr_str(qstr q) {
+ const byte *qd = find_qstr(q);
+ return (const char*)Q_GET_DATA(qd);
+}
- // not found, return nil
- return const_pool.qstrs[0];
+const byte *qstr_data(qstr q, uint *len) {
+ const byte *qd = find_qstr(q);
+ *len = Q_GET_LENGTH(qd);
+ return Q_GET_DATA(qd);
}
diff --git a/py/qstr.h b/py/qstr.h
new file mode 100644
index 0000000000..5c331c34af
--- /dev/null
+++ b/py/qstr.h
@@ -0,0 +1,35 @@
+// See qstrraw.h for a list of qstr's that are available as constants.
+// Reference them as MP_QSTR_xxxx.
+//
+// Note: it would be possible to define MP_QSTR_xxx as qstr_from_str_static("xxx")
+// for qstrs that are referenced this way, but you don't want to have them in ROM.
+
+enum {
+ MP_QSTR_NULL = 0, // indicates invalid/no qstr
+ MP_QSTR_ = 1, // the empty qstr
+#define Q(id, str) MP_QSTR_##id,
+// TODO having 'build/py.' here is a bit of a hack, should take config variable from Makefile
+#include "build/py.qstrdefs.generated.h"
+#undef Q
+ MP_QSTR_number_of,
+} category_t;
+
+typedef machine_uint_t qstr;
+
+#define QSTR_FROM_STR_STATIC(s) (qstr_from_strn((s), strlen(s)))
+
+void qstr_init(void);
+
+qstr qstr_from_str(const char *str);
+qstr qstr_from_strn(const char *str, uint len);
+//qstr qstr_from_str_static(const char *str);
+qstr qstr_from_strn_take(char *str, uint alloc_len, uint len);
+//qstr qstr_from_strn_copy(const char *str, int len);
+
+byte* qstr_build_start(uint len, byte **q_ptr);
+qstr qstr_build_end(byte *q_ptr);
+
+machine_uint_t qstr_hash(qstr q);
+const char* qstr_str(qstr q);
+uint qstr_len(qstr q);
+const byte* qstr_data(qstr q, uint *len);
diff --git a/py/mpqstrraw.h b/py/qstrdefs.h
index 10b1fc0d39..10b1fc0d39 100644
--- a/py/mpqstrraw.h
+++ b/py/qstrdefs.h
diff --git a/py/runtime.c b/py/runtime.c
index d8fc3ff6e4..210047ac0a 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -11,7 +11,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
@@ -154,7 +154,7 @@ void rt_init(void) {
#if MICROPY_CPYTHON_COMPAT
// Precreate sys module, so "import sys" didn't throw exceptions.
- mp_obj_new_module(qstr_from_str_static("sys"));
+ mp_obj_new_module(QSTR_FROM_STR_STATIC("sys"));
#endif
mp_module_micropython_init();
diff --git a/py/scope.c b/py/scope.c
index 5cc0bda068..1d240bb63e 100644
--- a/py/scope.c
+++ b/py/scope.c
@@ -5,6 +5,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "parse.h"
#include "scope.h"
@@ -17,7 +18,7 @@ scope_t *scope_new(scope_kind_t kind, mp_parse_node_t pn, qstr source_file, uint
scope->source_file = source_file;
switch (kind) {
case SCOPE_MODULE:
- scope->simple_name = qstr_from_str_static("<module>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<module>");
break;
case SCOPE_FUNCTION:
case SCOPE_CLASS:
@@ -25,19 +26,19 @@ scope_t *scope_new(scope_kind_t kind, mp_parse_node_t pn, qstr source_file, uint
scope->simple_name = MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn)->nodes[0]);
break;
case SCOPE_LAMBDA:
- scope->simple_name = qstr_from_str_static("<lambda>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<lambda>");
break;
case SCOPE_LIST_COMP:
- scope->simple_name = qstr_from_str_static("<listcomp>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<listcomp>");
break;
case SCOPE_DICT_COMP:
- scope->simple_name = qstr_from_str_static("<dictcomp>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<dictcomp>");
break;
case SCOPE_SET_COMP:
- scope->simple_name = qstr_from_str_static("<setcomp>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<setcomp>");
break;
case SCOPE_GEN_EXPR:
- scope->simple_name = qstr_from_str_static("<genexpr>");
+ scope->simple_name = QSTR_FROM_STR_STATIC("<genexpr>");
break;
default:
assert(0);
diff --git a/py/showbc.c b/py/showbc.c
index f34449ed10..f914223933 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -6,6 +6,7 @@
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "bc0.h"
#if MICROPY_DEBUG_PRINTERS
diff --git a/py/stream.c b/py/stream.c
index d3a11affbc..88ddc5e6c9 100644
--- a/py/stream.c
+++ b/py/stream.c
@@ -3,7 +3,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "obj.h"
#include "stream.h"
@@ -23,15 +23,14 @@ static mp_obj_t stream_read(uint n_args, const mp_obj_t *args) {
if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) {
return stream_readall(args[0]);
}
- // +1 because so far we mark end of string with \0
- char *buf = m_new(char, sz + 1);
+ char *buf = m_new(char, sz);
int error;
machine_int_t out_sz = o->type->stream_p.read(o, buf, sz, &error);
if (out_sz == -1) {
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_OSError, "[Errno %d]", error));
} else {
- buf[out_sz] = 0;
- return mp_obj_new_str(qstr_from_str_take(buf, /*out_sz,*/ sz + 1));
+ // TODO don't intern this string
+ return mp_obj_new_str(qstr_from_strn_take(buf, sz, out_sz));
}
}
@@ -42,8 +41,8 @@ static mp_obj_t stream_write(mp_obj_t self_in, mp_obj_t arg) {
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_OSError, "Operation not supported"));
}
- const char *buf = qstr_str(mp_obj_get_qstr(arg));
- machine_int_t sz = strlen(buf);
+ uint sz;
+ const byte *buf = qstr_data(mp_obj_get_qstr(arg), &sz);
int error;
machine_int_t out_sz = o->type->stream_p.write(self_in, buf, sz, &error);
if (out_sz == -1) {
@@ -92,10 +91,9 @@ static mp_obj_t stream_readall(mp_obj_t self_in) {
}
}
}
- vstr_set_size(vstr, total_size + 1); // TODO: for \0
- buf = vstr_str(vstr);
- buf[total_size] = 0;
- return mp_obj_new_str(qstr_from_str_take(buf, total_size + 1));
+ // TODO don't intern this string
+ vstr_set_size(vstr, total_size);
+ return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, total_size));
}
// Unbuffered, inefficient implementation of readline() for raw I/O files.
@@ -113,7 +111,7 @@ static mp_obj_t stream_unbuffered_readline(uint n_args, const mp_obj_t *args) {
vstr_t *vstr;
if (max_size != -1) {
- vstr = vstr_new_size(max_size + 1); // TODO: \0
+ vstr = vstr_new_size(max_size);
} else {
vstr = vstr_new();
}
@@ -134,10 +132,9 @@ static mp_obj_t stream_unbuffered_readline(uint n_args, const mp_obj_t *args) {
break;
}
}
- // TODO: \0
- vstr_add_byte(vstr, 0);
+ // TODO don't intern this string
vstr_shrink(vstr);
- return mp_obj_new_str(qstr_from_str_take(vstr_str(vstr), vstr_len(vstr)));
+ return mp_obj_new_str(qstr_from_strn_take(vstr_str(vstr), vstr->alloc, vstr_len(vstr)));
}
diff --git a/py/strtonum.c b/py/strtonum.c
index 48a746603f..d8bb05ac8d 100644
--- a/py/strtonum.c
+++ b/py/strtonum.c
@@ -6,7 +6,7 @@
#include "misc.h"
#include "mpconfig.h"
-#include "mpqstr.h"
+#include "qstr.h"
#include "nlr.h"
#include "obj.h"
diff --git a/py/vm.c b/py/vm.c
index 75c21d1b3c..c41146ac8f 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -7,6 +7,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
+#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "bc0.h"