py/makeqstrdata.py: Compute the qstr hash from bytes, not characters.

author: Damien George <damien.p.george@gmail.com> 2016-09-02 14:32:47 +1000
committer: Damien George <damien.p.george@gmail.com> 2016-09-02 14:32:47 +1000
commit: f127bef3e41f25eea6da73a52aab2fdc53be2464 (patch)
tree: e71c749ee10e9bfd1f52f8e8b1c3cca4e2ee6fd6 /py
parent: f98bb2ddcbfed7033ae93ae8fe98b0540a9fb42f (diff)
download: micropython-f127bef3e41f25eea6da73a52aab2fdc53be2464.tar.gz
micropython-f127bef3e41f25eea6da73a52aab2fdc53be2464.zip
1 files changed, 10 insertions, 9 deletions
diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py
index 8a3136b1f1..7249769f47 100644
--- a/py/makeqstrdata.py
+++ b/py/makeqstrdata.py
@@ -14,11 +14,13 @@ import sys
 #   - codepoint2name lives in a different module
 import platform
 if platform.python_version_tuple()[0] == '2':
-    ord_bytes = ord
+    bytes_cons = lambda val, enc=None: bytearray(val)
     from htmlentitydefs import codepoint2name
 elif platform.python_version_tuple()[0] == '3':
-    ord_bytes = lambda x:x
+    bytes_cons = bytes
     from html.entities import codepoint2name
+# end compatibility code
+
 codepoint2name[ord('-')] = 'hyphen';
 
 # add some custom names to map characters that aren't in HTML
@@ -52,8 +54,8 @@ codepoint2name[ord('~')] = 'tilde'
 # this must match the equivalent function in qstr.c
 def compute_hash(qstr, bytes_hash):
     hash = 5381
-    for char in qstr:
-        hash = (hash * 33) ^ ord(char)
+    for b in qstr:
+        hash = (hash * 33) ^ b
     # Make sure that valid hash is never zero, zero means "hash not computed"
     return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
 
@@ -115,16 +117,15 @@ def parse_input_headers(infiles):
     return qcfgs, qstrs
 
 def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
-    qhash = compute_hash(qstr, cfg_bytes_hash)
+    qbytes = bytes_cons(qstr, 'utf8')
+    qlen = len(qbytes)
+    qhash = compute_hash(qbytes, cfg_bytes_hash)
     if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
         # qstr is all printable ASCII so render it as-is (for easier debugging)
-        qlen = len(qstr)
         qdata = qstr
     else:
         # qstr contains non-printable codes so render entire thing as hex pairs
-        qbytes = qstr.encode('utf8')
-        qlen = len(qbytes)
-        qdata = ''.join(('\\x%02x' % ord_bytes(b)) for b in qbytes)
+        qdata = ''.join(('\\x%02x' % b) for b in qbytes)
     if qlen >= (1 << (8 * cfg_bytes_len)):
         print('qstr is too long:', qstr)
         assert False
author	Damien George <damien.p.george@gmail.com>	2016-09-02 14:32:47 +1000
committer	Damien George <damien.p.george@gmail.com>	2016-09-02 14:32:47 +1000
commit	f127bef3e41f25eea6da73a52aab2fdc53be2464 (patch)
tree	e71c749ee10e9bfd1f52f8e8b1c3cca4e2ee6fd6 /py
parent	f98bb2ddcbfed7033ae93ae8fe98b0540a9fb42f (diff)
download	micropython-f127bef3e41f25eea6da73a52aab2fdc53be2464.tar.gz micropython-f127bef3e41f25eea6da73a52aab2fdc53be2464.zip