summaryrefslogtreecommitdiffstatshomepage
path: root/tools/cc1
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2015-12-18 23:26:29 +0000
committerDamien George <damien.p.george@gmail.com>2015-12-31 00:24:33 +0000
commit4bd95f8b4407ff85c68676285c02236ee72d72cb (patch)
tree62f0a266696ae9d5a8f148e0a3c1c1f12d37caf8 /tools/cc1
parent521759ee18f94a0a55f1fcc8a1242da9cf92af55 (diff)
downloadmicropython-4bd95f8b4407ff85c68676285c02236ee72d72cb.tar.gz
micropython-4bd95f8b4407ff85c68676285c02236ee72d72cb.zip
tools: Add C middle-processor to make builtin tables proper hash tables.
Diffstat (limited to 'tools/cc1')
-rwxr-xr-xtools/cc1262
1 files changed, 262 insertions, 0 deletions
diff --git a/tools/cc1 b/tools/cc1
new file mode 100755
index 0000000000..827d5886a0
--- /dev/null
+++ b/tools/cc1
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+
+"""
+This is a middle-processor for MicroPython source files. It takes the output
+of the C preprocessor, has the option to change it, then feeds this into the
+C compiler.
+
+It currently has the ability to reorder static hash tables so they are actually
+hashed, resulting in faster lookup times at runtime.
+
+To use, configure the Python variables below, and add the following line to the
+Makefile:
+
+CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
+"""
+
+import sys
+import os
+import re
+
+################################################################################
+# these are the configuration variables
+# TODO somehow make them externally configurable
+
+# this is the path to the true C compiler
+cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
+#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
+
+# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
+bytes_in_qstr_hash = 2
+
+# this must be 1 or more (can be a decimal)
+# larger uses more code size but yields faster lookups
+table_size_mult = 1
+
+# these control output during processing
+print_stats = True
+print_debug = False
+
+# end configuration variables
+################################################################################
+
+# precompile regexs
+re_preproc_line = re.compile(r'# [0-9]+ ')
+re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
+re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
+re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
+re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
+
+# this must match the equivalent function in qstr.c
+def compute_hash(qstr):
+ hash = 5381
+ for char in qstr:
+ hash = (hash * 33) ^ ord(char)
+ # Make sure that valid hash is never zero, zero means "hash not computed"
+ return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
+
+# this algo must match the equivalent in map.c
+def hash_insert(map, key, value):
+ hash = compute_hash(key)
+ pos = hash % len(map)
+ start_pos = pos
+ if print_debug:
+ print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
+ while True:
+ if map[pos] is None:
+ # found empty slot, so key is not in table
+ if print_debug:
+ print('put at %u' % pos)
+ map[pos] = (key, value)
+ return
+ else:
+ # not yet found, keep searching
+ if map[pos][0] == key:
+ raise AssertionError("duplicate key '%s'" % (key,))
+ pos = (pos + 1) % len(map)
+ assert pos != start_pos
+
+def hash_find(map, key):
+ hash = compute_hash(key)
+ pos = hash % len(map)
+ start_pos = pos
+ attempts = 0
+ while True:
+ attempts += 1
+ if map[pos] is None:
+ return attempts, None
+ elif map[pos][0] == key:
+ return attempts, map[pos][1]
+ else:
+ pos = (pos + 1) % len(map)
+ if pos == start_pos:
+ return attempts, None
+
+def process_map_table(file, line, output):
+ output.append(line)
+
+ # consume all lines that are entries of the table and concat them
+ # (we do it this way because there can be multiple entries on one line)
+ table_contents = []
+ while True:
+ line = file.readline()
+ if len(line) == 0:
+ print('unexpected end of input')
+ sys.exit(1)
+ line = line.strip()
+ if len(line) == 0:
+ # empty line
+ continue
+ if re_preproc_line.match(line):
+ # preprocessor line number comment
+ continue
+ if line == '};':
+ # end of table (we assume it appears on a single line)
+ break
+ table_contents.append(line)
+
+ # make combined string of entries
+ entries_str = ''.join(table_contents)
+
+ # split into individual entries
+ entries = []
+ while entries_str:
+ # look for single entry, by matching nested braces
+ match = None
+ if entries_str[0] == '{':
+ nested_braces = 0
+ for i in range(len(entries_str)):
+ if entries_str[i] == '{':
+ nested_braces += 1
+ elif entries_str[i] == '}':
+ nested_braces -= 1
+ if nested_braces == 0:
+ match = re_map_entry.match(entries_str[:i + 2])
+ break
+
+ if not match:
+ print('unknown line in table:', entries_str)
+ sys.exit(1)
+
+ # extract single entry
+ line = match.group(0)
+ qstr = match.group(1)
+ entries_str = entries_str[len(line):].lstrip()
+
+ # add the qstr and the whole line to list of all entries
+ entries.append((qstr, line))
+
+ # sort entries so hash table construction is deterministic
+ entries.sort()
+
+ # create hash table
+ map = [None] * int(len(entries) * table_size_mult)
+ for qstr, line in entries:
+ # We assume that qstr does not have any escape sequences in it.
+ # This is reasonably safe, since keys in a module or class dict
+ # should be standard identifiers.
+ # TODO verify this and raise an error if escape sequence found
+ hash_insert(map, qstr, line)
+
+ # compute statistics
+ total_attempts = 0
+ for qstr, _ in entries:
+ attempts, line = hash_find(map, qstr)
+ assert line is not None
+ if print_debug:
+ print(' %s lookup took %u attempts' % (qstr, attempts))
+ total_attempts += attempts
+ if len(entries):
+ stats = len(map), len(entries) / len(map), total_attempts / len(entries)
+ else:
+ stats = 0, 0, 0
+ if print_debug:
+ print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
+
+ # output hash table
+ for row in map:
+ if row is None:
+ output.append('{ 0, 0 },\n')
+ else:
+ output.append(row[1] + '\n')
+ output.append('};\n')
+
+ # skip to next non-blank line
+ while True:
+ line = file.readline()
+ if len(line) == 0:
+ print('unexpected end of input')
+ sys.exit(1)
+ line = line.strip()
+ if len(line) == 0:
+ continue
+ break
+
+ # transform the is_ordered param from 1 to 0
+ match = re_mp_obj_dict_t.match(line)
+ if match is None:
+ match = re_mp_map_t.match(line)
+ if match is None:
+ print('expecting mp_obj_dict_t or mp_map_t definition')
+ print(output[0])
+ print(line)
+ sys.exit(1)
+ line = match.group('head') + '0' + match.group('tail') + '\n'
+ output.append(line)
+
+ return (match.group('id'),) + stats
+
+def process_file(filename):
+ output = []
+ file_changed = False
+ with open(filename, 'rt') as f:
+ while True:
+ line = f.readline()
+ if not line:
+ break
+ if re_mp_rom_map_elem_t.match(line):
+ file_changed = True
+ stats = process_map_table(f, line, output)
+ if print_stats:
+ print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
+ else:
+ output.append(line)
+
+ if file_changed:
+ if print_debug:
+ print(' modifying static maps in', output[0].strip())
+ with open(filename, 'wt') as f:
+ for line in output:
+ f.write(line)
+
+def main():
+ # run actual C compiler
+ # need to quote args that have special characters in them
+ def quote(s):
+ if s.find('<') != -1 or s.find('>') != -1:
+ return "'" + s + "'"
+ else:
+ return s
+ ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
+ if ret != 0:
+ ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
+ sys.exit(ret)
+
+ if sys.argv[1] == '-E':
+ # CPP has been run, now do our processing stage
+ for i, arg in enumerate(sys.argv):
+ if arg == '-o':
+ return process_file(sys.argv[i + 1])
+
+ print('%s: could not find "-o" option' % (sys.argv[0],))
+ sys.exit(1)
+ elif sys.argv[1] == '-fpreprocessed':
+ # compiler has been run, nothing more to do
+ return
+ else:
+ # unknown processing stage
+ print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()