summaryrefslogtreecommitdiffstatshomepage
path: root/py/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/unicode.c')
-rw-r--r--py/unicode.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/py/unicode.c b/py/unicode.c
new file mode 100644
index 0000000000..58c860a0e4
--- /dev/null
+++ b/py/unicode.c
@@ -0,0 +1,77 @@
+#include <stdint.h>
+
+#include "misc.h"
+
+// attribute flags
+#define FL_PRINT (0x01)
+#define FL_SPACE (0x02)
+#define FL_DIGIT (0x04)
+#define FL_ALPHA (0x08)
+#define FL_UPPER (0x10)
+#define FL_LOWER (0x20)
+
+// shorthand character attributes
+#define AT_PR (FL_PRINT)
+#define AT_SP (FL_SPACE | FL_PRINT)
+#define AT_DI (FL_DIGIT | FL_PRINT)
+#define AT_AL (FL_ALPHA | FL_PRINT)
+#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT)
+#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT)
+
+// table of attributes for ascii characters
+static const uint8_t attr[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI,
+ AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+ AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+ AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+ AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
+};
+
+unichar utf8_get_char(const char *s) {
+ return *s;
+}
+
+char *utf8_next_char(const char *s) {
+ return (char*)(s + 1);
+}
+
+bool unichar_isspace(unichar c) {
+ return c < 128 && (attr[c] & FL_SPACE) != 0;
+}
+
+bool unichar_isalpha(unichar c) {
+ return c < 128 && (attr[c] & FL_ALPHA) != 0;
+}
+
+bool unichar_isprint(unichar c) {
+ return c < 128 && (attr[c] & FL_PRINT) != 0;
+}
+
+bool unichar_isdigit(unichar c) {
+ return c < 128 && (attr[c] & FL_DIGIT) != 0;
+}
+
+/*
+bool char_is_alpha_or_digit(unichar c) {
+ return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
+}
+
+bool char_is_upper(unichar c) {
+ return c < 128 && (attr[c] & FL_UPPER) != 0;
+}
+
+bool char_is_lower(unichar c) {
+ return c < 128 && (attr[c] & FL_LOWER) != 0;
+}
+*/