diff options
author | Andrew Nacin <nacin@git.wordpress.org> | 2013-08-29 18:39:34 +0000 |
---|---|---|
committer | Andrew Nacin <nacin@git.wordpress.org> | 2013-08-29 18:39:34 +0000 |
commit | 8045afd81b7c80f6ef5b327c115a5bbb43e4b65c (patch) | |
tree | 15d457007610c451577debda89bd9e9cd3d74551 /tests/phpunit/data/formatting/utf-8 | |
parent | d34baebc1d8111c9c1014e11001957face778e52 (diff) | |
download | wordpress-8045afd81b7c80f6ef5b327c115a5bbb43e4b65c.tar.gz wordpress-8045afd81b7c80f6ef5b327c115a5bbb43e4b65c.zip |
Move PHPUnit tests into a tests/phpunit directory.
wp-tests-config.php can/should reside in the root of a develop checkout. `phpunit` should be run from the root.
see #25088.
git-svn-id: https://develop.svn.wordpress.org/trunk@25165 602fd350-edb4-49c9-b593-d223f7449a82
Diffstat (limited to 'tests/phpunit/data/formatting/utf-8')
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/README | 15 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/entitize.py | 24 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/entitized.txt | 5 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/u-urlencode.py | 24 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/u-urlencoded.txt | 5 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/urlencode.py | 33 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/urlencoded.txt | 5 | ||||
-rw-r--r-- | tests/phpunit/data/formatting/utf-8/utf-8.txt | 5 |
8 files changed, 116 insertions, 0 deletions
diff --git a/tests/phpunit/data/formatting/utf-8/README b/tests/phpunit/data/formatting/utf-8/README new file mode 100644 index 0000000000..5bc6a317d3 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/README @@ -0,0 +1,15 @@ +The Python scripts are for generating test data, because Python's Unicode
+support is much, much, much, much better than PHP's.
+
+ * `utf-8/urlencode.py`, `utf-8/u-urlencode.py` and `utf-8/entitize.py` process UTF-8
+ into a few different formats (%-encoding, %u-encoding, &#decimal;)
+ and are used like normal UNIXy pipes.
+
+ Try:
+
+ `python urlencode.py < utf-8.txt > urlencoded.txt`
+ `python u-urlencode.py < utf-8.txt > u-urlencoded.txt`
+ `python entitize.py < utf-8.txt > entitized.txt`
+
+ * `windows-1252.py` converts Windows-only smart-quotes and things
+ into their unicode &#decimal reference; equivalents.
diff --git a/tests/phpunit/data/formatting/utf-8/entitize.py b/tests/phpunit/data/formatting/utf-8/entitize.py new file mode 100644 index 0000000000..efa7cb18d5 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/entitize.py @@ -0,0 +1,24 @@ +# Generates entitized.txt from utf-8.txt
+#
+# entitized.txt is used by Tests_Formatting_UrlEncodedToEntities
+
+import codecs
+import sys
+
+def entitize(line):
+ """Convert text to &#[dec]; entities."""
+ line = line.strip();
+ line = ["&#%d;" % ord(s) for s in line]
+ return "".join(line)
+
+if __name__ == "__main__":
+ args = sys.argv[1:]
+ if args and args[0] in ("-h", "--help"):
+ print "Usage: python entitize.py < utf-8.txt > entitized.txt"
+ sys.exit(2)
+
+ sys.stdin = codecs.getreader("utf-8")(sys.stdin)
+ sys.stdout = codecs.getwriter("ascii")(sys.stdout)
+
+ lines = sys.stdin.readlines()
+ sys.stdout.write( "\n".join(map(entitize, lines)) )
diff --git a/tests/phpunit/data/formatting/utf-8/entitized.txt b/tests/phpunit/data/formatting/utf-8/entitized.txt new file mode 100644 index 0000000000..a29c9f9216 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/entitized.txt @@ -0,0 +1,5 @@ +章子怡 +François Truffaut +საქართველო +Björk Guðmundsdóttir +宮崎 駿
\ No newline at end of file diff --git a/tests/phpunit/data/formatting/utf-8/u-urlencode.py b/tests/phpunit/data/formatting/utf-8/u-urlencode.py new file mode 100644 index 0000000000..c20a14f1f8 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/u-urlencode.py @@ -0,0 +1,24 @@ +# Generates u-urlencoded.txt from utf-8.txt
+#
+# u-urlencoded.txt is used by Tests_Formatting_UrlEncodedToEntities
+
+import codecs
+import sys
+
+def uurlencode(line):
+ """Use %u[hexvalue] percent encoding."""
+ line = line.strip()
+ line = ["%%u%04X" % ord(s) for s in line]
+ return "".join(line)
+
+if __name__ == "__main__":
+ args = sys.argv[1:]
+ if args and args[0] in ("-h", "--help"):
+ print "Usage: python u-urlencode.py < utf-8.txt > u-urlencoded.txt"
+ sys.exit(2)
+
+ sys.stdin = codecs.getreader("utf-8")(sys.stdin)
+ sys.stdout = codecs.getwriter("ascii")(sys.stdout)
+
+ lines = sys.stdin.readlines()
+ sys.stdout.write( "\n".join(map(uurlencode, lines)) )
diff --git a/tests/phpunit/data/formatting/utf-8/u-urlencoded.txt b/tests/phpunit/data/formatting/utf-8/u-urlencoded.txt new file mode 100644 index 0000000000..ad4e422c75 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/u-urlencoded.txt @@ -0,0 +1,5 @@ +%u7AE0%u5B50%u6021
+%u0046%u0072%u0061%u006E%u00E7%u006F%u0069%u0073%u0020%u0054%u0072%u0075%u0066%u0066%u0061%u0075%u0074
+%u10E1%u10D0%u10E5%u10D0%u10E0%u10D7%u10D5%u10D4%u10DA%u10DD
+%u0042%u006A%u00F6%u0072%u006B%u0020%u0047%u0075%u00F0%u006D%u0075%u006E%u0064%u0073%u0064%u00F3%u0074%u0074%u0069%u0072
+%u5BAE%u5D0E%u3000%u99FF
diff --git a/tests/phpunit/data/formatting/utf-8/urlencode.py b/tests/phpunit/data/formatting/utf-8/urlencode.py new file mode 100644 index 0000000000..d29907f24b --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/urlencode.py @@ -0,0 +1,33 @@ +# Generates urlencoded.txt from utf-8.txt
+#
+# urlencoded.txt is used by Tests_Formatting_Utf8UriEncode
+
+import urllib, codecs, re
+import sys
+
+# uncapitalize pct-encoded values, leave the rest alone
+capfix = re.compile("%([0-9A-Z]{2})");
+def fix(match):
+ octet = match.group(1)
+ intval = int(octet, 16)
+ if intval < 128:
+ return chr(intval).lower()
+ return '%' + octet.lower()
+
+def urlencode(line):
+ """Percent-encode each byte of non-ASCII unicode characters."""
+ line = urllib.quote(line.strip().encode("utf-8"))
+ line = capfix.sub(fix, line)
+ return line
+
+if __name__ == "__main__":
+ args = sys.argv[1:]
+ if args and args[0] in ("-h", "--help"):
+ print "Usage: python urlencode.py < utf-8.txt > urlencoded.txt"
+ sys.exit(2)
+
+ sys.stdin = codecs.getreader("utf-8")(sys.stdin)
+ sys.stdout = codecs.getwriter("ascii")(sys.stdout)
+
+ lines = sys.stdin.readlines()
+ sys.stdout.write( "\n".join(map(urlencode, lines)) )
diff --git a/tests/phpunit/data/formatting/utf-8/urlencoded.txt b/tests/phpunit/data/formatting/utf-8/urlencoded.txt new file mode 100644 index 0000000000..930bf13ff6 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/urlencoded.txt @@ -0,0 +1,5 @@ +%e7%ab%a0%e5%ad%90%e6%80%a1
+Fran%c3%a7ois Truffaut
+%e1%83%a1%e1%83%90%e1%83%a5%e1%83%90%e1%83%a0%e1%83%97%e1%83%95%e1%83%94%e1%83%9a%e1%83%9d
+Bj%c3%b6rk Gu%c3%b0mundsd%c3%b3ttir
+%e5%ae%ae%e5%b4%8e%e3%80%80%e9%a7%bf
diff --git a/tests/phpunit/data/formatting/utf-8/utf-8.txt b/tests/phpunit/data/formatting/utf-8/utf-8.txt new file mode 100644 index 0000000000..1596029d20 --- /dev/null +++ b/tests/phpunit/data/formatting/utf-8/utf-8.txt @@ -0,0 +1,5 @@ +章子怡
+François Truffaut
+საქართველო
+Björk Guðmundsdóttir
+宮崎 駿
|