summaryrefslogtreecommitdiffstatshomepage
path: root/tests/phpunit/includes/build-visual-html-tree.php
diff options
context:
space:
mode:
Diffstat (limited to 'tests/phpunit/includes/build-visual-html-tree.php')
-rw-r--r--tests/phpunit/includes/build-visual-html-tree.php304
1 files changed, 304 insertions, 0 deletions
diff --git a/tests/phpunit/includes/build-visual-html-tree.php b/tests/phpunit/includes/build-visual-html-tree.php
new file mode 100644
index 0000000000..e0f6c3ac3d
--- /dev/null
+++ b/tests/phpunit/includes/build-visual-html-tree.php
@@ -0,0 +1,304 @@
+<?php
+
+/* phpcs:disable WordPress.Security.EscapeOutput.ExceptionNotEscaped */
+
+/**
+ * Generates representation of the semantic HTML tree structure.
+ *
+ * This is inspired by the representation used by the HTML5lib tests. It's been extended here for
+ * blocks to render the semantic structure of blocks and their attributes.
+ * The order of attributes and class names is normalized both for HTML tags and blocks,
+ * as is the whitespace in HTML tags' style attribute.
+ *
+ * For example, consider the following block markup:
+ *
+ * <!-- wp:separator {"className":"is-style-default has-custom-classname","style":{"spacing":{"margin":{"top":"50px","bottom":"50px"}}},"backgroundColor":"accent-1"} -->
+ * <hr class="wp-block-separator is-style-default has-custom-classname" style="margin-top: 50px; margin-bottom: 50px" />
+ * <!-- /wp:separator -->
+ *
+ * This will be represented as:
+ *
+ * BLOCK["core/separator"]
+ * {
+ * "backgroundColor": "accent-1",
+ * "className": "has-custom-classname is-style-default",
+ * "style": {
+ * "spacing": {
+ * "margin": {
+ * "top": "50px",
+ * "bottom": "50px"
+ * }
+ * }
+ * }
+ * }
+ * <hr>
+ * class="has-custom-classname is-style-default wp-block-separator"
+ * style="margin-top:50px;margin-bottom:50px;"
+ *
+ *
+ * @see https://github.com/WordPress/wordpress-develop/blob/trunk/tests/phpunit/data/html5lib-tests/tree-construction/README.md
+ *
+ * @since 6.9.0
+ *
+ * @throws WP_HTML_Unsupported_Exception|Error If the markup could not be parsed.
+ *
+ * @param string $html Given test HTML.
+ * @param string|null $fragment_context Context element in which to parse HTML, such as BODY or SVG.
+ * @return string Tree structure of parsed HTML, if supported.
+ */
+function build_visual_html_tree( string $html, ?string $fragment_context ): string {
+ $processor = $fragment_context
+ ? WP_HTML_Processor::create_fragment( $html, $fragment_context )
+ : WP_HTML_Processor::create_full_parser( $html );
+ if ( null === $processor ) {
+ throw new Error( 'Could not create a parser.' );
+ }
+ $tree_indent = ' ';
+
+ $output = '';
+ $indent_level = 0;
+ $was_text = null;
+ $text_node = '';
+
+ $block_context = array();
+
+ while ( $processor->next_token() ) {
+ if ( null !== $processor->get_last_error() ) {
+ break;
+ }
+
+ $token_name = $processor->get_token_name();
+ $token_type = $processor->get_token_type();
+ $is_closer = $processor->is_tag_closer();
+
+ if ( $was_text && '#text' !== $token_name ) {
+ if ( '' !== $text_node ) {
+ $output .= "{$text_node}\"\n";
+ }
+ $was_text = false;
+ $text_node = '';
+ }
+
+ switch ( $token_type ) {
+ case '#doctype':
+ $doctype = $processor->get_doctype_info();
+ $output .= "<!DOCTYPE {$doctype->name}";
+ if ( null !== $doctype->public_identifier || null !== $doctype->system_identifier ) {
+ $output .= " \"{$doctype->public_identifier}\" \"{$doctype->system_identifier}\"";
+ }
+ $output .= ">\n";
+ break;
+
+ case '#tag':
+ $namespace = $processor->get_namespace();
+ $tag_name = 'html' === $namespace
+ ? strtolower( $processor->get_tag() )
+ : "{$namespace} {$processor->get_qualified_tag_name()}";
+
+ if ( $is_closer ) {
+ --$indent_level;
+
+ if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
+ --$indent_level;
+ }
+
+ break;
+ }
+
+ $tag_indent = $indent_level;
+
+ if ( $processor->expects_closer() ) {
+ ++$indent_level;
+ }
+
+ $output .= str_repeat( $tree_indent, $tag_indent ) . "<{$tag_name}>\n";
+
+ $attribute_names = $processor->get_attribute_names_with_prefix( '' );
+ if ( $attribute_names ) {
+ $sorted_attributes = array();
+ foreach ( $attribute_names as $attribute_name ) {
+ $sorted_attributes[ $attribute_name ] = $processor->get_qualified_attribute_name( $attribute_name );
+ }
+
+ /*
+ * Sorts attributes to match html5lib sort order.
+ *
+ * - First comes normal HTML attributes.
+ * - Then come adjusted foreign attributes; these have spaces in their names.
+ * - Finally come non-adjusted foreign attributes; these have a colon in their names.
+ *
+ * Example:
+ *
+ * From: <math xlink:author definitionurl xlink:title xlink:show>
+ * Sorted: 'definitionURL', 'xlink show', 'xlink title', 'xlink:author'
+ */
+ uasort(
+ $sorted_attributes,
+ static function ( $a, $b ) {
+ $a_has_ns = str_contains( $a, ':' );
+ $b_has_ns = str_contains( $b, ':' );
+
+ // Attributes with `:` should follow all other attributes.
+ if ( $a_has_ns !== $b_has_ns ) {
+ return $a_has_ns ? 1 : -1;
+ }
+
+ $a_has_sp = str_contains( $a, ' ' );
+ $b_has_sp = str_contains( $b, ' ' );
+
+ // Attributes with a namespace ' ' should come after those without.
+ if ( $a_has_sp !== $b_has_sp ) {
+ return $a_has_sp ? 1 : -1;
+ }
+
+ return $a <=> $b;
+ }
+ );
+
+ foreach ( $sorted_attributes as $attribute_name => $display_name ) {
+ $val = $processor->get_attribute( $attribute_name );
+ /*
+ * Attributes with no value are `true` with the HTML API,
+ * we use the empty string value in the tree structure.
+ */
+ if ( true === $val ) {
+ $val = '';
+ } elseif ( 'class' === $attribute_name ) {
+ $class_names = iterator_to_array( $processor->class_list() );
+ sort( $class_names, SORT_STRING );
+ $val = implode( ' ', $class_names );
+ } elseif ( 'style' === $attribute_name ) {
+ $normalized_style = '';
+ foreach ( explode( ';', $val ) as $style ) {
+ if ( empty( trim( $style ) ) ) {
+ continue;
+ }
+ list( $style_key, $style_val ) = explode( ':', $style );
+
+ $style_key = trim( $style_key );
+ $style_val = trim( $style_val );
+
+ $normalized_style .= "{$style_key}:{$style_val};";
+ }
+ $val = $normalized_style;
+ }
+ $output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n";
+ }
+ }
+
+ // Self-contained tags contain their inner contents as modifiable text.
+ $modifiable_text = $processor->get_modifiable_text();
+ if ( '' !== $modifiable_text ) {
+ $output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n";
+ }
+
+ if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
+ $output .= str_repeat( $tree_indent, $indent_level ) . "content\n";
+ ++$indent_level;
+ }
+
+ break;
+
+ case '#cdata-section':
+ case '#text':
+ $text_content = $processor->get_modifiable_text();
+ if ( '' === trim( $text_content, " \f\t\r\n" ) ) {
+ break;
+ }
+ $was_text = true;
+ if ( '' === $text_node ) {
+ $text_node .= str_repeat( $tree_indent, $indent_level ) . '"';
+ }
+ $text_node .= $text_content;
+ break;
+
+ case '#funky-comment':
+ // Comments must be "<" then "!-- " then the data then " -->".
+ $output .= str_repeat( $tree_indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
+ break;
+
+ case '#comment':
+ // Comments must be "<" then "!--" then the data then "-->".
+ $comment = "<!--{$processor->get_full_comment_text()}-->";
+
+ // Maybe the comment is a block delimiter.
+ $parser = new WP_Block_Parser();
+ $parser->document = $comment;
+ $parser->offset = 0;
+ list( $delimiter_type, $block_name, $block_attrs, $start_offset, $token_length ) = $parser->next_token();
+
+ switch ( $delimiter_type ) {
+ case 'block-opener':
+ case 'void-block':
+ $output .= str_repeat( $tree_indent, $indent_level ) . "BLOCK[\"{$block_name}\"]\n";
+
+ if ( 'block-opener' === $delimiter_type ) {
+ $block_context[] = $block_name;
+ ++$indent_level;
+ }
+
+ // If they're no attributes, we're done here.
+ if ( empty( $block_attrs ) ) {
+ break;
+ }
+
+ // Normalize attribute order.
+ ksort( $block_attrs, SORT_STRING );
+
+ if ( isset( $block_attrs['className'] ) ) {
+ // Normalize class name order (and de-duplicate), as we need to be tolerant of different orders.
+ // (Style attributes don't need this treatment, as they are parsed into a nested array.)
+ $block_class_processor = new WP_HTML_Tag_Processor( '<div>' );
+ $block_class_processor->next_token();
+ $block_class_processor->set_attribute( 'class', $block_attrs['className'] );
+ $class_names = iterator_to_array( $block_class_processor->class_list() );
+ sort( $class_names, SORT_STRING );
+ $block_attrs['className'] = implode( ' ', $class_names );
+ }
+
+ $block_attrs = json_encode( $block_attrs, JSON_PRETTY_PRINT );
+ // Fix indentation by "halving" it (2 spaces instead of 4).
+ // Additionally, we need to indent each line by the current indentation level.
+ $block_attrs = preg_replace( '/^( +)\1/m', str_repeat( $tree_indent, $indent_level ) . '$1', $block_attrs );
+ // Finally, indent the first line, and the last line (with the closing curly brace).
+ $output .= str_repeat( $tree_indent, $indent_level ) . substr( $block_attrs, 0, -1 ) . str_repeat( $tree_indent, $indent_level ) . "}\n";
+ break;
+ case 'block-closer':
+ // Is this a closer for the currently open block?
+ if ( ! empty( $block_context ) && end( $block_context ) === $block_name ) {
+ // If it's a closer, we don't add it to the output.
+ // Instead, we decrease indentation and remove the block from block context stack.
+ --$indent_level;
+ array_pop( $block_context );
+ }
+ break;
+ default: // Not a block delimiter.
+ $output .= str_repeat( $tree_indent, $indent_level ) . $comment . "\n";
+ break;
+ }
+ break;
+ default:
+ // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_var_export
+ $serialized_token_type = var_export( $processor->get_token_type(), true );
+ throw new Error( "Unhandled token type for tree construction: {$serialized_token_type}" );
+ }
+ }
+
+ if ( null !== $processor->get_unsupported_exception() ) {
+ throw $processor->get_unsupported_exception();
+ }
+
+ if ( null !== $processor->get_last_error() ) {
+ throw new Error( "Parser error: {$processor->get_last_error()}" );
+ }
+
+ if ( $processor->paused_at_incomplete_token() ) {
+ throw new Error( 'Paused at incomplete token.' );
+ }
+
+ if ( '' !== $text_node ) {
+ $output .= "{$text_node}\"\n";
+ }
+
+ return $output;
+}