*
* * * This will be represented as: * * BLOCK["core/separator"] * { * "backgroundColor": "accent-1", * "className": "has-custom-classname is-style-default", * "style": { * "spacing": { * "margin": { * "top": "50px", * "bottom": "50px" * } * } * } * } *
* class="has-custom-classname is-style-default wp-block-separator" * style="margin-top:50px;margin-bottom:50px;" * * * @see https://github.com/WordPress/wordpress-develop/blob/trunk/tests/phpunit/data/html5lib-tests/tree-construction/README.md * * @since 6.9.0 * * @throws WP_HTML_Unsupported_Exception|Error If the markup could not be parsed. * * @param string $html Given test HTML. * @param string|null $fragment_context Context element in which to parse HTML, such as BODY or SVG. * @return string Tree structure of parsed HTML, if supported. */ function build_visual_html_tree( string $html, ?string $fragment_context ): string { $processor = $fragment_context ? WP_HTML_Processor::create_fragment( $html, $fragment_context ) : WP_HTML_Processor::create_full_parser( $html ); if ( null === $processor ) { throw new Error( 'Could not create a parser.' ); } $tree_indent = ' '; $output = ''; $indent_level = 0; $was_text = null; $text_node = ''; $block_context = array(); while ( $processor->next_token() ) { if ( null !== $processor->get_last_error() ) { break; } $token_name = $processor->get_token_name(); $token_type = $processor->get_token_type(); $is_closer = $processor->is_tag_closer(); if ( $was_text && '#text' !== $token_name ) { if ( '' !== $text_node ) { $output .= "{$text_node}\"\n"; } $was_text = false; $text_node = ''; } switch ( $token_type ) { case '#doctype': $doctype = $processor->get_doctype_info(); $output .= "name}"; if ( null !== $doctype->public_identifier || null !== $doctype->system_identifier ) { $output .= " \"{$doctype->public_identifier}\" \"{$doctype->system_identifier}\""; } $output .= ">\n"; break; case '#tag': $namespace = $processor->get_namespace(); $tag_name = 'html' === $namespace ? strtolower( $processor->get_tag() ) : "{$namespace} {$processor->get_qualified_tag_name()}"; if ( $is_closer ) { --$indent_level; if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { --$indent_level; } break; } $tag_indent = $indent_level; if ( $processor->expects_closer() ) { ++$indent_level; } $output .= str_repeat( $tree_indent, $tag_indent ) . "<{$tag_name}>\n"; $attribute_names = $processor->get_attribute_names_with_prefix( '' ); if ( $attribute_names ) { $sorted_attributes = array(); foreach ( $attribute_names as $attribute_name ) { $sorted_attributes[ $attribute_name ] = $processor->get_qualified_attribute_name( $attribute_name ); } /* * Sorts attributes to match html5lib sort order. * * - First comes normal HTML attributes. * - Then come adjusted foreign attributes; these have spaces in their names. * - Finally come non-adjusted foreign attributes; these have a colon in their names. * * Example: * * From: * Sorted: 'definitionURL', 'xlink show', 'xlink title', 'xlink:author' */ uasort( $sorted_attributes, static function ( $a, $b ) { $a_has_ns = str_contains( $a, ':' ); $b_has_ns = str_contains( $b, ':' ); // Attributes with `:` should follow all other attributes. if ( $a_has_ns !== $b_has_ns ) { return $a_has_ns ? 1 : -1; } $a_has_sp = str_contains( $a, ' ' ); $b_has_sp = str_contains( $b, ' ' ); // Attributes with a namespace ' ' should come after those without. if ( $a_has_sp !== $b_has_sp ) { return $a_has_sp ? 1 : -1; } return $a <=> $b; } ); foreach ( $sorted_attributes as $attribute_name => $display_name ) { $val = $processor->get_attribute( $attribute_name ); /* * Attributes with no value are `true` with the HTML API, * we use the empty string value in the tree structure. */ if ( true === $val ) { $val = ''; } elseif ( 'class' === $attribute_name ) { $class_names = iterator_to_array( $processor->class_list() ); sort( $class_names, SORT_STRING ); $val = implode( ' ', $class_names ); } elseif ( 'style' === $attribute_name ) { $normalized_style = ''; foreach ( explode( ';', $val ) as $style ) { if ( empty( trim( $style ) ) ) { continue; } list( $style_key, $style_val ) = explode( ':', $style ); $style_key = trim( $style_key ); $style_val = trim( $style_val ); $normalized_style .= "{$style_key}:{$style_val};"; } $val = $normalized_style; } $output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n"; } } // Self-contained tags contain their inner contents as modifiable text. $modifiable_text = $processor->get_modifiable_text(); if ( '' !== $modifiable_text ) { $output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n"; } if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { $output .= str_repeat( $tree_indent, $indent_level ) . "content\n"; ++$indent_level; } break; case '#cdata-section': case '#text': $text_content = $processor->get_modifiable_text(); if ( '' === trim( $text_content, " \f\t\r\n" ) ) { break; } $was_text = true; if ( '' === $text_node ) { $text_node .= str_repeat( $tree_indent, $indent_level ) . '"'; } $text_node .= $text_content; break; case '#funky-comment': // Comments must be "<" then "!-- " then the data then " -->". $output .= str_repeat( $tree_indent, $indent_level ) . "\n"; break; case '#comment': // Comments must be "<" then "!--" then the data then "-->". $comment = ""; // Maybe the comment is a block delimiter. $parser = new WP_Block_Parser(); $parser->document = $comment; $parser->offset = 0; list( $delimiter_type, $block_name, $block_attrs, $start_offset, $token_length ) = $parser->next_token(); switch ( $delimiter_type ) { case 'block-opener': case 'void-block': $output .= str_repeat( $tree_indent, $indent_level ) . "BLOCK[\"{$block_name}\"]\n"; if ( 'block-opener' === $delimiter_type ) { $block_context[] = $block_name; ++$indent_level; } // If they're no attributes, we're done here. if ( empty( $block_attrs ) ) { break; } // Normalize attribute order. ksort( $block_attrs, SORT_STRING ); if ( isset( $block_attrs['className'] ) ) { // Normalize class name order (and de-duplicate), as we need to be tolerant of different orders. // (Style attributes don't need this treatment, as they are parsed into a nested array.) $block_class_processor = new WP_HTML_Tag_Processor( '
' ); $block_class_processor->next_token(); $block_class_processor->set_attribute( 'class', $block_attrs['className'] ); $class_names = iterator_to_array( $block_class_processor->class_list() ); sort( $class_names, SORT_STRING ); $block_attrs['className'] = implode( ' ', $class_names ); } $block_attrs = json_encode( $block_attrs, JSON_PRETTY_PRINT ); // Fix indentation by "halving" it (2 spaces instead of 4). // Additionally, we need to indent each line by the current indentation level. $block_attrs = preg_replace( '/^( +)\1/m', str_repeat( $tree_indent, $indent_level ) . '$1', $block_attrs ); // Finally, indent the first line, and the last line (with the closing curly brace). $output .= str_repeat( $tree_indent, $indent_level ) . substr( $block_attrs, 0, -1 ) . str_repeat( $tree_indent, $indent_level ) . "}\n"; break; case 'block-closer': // Is this a closer for the currently open block? if ( ! empty( $block_context ) && end( $block_context ) === $block_name ) { // If it's a closer, we don't add it to the output. // Instead, we decrease indentation and remove the block from block context stack. --$indent_level; array_pop( $block_context ); } break; default: // Not a block delimiter. $output .= str_repeat( $tree_indent, $indent_level ) . $comment . "\n"; break; } break; default: // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_var_export $serialized_token_type = var_export( $processor->get_token_type(), true ); throw new Error( "Unhandled token type for tree construction: {$serialized_token_type}" ); } } if ( null !== $processor->get_unsupported_exception() ) { throw $processor->get_unsupported_exception(); } if ( null !== $processor->get_last_error() ) { throw new Error( "Parser error: {$processor->get_last_error()}" ); } if ( $processor->paused_at_incomplete_token() ) { throw new Error( 'Paused at incomplete token.' ); } if ( '' !== $text_node ) { $output .= "{$text_node}\"\n"; } return $output; }