diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 233d47eb8da95..2b115dd156014 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3385,6 +3385,58 @@ public function get_comment_type(): ?string { return $this->comment_type; } + /** + * Returns the text of a matched comment or null if not on a comment type node. + * + * This method returns the entire text content of a comment node as it + * would appear in the browser. + * + * This differs from {@see ::get_modifiable_text()} in that certain comment + * types in the HTML API cannot allow their entire comment text content to + * be modified. Namely, "bogus comments" of the form `` + * will create a comment whose text content starts with `?`. Note that if + * that character were modified, it would be possible to change the node + * type. + * + * @since 6.7.0 + * + * @return string|null The comment text as it would appear in the browser or null + * if not on a comment type node. + */ + public function get_full_comment_text(): ?string { + if ( self::STATE_FUNKY_COMMENT === $this->parser_state ) { + return $this->get_modifiable_text(); + } + + if ( self::STATE_COMMENT !== $this->parser_state ) { + return null; + } + + switch ( $this->get_comment_type() ) { + case self::COMMENT_AS_HTML_COMMENT: + case self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT: + return $this->get_modifiable_text(); + + case self::COMMENT_AS_CDATA_LOOKALIKE: + return "[CDATA[{$this->get_modifiable_text()}]]"; + + case self::COMMENT_AS_PI_NODE_LOOKALIKE: + return "?{$this->get_tag()}{$this->get_modifiable_text()}?"; + + /* + * This represents "bogus comments state" from HTML tokenization. + * This can be entered by `html[ $this->text_starts_at - 1 ]; + $comment_start = '?' === $preceding_character ? '?' : ''; + return "{$comment_start}{$this->get_modifiable_text()}"; + } + + return null; + } + /** * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as * distinct nodes prefixes. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 4862ba981e6f0..808fa39d17f26 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -27,20 +27,17 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase { * Skip specific tests that may not be supported or have known issues. */ const SKIP_TESTS = array( - 'comments01/line0155' => 'Unimplemented: Need to access raw comment text on non-normative comments.', - 'comments01/line0169' => 'Unimplemented: Need to access raw comment text on non-normative comments.', - 'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.', - 'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', - 'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', + 'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', ); /** @@ -315,26 +312,8 @@ static function ( $a, $b ) { break; case '#comment': - switch ( $processor->get_comment_type() ) { - case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT: - case WP_HTML_Processor::COMMENT_AS_HTML_COMMENT: - case WP_HTML_Processor::COMMENT_AS_INVALID_HTML: - $comment_text_content = $processor->get_modifiable_text(); - break; - - case WP_HTML_Processor::COMMENT_AS_CDATA_LOOKALIKE: - $comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]"; - break; - - case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE: - $comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?"; - break; - - default: - throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" ); - } // Comments must be "<" then "!-- " then the data then " -->". - $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n"; + $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n"; break; default: