diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9294ac2915b00..f3c4f2681f239 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1173,7 +1173,7 @@ protected function serialize_token(): string { return $html; } - $tag_name = $this->get_tag(); + $tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() ); $in_html = 'html' === $this->get_namespace(); $qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name(); @@ -1196,6 +1196,8 @@ protected function serialize_token(): string { if ( is_string( $value ) ) { $html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"'; } + + $html = str_replace( "\x00", "\u{FFFD}", $html ); } if ( ! $in_html && $this->has_self_closing_flag() ) { diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index 4fa46d3cabc2f..e05ca28473e04 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -248,4 +248,40 @@ public function data_bogus_comments() { 'XML Processing Instruction look-alike' => array( '<', '?xml foo ', '>' ), ); } + + /** + * Ensures that NULL bytes are properly handled. + * + * @ticket 62036 + * + * @dataProvider data_tokens_with_null_bytes + * + * @param string $html_with_nulls HTML token containing NULL bytes in various places. + * @param string $expected_output Expected parse of HTML after handling NULL bytes. + */ + public function test_replaces_null_bytes_appropriately( string $html_with_nulls, string $expected_output ) { + $this->assertSame( + WP_HTML_Processor::normalize( $html_with_nulls ), + $expected_output, + 'Should have properly replaced or removed NULL bytes.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_tokens_with_null_bytes() { + return array( + 'Tag name' => array( "", "" ), + 'Attribute name' => array( "", "" ), + 'Attribute value' => array( "", "" ), + 'Body text' => array( "one\x00two", 'onetwo' ), + 'Foreign content text' => array( "one\x00two", "one\u{FFFD}two" ), + 'SCRIPT content' => array( "", "" ), + 'STYLE content' => array( "", "" ), + 'Comment text' => array( "", "" ), + ); + } }