diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php
index caa6de1d09..ba8be1e7d4 100644
--- a/wp-includes/html-api/class-wp-html-processor.php
+++ b/wp-includes/html-api/class-wp-html-processor.php
@@ -1029,6 +1029,216 @@ public function get_current_depth(): int {
return count( $this->breadcrumbs );
}
+ /**
+ * Normalizes an HTML fragment by serializing it.
+ *
+ * This method assumes that the given HTML snippet is found in BODY context.
+ * For normalizing full documents or fragments found in other contexts, create
+ * a new processor using {@see WP_HTML_Processor::create_fragment} or
+ * {@see WP_HTML_Processor::create_full_parser} and call {@see WP_HTML_Processor::serialize}
+ * on the created instances.
+ *
+ * Many aspects of an input HTML fragment may be changed during normalization.
+ *
+ * - Attribute values will be double-quoted.
+ * - Duplicate attributes will be removed.
+ * - Omitted tags will be added.
+ * - Tag and attribute name casing will be lower-cased,
+ * except for specific SVG and MathML tags or attributes.
+ * - Text will be re-encoded, null bytes handled,
+ * and invalid UTF-8 replaced with U+FFFD.
+ * - Any incomplete syntax trailing at the end will be omitted,
+ * for example, an unclosed comment opener will be removed.
+ *
+ * Example:
+ *
+ * echo WP_HTML_Processor::normalize( 'One syntax < <> "oddities"
+ *
+ * @since 6.7.0
+ *
+ * @param string $html Input HTML to normalize.
+ *
+ * @return string|null Normalized output, or `null` if unable to normalize.
+ */
+ public static function normalize( string $html ): ?string {
+ return static::create_fragment( $html )->serialize();
+ }
+
+ /**
+ * Returns normalized HTML for a fragment by serializing it.
+ *
+ * This differs from {@see WP_HTML_Processor::normalize} in that it starts with
+ * a specific HTML Processor, which _must_ not have already started scanning;
+ * it must be in the initial ready state and will be in the completed state once
+ * serialization is complete.
+ *
+ * Many aspects of an input HTML fragment may be changed during normalization.
+ *
+ * - Attribute values will be double-quoted.
+ * - Duplicate attributes will be removed.
+ * - Omitted tags will be added.
+ * - Tag and attribute name casing will be lower-cased,
+ * except for specific SVG and MathML tags or attributes.
+ * - Text will be re-encoded, null bytes handled,
+ * and invalid UTF-8 replaced with U+FFFD.
+ * - Any incomplete syntax trailing at the end will be omitted,
+ * for example, an unclosed comment opener will be removed.
+ *
+ * Example:
+ *
+ * $processor = WP_HTML_Processor::create_fragment( 'One syntax < <> "oddities"
+ *
+ * @since 6.7.0
+ *
+ * @return string|null Normalized HTML markup represented by processor,
+ * or `null` if unable to generate serialization.
+ */
+ public function serialize(): ?string {
+ if ( WP_HTML_Tag_Processor::STATE_READY !== $this->parser_state ) {
+ wp_trigger_error(
+ __METHOD__,
+ "An HTML Processor which has already started processing cannot serialize it's contents. Serialize immediately after creating the instance.",
+ E_USER_WARNING
+ );
+ return null;
+ }
+
+ $html = '';
+ while ( $this->next_token() ) {
+ $html .= $this->serialize_token();
+ }
+
+ if ( null !== $this->get_last_error() ) {
+ wp_trigger_error(
+ __METHOD__,
+ "Cannot serialize HTML Processor with parsing error: {$this->get_last_error()}.",
+ E_USER_WARNING
+ );
+ return null;
+ }
+
+ return $html;
+ }
+
+ /**
+ * Serializes the currently-matched token.
+ *
+ * This method produces a fully-normative HTML string for the currently-matched token,
+ * if able. If not matched at any token or if the token doesn't correspond to any HTML
+ * it will return an empty string (for example, presumptuous end tags are ignored).
+ *
+ * @see static::serialize()
+ *
+ * @since 6.7.0
+ *
+ * @return string Serialization of token, or empty string if no serialization exists.
+ */
+ protected function serialize_token(): string {
+ $html = '';
+ $token_type = $this->get_token_type();
+
+ switch ( $token_type ) {
+ case '#text':
+ $html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
+ break;
+
+ // Unlike the `<>` which is interpreted as plaintext, this is ignored entirely.
+ case '#presumptuous-tag':
+ break;
+
+ case '#funky-comment':
+ case '#comment':
+ $html .= "";
+ break;
+
+ case '#cdata-section':
+ $html .= "get_modifiable_text()}]]>";
+ break;
+
+ case 'html':
+ $html .= '';
+ break;
+ }
+
+ if ( '#tag' !== $token_type ) {
+ return $html;
+ }
+
+ $tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() );
+ $in_html = 'html' === $this->get_namespace();
+ $qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
+
+ if ( $this->is_tag_closer() ) {
+ $html .= "{$qualified_name}>";
+ return $html;
+ }
+
+ $attribute_names = $this->get_attribute_names_with_prefix( '' );
+ if ( ! isset( $attribute_names ) ) {
+ $html .= "<{$qualified_name}>";
+ return $html;
+ }
+
+ $html .= "<{$qualified_name}";
+ foreach ( $attribute_names as $attribute_name ) {
+ $html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
+ $value = $this->get_attribute( $attribute_name );
+
+ if ( is_string( $value ) ) {
+ $html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
+ }
+
+ $html = str_replace( "\x00", "\u{FFFD}", $html );
+ }
+
+ if ( ! $in_html && $this->has_self_closing_flag() ) {
+ $html .= ' /';
+ }
+
+ $html .= '>';
+
+ // Flush out self-contained elements.
+ if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) {
+ $text = $this->get_modifiable_text();
+
+ switch ( $tag_name ) {
+ case 'IFRAME':
+ case 'NOEMBED':
+ case 'NOFRAMES':
+ $text = '';
+ break;
+
+ case 'SCRIPT':
+ case 'STYLE':
+ break;
+
+ default:
+ $text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
+ }
+
+ $html .= "{$text}{$qualified_name}>";
+ }
+
+ return $html;
+ }
+
/**
* Parses next element in the 'initial' insertion mode.
*
diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php
index 2b115dd156..5e29fc7d15 100644
--- a/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1984,6 +1984,9 @@ private function parse_next_tag(): bool {
* [#x10000-#xEFFFF]
* > NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
*
+ * @todo Processing instruction nodes in SGML may contain any kind of markup. XML defines a
+ * special case with `` syntax, but the `?` is part of the bogus comment.
+ *
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget
*/
if ( $this->token_length >= 5 && '?' === $html[ $closer_at - 1 ] ) {
diff --git a/wp-includes/version.php b/wp-includes/version.php
index fedbeb62b2..4552bd00e6 100644
--- a/wp-includes/version.php
+++ b/wp-includes/version.php
@@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
-$wp_version = '6.7-alpha-59075';
+$wp_version = '6.7-alpha-59076';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.