sirreal · sirreal · Dec 18, 2023 · Dec 19, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/.github/workflows/end-to-end-tests.yml b/.github/workflows/end-to-end-tests.yml
@@ -131,7 +131,7 @@ jobs:
         uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
         if: always()
         with:
-            name: failures-artifacts
+            name: failures-artifacts${{ matrix.LOCAL_SCRIPT_DEBUG && '-SCRIPT_DEBUG' || '' }}-${{ github.run_id }}
             path: artifacts
             if-no-files-found: ignore
 

diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -1264,7 +1264,7 @@ private function run_adoption_agency_algorithm() {
 
 			// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
 			if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
-				$this->state->active_formatting_elements->remove_node( $formatting_element->bookmark_name );
+				$this->state->active_formatting_elements->remove_node( $formatting_element );
 				return;
 			}
 

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1073,6 +1073,10 @@ private function skip_script_data() {
 					continue;
 				}
 
+				if ( $this->bytes_already_parsed >= $doc_length ) {
+					return false;
+				}
+
 				if ( '>' === $html[ $this->bytes_already_parsed ] ) {
 					$this->bytes_already_parsed = $closer_potentially_starts_at;
 					return true;
@@ -1113,7 +1117,7 @@ private function parse_next_tag() {
 
 			$this->token_starts_at = $at;
 
-			if ( '/' === $this->html[ $at + 1 ] ) {
+			if ( $doc_length > $at + 1 && '/' === $this->html[ $at + 1 ] ) {
 				$this->is_closing_tag = true;
 				++$at;
 			} else {

diff --git a/src/wp-includes/script-loader.php b/src/wp-includes/script-loader.php
@@ -1881,6 +1881,8 @@ function wp_prototype_before_jquery( $js_array ) {
  * These localizations require information that may not be loaded even by init.
  *
  * @since 2.5.0
+ *
+ * @global array $shortcode_tags
  */
 function wp_just_in_time_script_localization() {
 

diff --git a/tests/phpunit/data/html5lib-tests/AUTHORS.rst b/tests/phpunit/data/html5lib-tests/AUTHORS.rst
@@ -0,0 +1,34 @@
+Credits
+=======
+
+The ``html5lib`` test data is maintained by:
+
+- James Graham
+- Geoffrey Sneddon
+
+
+Contributors
+------------
+
+- Adam Barth
+- Andi Sidwell
+- Anne van Kesteren
+- David Flanagan
+- Edward Z. Yang
+- Geoffrey Sneddon
+- Henri Sivonen
+- Ian Hickson
+- Jacques Distler
+- James Graham
+- Lachlan Hunt
+- lantis63
+- Mark Pilgrim
+- Mats Palmgren
+- Ms2ger
+- Nolan Waite
+- Philip Taylor
+- Rafael Weinstein
+- Ryan King
+- Sam Ruby
+- Simon Pieters
+- Thomas Broyer
diff --git a/tests/phpunit/data/html5lib-tests/LICENSE b/tests/phpunit/data/html5lib-tests/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
+other contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/tests/phpunit/data/html5lib-tests/README.md b/tests/phpunit/data/html5lib-tests/README.md
@@ -0,0 +1,8 @@
+html5lib-tests
+==============
+
+This test data was taken from:
+
+https://github.com/html5lib/html5lib-tests
+
+The sha was `a9f44960a9fedf265093d22b2aa3c7ca123727b9`.
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/README.md b/tests/phpunit/data/html5lib-tests/tree-construction/README.md
@@ -0,0 +1,108 @@
+Tree Construction Tests
+=======================
+
+Each file containing tree construction tests consists of any number of
+tests separated by two newlines (LF) and a single newline before the end
+of the file. For instance:
+
+    [TEST]LF
+    LF
+    [TEST]LF
+    LF
+    [TEST]LF
+
+Where [TEST] is the following format:
+
+Each test must begin with a string "\#data" followed by a newline (LF).
+All subsequent lines until a line that says "\#errors" are the test data
+and must be passed to the system being tested unchanged, except with the
+final newline (on the last line) removed.
+
+Then there must be a line that says "\#errors". It must be followed by
+one line per parse error that a conformant checker would return. It
+doesn't matter what those lines are, although they can't be
+"\#new-errors", "\#document-fragment", "\#document", "\#script-off",
+"\#script-on", or empty, the only thing that matters is that there be
+the right number of parse errors.
+
+Then there \*may\* be a line that says "\#new-errors", which works like
+the "\#errors" section adding more errors to the expected number of
+errors.
+
+Then there \*may\* be a line that says "\#document-fragment", which must
+be followed by a newline (LF), followed by a string of characters that
+indicates the context element, followed by a newline (LF). If the string 
+of characters starts with "svg ", the context element is in the SVG
+namespace and the substring after "svg " is the local name. If the
+string of characters starts with "math ", the context element is in the
+MathML namespace and the substring after "math " is the local name.
+Otherwise, the context element is in the HTML namespace and the string
+is the local name. If this line is present the "\#data" must be parsed
+using the HTML fragment parsing algorithm with the context element as
+context.
+
+Then there \*may\* be a line that says "\#script-off" or
+"\#script-on". If a line that says "\#script-off" is present, the
+parser must set the scripting flag to disabled. If a line that says
+"\#script-on" is present, it must set it to enabled. Otherwise, the
+test should be run in both modes.
+
+Then there must be a line that says "\#document", which must be followed
+by a dump of the tree of the parsed DOM. Each node must be represented
+by a single line. Each line must start with "| ", followed by two spaces
+per parent node that the node has before the root document node.
+
+-   Element nodes must be represented by a "`<`" then the *tag name
+    string* "`>`", and all the attributes must be given, sorted
+    lexicographically by UTF-16 code unit according to their *attribute
+    name string*, on subsequent lines, as if they were children of the
+    element node.
+-   Attribute nodes must have the *attribute name string*, then an "="
+    sign, then the attribute value in double quotes (").
+-   Text nodes must be the string, in double quotes. Newlines aren't
+    escaped.
+-   Comments must be "`<`" then "`!-- `" then the data then "` -->`".
+-   DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
+    system id or public id is non-empty a space, public id in
+    double-quotes, another space an the system id in double-quotes, and
+    then in any case "`>`".
+-   Processing instructions must be "`<?`", then the target, then a
+    space, then the data and then "`>`". (The HTML parser cannot emit
+    processing instructions, but scripts can, and the WebVTT to DOM
+    rules can emit them.)
+-   Template contents are represented by the string "content" with the
+    children below it.
+
+The *tag name string* is the local name prefixed by a namespace
+designator. For the HTML namespace, the namespace designator is the
+empty string, i.e. there's no prefix. For the SVG namespace, the
+namespace designator is "svg ". For the MathML namespace, the namespace
+designator is "math ".
+
+The *attribute name string* is the local name prefixed by a namespace
+designator. For no namespace, the namespace designator is the empty
+string, i.e. there's no prefix. For the XLink namespace, the namespace
+designator is "xlink ". For the XML namespace, the namespace designator
+is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
+". Note the difference between "xlink:href" which is an attribute in no
+namespace with the local name "xlink:href" and "xlink href" which is an
+attribute in the xlink namespace with the local name "href".
+
+If there is also a "\#document-fragment" the bit following "\#document"
+must be a representation of the HTML fragment serialization for the
+context element given by "\#document-fragment".
+
+For example:
+
+    #data
+    <p>One<p>Two
+    #errors
+    3: Missing document type declaration
+    #document
+    | <html>
+    |   <head>
+    |   <body>
+    |     <p>
+    |       "One"
+    |     <p>
+    |       "Two"