Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Add get full comment text method #7342

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -3385,6 +3385,58 @@ public function get_comment_type(): ?string {
return $this->comment_type;
}

/**
* Returns the text of a matched comment or null if not on a comment type node.
*
* This method returns the entire text content of a comment node as it
* would appear in the browser.
*
* This differs from {@see ::get_modifiable_text()} in that certain comment
* types in the HTML API cannot allow their entire comment text content to
* be modified. Namely, "bogus comments" of the form `<?not allowed in html>`
* will create a comment whose text content starts with `?`. Note that if
* that character were modified, it would be possible to change the node
* type.
*
* @since 6.7.0
*
* @return string|null The comment text as it would appear in the browser or null
* if not on a comment type node.
*/
public function get_full_comment_text(): ?string {
if ( self::STATE_FUNKY_COMMENT === $this->parser_state ) {
return $this->get_modifiable_text();
}

if ( self::STATE_COMMENT !== $this->parser_state ) {
return null;
}

switch ( $this->get_comment_type() ) {
case self::COMMENT_AS_HTML_COMMENT:
case self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
return $this->get_modifiable_text();

case self::COMMENT_AS_CDATA_LOOKALIKE:
return "[CDATA[{$this->get_modifiable_text()}]]";

case self::COMMENT_AS_PI_NODE_LOOKALIKE:
return "?{$this->get_tag()}{$this->get_modifiable_text()}?";

/*
* This represents "bogus comments state" from HTML tokenization.
* This can be entered by `<?` or `<!`, where `?` is included in
* the comment text but `!` is not.
*/
case self::COMMENT_AS_INVALID_HTML:
$preceding_character = $this->html[ $this->text_starts_at - 1 ];
$comment_start = '?' === $preceding_character ? '?' : '';
return "{$comment_start}{$this->get_modifiable_text()}";
}

return null;
}

/**
* Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as
* distinct nodes prefixes.
Expand Down
45 changes: 12 additions & 33 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,17 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
* Skip specific tests that may not be supported or have known issues.
*/
const SKIP_TESTS = array(
'comments01/line0155' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'comments01/line0169' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
);

/**
Expand Down Expand Up @@ -315,26 +312,8 @@ static function ( $a, $b ) {
break;

case '#comment':
switch ( $processor->get_comment_type() ) {
case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
case WP_HTML_Processor::COMMENT_AS_HTML_COMMENT:
case WP_HTML_Processor::COMMENT_AS_INVALID_HTML:
$comment_text_content = $processor->get_modifiable_text();
break;

case WP_HTML_Processor::COMMENT_AS_CDATA_LOOKALIKE:
$comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]";
break;

case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
$comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
break;

default:
throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
}
// Comments must be "<" then "!-- " then the data then " -->".
$output .= str_repeat( self::TREE_INDENT, $indent_level ) . "<!-- {$comment_text_content} -->\n";
$output .= str_repeat( self::TREE_INDENT, $indent_level ) . "<!-- {$processor->get_full_comment_text()} -->\n";
break;

default:
Expand Down
Loading