From 014c9b06797ff4cb6a281de0fd6351dea51d69f6 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 26 Dec 2020 16:31:56 -0500 Subject: [PATCH 1/3] fix(jruby): clearer exception when failing to resolve xpath func Specifically, on XPath custom function resolution failure. --- ext/java/nokogiri/XmlXpathContext.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index f0aa3213e1..c6dc3305e0 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -162,7 +162,9 @@ private IRubyObject node_set(ThreadContext context, String expr, IRubyObject han return tryGetNodeSet(context, expr, fnResolver); } catch (TransformerException ex) { - throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, expr, ex).toThrowable(); // Nokogiri::XML::XPath::SyntaxError + throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, + (expr + ": " + ex.toString()), + ex).toThrowable(); } } From b883b1af844b71f6c0b6d0c3a4e336a73ebe5b93 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 26 Dec 2020 16:25:07 -0500 Subject: [PATCH 2/3] fix(jruby): make less-naive inference of xpath custom function ns Closes #1890 Related to #2147 --- ext/java/nokogiri/XmlXpathContext.java | 59 +++++++++++++++++++------- test/files/staff.xml | 2 +- test/xml/test_document.rb | 2 +- test/xml/test_dtd.rb | 2 +- test/xml/test_xpath.rb | 40 +++++++++++++++++ 5 files changed, 87 insertions(+), 18 deletions(-) diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index c6dc3305e0..ad74dd44cb 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -33,6 +33,8 @@ package nokogiri; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.transform.TransformerException; @@ -68,7 +70,6 @@ */ @JRubyClass(name="Nokogiri::XML::XPathContext") public class XmlXpathContext extends RubyObject { - static { final String DTMManager = "org.apache.xml.dtm.DTMManager"; if (SafePropertyAccessor.getProperty(DTMManager) == null) { @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO } } + + // see https://en.wikipedia.org/wiki/QName + private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ; + private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ; + private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*"; + private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())"; + private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr); + @JRubyMethod - public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) { - - String src = expr.convertToString().asJavaString(); - if (!handler.isNil()) { - if (!isContainsPrefix(src)) { - StringBuilder replacement = new StringBuilder(); - Set methodNames = handler.getMetaClass().getMethods().keySet(); - final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX; - for (String name : methodNames) { - replacement.setLength(0); - replacement.ensureCapacity(PREFIX.length() + 1 + name.length()); - replacement.append(PREFIX).append(':').append(name); - src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name) + public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) { + String query = rbQuery.convertToString().asJavaString(); + + if (!handler.isNil() && !isContainsPrefix(query)) { + // + // The user has passed in a handler, but isn't using the `nokogiri:` prefix as + // instructed in JRuby land, so let's try to be clever and rewrite the query, inserting + // the nokogiri namespace where appropriate. + // + StringBuilder namespacedQuery = new StringBuilder(); + int jchar = 0; + + // Find the methods on the handler object + Set methodNames = handler.getMetaClass().getMethods().keySet(); + + // Find the function calls in the xpath query + Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query); + + while (xpathFunctionCalls.find()) { + namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start())); + jchar = xpathFunctionCalls.start(); + + if (methodNames.contains(xpathFunctionCalls.group())) { + namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX); + namespacedQuery.append(":"); } + + namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end())); + jchar = xpathFunctionCalls.end(); + } + + if (jchar < query.length()-1) { + namespacedQuery.append(query.subSequence(jchar, query.length())); } + query = namespacedQuery.toString(); } - return node_set(context, src, handler); + return node_set(context, query, handler); } @JRubyMethod diff --git a/test/files/staff.xml b/test/files/staff.xml index fd635110ad..acebb31d99 100644 --- a/test/files/staff.xml +++ b/test/files/staff.xml @@ -35,7 +35,7 @@ EMP0003 Roger Jones - Department Manager + Department Manager 100,000 &ent4;
PO Box 27 Irving, texas 98553
diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 3bf417fb5e..5d61b12991 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -419,7 +419,7 @@ def test_move_root_with_existing_root_gets_gcd def test_validate if Nokogiri.uses_libxml? - assert_equal 44, @xml.validate.length + assert_equal 45, @xml.validate.length else xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid } assert_equal 40, xml.validate.length diff --git a/test/xml/test_dtd.rb b/test/xml/test_dtd.rb index 93e30d62ee..4668a1fb98 100644 --- a/test/xml/test_dtd.rb +++ b/test/xml/test_dtd.rb @@ -150,7 +150,7 @@ def test_line def test_validate if Nokogiri.uses_libxml? list = @xml.internal_subset.validate @xml - assert_equal 44, list.length + assert_equal 45, list.length else xml = Nokogiri::XML(File.open(XML_FILE)) {|cfg| cfg.dtdvalid} list = xml.internal_subset.validate xml diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 80c65c81db..878eae0049 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -35,6 +35,11 @@ def thing thing thing end + def another_thing thing + @things << thing + thing + end + def returns_array node_set @things << node_set.to_a node_set.to_a @@ -528,6 +533,41 @@ def test_xpath_syntax_error end end end + + describe "jruby inferring XPath functions from the handler methods" do + it "should not get confused simply by a string similarity" do + # https://github.com/sparklemotion/nokogiri/pull/1890 + # this describes a bug where XmlXpathContext naively replaced query substrings using method names + handler = Class.new do + def collision(nodes) + nil + end + end.new + found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler) + assert_equal 1, found_by_id.length + end + + it "handles multiple handler function calls" do + # test that jruby handles this case identically to C + result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler) + assert_equal(5, result.length) + assert_equal(10, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 1" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler) + assert_equal(5, result.length) + assert_equal(5, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 2" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler) + assert_equal(1, result.length) + assert_equal(1, @handler.things.length) + end + end end end end From e1461ce630c5771fb21ee700e3257279fdd765f3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 26 Dec 2020 16:33:42 -0500 Subject: [PATCH 3/3] changelog: updated for JRuby xpath function improvements --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65d1570b6a..d862e2dc51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -299,6 +299,8 @@ This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/is * [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) * [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!) * [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)] +* [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [#1890, #2148] +* [JRuby] Clarify exception message when custom XPath functions can't be resolved. * [JRuby] Fix failing tests on JRuby 9.2.x * [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)] * [JRuby] Fix support for Java 9. [[#1759](https://github.com/sparklemotion/nokogiri/issues/1759)] (Thanks, [@Taywee](https://github.com/Taywee)!)