From 5e45691593b1423dbc99fd54cae75ef531981981 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 26 Dec 2020 16:25:07 -0500 Subject: [PATCH] fix(jruby): make less-naive inference of xpath custom function ns Closes #1890 Related to #2147 --- ext/java/nokogiri/XmlXpathContext.java | 59 +++++++++++++++++++------- test/files/staff.xml | 2 +- test/xml/test_xpath.rb | 40 +++++++++++++++++ 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index f0aa3213e1b..667281871c4 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -33,6 +33,8 @@ package nokogiri; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.transform.TransformerException; @@ -68,7 +70,6 @@ */ @JRubyClass(name="Nokogiri::XML::XPathContext") public class XmlXpathContext extends RubyObject { - static { final String DTMManager = "org.apache.xml.dtm.DTMManager"; if (SafePropertyAccessor.getProperty(DTMManager) == null) { @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO } } + + // see https://en.wikipedia.org/wiki/QName + private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ; + private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ; + private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*"; + private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())"; + private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr); + @JRubyMethod - public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) { - - String src = expr.convertToString().asJavaString(); - if (!handler.isNil()) { - if (!isContainsPrefix(src)) { - StringBuilder replacement = new StringBuilder(); - Set methodNames = handler.getMetaClass().getMethods().keySet(); - final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX; - for (String name : methodNames) { - replacement.setLength(0); - replacement.ensureCapacity(PREFIX.length() + 1 + name.length()); - replacement.append(PREFIX).append(':').append(name); - src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name) + public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) { + String query = rbQuery.convertToString().asJavaString(); + + if (!handler.isNil() && !isContainsPrefix(query)) { + // + // The user has passed in a handler, but isn't using the `nokogiri:` prefix as + // instructed in JRuby land, so let's try to be clever and rewrite the query, inserting + // the nokogiri namespace where appropriate. + // + StringBuilder namespacedQuery = new StringBuilder(); + int jchar = 0; + + // Find the methods on the handler object + Set methodNames = handler.getMetaClass().getMethods().keySet(); + + // Find the function calls in the xpath query + Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query); + + while (xpathFunctionCalls.find()) { + namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start())); + jchar = xpathFunctionCalls.start(); + + if (methodNames.contains(xpathFunctionCalls.group())) { + namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX); + namespacedQuery.append(":"); } + + namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end())); + jchar = xpathFunctionCalls.end(); + } + + if (jchar < query.length()-1) { + namespacedQuery.append(query.subSequence(jchar, query.length())); } + query = namespacedQuery.toString(); } - return node_set(context, src, handler); + return node_set(context, query, handler); } @JRubyMethod diff --git a/test/files/staff.xml b/test/files/staff.xml index fd635110add..acebb31d99a 100644 --- a/test/files/staff.xml +++ b/test/files/staff.xml @@ -35,7 +35,7 @@ EMP0003 Roger Jones - Department Manager + Department Manager 100,000 &ent4;
PO Box 27 Irving, texas 98553
diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 80c65c81db4..878eae0049e 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -35,6 +35,11 @@ def thing thing thing end + def another_thing thing + @things << thing + thing + end + def returns_array node_set @things << node_set.to_a node_set.to_a @@ -528,6 +533,41 @@ def test_xpath_syntax_error end end end + + describe "jruby inferring XPath functions from the handler methods" do + it "should not get confused simply by a string similarity" do + # https://github.com/sparklemotion/nokogiri/pull/1890 + # this describes a bug where XmlXpathContext naively replaced query substrings using method names + handler = Class.new do + def collision(nodes) + nil + end + end.new + found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler) + assert_equal 1, found_by_id.length + end + + it "handles multiple handler function calls" do + # test that jruby handles this case identically to C + result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler) + assert_equal(5, result.length) + assert_equal(10, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 1" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler) + assert_equal(5, result.length) + assert_equal(5, @handler.things.length) + end + + it "doesn't get confused by an XPath function, flavor 2" do + # test that it doesn't get confused by an XPath function + result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler) + assert_equal(1, result.length) + assert_equal(1, @handler.things.length) + end + end end end end