Skip to content

Commit

Permalink
fix(jruby): make less-naive inference of xpath custom function ns
Browse files Browse the repository at this point in the history
Closes #1890
Related to #2147
  • Loading branch information
flavorjones committed Dec 26, 2020
1 parent a09b007 commit 5e45691
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 16 deletions.
59 changes: 44 additions & 15 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
package nokogiri;

import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.transform.TransformerException;

Expand Down Expand Up @@ -68,7 +70,6 @@
*/
@JRubyClass(name="Nokogiri::XML::XPathContext")
public class XmlXpathContext extends RubyObject {

static {
final String DTMManager = "org.apache.xml.dtm.DTMManager";
if (SafePropertyAccessor.getProperty(DTMManager) == null) {
Expand Down Expand Up @@ -109,25 +110,53 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyO
}
}


// see https://en.wikipedia.org/wiki/QName
private static final String NameStartCharStr = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]" ;
private static final String NameCharStr = "[-\\.0-9\u00B7\u0300-\u036F\u203F-\u2040]|" + NameStartCharStr ;
private static final String NCNameStr = "(?:" + NameStartCharStr + ")(?:" + NameCharStr + ")*";
private static final String XPathFunctionCaptureStr = "(" + NCNameStr + "(?=\\())";
private static final Pattern XPathFunctionCaptureRE = Pattern.compile(XPathFunctionCaptureStr);

@JRubyMethod
public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject handler) {

String src = expr.convertToString().asJavaString();
if (!handler.isNil()) {
if (!isContainsPrefix(src)) {
StringBuilder replacement = new StringBuilder();
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();
final String PREFIX = NokogiriNamespaceContext.NOKOGIRI_PREFIX;
for (String name : methodNames) {
replacement.setLength(0);
replacement.ensureCapacity(PREFIX.length() + 1 + name.length());
replacement.append(PREFIX).append(':').append(name);
src = src.replace(name, replacement); // replace(name, NOKOGIRI_PREFIX + ':' + name)
public IRubyObject evaluate(ThreadContext context, IRubyObject rbQuery, IRubyObject handler) {
String query = rbQuery.convertToString().asJavaString();

if (!handler.isNil() && !isContainsPrefix(query)) {
//
// The user has passed in a handler, but isn't using the `nokogiri:` prefix as
// instructed in JRuby land, so let's try to be clever and rewrite the query, inserting
// the nokogiri namespace where appropriate.
//
StringBuilder namespacedQuery = new StringBuilder();
int jchar = 0;

// Find the methods on the handler object
Set<String> methodNames = handler.getMetaClass().getMethods().keySet();

// Find the function calls in the xpath query
Matcher xpathFunctionCalls = XPathFunctionCaptureRE.matcher(query);

while (xpathFunctionCalls.find()) {
namespacedQuery.append(query.subSequence(jchar, xpathFunctionCalls.start()));
jchar = xpathFunctionCalls.start();

if (methodNames.contains(xpathFunctionCalls.group())) {
namespacedQuery.append(NokogiriNamespaceContext.NOKOGIRI_PREFIX);
namespacedQuery.append(":");
}

namespacedQuery.append(query.subSequence(xpathFunctionCalls.start(), xpathFunctionCalls.end()));
jchar = xpathFunctionCalls.end();
}

if (jchar < query.length()-1) {
namespacedQuery.append(query.subSequence(jchar, query.length()));
}
query = namespacedQuery.toString();
}

return node_set(context, src, handler);
return node_set(context, query, handler);
}

@JRubyMethod
Expand Down
2 changes: 1 addition & 1 deletion test/files/staff.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<employeeId>EMP0003</employeeId>
<name>Roger
Jones</name>
<position>Department Manager</position>
<position id='partial_collision_id'>Department Manager</position>
<salary>100,000</salary>
<gender>&ent4;</gender>
<address domestic="Yes" street="No">PO Box 27 Irving, texas 98553</address>
Expand Down
40 changes: 40 additions & 0 deletions test/xml/test_xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def thing thing
thing
end

def another_thing thing
@things << thing
thing
end

def returns_array node_set
@things << node_set.to_a
node_set.to_a
Expand Down Expand Up @@ -528,6 +533,41 @@ def test_xpath_syntax_error
end
end
end

describe "jruby inferring XPath functions from the handler methods" do
it "should not get confused simply by a string similarity" do
# https://github.com/sparklemotion/nokogiri/pull/1890
# this describes a bug where XmlXpathContext naively replaced query substrings using method names
handler = Class.new do
def collision(nodes)
nil
end
end.new
found_by_id = @xml.xpath("//*[@id='partial_collision_id']", handler)
assert_equal 1, found_by_id.length
end

it "handles multiple handler function calls" do
# test that jruby handles this case identically to C
result = @xml.xpath('//employee[thing(.)]/employeeId[another_thing(.)]', @handler)
assert_equal(5, result.length)
assert_equal(10, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 1" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[thing(.)]/employeeId[last()]', @handler)
assert_equal(5, result.length)
assert_equal(5, @handler.things.length)
end

it "doesn't get confused by an XPath function, flavor 2" do
# test that it doesn't get confused by an XPath function
result = @xml.xpath('//employee[last()]/employeeId[thing(.)]', @handler)
assert_equal(1, result.length)
assert_equal(1, @handler.things.length)
end
end
end
end
end

0 comments on commit 5e45691

Please sign in to comment.