From 24fc53326c9782a3a66cca49bd555c4a9e053dae Mon Sep 17 00:00:00 2001 From: Lucas Hills <2potatocakes@gmail.com> Date: Thu, 27 Feb 2014 22:55:08 +1100 Subject: [PATCH 1/4] Fix attribute nodes namespace when using XML::Reader Fixes assertion failure. xml reader is choking while trying to access namespace information tied to the attribute nodes. The assertion was: Assertion failed: (doc->_private), function Nokogiri_wrap_xml_namespace, file xml_namespace.c, line 41 --- ext/nokogiri/xml_namespace.c | 16 ++++++++++------ test/test_reader.rb | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 44bb25657f..4aa5ec784d 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -37,20 +37,24 @@ static VALUE href(VALUE self) VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node) { VALUE ns, document, node_cache; + nokogiriTuplePtr node_has_a_document; - assert(doc->_private); + if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; + node_has_a_document = DOC_RUBY_OBJECT_TEST(doc); - if(node->_private) + if(node->_private && node_has_a_document) return (VALUE)node->_private; ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node); - document = DOC_RUBY_OBJECT(doc); + if (doc->_private) { + document = DOC_RUBY_OBJECT(doc); - node_cache = rb_iv_get(document, "@node_cache"); - rb_ary_push(node_cache, ns); + node_cache = rb_iv_get(document, "@node_cache"); + rb_ary_push(node_cache, ns); - rb_iv_set(ns, "@document", DOC_RUBY_OBJECT(doc)); + rb_iv_set(ns, "@document", DOC_RUBY_OBJECT(doc)); + } node->_private = (void *)ns; diff --git a/test/test_reader.rb b/test/test_reader.rb index c1a96c3ae4..e43cbb1ebf 100644 --- a/test/test_reader.rb +++ b/test/test_reader.rb @@ -386,6 +386,25 @@ def test_ns_uri reader.map { |n| n.namespace_uri }) end + def test_namespaced_attributes + reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) + + hello + + + eoxml + attr_ns = [] + while reader.read + if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE + reader.attribute_nodes.each {|attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) } + end + end + assert_equal(['commons', + 'edi', + nil], + attr_ns) + end + def test_local_name reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) From b5c179afa46d698f2568a2137632ca04dbac6701 Mon Sep 17 00:00:00 2001 From: Brian Palmer Date: Tue, 4 Aug 2015 11:10:18 -0600 Subject: [PATCH 2/4] return the namespace even if it's not in the cache This fixes Nokogiri::XML::Attr#namespace not working when used with XmlReader, because namespaces never get cached in that scenario. We can't add it to the cache at this point, because the node that defined the namespace is not available. --- ext/java/nokogiri/XmlNode.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 5f620c767d..8b5673a3cc 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1082,12 +1082,23 @@ public IRubyObject key_p(ThreadContext context, IRubyObject rbkey) { @JRubyMethod public IRubyObject namespace(ThreadContext context) { - if (doc instanceof HtmlDocument) return context.getRuntime().getNil(); + Ruby runtime = context.getRuntime(); + if (doc instanceof HtmlDocument) return runtime.getNil(); NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCacheFormNode(node); String prefix = node.getPrefix(); - XmlNamespace namespace = nsCache.get(prefix == null ? "" : prefix, node.getNamespaceURI()); + String namespaceURI = node.getNamespaceURI(); + if (namespaceURI == null || namespaceURI == "") { + return runtime.getNil(); + } + + XmlNamespace namespace = nsCache.get(prefix == null ? "" : prefix, namespaceURI); if (namespace == null || namespace.isEmpty()) { - return context.getRuntime().getNil(); + // if it's not in the cache, create an unowned, uncached namespace and + // return that. XmlReader can't insert namespaces into the cache, so + // this is necessary for XmlReader to work correctly. + namespace = + (XmlNamespace) NokogiriService.XML_NAMESPACE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); + namespace.init(null, RubyString.newString(runtime, prefix), RubyString.newString(runtime, namespaceURI), doc); } return namespace; From 743058a3cf79c004fb13d5ce130778cedb4bfaaa Mon Sep 17 00:00:00 2001 From: John Shahid Date: Tue, 8 Dec 2015 19:41:58 -0500 Subject: [PATCH 3/4] just return the ruby object of node if one exists the fix in the previous commit is overly complicated. the if conditional originaly tested whether the passed document has a ruby object in '_private' which is unnecessary since we return the node's ruby object if one exists. This commit moves the conditional to the beginning of the function and simplifies it.a new object will be created and attach to the document, otherwise. --- ext/nokogiri/xml_namespace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 4aa5ec784d..5a9b57bde3 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -37,13 +37,10 @@ static VALUE href(VALUE self) VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node) { VALUE ns, document, node_cache; - nokogiriTuplePtr node_has_a_document; - if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; - node_has_a_document = DOC_RUBY_OBJECT_TEST(doc); + if (node->_private) return (VALUE)node->_private; - if(node->_private && node_has_a_document) - return (VALUE)node->_private; + if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc; ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node); From d81ce3b064a7048df14b645141bf1fa150b4c20f Mon Sep 17 00:00:00 2001 From: John Shahid Date: Wed, 9 Dec 2015 06:42:22 -0500 Subject: [PATCH 4/4] use `new' instead of the allocator some minor changes as well for readability --- ext/java/nokogiri/XmlNode.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 8b5673a3cc..fa0fd2548a 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1085,20 +1085,21 @@ public IRubyObject namespace(ThreadContext context) { Ruby runtime = context.getRuntime(); if (doc instanceof HtmlDocument) return runtime.getNil(); NokogiriNamespaceCache nsCache = NokogiriHelpers.getNamespaceCacheFormNode(node); - String prefix = node.getPrefix(); String namespaceURI = node.getNamespaceURI(); if (namespaceURI == null || namespaceURI == "") { return runtime.getNil(); } + String prefix = node.getPrefix(); XmlNamespace namespace = nsCache.get(prefix == null ? "" : prefix, namespaceURI); if (namespace == null || namespace.isEmpty()) { // if it's not in the cache, create an unowned, uncached namespace and // return that. XmlReader can't insert namespaces into the cache, so // this is necessary for XmlReader to work correctly. - namespace = - (XmlNamespace) NokogiriService.XML_NAMESPACE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); - namespace.init(null, RubyString.newString(runtime, prefix), RubyString.newString(runtime, namespaceURI), doc); + namespace = new XmlNamespace(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Namespace")); + IRubyObject rubyPrefix = NokogiriHelpers.stringOrNil(runtime, prefix); + IRubyObject rubyUri = NokogiriHelpers.stringOrNil(runtime, namespaceURI); + namespace.init(null, rubyPrefix, rubyUri, doc); } return namespace;