Skip to content

Commit 8d141ec

Browse files
committed
Change namespace cache strategy
Namespace calculation for each node cant' be cached to document because document lookup is slow for deeply nested nodes. Change namespace cache strategy, inject cached hash as an argument to retrieve namespace/namespaces from XPath match operation.
1 parent a6aa43c commit 8d141ec

5 files changed

Lines changed: 134 additions & 73 deletions

File tree

lib/rexml/document.rb

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -449,17 +449,24 @@ def document
449449

450450
private
451451

452-
attr_accessor :namespaces_cache
453-
454-
# New document level cache is created and available in this block.
455-
# This API is thread unsafe. Users can't change this document in this block.
456-
def enable_cache
457-
@namespaces_cache = {}
458-
begin
459-
yield
460-
ensure
461-
@namespaces_cache = nil
452+
# Returns namespaces defined in attribute list declarations for each element name.
453+
# { element_name => { prefix => uri, ... }, ... }
454+
def attrlist_per_element_namespaces # :nodoc:
455+
per_element_namespaces = {}
456+
if doctype
457+
doctype.each do |child|
458+
next unless child.kind_of? AttlistDecl
459+
element_name = child.element_name
460+
child.each do |name, value|
461+
attr = Attribute.new(name, value)
462+
if attr.prefix == 'xmlns' || attr.name == 'xmlns'
463+
namespaces = per_element_namespaces[element_name] ||= {}
464+
namespaces[attr.name] = attr.value
465+
end
466+
end
467+
end
462468
end
469+
per_element_namespaces
463470
end
464471

465472
def build( source )

lib/rexml/element.rb

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -588,12 +588,7 @@ def prefixes
588588
# d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
589589
#
590590
def namespaces
591-
namespaces_cache = document&.__send__(:namespaces_cache)
592-
if namespaces_cache
593-
namespaces_cache[self] ||= calculate_namespaces
594-
else
595-
calculate_namespaces
596-
end
591+
calculate_namespaces
597592
end
598593

599594
# :call-seq:
@@ -618,13 +613,10 @@ def namespaces
618613
#
619614
def namespace(prefix=nil)
620615
if prefix.nil?
621-
prefix = prefix()
616+
namespace_internal
617+
else
618+
namespace_lookup_internal(prefix)
622619
end
623-
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
624-
ns = namespaces[prefix]
625-
626-
ns = '' if ns.nil? and prefix == 'xmlns'
627-
ns
628620
end
629621

630622
# :call-seq:
@@ -1508,12 +1500,34 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
15081500
end
15091501

15101502
private
1511-
def calculate_namespaces
1512-
if parent
1513-
parent.namespaces.merge(attributes.namespaces)
1514-
else
1515-
attributes.namespaces
1516-
end
1503+
1504+
# Returns namespace of the element
1505+
def namespace_internal(namespaces = self.calculate_namespaces)
1506+
namespace_lookup_internal(prefix, namespaces)
1507+
end
1508+
1509+
# Lookup namespace for the given prefix in the context of the element
1510+
def namespace_lookup_internal(prefix, namespaces = self.calculate_namespaces)
1511+
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
1512+
ns = namespaces[prefix]
1513+
ns = '' if ns.nil? and prefix == 'xmlns'
1514+
ns
1515+
end
1516+
1517+
def calculate_namespaces(cache_hash = nil, attrlist_element_namespaces = nil)
1518+
return cache_hash[self] if cache_hash && cache_hash.key?(self)
1519+
1520+
inherited_namespaces = parent ? parent.send(:calculate_namespaces, cache_hash, attrlist_element_namespaces) : {}
1521+
attrlist_element_namespaces ||= document&.send(:attrlist_per_element_namespaces)
1522+
attrlist_namespaces = attrlist_element_namespaces&.[](is_a?(Document) ? doctype&.name : expanded_name)
1523+
own_namespaces = attributes.send(:own_namespaces)
1524+
1525+
# Inherited namespaces can be overridden by attribute list declaration, and both can be overridden by its own attributes
1526+
namespaces = inherited_namespaces
1527+
namespaces = namespaces.merge(attrlist_namespaces) if attrlist_namespaces
1528+
namespaces = namespaces.merge(own_namespaces) if own_namespaces.any?
1529+
cache_hash[self] = namespaces if cache_hash
1530+
namespaces
15171531
end
15181532

15191533
def __to_xpath_helper node
@@ -2386,6 +2400,15 @@ def []=( name, value )
23862400
@element
23872401
end
23882402

2403+
# Returns namespaces directly declared in this attribute set
2404+
private def own_namespaces # :nodoc:
2405+
namespaces = {}
2406+
each_attribute do |attribute|
2407+
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
2408+
end
2409+
namespaces
2410+
end
2411+
23892412
# :call-seq:
23902413
# prefixes -> array_of_prefix_strings
23912414
#
@@ -2424,19 +2447,12 @@ def prefixes
24242447
# d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"}
24252448
#
24262449
def namespaces
2427-
namespaces = {}
2428-
each_attribute do |attribute|
2429-
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
2430-
end
2431-
doctype = @element.document&.doctype
2432-
if doctype
2433-
expn = @element.expanded_name
2434-
expn = doctype.name if expn.size == 0
2435-
doctype.attributes_of(expn).each {
2436-
|attribute|
2437-
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
2438-
}
2439-
end
2450+
doc = @element.document
2451+
doctype = doc&.doctype
2452+
attrlist_element_namespaces = doc&.send(:attrlist_per_element_namespaces)
2453+
attrlist_namespaces = attrlist_element_namespaces&.[](@element.is_a?(Document) ? doctype&.name : @element.expanded_name)
2454+
namespaces = own_namespaces
2455+
namespaces = attrlist_namespaces.merge(namespaces) if attrlist_namespaces
24402456
namespaces
24412457
end
24422458

lib/rexml/xpath_parser.rb

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def initialize(strict: false)
6363
@namespaces = nil
6464
@variables = {}
6565
@functions = FunctionsClass.new
66+
@attrlist_per_element_namespaces = nil
67+
@document = nil
68+
@element_namespaces_cache = {}
6669
@nest = 0
6770
@strict = strict
6871
end
@@ -85,14 +88,8 @@ def parse path, node
8588
node = node.first
8689
end
8790

88-
document = node.document
89-
if document
90-
document.__send__(:enable_cache) do
91-
match( path_stack, node )
92-
end
93-
else
94-
match( path_stack, node )
95-
end
91+
@document = node.document
92+
match( path_stack, node )
9693
end
9794

9895
def get_first path, node
@@ -150,7 +147,6 @@ def first( path_stack, node )
150147
end
151148
end
152149

153-
154150
def match(path_stack, node)
155151
nodeset = [node]
156152
result = expr(path_stack, nodeset)
@@ -167,20 +163,45 @@ def strict?
167163
@strict
168164
end
169165

170-
# Returns a String namespace for a node, given a prefix
166+
# Returns a String namespace for a prefix used in xpath.
171167
# The rules are:
172168
#
173169
# 1. Use the supplied namespace mapping first.
174-
# 2. If no mapping was supplied, use the context node to look up the namespace
175-
def get_namespace( node, prefix )
170+
# 2. If no mapping was supplied, use the context node to look up the namespace as a fallback.
171+
def get_xpath_namespace( node, prefix )
176172
if @namespaces
177173
@namespaces[prefix] || ''
174+
elsif node.node_type == :element
175+
element_namespace_lookup(node, prefix)
178176
else
179-
return node.namespace( prefix ) if node.node_type == :element
180177
''
181178
end
182179
end
183180

181+
# Returns attribute's namespace URI while caching the
182+
# intermediate result to speed up retrieval of namespaces
183+
def attribute_namespace(attribute)
184+
attribute.prefix == '' ? '' : element_namespace_lookup(attribute.element, attribute.prefix)
185+
end
186+
187+
# Return element's namespace URI while caching the
188+
# intermediate result to speed up retrieval of namespaces
189+
def element_namespace(element)
190+
element.send(:namespace_internal, element_namespaces(element))
191+
end
192+
193+
# Returns a hash of namespaces for the given element while caching the
194+
# intermediate result to speed up retrieval of namespaces
195+
def element_namespaces(element)
196+
@attrlist_per_element_namespaces ||= @document&.send(:attrlist_per_element_namespaces) || {}
197+
element.send(:calculate_namespaces, @element_namespaces_cache, @attrlist_per_element_namespaces)
198+
end
199+
200+
# Returns namespace of the prefix in the context of the element,
201+
# while caching the intermediate result to speed up retrieval of namespaces
202+
def element_namespace_lookup(element, prefix)
203+
element.send(:namespace_lookup_internal, prefix, element_namespaces(element))
204+
end
184205

185206
# Expr takes a stack of path elements and a set of nodes (either a Parent
186207
# or an Array and returns an Array of matching nodes
@@ -641,20 +662,20 @@ def node_test(path_stack, any_type: :element)
641662
node.name == name
642663
elsif prefix.empty?
643664
if strict?
644-
node.name == name and node.namespace == ""
665+
node.name == name and element_namespace(node) == ""
645666
else
646-
node.name == name and node.namespace == get_namespace(node, prefix)
667+
node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix)
647668
end
648669
else
649-
node.name == name and node.namespace == get_namespace(node, prefix)
670+
node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix)
650671
end
651672
when :attribute
652673
if prefix.nil?
653674
node.name == name
654675
elsif prefix.empty?
655-
node.name == name and node.namespace == ""
676+
node.name == name and attribute_namespace(node) == ""
656677
else
657-
node.name == name and node.namespace == get_namespace(node.element, prefix)
678+
node.name == name and attribute_namespace(node) == get_xpath_namespace(node.element, prefix)
658679
end
659680
else
660681
false
@@ -665,11 +686,11 @@ def node_test(path_stack, any_type: :element)
665686
->(node) do
666687
case node.node_type
667688
when :element
668-
namespaces = @namespaces || node.namespaces
669-
node.namespace == namespaces[prefix]
689+
namespaces = @namespaces || element_namespaces(node)
690+
element_namespace(node) == namespaces[prefix]
670691
when :attribute
671-
namespaces = @namespaces || node.element.namespaces
672-
node.namespace == namespaces[prefix]
692+
namespaces = @namespaces || element_namespaces(node.element)
693+
attribute_namespace(node) == namespaces[prefix]
673694
else
674695
false
675696
end

test/test_core.rb

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,33 @@ def test_attlist_decl
916916
assert_equal correct, prefixes
917917
end
918918

919+
def test_attrlist_namespace_priority
920+
doc = Document.new <<~XML
921+
<!DOCTYPE blah [
922+
<!ATTLIST a
923+
xmlns CDATA "bar">
924+
]>
925+
<root xmlns='foo'>
926+
<a><b/></a>
927+
<a xmlns='baz'><c/></a>
928+
<d/>
929+
</root>
930+
XML
931+
a1, a2, d = doc.root.children.grep(REXML::Element)
932+
b = a1.first
933+
c = a2.first
934+
assert_equal('foo', doc.root.namespace)
935+
assert_equal('bar', a1.namespace)
936+
assert_equal('bar', b.namespace)
937+
assert_equal('baz', a2.namespace)
938+
assert_equal('baz', c.namespace)
939+
assert_equal('foo', d.namespace)
940+
assert_equal({}, doc.attributes.namespaces)
941+
assert_equal({ 'xmlns' => 'foo' }, doc.root.attributes.namespaces)
942+
assert_equal({ 'xmlns' => 'bar' }, a1.attributes.namespaces)
943+
assert_equal({ 'xmlns' => 'baz' }, a2.attributes.namespaces)
944+
end
945+
919946
def test_attlist_write
920947
doc = File.open(fixture_path("foo.xml")) {|file| Document.new file }
921948
out = ''

test/xpath/test_base.rb

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,16 +1312,6 @@ def test_namespaces_0
13121312
assert_equal( 1, XPath.match( d, "//x:*" ).size )
13131313
end
13141314

1315-
def test_namespaces_cache
1316-
doc = Document.new("<a xmlns='1'><b/></a>")
1317-
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='1']").to_s)
1318-
assert_nil(XPath.first(doc, "//b[namespace-uri()='']"))
1319-
1320-
doc.root.delete_namespace
1321-
assert_nil(XPath.first(doc, "//b[namespace-uri()='1']"))
1322-
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='']").to_s)
1323-
end
1324-
13251315
def test_ticket_71
13261316
doc = Document.new(%Q{<root xmlns:ns1="xyz" xmlns:ns2="123"><element ns1:attrname="foo" ns2:attrname="bar"/></root>})
13271317
el = doc.root.elements[1]

0 commit comments

Comments
 (0)