Skip to content

Commit 5d35322

Browse files
committed
Change namespace cache strategy
Namespace calculation for each node can't be cached to document because document lookup is slow for deeply nested nodes. Change namespace cache strategy, inject cached hash as an argument to retrieve namespace/namespaces from XPath match operation and also for bare namespace call.
1 parent 42e3adf commit 5d35322

5 files changed

Lines changed: 125 additions & 80 deletions

File tree

lib/rexml/doctype.rb

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,27 +113,26 @@ def node_type
113113
end
114114

115115
def attributes_of element
116-
attribute_declarations_of(element).map do |key, value|
116+
raw_attributes = _attlist_mappings[element] || {}
117+
raw_attributes.map do |key, value|
117118
Attribute.new(key, value)
118119
end
119120
end
120121

121122
def attribute_of element, attribute
122-
attribute_declarations_of(element)[attribute]
123+
_attlist_mappings[element]&.[](attribute)
123124
end
124125

125-
def attribute_declarations_of(name)
126-
raw_attributes = {}
127-
decls = select do |child|
128-
child.kind_of?(AttlistDecl) && child.element_name == name
129-
end
130-
decls.each do |child|
126+
def _attlist_mappings
127+
mapping = {}
128+
grep(AttlistDecl).each do |child|
129+
raw_attributes = mapping[child.element_name] ||= {}
131130
child.each do |key, val|
132131
# First declaration wins
133132
raw_attributes[key] = val unless raw_attributes.key? key
134133
end
135134
end
136-
raw_attributes
135+
mapping
137136
end
138137

139138
def clone

lib/rexml/element.rb

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -588,12 +588,7 @@ def prefixes
588588
# d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
589589
#
590590
def namespaces
591-
namespaces_cache = document&.__send__(:namespaces_cache)
592-
if namespaces_cache
593-
namespaces_cache[self] ||= calculate_namespaces
594-
else
595-
calculate_namespaces
596-
end
591+
_calculate_namespaces
597592
end
598593

599594
# :call-seq:
@@ -618,13 +613,10 @@ def namespaces
618613
#
619614
def namespace(prefix=nil)
620615
if prefix.nil?
621-
prefix = prefix()
616+
_namespace_internal
617+
else
618+
_namespace_lookup_internal(prefix)
622619
end
623-
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
624-
ns = namespaces[prefix]
625-
626-
ns = '' if ns.nil? and prefix == 'xmlns'
627-
ns
628620
end
629621

630622
# :call-seq:
@@ -1507,15 +1499,32 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
15071499
formatter.write( self, output )
15081500
end
15091501

1510-
private
1511-
def calculate_namespaces
1512-
if parent
1513-
parent.namespaces.merge(attributes.namespaces)
1514-
else
1515-
attributes.namespaces
1516-
end
1502+
# Returns namespace of the element
1503+
def _namespace_internal(namespaces = _calculate_namespaces) # :nodoc:
1504+
_namespace_lookup_internal(prefix, namespaces)
1505+
end
1506+
1507+
# Lookup namespace for the given prefix in the context of the element
1508+
def _namespace_lookup_internal(prefix, namespaces = _calculate_namespaces) # :nodoc:
1509+
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
1510+
ns = namespaces[prefix]
1511+
ns = '' if ns.nil? and prefix == 'xmlns'
1512+
ns
1513+
end
1514+
1515+
def _calculate_namespaces(cache_hash = nil, attlist_mappings = nil) # :nodoc:
1516+
return cache_hash[self] if cache_hash && cache_hash.key?(self)
1517+
1518+
attlist_mappings ||= document&.doctype&._attlist_mappings || {}
1519+
inherited_namespaces = parent ? parent._calculate_namespaces(cache_hash, attlist_mappings) : {}
1520+
attr_namespaces = attributes._calculate_namespaces(attlist_mappings)
1521+
namespaces = attr_namespaces.any? ? inherited_namespaces.merge(attr_namespaces) : inherited_namespaces
1522+
cache_hash[self] = namespaces if cache_hash
1523+
namespaces
15171524
end
15181525

1526+
private
1527+
15191528
def __to_xpath_helper node
15201529
rv = node.expanded_name.clone
15211530
if node.parent
@@ -2297,7 +2306,7 @@ def each
22972306
# attrs.get_attribute('nosuch') # => nil
22982307
#
22992308
def get_attribute( name )
2300-
fetch(name, nil) || attlist_attributes&.[](name)
2309+
fetch(name, nil) || attlist_attributes[name]
23012310
end
23022311

23032312
# :call-seq:
@@ -2368,11 +2377,7 @@ def prefixes
23682377
# d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"}
23692378
#
23702379
def namespaces
2371-
namespaces = {}
2372-
each_effective_attribute do |attribute|
2373-
namespaces[attribute.name] = attribute.value if attribute.namespace_declaration?
2374-
end
2375-
namespaces
2380+
_calculate_namespaces
23762381
end
23772382

23782383
# :call-seq:
@@ -2503,33 +2508,40 @@ def get_attribute_ns(namespace, name)
25032508
end
25042509
end
25052510

2511+
def _calculate_namespaces(attlist_mappings = nil) # :nodoc:
2512+
namespaces = {}
2513+
each_effective_attribute(attlist_mappings) do |attribute|
2514+
namespaces[attribute.name] = attribute.value if attribute.namespace_declaration?
2515+
end
2516+
namespaces
2517+
end
2518+
25062519
private
25072520

25082521
# Iterates over the attributes of the element and those in the DTD.
25092522
# If the element has an attribute with the same name as one in the DTD,
25102523
# the element's attribute takes precedence.
2511-
def each_effective_attribute
2512-
return to_enum(__method__) unless block_given?
2513-
attr = attlist_attributes
2514-
attr = attr ? attr.merge(self) : self
2515-
attr.each_value do |attribute|
2524+
def each_effective_attribute(attlist_mappings = nil)
2525+
return to_enum(__method__, attlist_mappings) unless block_given?
2526+
attributes = attlist_attributes(attlist_mappings).merge(self)
2527+
attributes.each_value do |attribute|
25162528
yield attribute
25172529
end
25182530
end
25192531

25202532
alias each_own_attribute each_value
25212533
private :each_own_attribute
25222534

2523-
def attlist_attributes
2524-
doctype = @element.document&.doctype
2525-
if doctype
2526-
expn = @element.is_a?(Document) ? doctype.name : @element.expanded_name
2527-
doctype.attribute_declarations_of(expn).map do |key, value|
2528-
attr = Attribute.new(key, value)
2529-
attr.element = @element
2530-
[key, attr]
2531-
end.to_h
2532-
end
2535+
def attlist_attributes(attlist_mappings = nil)
2536+
attlist_mappings ||= @element.document&.doctype&._attlist_mappings
2537+
return {} unless attlist_mappings
2538+
2539+
expn = @element.is_a?(Document) ? @element.doctype&.name : @element.expanded_name
2540+
attlist_mappings[expn]&.map do |key, value|
2541+
attr = Attribute.new(key, value)
2542+
attr.element = @element
2543+
[key, attr]
2544+
end.to_h
25332545
end
25342546
end
25352547
end

lib/rexml/xpath_parser.rb

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def initialize(strict: false)
6363
@namespaces = nil
6464
@variables = {}
6565
@functions = FunctionsClass.new
66+
@attlist_mappings = nil
67+
@document = nil
68+
@element_namespaces_cache = {}
6669
@nest = 0
6770
@strict = strict
6871
end
@@ -85,14 +88,8 @@ def parse path, node
8588
node = node.first
8689
end
8790

88-
document = node.document
89-
if document
90-
document.__send__(:enable_cache) do
91-
match( path_stack, node )
92-
end
93-
else
94-
match( path_stack, node )
95-
end
91+
@document = node.document
92+
match(path_stack, node)
9693
end
9794

9895
def get_first path, node
@@ -167,20 +164,45 @@ def strict?
167164
@strict
168165
end
169166

170-
# Returns a String namespace for a node, given a prefix
167+
# Returns a String namespace for a prefix used in xpath
171168
# The rules are:
172169
#
173170
# 1. Use the supplied namespace mapping first.
174-
# 2. If no mapping was supplied, use the context node to look up the namespace
175-
def get_namespace( node, prefix )
171+
# 2. If no mapping was supplied, use the context node to look up the namespace as a fallback.
172+
def get_xpath_namespace(node, prefix)
176173
if @namespaces
177174
@namespaces[prefix] || ''
175+
elsif node.node_type == :element
176+
element_namespace_lookup(node, prefix)
178177
else
179-
return node.namespace( prefix ) if node.node_type == :element
180178
''
181179
end
182180
end
183181

182+
# Returns attribute's namespace URI while caching the
183+
# intermediate result to speed up retrieval of namespaces
184+
def attribute_namespace(attribute)
185+
attribute.prefix == '' ? '' : element_namespace_lookup(attribute.element, attribute.prefix)
186+
end
187+
188+
# Return element's namespace URI while caching the
189+
# intermediate result to speed up retrieval of namespaces
190+
def element_namespace(element)
191+
element._namespace_internal(element_namespaces(element))
192+
end
193+
194+
# Returns a hash of namespaces for the given element while caching the
195+
# intermediate result to speed up retrieval of namespaces
196+
def element_namespaces(element)
197+
@attlist_mappings ||= @document&.doctype&._attlist_mappings || {}
198+
element._calculate_namespaces(@element_namespaces_cache, @attlist_mappings)
199+
end
200+
201+
# Returns namespace of the prefix in the context of the element,
202+
# while caching the intermediate result to speed up retrieval of namespaces
203+
def element_namespace_lookup(element, prefix)
204+
element._namespace_lookup_internal(prefix, element_namespaces(element))
205+
end
184206

185207
# Expr takes a stack of path elements and a set of nodes (either a Parent
186208
# or an Array and returns an Array of matching nodes
@@ -641,20 +663,20 @@ def node_test(path_stack, any_type: :element)
641663
node.name == name
642664
elsif prefix.empty?
643665
if strict?
644-
node.name == name and node.namespace == ""
666+
node.name == name and element_namespace(node) == ""
645667
else
646-
node.name == name and node.namespace == get_namespace(node, prefix)
668+
node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix)
647669
end
648670
else
649-
node.name == name and node.namespace == get_namespace(node, prefix)
671+
node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix)
650672
end
651673
when :attribute
652674
if prefix.nil?
653675
node.name == name
654676
elsif prefix.empty?
655-
node.name == name and node.namespace == ""
677+
node.name == name and attribute_namespace(node) == ""
656678
else
657-
node.name == name and node.namespace == get_namespace(node.element, prefix)
679+
node.name == name and attribute_namespace(node) == get_xpath_namespace(node.element, prefix)
658680
end
659681
else
660682
false
@@ -665,11 +687,11 @@ def node_test(path_stack, any_type: :element)
665687
->(node) do
666688
case node.node_type
667689
when :element
668-
namespaces = @namespaces || node.namespaces
669-
node.namespace == namespaces[prefix]
690+
namespaces = @namespaces || element_namespaces(node)
691+
element_namespace(node) == namespaces[prefix]
670692
when :attribute
671-
namespaces = @namespaces || node.element.namespaces
672-
node.namespace == namespaces[prefix]
693+
namespaces = @namespaces || element_namespaces(node.element)
694+
attribute_namespace(node) == namespaces[prefix]
673695
else
674696
false
675697
end

test/test_namespace.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# frozen_string_literal: false
2+
require "core_assertions"
23

34
module REXMLTests
45
class TestNamespace < Test::Unit::TestCase
6+
include Test::Unit::CoreAssertions
57
include Helper::Fixture
68
include REXML
79

@@ -34,5 +36,25 @@ def test_xml_namespace
3436
assert_equal("http://www.w3.org/XML/1998/namespace",
3537
document.root.namespace("xml"))
3638
end
39+
40+
def test_deep_element_namespace_linear
41+
omit('Recursion too deep on JRuby') if RUBY_ENGINE == "jruby"
42+
43+
max_depth = 3000
44+
xml = <<~XML
45+
<root xmlns="one">#{'<a>' * max_depth + '</a>' * max_depth}</root>
46+
XML
47+
doc = Document.new(xml)
48+
prepare_element = ->(depth) do
49+
node = doc.root
50+
depth.times { node = node.first }
51+
node || raise
52+
end
53+
54+
assert_linear_performance([30, 100, 300, 1000, 3000], rehearsal: 10) do |depth|
55+
elem = prepare_element.call(depth)
56+
elem.namespace
57+
end
58+
end
3759
end
3860
end

test/xpath/test_base.rb

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,16 +1312,6 @@ def test_namespaces_0
13121312
assert_equal( 1, XPath.match( d, "//x:*" ).size )
13131313
end
13141314

1315-
def test_namespaces_cache
1316-
doc = Document.new("<a xmlns='1'><b/></a>")
1317-
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='1']").to_s)
1318-
assert_nil(XPath.first(doc, "//b[namespace-uri()='']"))
1319-
1320-
doc.root.delete_namespace
1321-
assert_nil(XPath.first(doc, "//b[namespace-uri()='1']"))
1322-
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='']").to_s)
1323-
end
1324-
13251315
def test_ticket_71
13261316
doc = Document.new(%Q{<root xmlns:ns1="xyz" xmlns:ns2="123"><element ns1:attrname="foo" ns2:attrname="bar"/></root>})
13271317
el = doc.root.elements[1]

0 commit comments

Comments
 (0)