diff --git a/src/XML.jl b/src/XML.jl index 9027ade..2bac234 100644 --- a/src/XML.jl +++ b/src/XML.jl @@ -357,7 +357,7 @@ write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io))) write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w") -function write(io::IO, x; indentsize::Int=2, depth::Int=1) +function write(io::IO, x, ctx::Vector{Bool}=[false]; indentsize::Int=2, depth::Int=1) indent = ' ' ^ indentsize nodetype = XML.nodetype(x) tag = XML.tag(x) @@ -365,48 +365,61 @@ function write(io::IO, x; indentsize::Int=2, depth::Int=1) children = XML.children(x) padding = indent ^ max(0, depth - 1) - print(io, padding) + !ctx[end] && print(io, padding) + if nodetype === Text print(io, value) + elseif nodetype === Element + push!(ctx, ctx[end]) + update_ctx!(ctx, x) print(io, '<', tag) _print_attrs(io, x) print(io, isempty(children) ? '/' : "", '>') if !isempty(children) if length(children) == 1 && XML.nodetype(only(children)) === Text - write(io, only(children); indentsize=0) + write(io, only(children), ctx; indentsize=0) print(io, "') else - println(io) + !ctx[end] && println(io) foreach(children) do child - write(io, child; indentsize, depth = depth + 1) - println(io) + write(io, child, ctx; indentsize, depth=depth + 1) + !ctx[end] && println(io) end - print(io, padding, "') + print(io, !ctx[end] ? padding : "", "') end end + pop!(ctx) + elseif nodetype === DTD print(io, "') + elseif nodetype === Declaration print(io, "") + elseif nodetype === ProcessingInstruction print(io, "") + elseif nodetype === Comment print(io, "") + elseif nodetype === CData print(io, "") + elseif nodetype === Document foreach(children) do child - write(io, child; indentsize) - println(io) + write(io, child, ctx; indentsize) + !ctx[end] && println(io) end + else error("Unreachable case reached during XML.write") end -end end + +end # module XML diff --git a/src/raw.jl b/src/raw.jl index 8b77bba..c6837de 100644 --- a/src/raw.jl +++ b/src/raw.jl @@ -329,7 +329,11 @@ function parent(o::Raw) end #-----------------------------------------------------------------------------# next Raw -isspace(x::UInt8) = Base.isspace(Char(x)) +# isspace(x::UInt8) = Base.isspace(Char(x)) + +# XML whitespace per XML 1.0/1.1 production S: +# S ::= (#x20 | #x9 | #xD | #xA)+ +@inline xml_isspace(b::UInt8)::Bool = (b == 0x20) | (b == 0x09) | (b == 0x0A) | (b == 0x0D) """ next(node) --> typeof(node) or Nothing @@ -353,7 +357,7 @@ function next_xml_space(o::Raw) has_xml_space = o.has_xml_space ctx = copy(o.ctx) last_type = type - k = findnext(!isspace, data, i) + k = findnext(!xml_isspace, data, i) if isnothing(k) return nothing end @@ -369,11 +373,11 @@ function next_xml_space(o::Raw) if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/' type = RawText j = findnext(==(UInt8('<')), data, i) - 1 - j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed + j = ctx[end] ? j : findprev(!xml_isspace, data, j) # preserving whitespace if needed if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument # Maybe drop pure-whitespace inter-element text nodes? # (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node) - #if all(isspace, @view data[i:j]) && depth > 1 + #if all(xml_isspace, @view data[i:j]) && depth > 1 # return next(Raw(type, depth, j, 0, data, ctx, has_xml_space)) #end end @@ -429,7 +433,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5 type = o.type has_xml_space = o.has_xml_space ctx = [false] - i = findnext(!isspace, data, i) + i = findnext(!xml_isspace, data, i) if isnothing(i) return nothing end @@ -441,7 +445,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5 if c !== '<' type = RawText j = findnext(==(UInt8('<')), data, i) - 1 - j = findprev(!isspace, data, j) # "rstrip" + j = findprev(!xml_isspace, data, j) # "rstrip" elseif c === '<' c2 = Char(o.data[i+1]) if c2 === '!' @@ -514,7 +518,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5 ctx = has_xml_space ? copy(o.ctx) : [false] type === RawDocument && return nothing j = o.pos - 1 - j = findprev(!isspace, data, j) + j = findprev(!xml_isspace, data, j) if isnothing(j) return Raw(data, has_xml_space, ctx) # RawDocument end @@ -523,7 +527,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5 if c !== '>' # text type = RawText i = findprev(==(UInt8('>')), data, j) + 1 - i = findnext(!isspace, data, i) # "lstrip" + i = findnext(!xml_isspace, data, i) # "lstrip" elseif c === '>' c2 = Char(o.data[j-1]) if c2 === '-'