44//
55// Parsing model: encoding/xml streams tokens; we keep one open <component> in
66// docParser.cur. We read <pkgname>, <keywords>/<keyword>, and <categories>/<category>.
7- // AppStream puts xml:lang on parent blocks; only neutral or en/de blocks are indexed.
8- // flush runs at </component> (and before the next <component>) to emit pkgname + terms.
7+ // AppStream puts xml:lang on parent blocks; only neutral or en/de blocks are indexed —
8+ // rejected blocks are skipped wholesale via xml.Decoder.Skip. flush runs at </component>
9+ // (and before the next <component>) to emit pkgname + terms.
910package appstream
1011
1112import (
@@ -15,12 +16,6 @@ import (
1516 "strings"
1617)
1718
18- // XML element names referenced more than once in the decoder.
19- const (
20- elKeyword = "keyword"
21- elCategory = "category"
22- )
23-
2419// IndexTerms holds text extracted from one <component> for FTS (merged by pkgname in update.go).
2520type IndexTerms struct {
2621 Keywords []string
@@ -45,29 +40,24 @@ func ParseComponentsXML(r io.Reader, fn func(pkgname string, terms IndexTerms) e
4540 return err
4641 }
4742 case xml.EndElement :
48- if err := p .endElement (t ); err != nil {
49- return err
50- }
43+ p .endElement (t )
5144 case xml.CharData :
5245 p .charData (t )
5346 }
5447 }
5548}
5649
57- // docParser holds decoder state between tokens.
50+ // docParser holds decoder state between tokens. Rejected <keywords>/<categories>
51+ // blocks are skipped by the decoder so we never see their children — no skip flags needed.
5852type docParser struct {
59- fn func (string , IndexTerms ) error
60- dec * xml.Decoder
61- stack []string
62- inKeywords bool
63- keywordsBlockSkip bool
64- inKeyword bool
65- keywordSkip bool
66- inCategories bool
67- categoriesBlockSkip bool
68- inCategory bool
69- categorySkip bool
70- cur * component
53+ fn func (string , IndexTerms ) error
54+ dec * xml.Decoder
55+ cur * component
56+ inPkgname bool
57+ inKeywords bool
58+ inKeyword bool
59+ inCategories bool
60+ inCategory bool
7161}
7262
7363// flush emits cur via fn and clears it. EOF calls flush for the last component.
@@ -84,92 +74,86 @@ func (p *docParser) flush() error {
8474 return p .fn (name , terms )
8575}
8676
87- // startElement pushes stack; on <component> flushes the previous component then starts a new cur.
8877func (p * docParser ) startElement (t xml.StartElement ) error {
89- local := t .Name .Local
90- p .stack = append (p .stack , local )
91-
92- switch local {
78+ switch t .Name .Local {
9379 case "component" :
9480 if err := p .flush (); err != nil {
9581 return err
9682 }
9783 p .cur = & component {}
84+ case "pkgname" :
85+ if p .cur != nil {
86+ p .inPkgname = true
87+ }
9888 case "keywords" :
89+ if ! keywordLangAccepted (t .Attr ) {
90+ return p .dec .Skip ()
91+ }
9992 p .inKeywords = true
100- p .keywordsBlockSkip = ! keywordLangAccepted (t .Attr )
101- case elKeyword :
102- if p .inKeywords {
103- p .inKeyword = true
104- p .keywordSkip = ! keywordLangAccepted (t .Attr )
93+ case "keyword" :
94+ if ! p .inKeywords {
95+ return nil
96+ }
97+ if ! keywordLangAccepted (t .Attr ) {
98+ return p .dec .Skip ()
10599 }
100+ p .inKeyword = true
106101 case "categories" :
102+ if ! keywordLangAccepted (t .Attr ) {
103+ return p .dec .Skip ()
104+ }
107105 p .inCategories = true
108- p .categoriesBlockSkip = ! keywordLangAccepted (t .Attr )
109- case elCategory :
110- if p .inCategories {
111- p .inCategory = true
112- p .categorySkip = ! keywordLangAccepted (t .Attr )
106+ case "category" :
107+ if ! p .inCategories {
108+ return nil
109+ }
110+ if ! keywordLangAccepted (t .Attr ) {
111+ return p .dec .Skip ()
113112 }
113+ p .inCategory = true
114114 }
115115 return nil
116116}
117117
118- // endElement pops stack; on </component> flushes the finished component.
119- func (p * docParser ) endElement (t xml.EndElement ) error {
120- local := t .Name .Local
121- if len (p .stack ) == 0 {
122- return nil
123- }
124- p .stack = p .stack [:len (p .stack )- 1 ]
125-
126- switch local {
118+ func (p * docParser ) endElement (t xml.EndElement ) {
119+ switch t .Name .Local {
127120 case "component" :
128- return p .flush ()
121+ _ = p .flush ()
122+ case "pkgname" :
123+ p .inPkgname = false
129124 case "keywords" :
130125 p .inKeywords = false
126+ case "keyword" :
131127 p .inKeyword = false
132- p .keywordsBlockSkip = false
133- case elKeyword :
134- p .inKeyword = false
135- p .keywordSkip = false
136128 case "categories" :
137129 p .inCategories = false
130+ case "category" :
138131 p .inCategory = false
139- p .categoriesBlockSkip = false
140- case elCategory :
141- p .inCategory = false
142- p .categorySkip = false
143132 }
144- return nil
145133}
146134
147- // charData collects pkgname, <keyword>, and <category> text.
148135func (p * docParser ) charData (t xml.CharData ) {
149136 if p .cur == nil {
150137 return
151138 }
152- text := strings .TrimSpace (string (t ))
153- if text == "" {
139+ var dst * []string
140+ switch {
141+ case p .inPkgname :
142+ text := strings .TrimSpace (string (t ))
143+ if text != "" {
144+ p .cur .pkgname += text
145+ }
146+ return
147+ case p .inKeyword :
148+ dst = & p .cur .keywords
149+ case p .inCategory :
150+ dst = & p .cur .categories
151+ default :
154152 return
155153 }
156-
157- parent := ""
158- if len (p .stack ) > 0 {
159- parent = p .stack [len (p .stack )- 1 ]
160- }
161-
162- switch parent {
163- case "pkgname" :
164- p .cur .pkgname += text
165- case elKeyword :
166- if p .inKeyword && ! p .keywordsBlockSkip && ! p .keywordSkip {
167- p .cur .keywords = append (p .cur .keywords , text )
168- }
169- case elCategory :
170- if p .inCategory && ! p .categoriesBlockSkip && ! p .categorySkip {
171- p .cur .categories = append (p .cur .categories , text )
172- }
154+ text := strings .TrimSpace (string (t ))
155+ if text != "" {
156+ * dst = append (* dst , text )
173157 }
174158}
175159
0 commit comments