@@ -5,10 +5,16 @@ package appstream
55
66import (
77 "encoding/xml"
8+ "errors"
89 "io"
910 "strings"
1011)
1112
13+ // XML element names referenced more than once in the decoder.
14+ const (
15+ elKeyword = "keyword"
16+ )
17+
1218// ParseComponentsXML reads a Components-*.xml stream and calls fn for each
1319// <component>, as soon as the element is complete. Multiple components with the
1420// same <pkgname> produce multiple invocations; the caller merges by name. This
@@ -17,150 +23,158 @@ import (
1723func ParseComponentsXML (r io.Reader , fn func (pkgname string , parts []string ) error ) error {
1824 d := xml .NewDecoder (r )
1925 d .Strict = false
20-
21- var (
22- stack []string
23- muteLeaf []bool
24- inKeywords bool
25- inKeyword bool
26- inCats bool
27- inDesc int
28- )
29- var cur * component
30-
31- flush := func () error {
32- if cur == nil {
33- return nil
34- }
35- name := strings .TrimSpace (cur .pkgname )
36- parts := append ([]string (nil ), cur .parts ... )
37- cur = nil
38- if name == "" {
39- return nil
40- }
41- return fn (name , parts )
42- }
43-
26+ p := & docParser {fn : fn , dec : d }
4427 for {
45- tok , err := d .Token ()
46- if err == io .EOF {
47- if err := flush (); err != nil {
48- return err
49- }
50- break
28+ tok , err := p .dec .Token ()
29+ if errors .Is (err , io .EOF ) {
30+ return p .flush ()
5131 }
5232 if err != nil {
5333 return err
5434 }
55-
5635 switch t := tok .(type ) {
5736 case xml.StartElement :
58- local := t .Name .Local
59- stack = append (stack , local )
60- muted := false
61- if local == "name" || local == "summary" {
62- for _ , a := range t .Attr {
63- if a .Name .Local != "lang" || a .Value == "" {
64- continue
65- }
66- if a .Value != "en" && a .Value != "de" {
67- muted = true
68- break
69- }
70- }
71- }
72- muteLeaf = append (muteLeaf , muted )
73-
74- switch local {
75- case "component" :
76- if err := flush (); err != nil {
77- return err
78- }
79- cur = & component {}
80- case "keywords" :
81- inKeywords = true
82- case "keyword" :
83- if inKeywords {
84- inKeyword = true
85- }
86- case "categories" :
87- inCats = true
88- case "description" :
89- inDesc ++
90- case "p" :
91- // paragraph inside description
37+ if err := p .startElement (t ); err != nil {
38+ return err
9239 }
93-
9440 case xml.EndElement :
95- local := t .Name .Local
96- if len (stack ) == 0 {
97- continue
98- }
99- stack = stack [:len (stack )- 1 ]
100- if len (muteLeaf ) > 0 {
101- muteLeaf = muteLeaf [:len (muteLeaf )- 1 ]
41+ if err := p .endElement (t ); err != nil {
42+ return err
10243 }
44+ case xml.CharData :
45+ p .charData (t )
46+ }
47+ }
48+ }
10349
104- switch local {
105- case "component" :
106- if err := flush (); err != nil {
107- return err
108- }
109- case "keywords" :
110- inKeywords = false
111- inKeyword = false
112- case "keyword" :
113- inKeyword = false
114- case "categories" :
115- inCats = false
116- case "description" :
117- if inDesc > 0 {
118- inDesc --
119- }
120- }
50+ type docParser struct {
51+ fn func (string , []string ) error
52+ dec * xml.Decoder
53+ stack []string
54+ muteLeaf []bool
55+ inKeywords bool
56+ inKeyword bool
57+ inCats bool
58+ inDesc int
59+ cur * component
60+ }
12161
122- case xml.CharData :
123- if cur == nil {
124- continue
125- }
126- muted := len (muteLeaf ) > 0 && muteLeaf [len (muteLeaf )- 1 ]
127- if muted {
128- continue
129- }
130- text := strings .TrimSpace (string (t ))
131- if text == "" {
62+ func (p * docParser ) flush () error {
63+ if p .cur == nil {
64+ return nil
65+ }
66+ name := strings .TrimSpace (p .cur .pkgname )
67+ parts := append ([]string (nil ), p .cur .parts ... )
68+ p .cur = nil
69+ if name == "" {
70+ return nil
71+ }
72+ return p .fn (name , parts )
73+ }
74+
75+ func (p * docParser ) startElement (t xml.StartElement ) error {
76+ local := t .Name .Local
77+ p .stack = append (p .stack , local )
78+ muted := false
79+ if local == "name" || local == "summary" {
80+ for _ , a := range t .Attr {
81+ if a .Name .Local != "lang" || a .Value == "" {
13282 continue
13383 }
134-
135- parent := ""
136- if len (stack ) > 0 {
137- parent = stack [len (stack )- 1 ]
84+ if a .Value != "en" && a .Value != "de" {
85+ muted = true
86+ break
13887 }
88+ }
89+ }
90+ p .muteLeaf = append (p .muteLeaf , muted )
13991
140- switch parent {
141- case "pkgname" :
142- cur .pkgname += text
143- case "name" , "summary" :
144- cur .parts = append (cur .parts , text )
145- case "category" :
146- if inCats {
147- cur .parts = append (cur .parts , text )
148- }
149- case "keyword" :
150- if inKeyword {
151- cur .parts = append (cur .parts , text )
152- }
153- case "p" :
154- if inDesc > 0 {
155- cur .parts = append (cur .parts , text )
156- }
157- }
92+ switch local {
93+ case "component" :
94+ if err := p .flush (); err != nil {
95+ return err
96+ }
97+ p .cur = & component {}
98+ case "keywords" :
99+ p .inKeywords = true
100+ case elKeyword :
101+ if p .inKeywords {
102+ p .inKeyword = true
158103 }
104+ case "categories" :
105+ p .inCats = true
106+ case "description" :
107+ p .inDesc ++
108+ }
109+ return nil
110+ }
111+
112+ func (p * docParser ) endElement (t xml.EndElement ) error {
113+ local := t .Name .Local
114+ if len (p .stack ) == 0 {
115+ return nil
116+ }
117+ p .stack = p .stack [:len (p .stack )- 1 ]
118+ if len (p .muteLeaf ) > 0 {
119+ p .muteLeaf = p .muteLeaf [:len (p .muteLeaf )- 1 ]
159120 }
160121
122+ switch local {
123+ case "component" :
124+ return p .flush ()
125+ case "keywords" :
126+ p .inKeywords = false
127+ p .inKeyword = false
128+ case elKeyword :
129+ p .inKeyword = false
130+ case "categories" :
131+ p .inCats = false
132+ case "description" :
133+ if p .inDesc > 0 {
134+ p .inDesc --
135+ }
136+ }
161137 return nil
162138}
163139
140+ func (p * docParser ) charData (t xml.CharData ) {
141+ if p .cur == nil {
142+ return
143+ }
144+ muted := len (p .muteLeaf ) > 0 && p .muteLeaf [len (p .muteLeaf )- 1 ]
145+ if muted {
146+ return
147+ }
148+ text := strings .TrimSpace (string (t ))
149+ if text == "" {
150+ return
151+ }
152+
153+ parent := ""
154+ if len (p .stack ) > 0 {
155+ parent = p .stack [len (p .stack )- 1 ]
156+ }
157+
158+ switch parent {
159+ case "pkgname" :
160+ p .cur .pkgname += text
161+ case "name" , "summary" :
162+ p .cur .parts = append (p .cur .parts , text )
163+ case "category" :
164+ if p .inCats {
165+ p .cur .parts = append (p .cur .parts , text )
166+ }
167+ case elKeyword :
168+ if p .inKeyword {
169+ p .cur .parts = append (p .cur .parts , text )
170+ }
171+ case "p" :
172+ if p .inDesc > 0 {
173+ p .cur .parts = append (p .cur .parts , text )
174+ }
175+ }
176+ }
177+
164178type component struct {
165179 pkgname string
166180 parts []string
@@ -169,8 +183,8 @@ type component struct {
169183func dedupeWords (parts []string ) string {
170184 seen := make (map [string ]struct {})
171185 var b strings.Builder
172- for _ , p := range parts {
173- for _ , w := range strings .Fields (p ) {
186+ for _ , part := range parts {
187+ for _ , w := range strings .Fields (part ) {
174188 key := strings .ToLower (w )
175189 if _ , ok := seen [key ]; ok {
176190 continue
0 commit comments