Skip to content

Commit 5a37c84

Browse files
committed
fix errors
1 parent ecf8c27 commit 5a37c84

2 files changed

Lines changed: 147 additions & 127 deletions

File tree

internal/appstream/parse.go

Lines changed: 138 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@ package appstream
55

66
import (
77
"encoding/xml"
8+
"errors"
89
"io"
910
"strings"
1011
)
1112

13+
// XML element names referenced more than once in the decoder.
14+
const (
15+
elKeyword = "keyword"
16+
)
17+
1218
// ParseComponentsXML reads a Components-*.xml stream and calls fn for each
1319
// <component>, as soon as the element is complete. Multiple components with the
1420
// same <pkgname> produce multiple invocations; the caller merges by name. This
@@ -17,150 +23,158 @@ import (
1723
func ParseComponentsXML(r io.Reader, fn func(pkgname string, parts []string) error) error {
1824
d := xml.NewDecoder(r)
1925
d.Strict = false
20-
21-
var (
22-
stack []string
23-
muteLeaf []bool
24-
inKeywords bool
25-
inKeyword bool
26-
inCats bool
27-
inDesc int
28-
)
29-
var cur *component
30-
31-
flush := func() error {
32-
if cur == nil {
33-
return nil
34-
}
35-
name := strings.TrimSpace(cur.pkgname)
36-
parts := append([]string(nil), cur.parts...)
37-
cur = nil
38-
if name == "" {
39-
return nil
40-
}
41-
return fn(name, parts)
42-
}
43-
26+
p := &docParser{fn: fn, dec: d}
4427
for {
45-
tok, err := d.Token()
46-
if err == io.EOF {
47-
if err := flush(); err != nil {
48-
return err
49-
}
50-
break
28+
tok, err := p.dec.Token()
29+
if errors.Is(err, io.EOF) {
30+
return p.flush()
5131
}
5232
if err != nil {
5333
return err
5434
}
55-
5635
switch t := tok.(type) {
5736
case xml.StartElement:
58-
local := t.Name.Local
59-
stack = append(stack, local)
60-
muted := false
61-
if local == "name" || local == "summary" {
62-
for _, a := range t.Attr {
63-
if a.Name.Local != "lang" || a.Value == "" {
64-
continue
65-
}
66-
if a.Value != "en" && a.Value != "de" {
67-
muted = true
68-
break
69-
}
70-
}
71-
}
72-
muteLeaf = append(muteLeaf, muted)
73-
74-
switch local {
75-
case "component":
76-
if err := flush(); err != nil {
77-
return err
78-
}
79-
cur = &component{}
80-
case "keywords":
81-
inKeywords = true
82-
case "keyword":
83-
if inKeywords {
84-
inKeyword = true
85-
}
86-
case "categories":
87-
inCats = true
88-
case "description":
89-
inDesc++
90-
case "p":
91-
// paragraph inside description
37+
if err := p.startElement(t); err != nil {
38+
return err
9239
}
93-
9440
case xml.EndElement:
95-
local := t.Name.Local
96-
if len(stack) == 0 {
97-
continue
98-
}
99-
stack = stack[:len(stack)-1]
100-
if len(muteLeaf) > 0 {
101-
muteLeaf = muteLeaf[:len(muteLeaf)-1]
41+
if err := p.endElement(t); err != nil {
42+
return err
10243
}
44+
case xml.CharData:
45+
p.charData(t)
46+
}
47+
}
48+
}
10349

104-
switch local {
105-
case "component":
106-
if err := flush(); err != nil {
107-
return err
108-
}
109-
case "keywords":
110-
inKeywords = false
111-
inKeyword = false
112-
case "keyword":
113-
inKeyword = false
114-
case "categories":
115-
inCats = false
116-
case "description":
117-
if inDesc > 0 {
118-
inDesc--
119-
}
120-
}
50+
type docParser struct {
51+
fn func(string, []string) error
52+
dec *xml.Decoder
53+
stack []string
54+
muteLeaf []bool
55+
inKeywords bool
56+
inKeyword bool
57+
inCats bool
58+
inDesc int
59+
cur *component
60+
}
12161

122-
case xml.CharData:
123-
if cur == nil {
124-
continue
125-
}
126-
muted := len(muteLeaf) > 0 && muteLeaf[len(muteLeaf)-1]
127-
if muted {
128-
continue
129-
}
130-
text := strings.TrimSpace(string(t))
131-
if text == "" {
62+
func (p *docParser) flush() error {
63+
if p.cur == nil {
64+
return nil
65+
}
66+
name := strings.TrimSpace(p.cur.pkgname)
67+
parts := append([]string(nil), p.cur.parts...)
68+
p.cur = nil
69+
if name == "" {
70+
return nil
71+
}
72+
return p.fn(name, parts)
73+
}
74+
75+
func (p *docParser) startElement(t xml.StartElement) error {
76+
local := t.Name.Local
77+
p.stack = append(p.stack, local)
78+
muted := false
79+
if local == "name" || local == "summary" {
80+
for _, a := range t.Attr {
81+
if a.Name.Local != "lang" || a.Value == "" {
13282
continue
13383
}
134-
135-
parent := ""
136-
if len(stack) > 0 {
137-
parent = stack[len(stack)-1]
84+
if a.Value != "en" && a.Value != "de" {
85+
muted = true
86+
break
13887
}
88+
}
89+
}
90+
p.muteLeaf = append(p.muteLeaf, muted)
13991

140-
switch parent {
141-
case "pkgname":
142-
cur.pkgname += text
143-
case "name", "summary":
144-
cur.parts = append(cur.parts, text)
145-
case "category":
146-
if inCats {
147-
cur.parts = append(cur.parts, text)
148-
}
149-
case "keyword":
150-
if inKeyword {
151-
cur.parts = append(cur.parts, text)
152-
}
153-
case "p":
154-
if inDesc > 0 {
155-
cur.parts = append(cur.parts, text)
156-
}
157-
}
92+
switch local {
93+
case "component":
94+
if err := p.flush(); err != nil {
95+
return err
96+
}
97+
p.cur = &component{}
98+
case "keywords":
99+
p.inKeywords = true
100+
case elKeyword:
101+
if p.inKeywords {
102+
p.inKeyword = true
158103
}
104+
case "categories":
105+
p.inCats = true
106+
case "description":
107+
p.inDesc++
108+
}
109+
return nil
110+
}
111+
112+
func (p *docParser) endElement(t xml.EndElement) error {
113+
local := t.Name.Local
114+
if len(p.stack) == 0 {
115+
return nil
116+
}
117+
p.stack = p.stack[:len(p.stack)-1]
118+
if len(p.muteLeaf) > 0 {
119+
p.muteLeaf = p.muteLeaf[:len(p.muteLeaf)-1]
159120
}
160121

122+
switch local {
123+
case "component":
124+
return p.flush()
125+
case "keywords":
126+
p.inKeywords = false
127+
p.inKeyword = false
128+
case elKeyword:
129+
p.inKeyword = false
130+
case "categories":
131+
p.inCats = false
132+
case "description":
133+
if p.inDesc > 0 {
134+
p.inDesc--
135+
}
136+
}
161137
return nil
162138
}
163139

140+
func (p *docParser) charData(t xml.CharData) {
141+
if p.cur == nil {
142+
return
143+
}
144+
muted := len(p.muteLeaf) > 0 && p.muteLeaf[len(p.muteLeaf)-1]
145+
if muted {
146+
return
147+
}
148+
text := strings.TrimSpace(string(t))
149+
if text == "" {
150+
return
151+
}
152+
153+
parent := ""
154+
if len(p.stack) > 0 {
155+
parent = p.stack[len(p.stack)-1]
156+
}
157+
158+
switch parent {
159+
case "pkgname":
160+
p.cur.pkgname += text
161+
case "name", "summary":
162+
p.cur.parts = append(p.cur.parts, text)
163+
case "category":
164+
if p.inCats {
165+
p.cur.parts = append(p.cur.parts, text)
166+
}
167+
case elKeyword:
168+
if p.inKeyword {
169+
p.cur.parts = append(p.cur.parts, text)
170+
}
171+
case "p":
172+
if p.inDesc > 0 {
173+
p.cur.parts = append(p.cur.parts, text)
174+
}
175+
}
176+
}
177+
164178
type component struct {
165179
pkgname string
166180
parts []string
@@ -169,8 +183,8 @@ type component struct {
169183
func dedupeWords(parts []string) string {
170184
seen := make(map[string]struct{})
171185
var b strings.Builder
172-
for _, p := range parts {
173-
for _, w := range strings.Fields(p) {
186+
for _, part := range parts {
187+
for _, w := range strings.Fields(part) {
174188
key := strings.ToLower(w)
175189
if _, ok := seen[key]; ok {
176190
continue

internal/appstream/update.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"database/sql"
77
"encoding/json"
8+
"errors"
89
"fmt"
910
"io"
1011
"log/slog"
@@ -19,6 +20,11 @@ const DefaultSourcesBase = "https://sources.archlinux.org/other/packages/archlin
1920

2021
const archlinuxPackageJSON = "https://archlinux.org/packages/extra/any/archlinux-appstream-data/json/"
2122

23+
const (
24+
httpClientTimeoutRelease = 2 * time.Minute
25+
httpClientTimeoutUpdate = 15 * time.Minute
26+
)
27+
2228
var componentRepos = []string{"core", "extra", "multilib"}
2329

2430
type pkgJSON struct {
@@ -29,7 +35,7 @@ type pkgJSON struct {
2935
// the current extra/any archlinux-appstream-data package in the official repos.
3036
func LatestRelease(ctx context.Context, client *http.Client) (string, error) {
3137
if client == nil {
32-
client = &http.Client{Timeout: 2 * time.Minute}
38+
client = &http.Client{Timeout: httpClientTimeoutRelease}
3339
}
3440
req, err := http.NewRequestWithContext(ctx, http.MethodGet, archlinuxPackageJSON, nil)
3541
if err != nil {
@@ -50,7 +56,7 @@ func LatestRelease(ctx context.Context, client *http.Client) (string, error) {
5056
return "", fmt.Errorf("decode package json: %w", err)
5157
}
5258
if p.Pkgver == "" {
53-
return "", fmt.Errorf("empty pkgver in package json")
59+
return "", errors.New("empty pkgver in package json")
5460
}
5561
return p.Pkgver, nil
5662
}
@@ -60,7 +66,7 @@ func LatestRelease(ctx context.Context, client *http.Client) (string, error) {
6066
// and rebuilds the FTS index.
6167
func Update(ctx context.Context, db *sql.DB, client *http.Client, sourcesBase string) error {
6268
if client == nil {
63-
client = &http.Client{Timeout: 15 * time.Minute}
69+
client = &http.Client{Timeout: httpClientTimeoutUpdate}
6470
}
6571
sourcesBase = strings.TrimSuffix(sourcesBase, "/") + "/"
6672

0 commit comments

Comments
 (0)