Skip to content

Commit 47fc1a1

Browse files
committed
engine: support more robust handling of links in tar stream
Signed-off-by: Caleb Xu <caxu@redhat.com> Assisted-by: Cursor
1 parent 8381364 commit 47fc1a1

5 files changed

Lines changed: 1228 additions & 183 deletions

File tree

internal/engine/engine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ func generateBundleHash(ctx context.Context, bundlePath string) (string, error)
342342
slices.Sort(keys)
343343

344344
for _, k := range keys {
345-
hashBuffer.WriteString(fmt.Sprintf("%s %s\n", k, files[k]))
345+
fmt.Fprintf(&hashBuffer, "%s %s\n", k, files[k])
346346
}
347347

348348
artifactsWriter := artifacts.WriterFromContext(ctx)

internal/engine/graph.go

Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
package engine
2+
3+
import (
4+
"archive/tar"
5+
"path/filepath"
6+
"slices"
7+
"strings"
8+
9+
"github.com/go-logr/logr"
10+
11+
"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/log"
12+
)
13+
14+
// linkType a convenience type just to make the consuming functions more clear.
15+
type linkType byte
16+
17+
const (
18+
hardlink linkType = tar.TypeLink
19+
symlink linkType = tar.TypeSymlink
20+
)
21+
22+
// String returns the string representation of the LinkType.
23+
func (lt linkType) String() string {
24+
if lt == tar.TypeLink {
25+
return "hardlink"
26+
}
27+
return "symlink"
28+
}
29+
30+
type linkNode struct {
31+
Name string
32+
Deps *linkNode
33+
OriginalLinkname string // For symlinks, the original target string before resolution
34+
Type linkType // Type of link (symlink or hardlink)
35+
VirtualLinkTarget string // For hardlinks pointing to symlinks, the symlink's target string
36+
}
37+
38+
func (n *linkNode) IsHardlink() bool {
39+
return n.Type == hardlink
40+
}
41+
42+
func (n *linkNode) IsSymlink() bool {
43+
return n.Type == symlink
44+
}
45+
46+
// ChainTypesToFollow returns the link types that should be followed in a chain
47+
// starting from this node. Hardlinks follow both hardlink and symlink chains,
48+
// while symlinks only follow symlink chains.
49+
func (n *linkNode) ChainTypesToFollow() []linkType {
50+
if n.IsHardlink() {
51+
return []linkType{hardlink, symlink}
52+
}
53+
return []linkType{symlink}
54+
}
55+
56+
type LinkGraph map[string]*linkNode
57+
58+
type extractionContext struct {
59+
linkGraph LinkGraph
60+
neededFiles *[]string
61+
logger logr.Logger
62+
}
63+
64+
// ExpandFilePathAliases generates all possible paths to a file through directory symlinks.
65+
// For example, for a symlink /a/b/c -> /foo/bar (directory), and a file /foo/bar/baz, this
66+
// will return both /a/b/c/baz and /foo/bar/baz.
67+
func (lg LinkGraph) ExpandFilePathAliases(filePath string, symlinkAliases map[string][]string) []string {
68+
expanding := make(map[string]struct{})
69+
return lg.expandFilePathAliasesRec(filePath, symlinkAliases, expanding)
70+
}
71+
72+
func (lg LinkGraph) expandFilePathAliasesRec(filePath string, symlinkAliases map[string][]string, expanding map[string]struct{}) []string {
73+
results := []string{filePath}
74+
visited := make(map[string]struct{})
75+
visited[filePath] = struct{}{}
76+
77+
expanding[filePath] = struct{}{}
78+
defer delete(expanding, filePath)
79+
80+
// Check all parent directories to see if they have symlink aliases
81+
dir := filePath
82+
for {
83+
dir = filepath.Dir(dir)
84+
if dir == "." || dir == "/" {
85+
break
86+
}
87+
88+
// Check if this directory has any symlinks pointing to it
89+
if aliases, ok := symlinkAliases[dir]; ok {
90+
for _, symlinkPath := range aliases {
91+
// Replace the directory portion with the symlink path
92+
// For example: /usr/share/rpm/file.db with symlink /usr/lib/sysimage/rpm -> /usr/share/rpm
93+
// becomes /usr/lib/sysimage/rpm/file.db
94+
relativePath, err := filepath.Rel(dir, filePath)
95+
if err != nil {
96+
continue
97+
}
98+
aliasedPath := filepath.Join(symlinkPath, relativePath)
99+
if _, seen := visited[aliasedPath]; !seen {
100+
visited[aliasedPath] = struct{}{}
101+
results = append(results, aliasedPath)
102+
// Recursively find aliases of the aliased path. Skip recursion if aliasedPath
103+
// is already being expanded (directory symlink cycles).
104+
if _, onStack := expanding[aliasedPath]; !onStack {
105+
edgeKey := dir + "\x00" + symlinkPath
106+
if _, edgeSeen := expanding[edgeKey]; edgeSeen {
107+
continue
108+
}
109+
expanding[edgeKey] = struct{}{}
110+
nestedAliases := lg.expandFilePathAliasesRec(aliasedPath, symlinkAliases, expanding)
111+
delete(expanding, edgeKey)
112+
for _, nested := range nestedAliases {
113+
if _, seen := visited[nested]; !seen {
114+
visited[nested] = struct{}{}
115+
results = append(results, nested)
116+
}
117+
}
118+
}
119+
}
120+
}
121+
}
122+
}
123+
124+
return results
125+
}
126+
127+
// walkGraphChain walks a graph chain starting from a node, applying a visitor function
128+
// to each node in the chain. Returns when the chain ends, visitor returns false, or a cycle is detected.
129+
func walkGraphChain(start string, graph LinkGraph, visitor func(node string, deps *linkNode) bool) {
130+
current := start
131+
visited := make(map[string]struct{})
132+
for {
133+
if _, seen := visited[current]; seen {
134+
break
135+
}
136+
visited[current] = struct{}{}
137+
138+
node, ok := graph[current]
139+
if !ok || node.Deps == nil {
140+
break
141+
}
142+
next := node.Deps.Name
143+
if _, seen := visited[next]; seen {
144+
break
145+
}
146+
if !visitor(next, node.Deps) {
147+
break
148+
}
149+
current = next
150+
}
151+
}
152+
153+
// followLinkChain adds all links in the chain starting from the given link.
154+
// For example, if linkA -> linkB -> directory, this adds both linkA and linkB
155+
// to neededFiles. If filterType is provided (non-nil), only links of that type are followed.
156+
func (ec *extractionContext) followLinkChain(startLink string, filterType *linkType) {
157+
walkGraphChain(startLink, ec.linkGraph, func(target string, deps *linkNode) bool {
158+
// If the target is also a link in the graph
159+
if targetNode, isTargetLink := ec.linkGraph[target]; isTargetLink {
160+
// If filtering by type, check if target matches
161+
if filterType != nil && targetNode.Type != *filterType {
162+
return false
163+
}
164+
*ec.neededFiles = append(*ec.neededFiles, target)
165+
linkTypeStr := targetNode.Type.String()
166+
ec.logger.V(log.TRC).Info("adding transitive directory "+linkTypeStr, linkTypeStr, target, "via", startLink)
167+
return true
168+
}
169+
return false
170+
})
171+
}
172+
173+
// processLink processes a link (symlink or hardlink) by adding it to neededFiles,
174+
// following its chain, and recursively processing its target and parents.
175+
// The logContext parameter provides context for logging (e.g., "parent directory", "target").
176+
func (ec *extractionContext) processLink(path string, node *linkNode, logContext string, originalFile string, visited map[string]struct{}) {
177+
*ec.neededFiles = append(*ec.neededFiles, path)
178+
179+
ec.logger.V(log.TRC).Info("adding "+logContext+" "+node.Type.String(), node.Type.String(), path, "for_file", originalFile)
180+
181+
// Follow link chains based on node type
182+
for _, chainType := range node.ChainTypesToFollow() {
183+
ec.followLinkChain(path, &chainType)
184+
}
185+
186+
// Recursively process the target's parent directories AND the target itself
187+
// (the target might also be a link)
188+
if node.Deps != nil {
189+
depName := node.Deps.Name
190+
191+
// First check if the target itself is a link
192+
if _, seen := visited[depName]; !seen {
193+
visited[depName] = struct{}{}
194+
if depNode, isLink := ec.linkGraph[depName]; isLink {
195+
ec.processLink(depName, depNode, "target", originalFile, visited)
196+
}
197+
}
198+
199+
// Then check the target's parents
200+
ec.addParentLinks(depName, originalFile, visited)
201+
}
202+
}
203+
204+
func (ec *extractionContext) addParentLinks(path string, originalFile string, visited map[string]struct{}) {
205+
// Check all parent directories up to root
206+
dir := filepath.Dir(path)
207+
for dir != "." && dir != "/" {
208+
if _, seen := visited[dir]; seen {
209+
break
210+
}
211+
visited[dir] = struct{}{}
212+
213+
// Check if this directory is a link (symlink or hardlink)
214+
if node, isLink := ec.linkGraph[dir]; isLink {
215+
ec.processLink(dir, node, "parent directory", originalFile, visited)
216+
}
217+
218+
dir = filepath.Dir(dir)
219+
}
220+
}
221+
222+
// resolveRelativeLinkFrom resolves a symlink's target string the same way as planExtraction:
223+
// POSIX-style absolute linknames (leading '/') are archive-root paths; relative linknames
224+
// are resolved from linkPath's directory.
225+
func resolveRelativeLinkFrom(linkPath, linkTarget string) string {
226+
if strings.HasPrefix(linkTarget, "/") {
227+
return filepath.Clean(strings.TrimPrefix(linkTarget, "/"))
228+
}
229+
return filepath.Clean(filepath.Join(filepath.Dir(linkPath), linkTarget))
230+
}
231+
232+
// addAliasIfNew adds an alias to the aliases map if it doesn't already exist
233+
// Returns true if the alias was added (indicating a change)
234+
func addAliasIfNew(aliases map[string][]string, target, alias string) bool {
235+
if !slices.Contains(aliases[target], alias) {
236+
aliases[target] = append(aliases[target], alias)
237+
return true
238+
}
239+
return false
240+
}
241+
242+
// getLinkTarget returns the link target (original linkname) for a path,
243+
// checking both real symlinks and virtual links
244+
func getLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
245+
if linkNode, exists := linkGraph[path]; exists {
246+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
247+
return linkNode.OriginalLinkname, true
248+
}
249+
if linkNode.VirtualLinkTarget != "" {
250+
return linkNode.VirtualLinkTarget, true
251+
}
252+
}
253+
return "", false
254+
}
255+
256+
// getEffectiveLinkTarget checks if a path is a symlink/virtual-symlink,
257+
// or if it's a hardlink pointing to a symlink/virtual-symlink.
258+
// Returns the ultimate symlink target string.
259+
func getEffectiveLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
260+
// First check if path itself is a symlink/virtual-symlink
261+
if target, ok := getLinkTarget(path, linkGraph); ok {
262+
return target, true
263+
}
264+
265+
// If path is a hardlink, check if its target is a symlink/virtual-symlink
266+
if node, exists := linkGraph[path]; exists && node.IsHardlink() && node.Deps != nil {
267+
return getLinkTarget(node.Deps.Name, linkGraph)
268+
}
269+
270+
return "", false
271+
}
272+
273+
// BuildDirectoryAliasMap builds a backlink map for all links in the
274+
// LinkGraph. The resulting map associates each link in the graph
275+
// with its aliases.
276+
func (lg LinkGraph) BuildDirectoryAliasMap(logger logr.Logger) map[string][]string {
277+
// Start with basic symlink aliases
278+
aliases := make(map[string][]string)
279+
for linkPath, node := range lg {
280+
if node.IsSymlink() && node.Deps != nil {
281+
targetPath := node.Deps.Name
282+
aliases[targetPath] = append(aliases[targetPath], linkPath)
283+
}
284+
}
285+
286+
iteration := 0
287+
// Monotonic fixpoint: each successful addAliasIfNew strictly increases the
288+
// multiset of (target, alias) pairs; possible pairs are finite for a finite graph.
289+
for {
290+
changed := false
291+
iteration++
292+
293+
for linkPath, linkNode := range lg {
294+
if linkNode.Deps == nil {
295+
continue
296+
}
297+
targetPath := linkNode.Deps.Name
298+
299+
// Handle hardlinks pointing to symlinks (real or virtual)
300+
if linkNode.IsHardlink() {
301+
// Skip if we've already processed this hardlink as a virtual symlink
302+
if linkNode.VirtualLinkTarget != "" {
303+
continue
304+
}
305+
306+
// Skip if the target is not a symlink/virtual-symlink
307+
linkTarget, found := getLinkTarget(targetPath, lg)
308+
if !found {
309+
continue
310+
}
311+
312+
// Create a virtual symlink at the hardlink's location: interpret the
313+
// target symlink's linkname as relative to this hardlink's directory
314+
// (same inode semantics users see when opening the path via the hardlink).
315+
resolvedFromHardlink := resolveRelativeLinkFrom(linkPath, linkTarget)
316+
317+
if addAliasIfNew(aliases, resolvedFromHardlink, linkPath) {
318+
linkNode.VirtualLinkTarget = linkTarget
319+
logger.V(log.TRC).Info("adding hardlink alias",
320+
"from", linkPath,
321+
"to", resolvedFromHardlink,
322+
"via_target", targetPath,
323+
"iteration", iteration)
324+
changed = true
325+
}
326+
}
327+
328+
// Handle symlinks pointing to other links (directly or via hardlink)
329+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
330+
linkTarget, found := getEffectiveLinkTarget(targetPath, lg)
331+
if !found {
332+
continue
333+
}
334+
335+
resolvedFromSymlink := resolveRelativeLinkFrom(targetPath, linkTarget)
336+
337+
if addAliasIfNew(aliases, resolvedFromSymlink, linkPath) {
338+
logger.V(log.TRC).Info("adding symlink alias",
339+
"from", linkPath,
340+
"to", resolvedFromSymlink,
341+
"via_target", targetPath,
342+
"iteration", iteration)
343+
changed = true
344+
}
345+
}
346+
}
347+
348+
if !changed {
349+
break
350+
}
351+
}
352+
353+
logger.V(log.DBG).Info("directory alias map built", "iterations", iteration, "total_aliases", len(aliases))
354+
355+
return aliases
356+
}

0 commit comments

Comments
 (0)