Skip to content

Commit 6e7a11d

Browse files
committed
engine: support more robust handling of links in tar stream
Signed-off-by: Caleb Xu <caxu@redhat.com> Assisted-by: Cursor
1 parent 8381364 commit 6e7a11d

5 files changed

Lines changed: 1162 additions & 183 deletions

File tree

internal/engine/engine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ func generateBundleHash(ctx context.Context, bundlePath string) (string, error)
342342
slices.Sort(keys)
343343

344344
for _, k := range keys {
345-
hashBuffer.WriteString(fmt.Sprintf("%s %s\n", k, files[k]))
345+
fmt.Fprintf(&hashBuffer, "%s %s\n", k, files[k])
346346
}
347347

348348
artifactsWriter := artifacts.WriterFromContext(ctx)

internal/engine/graph.go

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
package engine
2+
3+
import (
4+
"archive/tar"
5+
"path/filepath"
6+
"slices"
7+
8+
"github.com/go-logr/logr"
9+
10+
"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/log"
11+
)
12+
13+
// linkType a convenience type just to make the consuming functions more clear.
14+
type linkType byte
15+
16+
const (
17+
hardlink linkType = tar.TypeLink
18+
symlink linkType = tar.TypeSymlink
19+
)
20+
21+
// String returns the string representation of the LinkType.
22+
func (lt linkType) String() string {
23+
if lt == tar.TypeLink {
24+
return "hardlink"
25+
}
26+
return "symlink"
27+
}
28+
29+
type linkNode struct {
30+
Name string
31+
Deps *linkNode
32+
OriginalLinkname string // For symlinks, the original target string before resolution
33+
Type linkType // Type of link (symlink or hardlink)
34+
VirtualLinkTarget string // For hardlinks pointing to symlinks, the symlink's target string
35+
}
36+
37+
func (n *linkNode) IsHardlink() bool {
38+
return n.Type == hardlink
39+
}
40+
41+
func (n *linkNode) IsSymlink() bool {
42+
return n.Type == symlink
43+
}
44+
45+
// ChainTypesToFollow returns the link types that should be followed in a chain
46+
// starting from this node. Hardlinks follow both hardlink and symlink chains,
47+
// while symlinks only follow symlink chains.
48+
func (n *linkNode) ChainTypesToFollow() []linkType {
49+
if n.IsHardlink() {
50+
return []linkType{hardlink, symlink}
51+
}
52+
return []linkType{symlink}
53+
}
54+
55+
type LinkGraph map[string]*linkNode
56+
57+
type extractionContext struct {
58+
linkGraph LinkGraph
59+
neededFiles *[]string
60+
logger logr.Logger
61+
}
62+
63+
// ExpandFilePathAliases generates all possible paths to a file through directory symlinks.
64+
// For example, for a symlink /a/b/c -> /foo/bar (directory), and a file /foo/bar/baz, this
65+
// will return both /a/b/c/baz and /foo/bar/baz.
66+
func (lg LinkGraph) ExpandFilePathAliases(filePath string, symlinkAliases map[string][]string) []string {
67+
expanding := make(map[string]struct{})
68+
return lg.expandFilePathAliasesRec(filePath, symlinkAliases, expanding)
69+
}
70+
71+
func (lg LinkGraph) expandFilePathAliasesRec(filePath string, symlinkAliases map[string][]string, expanding map[string]struct{}) []string {
72+
results := []string{filePath}
73+
visited := make(map[string]struct{})
74+
visited[filePath] = struct{}{}
75+
76+
expanding[filePath] = struct{}{}
77+
defer delete(expanding, filePath)
78+
79+
// Check all parent directories to see if they have symlink aliases
80+
dir := filePath
81+
for {
82+
dir = filepath.Dir(dir)
83+
if dir == "." || dir == "/" {
84+
break
85+
}
86+
87+
// Check if this directory has any symlinks pointing to it
88+
if aliases, ok := symlinkAliases[dir]; ok {
89+
for _, symlinkPath := range aliases {
90+
// Replace the directory portion with the symlink path
91+
// For example: /usr/share/rpm/file.db with symlink /usr/lib/sysimage/rpm -> /usr/share/rpm
92+
// becomes /usr/lib/sysimage/rpm/file.db
93+
relativePath, err := filepath.Rel(dir, filePath)
94+
if err != nil {
95+
continue
96+
}
97+
aliasedPath := filepath.Join(symlinkPath, relativePath)
98+
if _, seen := visited[aliasedPath]; !seen {
99+
visited[aliasedPath] = struct{}{}
100+
results = append(results, aliasedPath)
101+
// Recursively find aliases of the aliased path. Skip recursion if aliasedPath
102+
// is already being expanded (directory symlink cycles).
103+
if _, onStack := expanding[aliasedPath]; !onStack {
104+
nestedAliases := lg.expandFilePathAliasesRec(aliasedPath, symlinkAliases, expanding)
105+
for _, nested := range nestedAliases {
106+
if _, seen := visited[nested]; !seen {
107+
visited[nested] = struct{}{}
108+
results = append(results, nested)
109+
}
110+
}
111+
}
112+
}
113+
}
114+
}
115+
}
116+
117+
return results
118+
}
119+
120+
// walkGraphChain walks a graph chain starting from a node, applying a visitor function
121+
// to each node in the chain. Returns when the chain ends, visitor returns false, or a cycle is detected.
122+
func walkGraphChain(start string, graph LinkGraph, visitor func(node string, deps *linkNode) bool) {
123+
current := start
124+
visited := make(map[string]struct{})
125+
for {
126+
if _, seen := visited[current]; seen {
127+
break
128+
}
129+
visited[current] = struct{}{}
130+
131+
node, ok := graph[current]
132+
if !ok || node.Deps == nil {
133+
break
134+
}
135+
next := node.Deps.Name
136+
if _, seen := visited[next]; seen {
137+
break
138+
}
139+
if !visitor(next, node.Deps) {
140+
break
141+
}
142+
current = next
143+
}
144+
}
145+
146+
// followLinkChain adds all links in the chain starting from the given link.
147+
// For example, if linkA -> linkB -> directory, this adds both linkA and linkB
148+
// to neededFiles. If filterType is provided (non-nil), only links of that type are followed.
149+
func (ec *extractionContext) followLinkChain(startLink string, filterType *linkType) {
150+
walkGraphChain(startLink, ec.linkGraph, func(target string, deps *linkNode) bool {
151+
// If the target is also a link in the graph
152+
if targetNode, isTargetLink := ec.linkGraph[target]; isTargetLink {
153+
// If filtering by type, check if target matches
154+
if filterType != nil && targetNode.Type != *filterType {
155+
return false
156+
}
157+
*ec.neededFiles = append(*ec.neededFiles, target)
158+
linkTypeStr := targetNode.Type.String()
159+
ec.logger.V(log.TRC).Info("adding transitive directory "+linkTypeStr, linkTypeStr, target, "via", startLink)
160+
return true
161+
}
162+
return false
163+
})
164+
}
165+
166+
// processLink processes a link (symlink or hardlink) by adding it to neededFiles,
167+
// following its chain, and recursively processing its target and parents.
168+
// The logContext parameter provides context for logging (e.g., "parent directory", "target").
169+
func (ec *extractionContext) processLink(path string, node *linkNode, logContext string, originalFile string, visited map[string]struct{}) {
170+
*ec.neededFiles = append(*ec.neededFiles, path)
171+
172+
ec.logger.V(log.TRC).Info("adding "+logContext+" "+node.Type.String(), node.Type.String(), path, "for_file", originalFile)
173+
174+
// Follow link chains based on node type
175+
for _, chainType := range node.ChainTypesToFollow() {
176+
ec.followLinkChain(path, &chainType)
177+
}
178+
179+
// Recursively process the target's parent directories AND the target itself
180+
// (the target might also be a link)
181+
if node.Deps != nil {
182+
depName := node.Deps.Name
183+
184+
// First check if the target itself is a link
185+
if _, seen := visited[depName]; !seen {
186+
visited[depName] = struct{}{}
187+
if depNode, isLink := ec.linkGraph[depName]; isLink {
188+
ec.processLink(depName, depNode, "target", originalFile, visited)
189+
}
190+
}
191+
192+
// Then check the target's parents
193+
ec.addParentLinks(depName, originalFile, visited)
194+
}
195+
}
196+
197+
func (ec *extractionContext) addParentLinks(path string, originalFile string, visited map[string]struct{}) {
198+
// Check all parent directories up to root
199+
dir := filepath.Dir(path)
200+
for dir != "." && dir != "/" {
201+
if _, seen := visited[dir]; seen {
202+
break
203+
}
204+
visited[dir] = struct{}{}
205+
206+
// Check if this directory is a link (symlink or hardlink)
207+
if node, isLink := ec.linkGraph[dir]; isLink {
208+
ec.processLink(dir, node, "parent directory", originalFile, visited)
209+
}
210+
211+
dir = filepath.Dir(dir)
212+
}
213+
}
214+
215+
func resolveRelativeLinkFrom(linkPath, linkTarget string) string {
216+
return filepath.Clean(filepath.Join(filepath.Dir(linkPath), linkTarget))
217+
}
218+
219+
// addAliasIfNew adds an alias to the aliases map if it doesn't already exist
220+
// Returns true if the alias was added (indicating a change)
221+
func addAliasIfNew(aliases map[string][]string, target, alias string) bool {
222+
if !slices.Contains(aliases[target], alias) {
223+
aliases[target] = append(aliases[target], alias)
224+
return true
225+
}
226+
return false
227+
}
228+
229+
// getLinkTarget returns the link target (original linkname) for a path,
230+
// checking both real symlinks and virtual links
231+
func getLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
232+
if linkNode, exists := linkGraph[path]; exists {
233+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
234+
return linkNode.OriginalLinkname, true
235+
}
236+
if linkNode.VirtualLinkTarget != "" {
237+
return linkNode.VirtualLinkTarget, true
238+
}
239+
}
240+
return "", false
241+
}
242+
243+
// getEffectiveLinkTarget checks if a path is a symlink/virtual-symlink,
244+
// or if it's a hardlink pointing to a symlink/virtual-symlink.
245+
// Returns the ultimate symlink target string.
246+
func getEffectiveLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
247+
// First check if path itself is a symlink/virtual-symlink
248+
if target, ok := getLinkTarget(path, linkGraph); ok {
249+
return target, true
250+
}
251+
252+
// If path is a hardlink, check if its target is a symlink/virtual-symlink
253+
if node, exists := linkGraph[path]; exists && node.IsHardlink() && node.Deps != nil {
254+
return getLinkTarget(node.Deps.Name, linkGraph)
255+
}
256+
257+
return "", false
258+
}
259+
260+
// BuildDirectoryAliasMap builds a backlink map for all links in the
261+
// LinkGraph. The resulting map associates each link in the graph
262+
// with its aliases.
263+
func (lg LinkGraph) BuildDirectoryAliasMap(logger logr.Logger) map[string][]string {
264+
// Start with basic symlink aliases
265+
aliases := make(map[string][]string)
266+
for linkPath, node := range lg {
267+
if node.IsSymlink() && node.Deps != nil {
268+
targetPath := node.Deps.Name
269+
aliases[targetPath] = append(aliases[targetPath], linkPath)
270+
}
271+
}
272+
273+
iteration := 0
274+
// Monotonic fixpoint: each successful addAliasIfNew strictly increases the
275+
// multiset of (target, alias) pairs; possible pairs are finite for a finite graph.
276+
for {
277+
changed := false
278+
iteration++
279+
280+
for linkPath, linkNode := range lg {
281+
if linkNode.Deps == nil {
282+
continue
283+
}
284+
targetPath := linkNode.Deps.Name
285+
286+
// Handle hardlinks pointing to symlinks (real or virtual)
287+
if linkNode.IsHardlink() {
288+
// Skip if we've already processed this hardlink as a virtual symlink
289+
if linkNode.VirtualLinkTarget != "" {
290+
continue
291+
}
292+
293+
// Skip if the target is not a symlink/virtual-symlink
294+
linkTarget, found := getLinkTarget(targetPath, lg)
295+
if !found {
296+
continue
297+
}
298+
299+
// Create a virtual symlink at the hardlink's location: interpret the
300+
// target symlink's linkname as relative to this hardlink's directory
301+
// (same inode semantics users see when opening the path via the hardlink).
302+
resolvedFromHardlink := resolveRelativeLinkFrom(linkPath, linkTarget)
303+
304+
if addAliasIfNew(aliases, resolvedFromHardlink, linkPath) {
305+
linkNode.VirtualLinkTarget = linkTarget
306+
logger.V(log.TRC).Info("adding hardlink alias",
307+
"from", linkPath,
308+
"to", resolvedFromHardlink,
309+
"via_target", targetPath,
310+
"iteration", iteration)
311+
changed = true
312+
}
313+
}
314+
315+
// Handle symlinks pointing to other links (directly or via hardlink)
316+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
317+
linkTarget, found := getEffectiveLinkTarget(targetPath, lg)
318+
if !found {
319+
continue
320+
}
321+
322+
resolvedFromSymlink := resolveRelativeLinkFrom(targetPath, linkTarget)
323+
324+
if addAliasIfNew(aliases, resolvedFromSymlink, linkPath) {
325+
logger.V(log.TRC).Info("adding symlink alias",
326+
"from", linkPath,
327+
"to", resolvedFromSymlink,
328+
"via_target", targetPath,
329+
"iteration", iteration)
330+
changed = true
331+
}
332+
}
333+
}
334+
335+
if !changed {
336+
break
337+
}
338+
}
339+
340+
logger.V(log.DBG).Info("directory alias map built", "iterations", iteration, "total_aliases", len(aliases))
341+
342+
return aliases
343+
}

0 commit comments

Comments
 (0)