Skip to content

Commit 7f3bb42

Browse files
committed
engine: support more robust handling of links in tar stream
Signed-off-by: Caleb Xu <caxu@redhat.com> Assisted-by: Cursor
1 parent 8381364 commit 7f3bb42

5 files changed

Lines changed: 970 additions & 184 deletions

File tree

internal/engine/engine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ func generateBundleHash(ctx context.Context, bundlePath string) (string, error)
342342
slices.Sort(keys)
343343

344344
for _, k := range keys {
345-
hashBuffer.WriteString(fmt.Sprintf("%s %s\n", k, files[k]))
345+
fmt.Fprintf(&hashBuffer, "%s %s\n", k, files[k])
346346
}
347347

348348
artifactsWriter := artifacts.WriterFromContext(ctx)

internal/engine/graph.go

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
package engine
2+
3+
import (
4+
"archive/tar"
5+
"path/filepath"
6+
"slices"
7+
8+
"github.com/go-logr/logr"
9+
10+
"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/log"
11+
)
12+
13+
// linkType a convenience type just to make the consuming functions more clear.
14+
type linkType byte
15+
16+
const (
17+
hardlink linkType = tar.TypeLink
18+
symlink linkType = tar.TypeSymlink
19+
)
20+
21+
// String returns the string representation of the LinkType.
22+
func (lt linkType) String() string {
23+
if lt == tar.TypeLink {
24+
return "hardlink"
25+
}
26+
return "symlink"
27+
}
28+
29+
type linkNode struct {
30+
Name string
31+
Deps *linkNode
32+
OriginalLinkname string // For symlinks, the original target string before resolution
33+
Type linkType // Type of link (symlink or hardlink)
34+
VirtualLinkTarget string // For hardlinks pointing to symlinks, the symlink's target string
35+
}
36+
37+
func (n *linkNode) IsHardlink() bool {
38+
return n.Type == hardlink
39+
}
40+
41+
func (n *linkNode) IsSymlink() bool {
42+
return n.Type == symlink
43+
}
44+
45+
// ChainTypesToFollow returns the link types that should be followed in a chain
46+
// starting from this node. Hardlinks follow both hardlink and symlink chains,
47+
// while symlinks only follow symlink chains.
48+
func (n *linkNode) ChainTypesToFollow() []linkType {
49+
if n.IsHardlink() {
50+
return []linkType{hardlink, symlink}
51+
}
52+
return []linkType{symlink}
53+
}
54+
55+
type LinkGraph map[string]*linkNode
56+
57+
type extractionContext struct {
58+
linkGraph LinkGraph
59+
neededFiles *[]string
60+
logger logr.Logger
61+
}
62+
63+
// ExpandFilePathAliases generates all possible paths to a file through directory symlinks.
64+
// For example, for a symlink /a/b/c -> /foo/bar (directory), and a file /foo/bar/baz, this
65+
// will return both /a/b/c/baz and /foo/bar/baz.
66+
func (lg LinkGraph) ExpandFilePathAliases(filePath string, symlinkAliases map[string][]string) []string {
67+
expanding := make(map[string]struct{})
68+
return lg.expandFilePathAliasesRec(filePath, symlinkAliases, expanding)
69+
}
70+
71+
func (lg LinkGraph) expandFilePathAliasesRec(filePath string, symlinkAliases map[string][]string, expanding map[string]struct{}) []string {
72+
results := []string{filePath}
73+
visited := make(map[string]struct{})
74+
visited[filePath] = struct{}{}
75+
76+
expanding[filePath] = struct{}{}
77+
defer delete(expanding, filePath)
78+
79+
// Check all parent directories to see if they have symlink aliases
80+
dir := filePath
81+
for {
82+
dir = filepath.Dir(dir)
83+
if dir == "." || dir == "/" {
84+
break
85+
}
86+
87+
// Check if this directory has any symlinks pointing to it
88+
if aliases, ok := symlinkAliases[dir]; ok {
89+
for _, symlinkPath := range aliases {
90+
// Replace the directory portion with the symlink path
91+
// For example: /usr/share/rpm/file.db with symlink /usr/lib/sysimage/rpm -> /usr/share/rpm
92+
// becomes /usr/lib/sysimage/rpm/file.db
93+
relativePath, err := filepath.Rel(dir, filePath)
94+
if err != nil {
95+
continue
96+
}
97+
aliasedPath := filepath.Join(symlinkPath, relativePath)
98+
if _, seen := visited[aliasedPath]; !seen {
99+
visited[aliasedPath] = struct{}{}
100+
results = append(results, aliasedPath)
101+
// Recursively find aliases of the aliased path. Skip recursion if aliasedPath
102+
// is already being expanded (directory symlink cycles).
103+
if _, onStack := expanding[aliasedPath]; !onStack {
104+
nestedAliases := lg.expandFilePathAliasesRec(aliasedPath, symlinkAliases, expanding)
105+
for _, nested := range nestedAliases {
106+
if _, seen := visited[nested]; !seen {
107+
visited[nested] = struct{}{}
108+
results = append(results, nested)
109+
}
110+
}
111+
}
112+
}
113+
}
114+
}
115+
}
116+
117+
return results
118+
}
119+
120+
// walkGraphChain walks a graph chain starting from a node, applying a visitor function
121+
// to each node in the chain. Returns when the chain ends or visitor returns false.
122+
func walkGraphChain(start string, graph LinkGraph, visitor func(node string, deps *linkNode) bool) {
123+
current := start
124+
for {
125+
node, ok := graph[current]
126+
if !ok || node.Deps == nil {
127+
break
128+
}
129+
if !visitor(node.Deps.Name, node.Deps) {
130+
break
131+
}
132+
current = node.Deps.Name
133+
}
134+
}
135+
136+
// followLinkChain adds all links in the chain starting from the given link.
137+
// For example, if linkA -> linkB -> directory, this adds both linkA and linkB
138+
// to neededFiles. If filterType is provided (non-nil), only links of that type are followed.
139+
func (ec *extractionContext) followLinkChain(startLink string, filterType *linkType) {
140+
walkGraphChain(startLink, ec.linkGraph, func(target string, deps *linkNode) bool {
141+
// If the target is also a link in the graph
142+
if targetNode, isTargetLink := ec.linkGraph[target]; isTargetLink {
143+
// If filtering by type, check if target matches
144+
if filterType != nil && targetNode.Type != *filterType {
145+
return false
146+
}
147+
*ec.neededFiles = append(*ec.neededFiles, target)
148+
linkTypeStr := targetNode.Type.String()
149+
ec.logger.V(log.TRC).Info("adding transitive directory "+linkTypeStr, linkTypeStr, target, "via", startLink)
150+
return true
151+
}
152+
return false
153+
})
154+
}
155+
156+
// processLink processes a link (symlink or hardlink) by adding it to neededFiles,
157+
// following its chain, and recursively processing its target and parents.
158+
// The logContext parameter provides context for logging (e.g., "parent directory", "target").
159+
func (ec *extractionContext) processLink(path string, node *linkNode, logContext string, originalFile string, visited map[string]struct{}) {
160+
*ec.neededFiles = append(*ec.neededFiles, path)
161+
162+
ec.logger.V(log.TRC).Info("adding "+logContext+" "+node.Type.String(), node.Type.String(), path, "for_file", originalFile)
163+
164+
// Follow link chains based on node type
165+
for _, chainType := range node.ChainTypesToFollow() {
166+
ec.followLinkChain(path, &chainType)
167+
}
168+
169+
// Recursively process the target's parent directories AND the target itself
170+
// (the target might also be a link)
171+
if node.Deps != nil {
172+
depName := node.Deps.Name
173+
174+
// First check if the target itself is a link
175+
if _, seen := visited[depName]; !seen {
176+
visited[depName] = struct{}{}
177+
if depNode, isLink := ec.linkGraph[depName]; isLink {
178+
ec.processLink(depName, depNode, "target", originalFile, visited)
179+
}
180+
}
181+
182+
// Then check the target's parents
183+
ec.addParentLinks(depName, originalFile, visited)
184+
}
185+
}
186+
187+
func (ec *extractionContext) addParentLinks(path string, originalFile string, visited map[string]struct{}) {
188+
// Check all parent directories up to root
189+
dir := filepath.Dir(path)
190+
for dir != "." && dir != "/" {
191+
if _, seen := visited[dir]; seen {
192+
break
193+
}
194+
visited[dir] = struct{}{}
195+
196+
// Check if this directory is a link (symlink or hardlink)
197+
if node, isLink := ec.linkGraph[dir]; isLink {
198+
ec.processLink(dir, node, "parent directory", originalFile, visited)
199+
}
200+
201+
dir = filepath.Dir(dir)
202+
}
203+
}
204+
205+
func resolveRelativeLinkFrom(linkPath, linkTarget string) string {
206+
return filepath.Clean(filepath.Join(filepath.Dir(linkPath), linkTarget))
207+
}
208+
209+
// addAliasIfNew adds an alias to the aliases map if it doesn't already exist
210+
// Returns true if the alias was added (indicating a change)
211+
func addAliasIfNew(aliases map[string][]string, target, alias string) bool {
212+
if !slices.Contains(aliases[target], alias) {
213+
aliases[target] = append(aliases[target], alias)
214+
return true
215+
}
216+
return false
217+
}
218+
219+
// getLinkTarget returns the link target (original linkname) for a path,
220+
// checking both real symlinks and virtual links
221+
func getLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
222+
if linkNode, exists := linkGraph[path]; exists {
223+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
224+
return linkNode.OriginalLinkname, true
225+
}
226+
if linkNode.VirtualLinkTarget != "" {
227+
return linkNode.VirtualLinkTarget, true
228+
}
229+
}
230+
return "", false
231+
}
232+
233+
// getEffectiveLinkTarget checks if a path is a symlink/virtual-symlink,
234+
// or if it's a hardlink pointing to a symlink/virtual-symlink.
235+
// Returns the ultimate symlink target string.
236+
func getEffectiveLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
237+
// First check if path itself is a symlink/virtual-symlink
238+
if target, ok := getLinkTarget(path, linkGraph); ok {
239+
return target, true
240+
}
241+
242+
// If path is a hardlink, check if its target is a symlink/virtual-symlink
243+
if node, exists := linkGraph[path]; exists && node.IsHardlink() && node.Deps != nil {
244+
return getLinkTarget(node.Deps.Name, linkGraph)
245+
}
246+
247+
return "", false
248+
}
249+
250+
// BuildDirectoryAliasMap builds a backlink map for all links in the
251+
// LinkGraph. The resulting map associates each link in the graph
252+
// with its aliases.
253+
func (lg LinkGraph) BuildDirectoryAliasMap(logger logr.Logger) map[string][]string {
254+
// Start with basic symlink aliases
255+
aliases := make(map[string][]string)
256+
for linkPath, node := range lg {
257+
if node.IsSymlink() && node.Deps != nil {
258+
targetPath := node.Deps.Name
259+
aliases[targetPath] = append(aliases[targetPath], linkPath)
260+
}
261+
}
262+
263+
changed := true
264+
iteration := 0
265+
maxIterations := 100 // Safety limit to prevent infinite loops
266+
267+
for changed && iteration < maxIterations {
268+
changed = false
269+
iteration++
270+
271+
for linkPath, linkNode := range lg {
272+
if linkNode.Deps == nil {
273+
continue
274+
}
275+
targetPath := linkNode.Deps.Name
276+
277+
// Handle hardlinks pointing to symlinks (real or virtual)
278+
if linkNode.IsHardlink() {
279+
// Skip if we've already processed this hardlink as a virtual symlink
280+
if linkNode.VirtualLinkTarget != "" {
281+
continue
282+
}
283+
284+
// Skip if the target is not a symlink/virtual-symlink
285+
linkTarget, found := getLinkTarget(targetPath, lg)
286+
if !found {
287+
continue
288+
}
289+
290+
// Create a virtual symlink at the hardlink's location
291+
resolvedFromHardlink := resolveRelativeLinkFrom(linkPath, linkTarget)
292+
293+
if addAliasIfNew(aliases, resolvedFromHardlink, linkPath) {
294+
linkNode.VirtualLinkTarget = linkTarget
295+
logger.V(log.TRC).Info("adding hardlink alias",
296+
"from", linkPath,
297+
"to", resolvedFromHardlink,
298+
"via_target", targetPath,
299+
"iteration", iteration)
300+
changed = true
301+
}
302+
}
303+
304+
// Handle symlinks pointing to other links (directly or via hardlink)
305+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
306+
linkTarget, found := getEffectiveLinkTarget(targetPath, lg)
307+
if !found {
308+
continue
309+
}
310+
311+
resolvedFromSymlink := resolveRelativeLinkFrom(linkPath, linkTarget)
312+
313+
if addAliasIfNew(aliases, resolvedFromSymlink, linkPath) {
314+
logger.V(log.TRC).Info("adding symlink alias",
315+
"from", linkPath,
316+
"to", resolvedFromSymlink,
317+
"via_target", targetPath,
318+
"iteration", iteration)
319+
changed = true
320+
}
321+
}
322+
}
323+
}
324+
325+
if iteration >= maxIterations {
326+
logger.V(log.DBG).Info("directory alias map build process reached max iterations, may have incomplete aliases", "total_aliases", len(aliases))
327+
} else {
328+
logger.V(log.DBG).Info("directory alias map built", "iterations", iteration, "total_aliases", len(aliases))
329+
}
330+
331+
return aliases
332+
}

internal/engine/graph_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package engine
2+
3+
import (
4+
. "github.com/onsi/ginkgo/v2"
5+
. "github.com/onsi/gomega"
6+
)
7+
8+
var _ = Describe("LinkGraph ExpandFilePathAliases", func() {
9+
It("terminates with a finite set when directory symlink aliases form a cycle", func() {
10+
// Mutual directory symlinks: link "b" points at directory "a", link "a" points at "b".
11+
// Matches BuildDirectoryAliasMap: aliases[target] = symlink paths pointing to target.
12+
symlinkAliases := map[string][]string{
13+
"a": {"b"},
14+
"b": {"a"},
15+
}
16+
var lg LinkGraph
17+
18+
out := lg.ExpandFilePathAliases("a/x", symlinkAliases)
19+
20+
Expect(out).To(ConsistOf("a/x", "b/x"))
21+
})
22+
})

0 commit comments

Comments
 (0)