Skip to content

Commit 755fb96

Browse files
committed
engine: support more robust handling of links in tar stream
Signed-off-by: Caleb Xu <caxu@redhat.com>
1 parent d8d9f10 commit 755fb96

4 files changed

Lines changed: 905 additions & 181 deletions

File tree

internal/engine/engine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ func generateBundleHash(ctx context.Context, bundlePath string) (string, error)
333333
slices.Sort(keys)
334334

335335
for _, k := range keys {
336-
hashBuffer.WriteString(fmt.Sprintf("%s %s\n", k, files[k]))
336+
fmt.Fprintf(&hashBuffer, "%s %s\n", k, files[k])
337337
}
338338

339339
artifactsWriter := artifacts.WriterFromContext(ctx)

internal/engine/graph.go

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
package engine
2+
3+
import (
4+
"archive/tar"
5+
"path/filepath"
6+
"slices"
7+
8+
"github.com/go-logr/logr"
9+
10+
"github.com/redhat-openshift-ecosystem/openshift-preflight/internal/log"
11+
)
12+
13+
// linkType a convenience type just to make the consuming functions more clear.
14+
type linkType byte
15+
16+
const (
17+
hardlink linkType = tar.TypeLink
18+
symlink linkType = tar.TypeSymlink
19+
)
20+
21+
// String returns the string representation of the LinkType.
22+
func (lt linkType) String() string {
23+
if lt == tar.TypeLink {
24+
return "hardlink"
25+
}
26+
return "symlink"
27+
}
28+
29+
type linkNode struct {
30+
Name string
31+
Deps *linkNode
32+
OriginalLinkname string // For symlinks, the original target string before resolution
33+
Type linkType // Type of link (symlink or hardlink)
34+
VirtualLinkTarget string // For hardlinks pointing to symlinks, the symlink's target string
35+
}
36+
37+
func (n *linkNode) IsHardlink() bool {
38+
return n.Type == hardlink
39+
}
40+
41+
func (n *linkNode) IsSymlink() bool {
42+
return n.Type == symlink
43+
}
44+
45+
// ChainTypesToFollow returns the link types that should be followed in a chain
46+
// starting from this node. Hardlinks follow both hardlink and symlink chains,
47+
// while symlinks only follow symlink chains.
48+
func (n *linkNode) ChainTypesToFollow() []linkType {
49+
if n.IsHardlink() {
50+
return []linkType{hardlink, symlink}
51+
}
52+
return []linkType{symlink}
53+
}
54+
55+
type LinkGraph map[string]*linkNode
56+
57+
type extractionContext struct {
58+
linkGraph LinkGraph
59+
neededFiles *[]string
60+
logger logr.Logger
61+
}
62+
63+
// ExpandFilePathAliases generates all possible paths to a file through directory symlinks.
64+
// For example, for a symlink /a/b/c -> /foo/bar (directory), and a file /foo/bar/baz, this
65+
// will return both /a/b/c/baz and /foo/bar/baz.
66+
func (lg LinkGraph) ExpandFilePathAliases(filePath string, symlinkAliases map[string][]string) []string {
67+
results := []string{filePath}
68+
visited := make(map[string]struct{})
69+
visited[filePath] = struct{}{}
70+
71+
// Check all parent directories to see if they have symlink aliases
72+
dir := filePath
73+
for {
74+
dir = filepath.Dir(dir)
75+
if dir == "." || dir == "/" {
76+
break
77+
}
78+
79+
// Check if this directory has any symlinks pointing to it
80+
if aliases, ok := symlinkAliases[dir]; ok {
81+
for _, symlinkPath := range aliases {
82+
// Replace the directory portion with the symlink path
83+
// For example: /usr/share/rpm/file.db with symlink /usr/lib/sysimage/rpm -> /usr/share/rpm
84+
// becomes /usr/lib/sysimage/rpm/file.db
85+
relativePath, err := filepath.Rel(dir, filePath)
86+
if err != nil {
87+
continue
88+
}
89+
aliasedPath := filepath.Join(symlinkPath, relativePath)
90+
if _, seen := visited[aliasedPath]; !seen {
91+
visited[aliasedPath] = struct{}{}
92+
results = append(results, aliasedPath)
93+
// Recursively find aliases of the aliased path
94+
nestedAliases := lg.ExpandFilePathAliases(aliasedPath, symlinkAliases)
95+
for _, nested := range nestedAliases {
96+
if _, seen := visited[nested]; !seen {
97+
visited[nested] = struct{}{}
98+
results = append(results, nested)
99+
}
100+
}
101+
}
102+
}
103+
}
104+
}
105+
106+
return results
107+
}
108+
109+
// walkGraphChain walks a graph chain starting from a node, applying a visitor function
110+
// to each node in the chain. Returns when the chain ends or visitor returns false.
111+
func walkGraphChain(start string, graph LinkGraph, visitor func(node string, deps *linkNode) bool) {
112+
current := start
113+
for {
114+
node, ok := graph[current]
115+
if !ok || node.Deps == nil {
116+
break
117+
}
118+
if !visitor(node.Deps.Name, node.Deps) {
119+
break
120+
}
121+
current = node.Deps.Name
122+
}
123+
}
124+
125+
// followLinkChain adds all links in the chain starting from the given link.
126+
// For example, if linkA -> linkB -> directory, this adds both linkA and linkB
127+
// to neededFiles. If filterType is provided (non-nil), only links of that type are followed.
128+
func (ec *extractionContext) followLinkChain(startLink string, filterType *linkType) {
129+
walkGraphChain(startLink, ec.linkGraph, func(target string, deps *linkNode) bool {
130+
// If the target is also a link in the graph
131+
if targetNode, isTargetLink := ec.linkGraph[target]; isTargetLink {
132+
// If filtering by type, check if target matches
133+
if filterType != nil && targetNode.Type != *filterType {
134+
return false
135+
}
136+
*ec.neededFiles = append(*ec.neededFiles, target)
137+
linkTypeStr := targetNode.Type.String()
138+
ec.logger.V(log.TRC).Info("adding transitive directory "+linkTypeStr, linkTypeStr, target, "via", startLink)
139+
return true
140+
}
141+
return false
142+
})
143+
}
144+
145+
// processLink processes a link (symlink or hardlink) by adding it to neededFiles,
146+
// following its chain, and recursively processing its target and parents.
147+
// The logContext parameter provides context for logging (e.g., "parent directory", "target").
148+
func (ec *extractionContext) processLink(path string, node *linkNode, logContext string, originalFile string, visited map[string]struct{}) {
149+
*ec.neededFiles = append(*ec.neededFiles, path)
150+
151+
ec.logger.V(log.TRC).Info("adding "+logContext+" "+node.Type.String(), node.Type.String(), path, "for_file", originalFile)
152+
153+
// Follow link chains based on node type
154+
for _, chainType := range node.ChainTypesToFollow() {
155+
ec.followLinkChain(path, &chainType)
156+
}
157+
158+
// Recursively process the target's parent directories AND the target itself
159+
// (the target might also be a link)
160+
if node.Deps != nil {
161+
depName := node.Deps.Name
162+
163+
// First check if the target itself is a link
164+
if _, seen := visited[depName]; !seen {
165+
visited[depName] = struct{}{}
166+
if depNode, isLink := ec.linkGraph[depName]; isLink {
167+
ec.processLink(depName, depNode, "target", originalFile, visited)
168+
}
169+
}
170+
171+
// Then check the target's parents
172+
ec.addParentLinks(depName, originalFile, visited)
173+
}
174+
}
175+
176+
func (ec *extractionContext) addParentLinks(path string, originalFile string, visited map[string]struct{}) {
177+
// Check all parent directories up to root
178+
dir := filepath.Dir(path)
179+
for dir != "." && dir != "/" {
180+
if _, seen := visited[dir]; seen {
181+
break
182+
}
183+
visited[dir] = struct{}{}
184+
185+
// Check if this directory is a link (symlink or hardlink)
186+
if node, isLink := ec.linkGraph[dir]; isLink {
187+
ec.processLink(dir, node, "parent directory", originalFile, visited)
188+
}
189+
190+
dir = filepath.Dir(dir)
191+
}
192+
}
193+
194+
func resolveRelativeLinkFrom(linkPath, linkTarget string) string {
195+
return filepath.Clean(filepath.Join(filepath.Dir(linkPath), linkTarget))
196+
}
197+
198+
// addAliasIfNew adds an alias to the aliases map if it doesn't already exist
199+
// Returns true if the alias was added (indicating a change)
200+
func addAliasIfNew(aliases map[string][]string, target, alias string) bool {
201+
if !slices.Contains(aliases[target], alias) {
202+
aliases[target] = append(aliases[target], alias)
203+
return true
204+
}
205+
return false
206+
}
207+
208+
// getLinkTarget returns the link target (original linkname) for a path,
209+
// checking both real symlinks and virtual links
210+
func getLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
211+
if linkNode, exists := linkGraph[path]; exists {
212+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
213+
return linkNode.OriginalLinkname, true
214+
}
215+
if linkNode.VirtualLinkTarget != "" {
216+
return linkNode.VirtualLinkTarget, true
217+
}
218+
}
219+
return "", false
220+
}
221+
222+
// getEffectiveLinkTarget checks if a path is a symlink/virtual-symlink,
223+
// or if it's a hardlink pointing to a symlink/virtual-symlink.
224+
// Returns the ultimate symlink target string.
225+
func getEffectiveLinkTarget(path string, linkGraph LinkGraph) (linkTarget string, found bool) {
226+
// First check if path itself is a symlink/virtual-symlink
227+
if target, ok := getLinkTarget(path, linkGraph); ok {
228+
return target, true
229+
}
230+
231+
// If path is a hardlink, check if its target is a symlink/virtual-symlink
232+
if node, exists := linkGraph[path]; exists && node.IsHardlink() && node.Deps != nil {
233+
return getLinkTarget(node.Deps.Name, linkGraph)
234+
}
235+
236+
return "", false
237+
}
238+
239+
// BuildDirectoryAliasMap builds a backlink map for all links in the
240+
// LinkGraph. The resulting map associates each link in the graph
241+
// with its aliases.
242+
func (lg LinkGraph) BuildDirectoryAliasMap(logger logr.Logger) map[string][]string {
243+
// Start with basic symlink aliases
244+
aliases := make(map[string][]string)
245+
for linkPath, node := range lg {
246+
if node.IsSymlink() && node.Deps != nil {
247+
targetPath := node.Deps.Name
248+
aliases[targetPath] = append(aliases[targetPath], linkPath)
249+
}
250+
}
251+
252+
changed := true
253+
iteration := 0
254+
maxIterations := 100 // Safety limit to prevent infinite loops
255+
256+
for changed && iteration < maxIterations {
257+
changed = false
258+
iteration++
259+
260+
for linkPath, linkNode := range lg {
261+
if linkNode.Deps == nil {
262+
continue
263+
}
264+
targetPath := linkNode.Deps.Name
265+
266+
// Handle hardlinks pointing to symlinks (real or virtual)
267+
if linkNode.IsHardlink() {
268+
// Skip if we've already processed this hardlink as a virtual symlink
269+
if linkNode.VirtualLinkTarget != "" {
270+
continue
271+
}
272+
273+
// Skip if the target is not a symlink/virtual-symlink
274+
linkTarget, found := getLinkTarget(targetPath, lg)
275+
if !found {
276+
continue
277+
}
278+
279+
// Create a virtual symlink at the hardlink's location
280+
resolvedFromHardlink := resolveRelativeLinkFrom(linkPath, linkTarget)
281+
282+
if addAliasIfNew(aliases, resolvedFromHardlink, linkPath) {
283+
linkNode.VirtualLinkTarget = linkTarget
284+
logger.V(log.TRC).Info("adding hardlink alias",
285+
"from", linkPath,
286+
"to", resolvedFromHardlink,
287+
"via_target", targetPath,
288+
"iteration", iteration)
289+
changed = true
290+
}
291+
}
292+
293+
// Handle symlinks pointing to other links (directly or via hardlink)
294+
if linkNode.IsSymlink() && linkNode.OriginalLinkname != "" {
295+
linkTarget, found := getEffectiveLinkTarget(targetPath, lg)
296+
if !found {
297+
continue
298+
}
299+
300+
resolvedFromSymlink := resolveRelativeLinkFrom(linkPath, linkTarget)
301+
302+
if addAliasIfNew(aliases, resolvedFromSymlink, linkPath) {
303+
logger.V(log.TRC).Info("adding symlink alias",
304+
"from", linkPath,
305+
"to", resolvedFromSymlink,
306+
"via_target", targetPath,
307+
"iteration", iteration)
308+
changed = true
309+
}
310+
}
311+
}
312+
}
313+
314+
if iteration >= maxIterations {
315+
logger.V(log.DBG).Info("directory alias map build process reached max iterations, may have incomplete aliases", "total_aliases", len(aliases))
316+
} else {
317+
logger.V(log.DBG).Info("directory alias map built", "iterations", iteration, "total_aliases", len(aliases))
318+
}
319+
320+
return aliases
321+
}

0 commit comments

Comments
 (0)