Skip to content

Commit f2c5e94

Browse files
authored
[ANE-2716] tar extract with long names (#1635)
* deal with GNU long name entries in tarballs * add links to how L and K links work in the comment * add a regression test * add K links to the test * update the changelog * fix formatting * backport a change into the Changelog
1 parent 8491141 commit f2c5e94

4 files changed

Lines changed: 88 additions & 1 deletion

File tree

Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# FOSSA CLI Changelog
22

3+
## 3.15.6
4+
5+
- Archive uploads: Fix a bug where tar files with long filenames created by GNU tar would not extract correctly ([#1635](https://github.com/fossas/fossa-cli/pull/1635))
6+
37
## 3.15.5
8+
- Fix reporting of transitive dependencies in pnpm v9 lockfiles ([#1632](https://github.com/fossas/fossa-cli/pull/1632))
49
- Jar call-graph update - Attempt to fix a reachability issue ([#1634](https://github.com/fossas/fossa-cli/pull/1634))
510

611
## 3.15.4

src/Discovery/Archive.hs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,12 +190,38 @@ extractTarBz2 :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
190190
extractTarBz2 dir tarGzFile =
191191
sendIO $ Tar.unpack (fromAbsDir dir) . removeTarLinks . readTar . BZip.decompress =<< BL.readFile (fromAbsFile tarGzFile)
192192

193-
-- The tar unpacker dies when tar files reference files outside of the archive root
193+
-- The tar unpacker dies when tar files reference files outside of the archive root.
194+
-- We also need to remove GNU long name entries (type 'L' and 'K') that precede
195+
-- symbolic/hard links, otherwise removing the link leaves orphaned long name entries
196+
-- which causes TwoTypeLEntries errors during unpacking.
197+
--
198+
-- GNU tar uses special entry types for paths exceeding 100 characters:
199+
-- - Type 'L': Contains the full path for the following entry's filename
200+
-- - Type 'K': Contains the full path for the following entry's link target
201+
--
202+
-- Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
203+
-- (search for "typeflag" - 'L' and 'K' are GNU extensions listed in the table)
204+
-- Reference: https://hackage.haskell.org/package/tar/docs/Codec-Archive-Tar-Entry.html#t:GenEntryContent
205+
-- (see OtherEntryType documentation which explains 'L' and 'K' type codes)
194206
removeTarLinks :: Tar.Entries e -> Tar.Entries e
195207
removeTarLinks (Tar.Next x xs) =
196208
case Tar.entryContent x of
197209
Tar.HardLink _ -> removeTarLinks xs
198210
Tar.SymbolicLink _ -> removeTarLinks xs
211+
-- GNU long name (type 'L') or long link target (type 'K') entry:
212+
-- Check if the next entry is a link that will be removed
213+
Tar.OtherEntryType typeCode _ _
214+
| typeCode == 'L' || typeCode == 'K' ->
215+
case xs of
216+
Tar.Next y ys ->
217+
case Tar.entryContent y of
218+
-- Next entry is a link - skip both the long name entry and the link
219+
Tar.HardLink _ -> removeTarLinks ys
220+
Tar.SymbolicLink _ -> removeTarLinks ys
221+
-- Next entry is not a link - keep both entries
222+
_ -> Tar.Next x (Tar.Next y (removeTarLinks ys))
223+
-- No next entry - keep the long name entry (will likely fail anyway)
224+
other -> Tar.Next x (removeTarLinks other)
199225
_ -> Tar.Next x (removeTarLinks xs)
200226
removeTarLinks Tar.Done = Tar.Done
201227
removeTarLinks (Tar.Fail e) = Tar.Fail e

test/Discovery/ArchiveSpec.hs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,40 @@ spec = do
105105
tempDirExists <- sendIO $ PIO.doesDirExist extractedDir
106106
tempDirExists `shouldBe` False
107107

108+
-- Regression test for ANE-2716: tar archives with GNU long name entries (type 'L' and 'K')
109+
-- for symbolic links would fail with TwoTypeLEntries error because removeTarLinks
110+
-- removed the symlinks but left orphaned L/K entries.
111+
--
112+
-- GNU tar uses type 'L' entries to store paths exceeding 100 characters, and type 'K'
113+
-- entries for long link targets. Each L/K entry is followed by the actual file entry.
114+
-- Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
115+
-- (search for "typeflag" - 'L' and 'K' are GNU extensions listed in the table)
116+
--
117+
-- The test archive contains both:
118+
-- - Type 'L' entries: symlinks with long paths (>100 chars)
119+
-- - Type 'K' entries: symlinks with long targets (>100 chars)
120+
describe "extract tar.xz archive with GNU long name entries (L and K) for symlinks" $ do
121+
target <- runIO longFileNameSymlinkTarXzPath
122+
result <- runIO $
123+
runStack . runDiagnostics . runFinally . failOnMaybe "extractTarXz" . withArchive extractTarXz target $ \dir -> do
124+
-- The symlinks are removed by removeTarLinks, but the target file should exist
125+
-- (it's in a deeply nested path that triggers L entries for the directories too)
126+
let targetPath =
127+
dir
128+
</> $(mkRelDir "symlink-longname-test")
129+
</> $(mkRelDir "very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length")
130+
</> $(mkRelFile "target.txt")
131+
content <- sendIO . TIO.readFile . toFilePath $ targetPath
132+
pure (dir, content)
133+
134+
it "should extract without TwoTypeLEntries error" $ do
135+
assertOnSuccess result $ \_ (_, extractedContent) -> extractedContent `shouldBe` "target content\n"
136+
137+
it "should have cleaned up the temporary directory" $ do
138+
assertOnSuccess result $ \_ (extractedDir, _) -> do
139+
tempDirExists <- sendIO $ PIO.doesDirExist extractedDir
140+
tempDirExists `shouldBe` False
141+
108142
describe "extract tar.bz2 archive to a temporary location" $ do
109143
target <- runIO simpleTarBz2Path
110144
result <- runIO $
@@ -167,6 +201,28 @@ simpleTarXzPath = PIO.resolveFile' "test/Discovery/testdata/simple.tar.xz"
167201
simpleTarBz2Path :: IO (Path Abs File)
168202
simpleTarBz2Path = PIO.resolveFile' "test/Discovery/testdata/simple.tar.bz2"
169203

204+
-- Archive with GNU long name entries (type 'L' and 'K') for symlinks.
205+
-- Used to test regression fix for ANE-2716 (TwoTypeLEntries error).
206+
--
207+
-- GNU tar uses:
208+
-- - Type 'L' entries to store file paths exceeding 100 characters
209+
-- - Type 'K' entries to store link targets exceeding 100 characters
210+
-- Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
211+
-- (search for "typeflag" - 'L' and 'K' are GNU extensions)
212+
--
213+
-- Created with:
214+
-- mkdir -p symlink-longname-test/very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length
215+
-- echo "target content" > "symlink-longname-test/very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length/target.txt"
216+
-- # Symlink with long PATH (triggers L entry):
217+
-- ln -s ../target.txt "symlink-longname-test/very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length/symlink-long-path.txt"
218+
-- # Symlinks with long TARGETS (triggers K entries):
219+
-- ln -s "very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length/target.txt" symlink-longname-test/symlink-long-target1.txt
220+
-- ln -s "very/deeply/nested/directory/structure/that/exceeds/one/hundred/characters/in/total/path/length/target.txt" symlink-longname-test/symlink-long-target2.txt
221+
-- gtar --format=gnu -cvf symlink-longname.tar symlink-longname-test && xz symlink-longname.tar
222+
-- rm -rf symlink-longname-test
223+
longFileNameSymlinkTarXzPath :: IO (Path Abs File)
224+
longFileNameSymlinkTarXzPath = PIO.resolveFile' "test/Discovery/testdata/symlink-longname.tar.xz"
225+
170226
expectedSimpleContentA :: Text
171227
expectedSimpleContentA = "6b5effe3-215a-49ec-9286-f0702f7eb529"
172228

568 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)