Skip to content

Commit a51fcc5

Browse files
committed
reorg for readability in program code and in UI
+ Reorganize namespaces for readability in program code. + Change descriptions of file trees in UI for more consistency.
1 parent 100c1c3 commit a51fcc5

17 files changed

Lines changed: 421 additions & 262 deletions
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
using System;
2+
using System.Collections.Generic;
3+
4+
namespace Pine.Core.Files;
5+
6+
/// <summary>
7+
/// Provides utility methods for extracting tree structures from named binary blobs, supporting common archive formats
8+
/// such as ZIP, TAR.GZ, TGZ, and GZ.
9+
/// </summary>
10+
/// <remarks>Use the methods in this class to interpret and extract hierarchical data from blobs based on their
11+
/// file name and format. The extraction logic supports several widely used archive types, enabling consistent access to
12+
/// tree representations regardless of the underlying compression or packaging. This class is intended for scenarios
13+
/// where blob content may represent an archive or compressed file and a tree structure is required for further
14+
/// processing.</remarks>
15+
public class CommonMappings
16+
{
17+
/// <summary>
18+
/// Extracts one or more tree structures from the specified named blob, interpreting its content as a supported
19+
/// archive or compressed format.
20+
/// </summary>
21+
/// <remarks>The method attempts to extract trees from the blob by interpreting its content according to
22+
/// the file extension. Supported formats include ZIP, TAR.GZ, TGZ, and GZ. If the content cannot be parsed as a
23+
/// supported archive, no trees are returned. The method does not throw exceptions for invalid or unsupported
24+
/// formats; instead, it silently skips extraction for those cases.</remarks>
25+
/// <param name="blobName">The name of the blob, including its file extension, which determines how the content is interpreted (e.g.,
26+
/// ".zip", ".tar.gz", ".tgz", ".gz").</param>
27+
/// <param name="blobContent">The binary content of the blob to be processed. Must represent a valid archive or compressed file format
28+
/// supported by the method.</param>
29+
/// <returns>An enumerable collection of tree structures extracted from the blob content. The collection may contain zero or
30+
/// more trees, depending on the format and validity of the content.</returns>
31+
32+
public static IEnumerable<BlobTreeWithStringPath> ExtractTreesFromNamedBlob(
33+
string blobName,
34+
ReadOnlyMemory<byte> blobContent)
35+
{
36+
{
37+
BlobTreeWithStringPath? fromZipArchive = null;
38+
39+
try
40+
{
41+
fromZipArchive =
42+
PineValueComposition.SortedTreeFromSetOfBlobsWithCommonFilePath(
43+
ZipArchive.EntriesFromZipArchive(blobContent));
44+
}
45+
catch { }
46+
47+
if (fromZipArchive is not null)
48+
yield return fromZipArchive;
49+
}
50+
51+
if (blobName.EndsWith(".tar.gz", StringComparison.OrdinalIgnoreCase) ||
52+
blobName.EndsWith(".tgz", StringComparison.OrdinalIgnoreCase))
53+
{
54+
BlobTreeWithStringPath? fromTarArchive = null;
55+
56+
try
57+
{
58+
fromTarArchive =
59+
TarArchive.TreeWithStringPathFromTarArchive(BytesConversions.DecompressGzip(blobContent));
60+
}
61+
catch { }
62+
63+
if (fromTarArchive is not null)
64+
yield return fromTarArchive;
65+
}
66+
else
67+
{
68+
if (blobName.EndsWith(".gz", StringComparison.OrdinalIgnoreCase))
69+
{
70+
ReadOnlyMemory<byte>? fromGzip = null;
71+
72+
try
73+
{
74+
fromGzip = BytesConversions.DecompressGzip(blobContent);
75+
}
76+
catch { }
77+
78+
if (fromGzip is not null)
79+
yield return BlobTreeWithStringPath.Blob(fromGzip.Value);
80+
}
81+
}
82+
}
83+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using Pine.Core.Addressing;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Collections.Immutable;
5+
using System.Linq;
6+
7+
namespace Pine.Core.Files;
8+
9+
/// <summary>
10+
/// Extension methods for <see cref="BlobTreeWithStringPath"/>.
11+
/// </summary>
12+
public static class FileTreeExtensions
13+
{
14+
/// <summary>
15+
/// Generates human-readable overviews of a file tree or blob, for use in command-line interfaces or logs.
16+
/// </summary>
17+
public static IEnumerable<string> DescribeFileTreeForHumans(
18+
BlobTreeWithStringPath composition,
19+
bool listFiles,
20+
string? extractFileName)
21+
{
22+
if (composition is BlobTreeWithStringPath.TreeNode tree)
23+
{
24+
var blobs = composition.EnumerateBlobsTransitive().ToImmutableList();
25+
26+
yield return
27+
"a directory containing " + blobs.Count + " files with an aggregate size of " +
28+
CommandLineInterface.FormatIntegerForDisplay(blobs.Sum(blob => (long)blob.blobContent.Length)) + " bytes.";
29+
30+
if (listFiles)
31+
{
32+
yield return
33+
"file paths, sizes and hashes:\n" +
34+
string.Join(
35+
"\n",
36+
blobs.Select(blobAtPath =>
37+
string.Join("/", blobAtPath.path) + " : " +
38+
blobAtPath.blobContent.Length + " bytes, " +
39+
Convert.ToHexStringLower(PineValueHashTree.ComputeHash(PineValue.Blob(blobAtPath.blobContent)).Span)[..10]));
40+
}
41+
42+
yield break;
43+
}
44+
45+
if (composition is BlobTreeWithStringPath.BlobNode blob)
46+
{
47+
yield return "a blob containing " + blob.Bytes.Length + " bytes";
48+
49+
if (extractFileName is not null)
50+
{
51+
foreach (var extractedTree in CommonMappings.ExtractTreesFromNamedBlob(extractFileName, blob.Bytes))
52+
{
53+
var extractedTreeCompositionId =
54+
Convert.ToHexStringLower(PineValueHashTree.ComputeHashNotSorted(extractedTree).Span);
55+
56+
var compositionDescription =
57+
string.Join(
58+
"\n",
59+
DescribeFileTreeForHumans(
60+
extractedTree,
61+
listFiles: listFiles,
62+
extractFileName: null));
63+
64+
yield return "Extracted composition " + extractedTreeCompositionId + ", which is " + compositionDescription;
65+
}
66+
}
67+
}
68+
}
69+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Collections.Immutable;
4+
using System.IO;
5+
using System.Linq;
6+
7+
namespace Pine.Core.Files;
8+
9+
/// <summary>
10+
/// Helpers for working with TAR archives.
11+
/// </summary>
12+
public static class TarArchive
13+
{
14+
/// <summary>
15+
/// Creates a tree representation of the contents of a TAR archive, using string-based paths for each entry.
16+
/// </summary>
17+
/// <remarks>The method reads the entire TAR archive from memory and constructs a tree where each file and
18+
/// directory is accessible by its string path. The input buffer is not modified. This method is suitable for
19+
/// scenarios where the archive is already loaded into memory.</remarks>
20+
/// <param name="tarArchive">A read-only memory buffer containing the bytes of the TAR archive to be parsed. Must contain a valid TAR archive
21+
/// format.</param>
22+
public static BlobTreeWithStringPath TreeWithStringPathFromTarArchive(ReadOnlyMemory<byte> tarArchive)
23+
{
24+
using var archiveReader =
25+
SharpCompress.Archives.Tar.TarArchive.Open(new MemoryStream(tarArchive.ToArray()));
26+
27+
return TreeWithStringPathFromTarArchiveEntries(archiveReader.Entries);
28+
}
29+
30+
/// <summary>
31+
/// Creates a tree structure of blobs from the file entries in a TAR archive, using string paths as keys.
32+
/// </summary>
33+
/// <remarks>The resulting tree preserves the original file paths from the TAR archive as string keys.
34+
/// Directory entries in the archive are excluded from the tree.</remarks>
35+
/// <param name="entries">A collection of TAR archive entries to include in the tree. Only file entries are processed; directory entries
36+
/// are ignored.</param>
37+
public static BlobTreeWithStringPath TreeWithStringPathFromTarArchiveEntries(
38+
IEnumerable<SharpCompress.Archives.Tar.TarArchiveEntry> entries)
39+
{
40+
var treeEntries =
41+
entries
42+
.Where(tarEntry => !tarEntry.IsDirectory)
43+
.Select(tarEntry =>
44+
{
45+
using var memoryStream = new MemoryStream();
46+
using var tarEntryStream = tarEntry.OpenEntryStream();
47+
48+
tarEntryStream.CopyTo(memoryStream);
49+
50+
var componentBytes = memoryStream.ToArray();
51+
52+
return (name: tarEntry.Key, component: BlobTreeWithStringPath.Blob(componentBytes));
53+
}).ToImmutableList();
54+
55+
return BlobTreeWithStringPath.SortedTree(treeEntries);
56+
}
57+
}
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
6+
namespace Pine.Core.Files;
7+
8+
/// <summary>
9+
/// Helpers for working with ZIP archives.
10+
/// <see href="https://en.wikipedia.org/wiki/ZIP_(file_format)"></see>
11+
/// </summary>
12+
public static class ZipArchive
13+
{
14+
/// <summary>
15+
/// https://github.com/dotnet/corefx/blob/a10890f4ffe0fadf090c922578ba0e606ebdd16c/src/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs#L206-L234
16+
/// </summary>
17+
public static DateTimeOffset EntryLastWriteTimeDefault => new(1980, 1, 1, 0, 0, 0, TimeSpan.Zero);
18+
19+
/// <summary>
20+
/// Creates a ZIP archive containing the specified file entries and returns its binary data as a byte array.
21+
/// </summary>
22+
/// <remarks>Each file entry is added to the archive using its provided name. The method does not preserve
23+
/// file metadata such as timestamps or permissions. The returned byte array can be saved to disk or transmitted as
24+
/// needed.</remarks>
25+
/// <param name="entries">A collection of file entries, each consisting of a file name and its content as a read-only memory buffer. Each
26+
/// entry will be added to the archive with the provided name and content.</param>
27+
/// <param name="compressionLevel">The compression level to apply to the archive. Defaults to <see
28+
/// cref="System.IO.Compression.CompressionLevel.Optimal"/> if not specified.</param>
29+
/// <returns>A byte array containing the ZIP archive data with all specified entries included.</returns>
30+
public static byte[] ZipArchiveFromFiles(
31+
IEnumerable<(string name, ReadOnlyMemory<byte> content)> entries,
32+
System.IO.Compression.CompressionLevel compressionLevel = System.IO.Compression.CompressionLevel.Optimal) =>
33+
ZipArchiveFromFiles(
34+
[.. entries.Select(entry => (entry.name, entry.content, EntryLastWriteTimeDefault))],
35+
compressionLevel);
36+
37+
/// <summary>
38+
/// Creates a ZIP archive containing the specified files and returns its contents as a byte array.
39+
/// </summary>
40+
/// <remarks>Each file path is constructed by joining the path segments in the key with a forward slash
41+
/// ('/'). The method does not validate file names or contents; callers should ensure that paths and data are valid
42+
/// for their intended use.</remarks>
43+
/// <param name="entries">A dictionary mapping each file's path segments to its content. Each key represents the file path as a list of
44+
/// strings, and each value contains the file's data as a read-only memory buffer.</param>
45+
/// <param name="compressionLevel">The compression level to use when creating the ZIP archive. The default is CompressionLevel.Optimal.</param>
46+
/// <returns>A byte array containing the ZIP archive with all specified files. The array will be empty if no entries are
47+
/// provided.</returns>
48+
public static byte[] ZipArchiveFromFiles(
49+
IReadOnlyDictionary<IReadOnlyList<string>, ReadOnlyMemory<byte>> entries,
50+
System.IO.Compression.CompressionLevel compressionLevel = System.IO.Compression.CompressionLevel.Optimal) =>
51+
ZipArchiveFromFiles(
52+
entries.Select(entry => (name: string.Join("/", entry.Key), content: entry.Value)),
53+
compressionLevel);
54+
55+
/// <summary>
56+
/// Creates a ZIP archive containing the specified files and returns its contents as a byte array.
57+
/// </summary>
58+
/// <remarks>The returned ZIP archive is created in memory and is not written to disk. The method does not
59+
/// validate the uniqueness of file names; duplicate names may result in multiple entries with the same name in the
60+
/// archive. The last write time for each entry is set according to the provided value.</remarks>
61+
/// <param name="entries">A collection of file entries to include in the archive. Each entry specifies the file name, content as a
62+
/// read-only memory buffer, and the last write time to set for the file in the archive. The file name must be a
63+
/// valid ZIP entry name and cannot be null or empty.</param>
64+
/// <param name="compressionLevel">The compression level to use for each file in the archive. Defaults to <see
65+
/// cref="System.IO.Compression.CompressionLevel.Optimal"/> if not specified.</param>
66+
/// <returns>A byte array containing the complete ZIP archive. The array will be empty if no entries are provided.</returns>
67+
public static byte[] ZipArchiveFromFiles(
68+
IEnumerable<(string name, ReadOnlyMemory<byte> content, DateTimeOffset lastWriteTime)> entries,
69+
System.IO.Compression.CompressionLevel compressionLevel = System.IO.Compression.CompressionLevel.Optimal)
70+
{
71+
var stream = new MemoryStream();
72+
73+
using (var fclZipArchive = new System.IO.Compression.ZipArchive(stream, System.IO.Compression.ZipArchiveMode.Create, true))
74+
{
75+
foreach (var (entryName, entryContent, lastWriteTime) in entries)
76+
{
77+
var entry = fclZipArchive.CreateEntry(entryName, compressionLevel);
78+
79+
entry.LastWriteTime = lastWriteTime;
80+
81+
using var entryStream = entry.Open();
82+
83+
entryStream.Write(entryContent.Span);
84+
}
85+
}
86+
87+
stream.Seek(0, SeekOrigin.Begin);
88+
89+
var zipArchive = new byte[stream.Length];
90+
91+
stream.Read(zipArchive, 0, (int)stream.Length);
92+
stream.Dispose();
93+
94+
return zipArchive;
95+
}
96+
97+
/// <summary>
98+
/// Enumerates the files contained in a ZIP archive, each represented by its (flat) path and content.
99+
/// </summary>
100+
/// <param name="zipArchive">A read-only memory buffer containing the ZIP archive data. Must be a valid ZIP file; otherwise, the behavior is
101+
/// undefined.</param>
102+
/// <returns>An enumerable collection of tuples, each containing the file name and its content as a read-only memory buffer.
103+
/// Only file entries are included; directory entries are excluded.</returns>
104+
public static IEnumerable<(string name, ReadOnlyMemory<byte> content)> FileEntriesFromZipArchive(
105+
ReadOnlyMemory<byte> zipArchive) =>
106+
EntriesFromZipArchive(
107+
zipArchive: zipArchive,
108+
includeEntry: entry => !entry.FullName.Replace('\\', '/').EndsWith('/'));
109+
110+
/// <summary>
111+
/// Extracts all entries from the specified ZIP archive and returns their names and contents.
112+
/// </summary>
113+
/// <remarks>The method returns all entries in the archive, including files and directories.
114+
/// The order of entries matches their order in the archive.</remarks>
115+
/// <param name="zipArchive">A read-only memory buffer containing the ZIP archive data. Must represent a valid ZIP file format.</param>
116+
/// <returns>An enumerable collection of tuples, each containing the entry name and its content as a read-only memory buffer.
117+
/// The collection is empty if the archive contains no entries.</returns>
118+
public static IEnumerable<(string name, ReadOnlyMemory<byte> content)> EntriesFromZipArchive(ReadOnlyMemory<byte> zipArchive) =>
119+
EntriesFromZipArchive(zipArchive: zipArchive, includeEntry: _ => true);
120+
121+
/// <summary>
122+
/// Enumerates entries from a ZIP archive and returns the name and content of each entry that matches the specified
123+
/// filter.
124+
/// </summary>
125+
/// <remarks>The method reads the entire content of each included entry into memory. Use caution when
126+
/// processing large archives or entries to avoid excessive memory usage.</remarks>
127+
/// <param name="zipArchive">A read-only memory buffer containing the binary data of the ZIP archive to be read.</param>
128+
/// <param name="includeEntry">A predicate used to determine whether a given ZIP archive entry should be included in the results. The function
129+
/// receives each entry and should return <see langword="true"/> to include the entry; otherwise, <see
130+
/// langword="false"/>.</param>
131+
/// <returns>An enumerable collection of tuples, each containing the entry name and its content as a read-only memory buffer.
132+
/// Only entries for which <paramref name="includeEntry"/> returns <see langword="true"/> are included.</returns>
133+
/// <exception cref="Exception">Thrown if the number of bytes read from an entry does not match the expected entry length.</exception>
134+
public static IEnumerable<(string name, ReadOnlyMemory<byte> content)> EntriesFromZipArchive(
135+
ReadOnlyMemory<byte> zipArchive,
136+
Func<System.IO.Compression.ZipArchiveEntry, bool> includeEntry)
137+
{
138+
using var fclZipArchive =
139+
new System.IO.Compression.ZipArchive(
140+
new MemoryStream(zipArchive.ToArray()),
141+
System.IO.Compression.ZipArchiveMode.Read);
142+
143+
foreach (var entry in fclZipArchive.Entries)
144+
{
145+
if (!includeEntry(entry))
146+
continue;
147+
148+
using var entryStream = entry.Open();
149+
using var memoryStream = new MemoryStream();
150+
151+
entryStream.CopyTo(memoryStream);
152+
153+
var entryContent = memoryStream.ToArray();
154+
155+
if (entryContent.Length != entry.Length)
156+
{
157+
throw new Exception(
158+
"Error trying to read entry '" + entry.FullName + "': got " +
159+
entryContent.Length + " bytes from entry instead of " + entry.Length);
160+
}
161+
162+
yield return (entry.FullName, entryContent);
163+
}
164+
}
165+
}

implement/Pine.Core/Pine.Core.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
<PackageReference Include="Microsoft.AspNetCore.Http.Abstractions" Version="2.3.0" />
4545
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="4.14.0" />
4646
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" Version="9.0.10" />
47+
<PackageReference Include="SharpCompress" Version="0.41.0" />
4748
</ItemGroup>
4849

4950
</Project>

implement/Pine.IntegrationTests/LoadFromGitHubTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using AwesomeAssertions;
22
using Pine.Core;
3+
using Pine.Core.Files;
34
using System;
45
using System.Collections.Generic;
56
using System.Collections.Immutable;

0 commit comments

Comments
 (0)