Skip to content

Commit 40a0590

Browse files
committed
Add --depth flag for Kitfile generation
Add flag `--depth` to commands that generate Kitfiles. The provided depth controls how far down a directory structure kit will navigate before starting to group directories into layers -- for example, with depth=1, files in immediate subdirectories of the context will be processed individually and added to the Kitfile as appropriate. Signed-off-by: Angel Misevski <amisevsk@gmail.com>
1 parent ff9a282 commit 40a0590

10 files changed

Lines changed: 138 additions & 109 deletions

File tree

pkg/cmd/kitimport/cmd.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ type importOptions struct {
7979
kitfilePath string
8080
downloadTool string
8181
concurrency int
82+
depth int
8283
modelKitRef *registry.Reference
8384
}
8485

@@ -100,6 +101,7 @@ func ImportCommand() *cobra.Command {
100101
cmd.Flags().StringVarP(&opts.kitfilePath, "file", "f", "", "Path to Kitfile to use for packing (use '-' to read from standard input)")
101102
cmd.Flags().StringVar(&opts.downloadTool, "tool", "", "Tool to use for downloading files: options are 'git' and 'hf' (default: detect based on repository)")
102103
cmd.Flags().IntVar(&opts.concurrency, "concurrency", 5, "Maximum number of simultaneous downloads (for huggingface)")
104+
cmd.Flags().IntVar(&opts.depth, "depth", 0, "Maximum directory depth to process when generating a Kitfile. Setting to -1 processes all files individually")
103105
cmd.Flags().SortFlags = false
104106
return cmd
105107
}

pkg/cmd/kitimport/gitimport.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func importUsingGit(ctx context.Context, opts *importOptions) error {
7373
if err != nil {
7474
return fmt.Errorf("error processing directory: %w", err)
7575
}
76-
kf, err := generateKitfile(dirContents, opts.repo, tmpDir)
76+
kf, err := generateKitfile(dirContents, opts.repo, tmpDir, opts.depth)
7777
if err != nil {
7878
return err
7979
}

pkg/cmd/kitimport/hfimport.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func importUsingHF(ctx context.Context, opts *importOptions) error {
7070
}
7171
kitfile = kf
7272
} else {
73-
kf, err := generateKitfile(dirListing, repo, tmpDir)
73+
kf, err := generateKitfile(dirListing, repo, tmpDir, opts.depth)
7474
if err != nil {
7575
return err
7676
}

pkg/cmd/kitimport/util.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import (
4242

4343
var ErrNoEditorFound = errors.New("no editor found")
4444

45-
func generateKitfile(dirContents *kfgen.DirectoryListing, repo string, outDir string) (*artifact.KitFile, error) {
45+
func generateKitfile(dirContents *kfgen.DirectoryListing, repo string, outDir string, depth int) (*artifact.KitFile, error) {
4646
// Fill fields in package so that they're not empty in `kit list` later.
4747
sections := strings.Split(repo, "/")
4848
var modelPackage *artifact.Package
@@ -52,7 +52,7 @@ func generateKitfile(dirContents *kfgen.DirectoryListing, repo string, outDir st
5252
Authors: []string{sections[len(sections)-2]},
5353
}
5454
}
55-
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage)
55+
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage, depth)
5656
if err != nil {
5757
return nil, fmt.Errorf("failed to generate Kitfile: %w", err)
5858
}

pkg/cmd/kitinit/cmd.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ type initOptions struct {
8080
repoRef string
8181
token string
8282
outputPath string
83+
depth int
8384
// Computed fields (remote only)
8485
repo string
8586
repoType hf.RepositoryType
@@ -105,6 +106,7 @@ func InitCommand() *cobra.Command {
105106
cmd.Flags().StringVar(&opts.repoRef, "ref", "main", "Branch or tag for remote repository (requires --remote)")
106107
cmd.Flags().StringVar(&opts.token, "token", "", "Auth token for remote repository (requires --remote)")
107108
cmd.Flags().StringVarP(&opts.outputPath, "output", "o", "", "Output path for generated Kitfile ('-' writes to stdout; default: Kitfile in directory for local, stdout for remote)")
109+
cmd.Flags().IntVar(&opts.depth, "depth", 0, "Maximum directory depth to process when generating a Kitfile. Setting to -1 processes all files individually")
108110
cmd.Flags().SortFlags = false
109111
return cmd
110112
}
@@ -146,7 +148,7 @@ func runCommand(opts *initOptions) func(*cobra.Command, []string) error {
146148
func runInit(dirContents *kfgen.DirectoryListing, opts *initOptions) error {
147149
modelPackage := buildPackageFromRepo(opts.repo, opts.modelkitName, opts.modelkitDescription, opts.modelkitAuthor)
148150

149-
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage)
151+
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage, opts.depth)
150152
if err != nil {
151153
return output.Fatalf("Error generating Kitfile: %s", err)
152154
}

pkg/lib/kitfile/generate/generate.go

Lines changed: 108 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,10 @@ var promptFilePatterns = []string{
8484

8585
// Generate a basic Kitfile by looking at the contents of a directory. Parameter
8686
// packageOpt can be used to define metadata for the Kitfile (i.e. the package
87-
// section), which is left empty if the parameter is nil.
88-
func GenerateKitfile(dir *DirectoryListing, packageOpt *artifact.Package) (*artifact.KitFile, error) {
87+
// section), which is left empty if the parameter is nil. depth controls how many
88+
// levels of subdirectories are processed file-by-file; at depth=0 (the default),
89+
// subdirectories of the root dir are analyzed as whole units.
90+
func GenerateKitfile(dir *DirectoryListing, packageOpt *artifact.Package, depth int) (*artifact.KitFile, error) {
8991
output.Logf(output.LogLevelTrace, "Generating Kitfile in %s", dir.Path)
9092
kitfile := &artifact.KitFile{
9193
ManifestVersion: "1.0.0",
@@ -113,15 +115,63 @@ func GenerateKitfile(dir *DirectoryListing, packageOpt *artifact.Package) (*arti
113115
return kitfile, nil
114116
}
115117

116-
// We can make sure all files are included by including a layer with path '.'
117-
// However, we only want to do this if it is necessary
118-
includeCatchallSection := false
119-
// Dirs we don't know how to handle automatically.
120-
var unprocessedDirPaths []string
121-
// Metadata files; we want these to be either model parts (if there is a model)
122-
// or datasets
123-
var modelFiles, metadataFiles []FileListing
124-
var detectedLicenseType string
118+
modelFiles, metadataFiles, unknownFiles, detectedLicenseType := processSubdirFiles(kitfile, *dir, depth)
119+
120+
if len(unknownFiles) > 5 {
121+
output.Logf(output.LogLevelTrace, "Unrecognized files found in %s; adding catch-all code layer", dir.Path)
122+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: "."})
123+
} else if len(unknownFiles) > 0 {
124+
output.Logf(output.LogLevelTrace, "Unrecognized files found in %s; adding as code layers")
125+
for _, f := range unknownFiles {
126+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: f.Path})
127+
}
128+
}
129+
130+
if len(modelFiles) > 0 {
131+
if err := addModelToKitfile(kitfile, modelFiles); err != nil {
132+
return nil, fmt.Errorf("failed to add model to Kitfile: %w", err)
133+
}
134+
output.Logf(output.LogLevelTrace, "Adding metadata files as model parts")
135+
for _, metadataFile := range metadataFiles {
136+
kitfile.Model.Parts = append(kitfile.Model.Parts, artifact.ModelPart{Path: metadataFile.Path})
137+
}
138+
} else {
139+
output.Logf(output.LogLevelTrace, "No model detected; adding metadata files as dataset layers")
140+
for _, metadataFile := range metadataFiles {
141+
kitfile.DataSets = append(kitfile.DataSets, artifact.DataSet{Path: metadataFile.Path})
142+
}
143+
}
144+
145+
// If we detected a license, try to attach it to the Kitfile section that makes sense
146+
if kitfile.Model != nil && detectedLicenseType != "" {
147+
kitfile.Model.License = detectedLicenseType
148+
} else if len(kitfile.DataSets) == 1 {
149+
kitfile.DataSets[0].License = detectedLicenseType
150+
} else if len(kitfile.Code) == 1 {
151+
kitfile.Code[0].License = detectedLicenseType
152+
} else {
153+
output.Logf(output.LogLevelTrace, "Unsure what license applies to, adding to Kitfile package")
154+
kitfile.Package.License = detectedLicenseType
155+
}
156+
157+
applySkillMetadataToPackage(kitfile, *dir)
158+
159+
return kitfile, nil
160+
}
161+
162+
// processSubdirFiles classifies each file in dir individually and adds typed layers to
163+
// kitfile. depth controls how many additional subdirectory levels are processed
164+
// file-by-file before switching to addDirToKitfile (whole-directory analysis). Returns
165+
// model and metadata files for the caller to pair, any files whose type could not be
166+
// determined (so the caller can decide how to handle them), and any detected license
167+
// identifier. Unclassifiable directories are added directly to kitfile.Code.
168+
func processSubdirFiles(kitfile *artifact.KitFile, dir DirectoryListing, depth int) (modelFiles []FileListing, metadataFiles []FileListing, unknownFiles []FileListing, detectedLicenseType string) {
169+
if found, _ := dirContainsSkillMD(dir); found {
170+
output.Logf(output.LogLevelTrace, "Directory %s contains SKILL.md; treating as skill", dir.Path)
171+
prompt, _ := buildPromptFromSkill(dir)
172+
kitfile.Prompts = append(kitfile.Prompts, prompt)
173+
return nil, nil, nil, ""
174+
}
125175

126176
output.Logf(output.LogLevelTrace, "Reading directory contents")
127177
for _, file := range dir.Files {
@@ -136,14 +186,14 @@ func GenerateKitfile(dir *DirectoryListing, packageOpt *artifact.Package) (*arti
136186
if strings.HasPrefix(strings.ToLower(file.Name), "readme") {
137187
output.Logf(output.LogLevelTrace, "Found readme file '%s'", file.Name)
138188
kitfile.Docs = append(kitfile.Docs, artifact.Docs{
139-
Path: file.Name,
189+
Path: file.Path,
140190
Description: "Readme file",
141191
})
142192
continue
143193
} else if strings.HasPrefix(strings.ToLower(file.Name), "license") {
144194
output.Logf(output.LogLevelTrace, "Found license file '%s'", file.Name)
145195
kitfile.Docs = append(kitfile.Docs, artifact.Docs{
146-
Path: file.Name,
196+
Path: file.Path,
147197
Description: "License file",
148198
})
149199
licenseType, err := detectLicense(file.Path)
@@ -173,98 +223,64 @@ func GenerateKitfile(dir *DirectoryListing, packageOpt *artifact.Package) (*arti
173223
kitfile.DataSets = append(kitfile.DataSets, artifact.DataSet{Path: file.Path})
174224
case fileTypePrompt:
175225
kitfile.Prompts = append(kitfile.Prompts, artifact.Prompt{Path: file.Path})
226+
case fileTypeCode:
227+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: file.Path})
176228
default:
177-
output.Logf(output.LogLevelTrace, "File %s is either code or unknown type. Will be added as a catch-all section", file.Path)
178-
// File is either code or unknown; we'll have to include it in a catch-all section
179-
includeCatchallSection = true
229+
output.Logf(output.LogLevelTrace, "File %s is unknown type; will be included in code section", file.Path)
230+
unknownFiles = append(unknownFiles, file)
180231
}
181232
}
182233

183-
for _, subDir := range dir.Subdirs {
184-
dirModelFiles, err := addDirToKitfile(kitfile, subDir)
185-
if err != nil {
186-
output.Logf(output.LogLevelTrace, "Failed to determine type for directory %s: %s", subDir.Path, err)
187-
unprocessedDirPaths = append(unprocessedDirPaths, subDir.Path)
188-
}
189-
modelFiles = append(modelFiles, dirModelFiles...)
190-
continue
191-
}
192-
193-
if len(modelFiles) > 0 {
194-
if err := addModelToKitfile(kitfile, modelFiles); err != nil {
195-
return nil, fmt.Errorf("failed to add model to Kitfile: %w", err)
196-
}
197-
output.Logf(output.LogLevelTrace, "Adding metadata files as model parts")
198-
for _, metadataFile := range metadataFiles {
199-
kitfile.Model.Parts = append(kitfile.Model.Parts, artifact.ModelPart{Path: metadataFile.Path})
234+
if depth == 0 {
235+
for _, subDir := range dir.Subdirs {
236+
dirModelFiles := addDirToKitfile(kitfile, subDir)
237+
modelFiles = append(modelFiles, dirModelFiles...)
200238
}
201239
} else {
202-
output.Logf(output.LogLevelTrace, "No model detected; adding metadata files as dataset layers")
203-
for _, metadataFile := range metadataFiles {
204-
kitfile.DataSets = append(kitfile.DataSets, artifact.DataSet{Path: metadataFile.Path})
205-
}
206-
}
207-
208-
// Decide how to handle remaining paths. Either package them in one large code layer with basePath
209-
// or as separate layers for each directory.
210-
output.Logf(output.LogLevelTrace, "Unable to process %d paths in %s", len(unprocessedDirPaths), dir.Path)
211-
if includeCatchallSection || len(unprocessedDirPaths) > 5 {
212-
output.Logf(output.LogLevelTrace, "Adding catch-all code layer to include files in %s", dir.Path)
213-
// Overwrite any code layers we added before; this is cleaner than e.g. having a layer for '.' and a layer for 'src'
214-
kitfile.Code = []artifact.Code{{Path: "."}}
215-
} else {
216-
for _, path := range unprocessedDirPaths {
217-
kitfile.Code = append(kitfile.Code, artifact.Code{Path: path})
240+
for _, subDir := range dir.Subdirs {
241+
// Ignore licenses in subdirectories -- it's unclear where to assign them
242+
subModelFiles, subMetaFiles, subUnknownFiles, _ := processSubdirFiles(kitfile, subDir, depth-1)
243+
modelFiles = append(modelFiles, subModelFiles...)
244+
metadataFiles = append(metadataFiles, subMetaFiles...)
245+
for _, f := range subUnknownFiles {
246+
output.Logf(output.LogLevelTrace, "File %s is unknown type; adding as code layer", f.Path)
247+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: f.Path})
248+
}
218249
}
219250
}
220-
221-
// If we detected a license, try to attach it to the Kitfile section that makes sense
222-
if kitfile.Model != nil && detectedLicenseType != "" {
223-
kitfile.Model.License = detectedLicenseType
224-
} else if len(kitfile.DataSets) == 1 {
225-
kitfile.DataSets[0].License = detectedLicenseType
226-
} else if len(kitfile.Code) == 1 {
227-
kitfile.Code[0].License = detectedLicenseType
228-
} else {
229-
output.Logf(output.LogLevelTrace, "Unsure what license applies to, adding to Kitfile package")
230-
kitfile.Package.License = detectedLicenseType
231-
}
232-
233-
applySkillMetadataToPackage(kitfile, *dir)
234-
235-
return kitfile, nil
251+
return modelFiles, metadataFiles, unknownFiles, detectedLicenseType
236252
}
237253

238-
func addDirToKitfile(kitfile *artifact.KitFile, dir DirectoryListing) (modelFiles []FileListing, err error) {
254+
func addDirToKitfile(kitfile *artifact.KitFile, dir DirectoryListing) (modelFiles []FileListing) {
239255
if found, _ := dirContainsSkillMD(dir); found {
240256
output.Logf(output.LogLevelTrace, "Directory %s contains SKILL.md; treating as skill", dir.Path)
241257
prompt, _ := buildPromptFromSkill(dir)
242258
kitfile.Prompts = append(kitfile.Prompts, prompt)
243-
return nil, nil
259+
return nil
244260
}
245261

246262
switch dir.Name {
247263
case "docs":
248264
output.Logf(output.LogLevelTrace, "Directory %s interpreted as documentation", dir.Name)
249265
kitfile.Docs = append(kitfile.Docs, artifact.Docs{
250-
Path: dir.Path,
266+
Path: withTrailingSlash(dir.Path),
251267
})
252-
return nil, nil
268+
return nil
253269
case "src", "pkg", "lib", "build":
254270
output.Logf(output.LogLevelTrace, "Directory %s interpreted as code", dir.Name)
255271
kitfile.Code = append(kitfile.Code, artifact.Code{
256-
Path: dir.Path,
272+
Path: withTrailingSlash(dir.Path),
257273
})
258-
return nil, nil
274+
return nil
259275
}
260276

261277
// Sort entries in the directory to try and figure out what it contains. We'll reuse the
262278
// fact that the fileTypes are enumerated using iota (and so are ints) to index correctly.
263279
// Avoid using maps here since they iterate in a random order.
264280
directoryContents := [int(fileTypeUnknown) + 1][]string{}
265281
for _, subdir := range dir.Subdirs {
266-
// We can, in the future, recurse deeper into the directory tree here. For now, treat secondary dirs as unknowns
267-
directoryContents[int(fileTypeUnknown)] = append(directoryContents[int(fileTypeUnknown)], subdir.Path)
282+
// We can, in the future, recurse deeper into the directory tree here. For now, treat additional dirs as unknowns
283+
directoryContents[int(fileTypeUnknown)] = append(directoryContents[int(fileTypeUnknown)], filepath.Join(withTrailingSlash(dir.Path), subdir.Path))
268284
}
269285

270286
var metadataFiles []FileListing
@@ -292,7 +308,9 @@ func addDirToKitfile(kitfile *artifact.KitFile, dir DirectoryListing) (modelFile
292308
}
293309
}
294310
if directoryHasMixedContents {
295-
return modelFiles, fmt.Errorf("mixed content in directory; unable to determine type")
311+
output.Logf(output.LogLevelTrace, "Mixed contents in directory %s, adding as code layer", dir.Path)
312+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: withTrailingSlash(dir.Path)})
313+
return modelFiles
296314
}
297315
switch overallFiletype {
298316
case fileTypeModel:
@@ -301,21 +319,22 @@ func addDirToKitfile(kitfile *artifact.KitFile, dir DirectoryListing) (modelFile
301319
modelFiles = append(modelFiles, metadataFiles...)
302320
case fileTypeDataset:
303321
output.Logf(output.LogLevelTrace, "Interpreting directory %s as a dataset directory", dir.Path)
304-
kitfile.DataSets = append(kitfile.DataSets, artifact.DataSet{Path: dir.Path})
322+
kitfile.DataSets = append(kitfile.DataSets, artifact.DataSet{Path: withTrailingSlash(dir.Path)})
305323
case fileTypeDocs:
306324
output.Logf(output.LogLevelTrace, "Interpreting directory %s as a docs directory", dir.Path)
307-
kitfile.Docs = append(kitfile.Docs, artifact.Docs{Path: dir.Path})
325+
kitfile.Docs = append(kitfile.Docs, artifact.Docs{Path: withTrailingSlash(dir.Path)})
308326
case fileTypePrompt:
309327
output.Logf(output.LogLevelTrace, "Interpreting directory %s as a prompts directory", dir.Path)
310-
kitfile.Prompts = append(kitfile.Prompts, artifact.Prompt{Path: dir.Path})
328+
kitfile.Prompts = append(kitfile.Prompts, artifact.Prompt{Path: withTrailingSlash(dir.Path)})
329+
case fileTypeCode:
330+
output.Logf(output.LogLevelTrace, "Interpreting directory %s as code directory", dir.Path)
331+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: withTrailingSlash(dir.Path)})
311332
default:
312-
output.Logf(output.LogLevelTrace, "Could not determine type for directory %s", dir.Path)
313-
// If it's overall code, metadata, or unknown, just return it as unprocessed and let it be added as a Code section
314-
// later
315-
return modelFiles, fmt.Errorf("directory should be handled as Code")
333+
output.Logf(output.LogLevelTrace, "Could not determine type for directory %s. Adding as code layer", dir.Path)
334+
kitfile.Code = append(kitfile.Code, artifact.Code{Path: withTrailingSlash(dir.Path)})
316335
}
317336

318-
return modelFiles, nil
337+
return modelFiles
319338
}
320339

321340
func determineFileType(filename string) fileType {
@@ -344,7 +363,6 @@ func determineFileType(filename string) fileType {
344363
return fileTypeDataset
345364
}
346365
return fileTypeUnknown
347-
348366
}
349367

350368
func addModelToKitfile(kitfile *artifact.KitFile, files []FileListing) error {
@@ -440,3 +458,10 @@ func anyPattern(query string, patterns []string) bool {
440458
}
441459
return false
442460
}
461+
462+
func withTrailingSlash(p string) string {
463+
if p == "." || strings.HasSuffix(p, "/") {
464+
return p
465+
}
466+
return p + "/"
467+
}

pkg/lib/kitfile/generate/skill.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func dirContainsSkillMD(dir DirectoryListing) (bool, string) {
4949

5050
func buildPromptFromSkill(dir DirectoryListing) (artifact.Prompt, *skill.SkillFrontmatter) {
5151
prompt := artifact.Prompt{
52-
Path: dir.Path,
52+
Path: withTrailingSlash(dir.Path),
5353
}
5454

5555
found, skillPath := dirContainsSkillMD(dir)

0 commit comments

Comments
 (0)