Skip to content

Commit 5bd7348

Browse files
Image dataset (#61)
* feat(dataset): Improve CSV file management in dataset dialog Implements drag-and-drop reordering and deletion for CSV files in the dataset creation/editing dialog. Key changes: - Uses dnd-kit for drag-and-drop functionality. - Stores the ordered list of selected file names in the dataset's `data` field. - Continues to send actual `File` objects for new/updated files to the API via the `files` field. - Updates UI to display file names, sizes for new files, and allows reordering and deletion. - Adds unit tests for new interactions and logic using Vitest. - Ensures all build, lint, and type checks pass. This addresses the issue of maintaining file order and allowing more flexible file management for CSV datasets. * add image type to dataset * add image type * feat: Add image type to dataset dialog This commit introduces a new 'image' dataset type to the dataset creation and update dialog. Key changes: - Added "image" as an option in the dataset type selection. - Implemented image file selection, allowing you to choose common image formats (PNG, JPG, GIF). - Added functionality to generate and display small image thumbnails in the file list for selected images. - Updated data structures, submission logic, and validation to support the new image type. - Included unit tests to cover the new functionality, including image selection, thumbnail display (mocked), and submission. The file input for images now shows a preview thumbnail for each selected image file in the list, enhancing your experience when working with image datasets. * fix tests * fix vision cli integration test * update * [Frontend] Add image dataset preview functionality This commit introduces the ability to preview image datasets. When a dataset with type 'image' is previewed, the images are displayed as cards, with each card showing the image and its filename below it. Multiple images are displayed per row in a responsive grid layout. Key changes: - Modified `DatasetPreviewDialog` to handle `type: "image"`. - Used `Card` components to display each image and its filename. - Added unit tests for the new image preview functionality, covering successful rendering, empty lists, loading, and error states. Note: Full verification of tests, tsc, and pnpm build was hindered by persistent environmental issues. The changes are submitted based on the successful implementation of the code and test logic. * update * fix update image dataset * fix lint * style: Remove unnecessary comments from dataset tests I removed explanatory code comments from `ui/src/components/dialog/dataset/dataset.test.tsx` to improve readability and maintainability. The tests themselves remain unchanged in functionality. All tests (excluding the intentionally skipped image tests) continue to pass, and both `tsc` and `pnpm build` complete successfully. * fix lint * fix test --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent a631bc8 commit 5bd7348

24 files changed

Lines changed: 1509 additions & 560 deletions

api/dataset.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package api
33
import (
44
"errors"
55
"fmt"
6-
"io"
76
"net/http"
87

98
"github.com/Yiling-J/tablepilot/ent"
@@ -12,8 +11,6 @@ import (
1211
"github.com/gin-gonic/gin"
1312
)
1413

15-
// Dataset-related handlers
16-
1714
func (hs *HTTPServer) CreateDataset(ctx *gin.Context) {
1815
var apiReq services_dataset.DatasetAPIRequest
1916
if err := ctx.ShouldBind(&apiReq); err != nil {
@@ -28,19 +25,22 @@ func (hs *HTTPServer) CreateDataset(ctx *gin.Context) {
2825
Data: apiReq.Data,
2926
}
3027

31-
if apiReq.Type == "csv" {
28+
if apiReq.Type == "csv" || apiReq.Type == "image" {
3229
if len(apiReq.Files) == 0 {
3330
errorResponse(ctx, http.StatusBadRequest, errors.New("at least one file is required for CSV dataset type"))
3431
return
3532
}
36-
var readers []io.Reader
33+
var readers []services_dataset.CreateDatasetFile
3734
for _, fh := range apiReq.Files {
3835
f, err := fh.Open()
3936
if err != nil {
4037
errorResponse(ctx, http.StatusBadRequest, err)
4138
return
4239
}
43-
readers = append(readers, f)
40+
readers = append(readers, services_dataset.CreateDatasetFile{
41+
Name: fh.Filename,
42+
Reader: f,
43+
})
4444
}
4545
serviceReq.Files = readers
4646
}
@@ -107,19 +107,22 @@ func (hs *HTTPServer) UpdateDataset(ctx *gin.Context) {
107107
}
108108

109109
if apiReq.Files != nil {
110-
var readers []io.Reader
110+
var readers []services_dataset.CreateDatasetFile
111111
for _, fh := range apiReq.Files {
112112
f, err := fh.Open()
113113
if err != nil {
114114
errorResponse(ctx, http.StatusBadRequest, err)
115115
return
116116
}
117-
readers = append(readers, f)
117+
readers = append(readers, services_dataset.CreateDatasetFile{
118+
Name: fh.Filename,
119+
Reader: f,
120+
})
118121
}
119122
serviceReq.Files = readers
120123
serviceReq.Fields = append(serviceReq.Fields, "files")
121124
} else {
122-
serviceReq.Files = []io.Reader{}
125+
serviceReq.Files = []services_dataset.CreateDatasetFile{}
123126
}
124127

125128
err := hs.DatasetService.Update(ctx.Request.Context(), datasetID, serviceReq)

api/dataset_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func TestAPI_CreateDatasetWithFiles(t *testing.T) {
7272
require.Equal(t, expectedRequest.Description, req.Description)
7373
require.Equal(t, expectedRequest.Type, req.Type)
7474
require.Equal(t, 1, len(req.Files))
75-
data, err := io.ReadAll(req.Files[0])
75+
data, err := io.ReadAll(req.Files[0].Reader)
7676
require.NoError(t, err)
7777
require.Equal(t, "header1,header2,header3\nr1c1,r1c2,r1c3\n", string(data))
7878
return "new_dataset_id", nil

cmd/cli/handler.go

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -181,16 +181,17 @@ func (h *Handler) CreateDataset(cmd *cobra.Command, args []string) error {
181181
options = append(options, o...)
182182
}
183183
req.Data = options
184-
case "csv":
184+
case "csv", "image":
185185
filePaths, err := cmd.Flags().GetStringArray("path")
186186
if err != nil {
187187
return fmt.Errorf("error getting file flag for csv type: %w", err)
188188
}
189189
if len(filePaths) == 0 {
190-
return fmt.Errorf("at least one --path must be provided for type 'csv'")
190+
return fmt.Errorf("at least one --path must be provided")
191191
}
192-
var readers []io.Reader
192+
var readers []dataset.CreateDatasetFile
193193
files, err := parsePaths(filePaths)
194+
names := []string{}
194195
if err != nil {
195196
return err
196197
}
@@ -199,12 +200,17 @@ func (h *Handler) CreateDataset(cmd *cobra.Command, args []string) error {
199200
if err != nil {
200201
return fmt.Errorf("failed to open file %s: %w", f, err)
201202
}
202-
readers = append(readers, file)
203+
readers = append(readers, dataset.CreateDatasetFile{
204+
Name: filepath.Base(f),
205+
Reader: file,
206+
})
207+
names = append(names, filepath.Base(f))
203208
}
204209
req.Files = readers
210+
req.Data = names
205211
defer func() {
206212
for _, f := range readers {
207-
if c, ok := f.(io.Closer); ok {
213+
if c, ok := f.Reader.(io.Closer); ok {
208214
c.Close()
209215
}
210216
}
@@ -307,32 +313,29 @@ func (h *Handler) UpdateDataset(cmd *cobra.Command, args []string) error {
307313
options = append(options, o...)
308314
}
309315
req.Data = options
310-
case "csv":
316+
case "csv", "image":
311317
filePaths, err := cmd.Flags().GetStringArray("file")
312318
if err != nil {
313319
return fmt.Errorf("error getting file flag for csv type: %w", err)
314320
}
315321
if len(filePaths) == 0 {
316322
return fmt.Errorf("at least one --file path must be provided for type 'csv'")
317323
}
318-
var files []io.Reader
324+
var files []dataset.CreateDatasetFile
319325
for _, filePath := range filePaths {
320326
file, err := os.Open(filePath)
321327
if err != nil {
322-
// Close already opened files if any
323-
for _, f := range files {
324-
if c, ok := f.(io.Closer); ok {
325-
c.Close()
326-
}
327-
}
328328
return fmt.Errorf("failed to open file %s: %w", filePath, err)
329329
}
330-
files = append(files, file)
330+
files = append(files, dataset.CreateDatasetFile{
331+
Name: filepath.Base(filePath),
332+
Reader: file,
333+
})
331334
}
332335
req.Files = files
333336
defer func() {
334337
for _, f := range files {
335-
if c, ok := f.(io.Closer); ok {
338+
if c, ok := f.Reader.(io.Closer); ok {
336339
c.Close()
337340
}
338341
}

ent/dataset/dataset.go

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ent/migrate/schema.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ent/schema/dataset.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func (Dataset) Fields() []ent.Field {
3636
field.String("name").Unique().NotEmpty(),
3737
field.String("path").Optional(),
3838
field.String("description").Default(""),
39-
field.Enum("type").Values("list", "csv"),
39+
field.Enum("type").Values("list", "csv", "image"),
4040
field.JSON("indexer", CSVIndexer{}).Optional(),
4141
field.Strings("values").Optional(),
4242
}

services/dataset/dataset.go

Lines changed: 58 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ import (
77
"io"
88
"os"
99
"path/filepath"
10+
"slices"
1011

1112
"github.com/Yiling-J/tablepilot/config"
1213
"github.com/Yiling-J/tablepilot/ent"
1314
db_dataset "github.com/Yiling-J/tablepilot/ent/dataset"
1415
"github.com/Yiling-J/tablepilot/services/source"
1516
"github.com/Yiling-J/tablepilot/services/source/csvindexer"
16-
"github.com/Yiling-J/tablepilot/utils"
1717
)
1818

1919
//go:generate moq -rm -out dataset_moq.go . DatasetService
@@ -39,56 +39,43 @@ func NewDatasetService(db *ent.Client, cfg *config.Config) *DatasetServiceImpl {
3939
}
4040

4141
func (s DatasetServiceImpl) buildCreateDatasetReq(ctx context.Context, req *CreateDatasetRequest, sr *ent.Dataset) error {
42-
switch req.Type {
43-
case db_dataset.TypeCsv:
44-
relativePath := filepath.Join("datasets/shared", sr.Nanoid)
45-
dirPath := filepath.Join(s.cfg.Common.DataDir, relativePath)
46-
err := os.MkdirAll(dirPath, os.ModePerm)
47-
if err != nil {
48-
return fmt.Errorf("failed to create directory: %w", err)
49-
}
42+
relativePath := filepath.Join("datasets/shared", sr.Nanoid)
43+
dirPath := filepath.Join(s.cfg.Common.DataDir, relativePath)
44+
err := os.MkdirAll(dirPath, os.ModePerm)
45+
if err != nil {
46+
return fmt.Errorf("failed to create directory: %w", err)
47+
}
5048

51-
if len(req.Files) == 0 {
52-
return errors.New("dataset.Create: files should not be empty")
53-
}
54-
filePath := filepath.Join(dirPath, "data.csv")
55-
outFile, err := os.Create(filePath)
49+
for _, file := range req.Files {
50+
outFile, err := os.Create(filepath.Join(dirPath, file.Name))
5651
if err != nil {
57-
return fmt.Errorf("failed to create file %s: %w", filePath, err)
52+
return fmt.Errorf("failed to create file %w", err)
5853
}
59-
for i, file := range req.Files {
60-
// skip csv headers
61-
if i > 0 {
62-
reader := utils.NewCsvReader(file)
63-
_, err = reader.Read()
64-
if err != nil {
65-
return fmt.Errorf("failed to read csv %w", err)
66-
}
67-
offset := reader.InputOffset()
68-
_, err = file.(io.ReadSeeker).Seek(offset, io.SeekStart)
69-
if err != nil {
70-
return fmt.Errorf("failed to seek csv file %w", err)
71-
}
72-
}
73-
_, err = io.Copy(outFile, file)
74-
if err != nil {
75-
return fmt.Errorf("failed to write to file %s: %w", filePath, err)
76-
}
54+
defer outFile.Close()
55+
_, err = io.Copy(outFile, file.Reader)
56+
if err != nil {
57+
return fmt.Errorf("failed to write to file %w", err)
7758
}
78-
outFile.Close()
79-
// build index
80-
indexer, err := csvindexer.NewCSVIndexer(os.DirFS(dirPath), []string{"data.csv"})
59+
}
60+
switch req.Type {
61+
case db_dataset.TypeCsv:
62+
indexer, err := csvindexer.NewCSVIndexer(os.DirFS(dirPath), req.Data)
8163
if err != nil {
8264
return fmt.Errorf("table.Create: build csv index: %w", err)
8365
}
84-
err = sr.Update().SetPath(relativePath).SetIndexer(indexer.CSVIndexer).Exec(ctx)
66+
err = sr.Update().SetPath(relativePath).SetIndexer(indexer.CSVIndexer).SetValues(req.Data).Exec(ctx)
8567
if err != nil {
86-
return fmt.Errorf("table.Create: update dataset metadata: %w", err) // Clarified error
68+
return fmt.Errorf("table.Create: update dataset metadata: %w", err)
69+
}
70+
case db_dataset.TypeImage:
71+
err = sr.Update().SetPath(relativePath).SetValues(req.Data).Exec(ctx)
72+
if err != nil {
73+
return fmt.Errorf("table.Create: update dataset metadata: %w", err)
8774
}
8875
case db_dataset.TypeList:
8976
err := sr.Update().SetValues(req.Data).Exec(ctx)
9077
if err != nil {
91-
return fmt.Errorf("table.Create: update dataset values: %w", err) // Clarified error
78+
return fmt.Errorf("table.Create: update dataset values: %w", err)
9279
}
9380
}
9481
return nil
@@ -119,6 +106,10 @@ func (s *DatasetServiceImpl) List(ctx context.Context) ([]*DatasetInfo, error) {
119106
}
120107
datasetInfos := []*DatasetInfo{}
121108
for _, ds := range datasets {
109+
// backward compatible
110+
if ds.Type == db_dataset.TypeCsv && len(ds.Values) == 0 {
111+
ds.Values = []string{"data.csv"}
112+
}
122113
datasetInfos = append(datasetInfos, &DatasetInfo{
123114
ID: ds.Nanoid,
124115
Name: ds.Name,
@@ -161,22 +152,38 @@ func (s *DatasetServiceImpl) Update(ctx context.Context, dataset string, req *Up
161152
case "description":
162153
updater.SetDescription(req.Description)
163154
case "data", "files":
164-
processDataRebuild = true
165-
updater.ClearIndexer().ClearPath().SetValues(nil)
155+
// new files or data slice change
156+
if len(req.Files) > 0 || !slices.Equal(req.Data, ds.Values) {
157+
processDataRebuild = true
158+
updater.ClearIndexer().ClearPath().SetValues(nil)
159+
}
166160
}
167161
}
168162

169163
updatedDsEntity, err := updater.Save(ctx)
170164
if err != nil {
171-
return ent.Rollback(tx, fmt.Errorf("dataset.Update: save changes: %w", err)) // Clarified error
165+
return ent.Rollback(tx, fmt.Errorf("dataset.Update: save changes: %w", err))
172166
}
173167

174168
if processDataRebuild {
175169
if originalPath != "" {
176170
oldDirPath := filepath.Join(s.cfg.Common.DataDir, originalPath)
177171
if _, statErr := os.Stat(oldDirPath); !os.IsNotExist(statErr) {
178-
if removeErr := os.RemoveAll(oldDirPath); removeErr != nil {
179-
return ent.Rollback(tx, fmt.Errorf("dataset.Update: failed to remove old directory %s: %w", oldDirPath, removeErr))
172+
keep := map[string]bool{}
173+
for _, file := range req.Data {
174+
keep[file] = true
175+
}
176+
entries, err := os.ReadDir(oldDirPath)
177+
if err != nil {
178+
return ent.Rollback(tx, fmt.Errorf("dataset.Update: read dir: %w", err))
179+
}
180+
for _, e := range entries {
181+
if _, ok := keep[e.Name()]; !ok {
182+
err = os.Remove(filepath.Join(oldDirPath, e.Name()))
183+
if err != nil {
184+
return ent.Rollback(tx, fmt.Errorf("dataset.Update: remove file: %w", err))
185+
}
186+
}
180187
}
181188
}
182189
}
@@ -217,7 +224,7 @@ func (s *DatasetServiceImpl) Delete(ctx context.Context, dataset string) error {
217224

218225
err = tx.Dataset.DeleteOne(ds).Exec(ctx)
219226
if err != nil {
220-
return ent.Rollback(tx, fmt.Errorf("dataset.Delete: execute delete: %w", err)) // Clarified error
227+
return ent.Rollback(tx, fmt.Errorf("dataset.Delete: execute delete: %w", err))
221228
}
222229

223230
return tx.Commit()
@@ -229,7 +236,7 @@ func (s *DatasetServiceImpl) Get(ctx context.Context, source string) (*DatasetIn
229236
db_dataset.Nanoid(source),
230237
)).Only(ctx)
231238
if err != nil {
232-
return nil, fmt.Errorf("dataset.Get: query dataset: %w", err) // Clarified error
239+
return nil, fmt.Errorf("dataset.Get: query dataset: %w", err)
233240
}
234241
return &DatasetInfo{
235242
Name: sr.Name,
@@ -276,6 +283,11 @@ func (s *DatasetServiceImpl) Preview(ctx context.Context, dataset string) (*Data
276283
Type: sr.Type,
277284
Rows: rows,
278285
}, nil
286+
case db_dataset.TypeImage:
287+
return &DatasetRows{
288+
Type: sr.Type,
289+
Data: sr.Values,
290+
}, nil
279291
case db_dataset.TypeList:
280292
return &DatasetRows{
281293
Type: sr.Type,

0 commit comments

Comments
 (0)