|
1 | | -//! This module provides structs for parsing and manipulating CSV data |
2 | | -//! [Released under GNU LGPLv3] |
3 | | -//! |
4 | | -const std = @import("std"); |
5 | | -const Allocator = std.mem.Allocator; |
6 | | -const ArrayList = std.ArrayList; |
7 | | - |
8 | | -pub const schema = @import("schema.zig"); |
9 | | - |
10 | | -/// A structure for storing settings for use with struct Table |
11 | | -pub const Settings = struct { |
12 | | - /// The delimiter that separates the values (aka. separator) |
13 | | - delimiter: []const u8, |
14 | | - /// The terminator that defines when a row of delimiter-separated values is terminated |
15 | | - terminator: []const u8, |
16 | | - |
17 | | - /// A function that returns the default settings that are most commonly used for CSV data |
18 | | - /// { .delimiter = ",", .terminator = "\n" } |
19 | | - pub fn default() Settings { |
20 | | - return Settings{ |
21 | | - .delimiter = ",", |
22 | | - .terminator = "\n", |
23 | | - }; |
24 | | - } |
25 | | -}; |
26 | | - |
27 | | -/// Errors that may return from struct Table |
28 | | -pub const TableError = error{ |
29 | | - /// The requested column was not found |
30 | | - ColumnNotFound, |
31 | | - /// The requested value contains a delimiter or terminator character |
32 | | - IllegalCharacter, |
33 | | - /// A row is inconsistent with the number of values previously parsed |
34 | | - InconsistentRowLength, |
35 | | - /// Could not allocate required memory |
36 | | - OutOfMemory, |
37 | | - /// The requested row was not found |
38 | | - RowNotFound, |
39 | | - /// The requested value was not found |
40 | | - ValueNotFound, |
41 | | -}; |
42 | | - |
43 | | -/// A structure for parsing and manipulating CSV data |
44 | | -pub const Table = struct { |
45 | | - /// The settings that should be used when parsing the CSV data |
46 | | - settings: Settings, |
47 | | - // allocator used for temporary allocations |
48 | | - allocator: Allocator, |
49 | | - // amount of columns expected in each row, used for validation |
50 | | - expected_column_count: ?usize, |
51 | | - // array of rows, each row is an array of subsequent column values |
52 | | - data: ArrayList(ArrayList([]const u8)), |
53 | | - |
54 | | - /// Initialize struct Table |
55 | | - pub fn init(allocator: Allocator, settings: Settings) Table { |
56 | | - return Table{ |
57 | | - .settings = settings, |
58 | | - .allocator = allocator, |
59 | | - .expected_column_count = null, |
60 | | - .data = .empty, |
61 | | - }; |
62 | | - } |
63 | | - |
64 | | - /// Deinitializes the internal arena allocator and parsed data |
65 | | - pub fn deinit(self: *Table) void { |
66 | | - for (self.data.items) |*row| { |
67 | | - row.deinit(self.allocator); |
68 | | - } |
69 | | - self.data.deinit(self.allocator); |
70 | | - } |
71 | | - |
72 | | - /// Load and append CSV data to the struct Table |
73 | | - pub fn parse(self: *Table, csv_data: []const u8) TableError!void { |
74 | | - const csv_data_sanatized = std.mem.trimRight(u8, csv_data, self.settings.terminator); |
75 | | - var rows = std.mem.splitSequence(u8, csv_data_sanatized, self.settings.terminator); |
76 | | - while (rows.next()) |row| { |
77 | | - const value_count = try self.parseRow(row); |
78 | | - if (self.expected_column_count == null) { |
79 | | - self.expected_column_count = value_count; |
80 | | - } else if (value_count != self.expected_column_count) { |
81 | | - return TableError.InconsistentRowLength; |
82 | | - } |
83 | | - } |
84 | | - } |
85 | | - |
86 | | - /// Parse a single row of CSV data and append it to the struct Table |
87 | | - /// |
88 | | - /// Returns the number of values parsed in the row. |
89 | | - fn parseRow(self: *Table, row: []const u8) TableError!usize { |
90 | | - var values: ArrayList([]const u8) = .empty; |
91 | | - var columns = std.mem.splitSequence(u8, row, self.settings.delimiter); |
92 | | - while (columns.next()) |value| { |
93 | | - try values.append(self.allocator, value); |
94 | | - } |
95 | | - try self.data.append(self.allocator, values); |
96 | | - return values.items.len; |
97 | | - } |
98 | | - |
99 | | - /// Returns the number of rows in the table |
100 | | - pub fn getRowCount(self: Table) usize { |
101 | | - return self.data.items.len; |
102 | | - } |
103 | | - |
104 | | - /// Returns the number of rows in the table |
105 | | - pub fn getColumnCount(self: Table) usize { |
106 | | - return self.expected_column_count orelse 0; |
107 | | - } |
108 | | - |
109 | | - /// Returns all columns indexes that match a given value in a specific row |
110 | | - /// |
111 | | - /// Arguments: |
112 | | - /// - `allocator`: The allocator to use for the returned slice. |
113 | | - /// - `row_index`: The index of the row to search in. |
114 | | - /// - `searched_value`: The value to search for in the row. |
115 | | - /// |
116 | | - /// Raises `TableError.ValueNotFound` if no matching values are found. |
117 | | - /// |
118 | | - /// This function may be used for retrieving columns by their header key: |
119 | | - /// ```zig |
120 | | - /// try table.parse( |
121 | | - /// \\id,name |
122 | | - /// \\1,John |
123 | | - /// ); |
124 | | - /// const indexes = try table.findColumnIndexesByValue(allocator, 0, "id"); |
125 | | - /// assert(indexes == &.{0}); |
126 | | - /// ``` |
127 | | - pub fn findColumnIndexesByValue(self: Table, allocator: Allocator, row_index: usize, searched_value: []const u8) TableError![]usize { |
128 | | - if (row_index >= self.data.items.len) return TableError.RowNotFound; |
129 | | - var column_indexes: ArrayList(usize) = .empty; |
130 | | - for (self.data.items[row_index].items, 0..) |column_value, column_index| { |
131 | | - if (std.mem.eql(u8, column_value, searched_value)) { |
132 | | - try column_indexes.append(allocator, column_index); |
133 | | - } |
134 | | - } |
135 | | - if (column_indexes.items.len <= 0) { |
136 | | - column_indexes.deinit(allocator); |
137 | | - return TableError.ValueNotFound; |
138 | | - } |
139 | | - return column_indexes.toOwnedSlice(allocator); |
140 | | - } |
141 | | - |
142 | | - /// Returns all row indexes that match a given value in a specific column |
143 | | - /// |
144 | | - /// Arguments: |
145 | | - /// - `allocator`: The allocator to use for the returned slice. |
146 | | - /// - `column_index`: The index of the column to search in. |
147 | | - /// - `searched_value`: The value to search for in the column. |
148 | | - /// |
149 | | - /// Raises `TableError.ValueNotFound` if no matching values are found. |
150 | | - /// |
151 | | - /// This function may be used for retrieving columns by their header key: |
152 | | - /// ```zig |
153 | | - /// try table.parse( |
154 | | - /// \\id,name |
155 | | - /// \\1,John |
156 | | - /// ); |
157 | | - /// const indexes = try table.findRowIndexesByValue(allocator, 0, "1"); |
158 | | - /// assert(indexes == &.{1}); |
159 | | - /// ``` |
160 | | - pub fn findRowIndexesByValue(self: Table, allocator: Allocator, column_index: usize, searched_value: []const u8) TableError![]usize { |
161 | | - if (self.expected_column_count == null) return TableError.ColumnNotFound; |
162 | | - if (column_index >= self.expected_column_count orelse unreachable) return TableError.ColumnNotFound; |
163 | | - var row_indexes: ArrayList(usize) = .empty; |
164 | | - for (self.data.items, 0..) |row, row_index| { |
165 | | - if (std.mem.eql(u8, row.items[column_index], searched_value)) { |
166 | | - try row_indexes.append(allocator, row_index); |
167 | | - } |
168 | | - } |
169 | | - if (row_indexes.items.len <= 0) { |
170 | | - row_indexes.deinit(allocator); |
171 | | - return TableError.ValueNotFound; |
172 | | - } |
173 | | - return row_indexes.toOwnedSlice(allocator); |
174 | | - } |
175 | | - |
176 | | - /// Return the column at the provided index as a slice of values |
177 | | - pub fn getColumnByIndex(self: Table, allocator: Allocator, column_index: usize) TableError![]const []const u8 { |
178 | | - if (self.expected_column_count == null) return TableError.ColumnNotFound; |
179 | | - if (column_index > self.expected_column_count orelse unreachable) return TableError.ColumnNotFound; |
180 | | - var column_values: ArrayList([]const u8) = .empty; |
181 | | - for (self.data.items) |row| { |
182 | | - try column_values.append(allocator, row.items[column_index]); |
183 | | - } |
184 | | - return column_values.toOwnedSlice(allocator); |
185 | | - } |
186 | | - |
187 | | - /// Return the row at the provided index as a slice of values |
188 | | - pub fn getRowByIndex(self: Table, row_index: usize) TableError![]const []const u8 { |
189 | | - if (row_index >= self.data.items.len) return TableError.RowNotFound; |
190 | | - return self.data.items[row_index].items; |
191 | | - } |
192 | | - |
193 | | - /// Insert an empty row at the provided index and shift all subsequent rows |
194 | | - /// |
195 | | - /// Arguments: |
196 | | - /// - `row_index`: The index at which to insert the empty row. If `null`, the row will be appended to the end. |
197 | | - /// |
198 | | - /// Returns the index of the newly inserted row. |
199 | | - pub fn insertEmptyRow(self: *Table, row_index: ?usize) TableError!usize { |
200 | | - const target_index = row_index orelse self.data.items.len; |
201 | | - if (target_index > self.data.items.len) return TableError.RowNotFound; |
202 | | - var empty_row: ArrayList([]const u8) = .empty; |
203 | | - for (0..self.expected_column_count orelse 0) |_| try empty_row.append(self.allocator, ""); |
204 | | - try self.data.insert(self.allocator, target_index, empty_row); |
205 | | - return target_index; |
206 | | - } |
207 | | - |
208 | | - /// Insert an empty column at the provided index and shift all subsequent columns |
209 | | - /// |
210 | | - /// Arguments: |
211 | | - /// - `column_index`: The index at which to insert the empty column. If `null`, the column will be appended to the end. |
212 | | - /// |
213 | | - /// Returns the index of the newly inserted column. |
214 | | - pub fn insertEmptyColumn(self: *Table, column_index: ?usize) TableError!usize { |
215 | | - const target_index = column_index orelse self.expected_column_count orelse 0; |
216 | | - if (target_index > self.expected_column_count orelse 0) return TableError.ColumnNotFound; |
217 | | - for (self.data.items) |*row| { |
218 | | - try row.insert(self.allocator, target_index, ""); |
219 | | - } |
220 | | - self.expected_column_count = (self.expected_column_count orelse 0) + 1; |
221 | | - return target_index; |
222 | | - } |
223 | | - |
224 | | - /// Replace a value by a given new value, row index, and column index |
225 | | - pub fn replaceValue(self: *Table, row_index: usize, column_index: usize, new_value: []const u8) TableError!void { |
226 | | - if (row_index >= self.data.items.len) return TableError.RowNotFound; |
227 | | - if (column_index >= self.expected_column_count orelse 0) return TableError.ColumnNotFound; |
228 | | - if (std.mem.indexOf(u8, new_value, self.settings.delimiter) != null) return TableError.IllegalCharacter; |
229 | | - if (std.mem.indexOf(u8, new_value, self.settings.terminator) != null) return TableError.IllegalCharacter; |
230 | | - self.data.items[row_index].items[column_index] = new_value; |
231 | | - } |
232 | | - |
233 | | - /// Remove a column by its index |
234 | | - /// |
235 | | - /// All prior column indexes will be invalidated. |
236 | | - pub fn deleteColumnByIndex(self: *Table, column_index: usize) TableError!void { |
237 | | - if (column_index >= self.expected_column_count orelse 0) return TableError.ColumnNotFound; |
238 | | - for (self.data.items) |*row| { |
239 | | - _ = row.orderedRemove(column_index); |
240 | | - } |
241 | | - self.expected_column_count = (self.expected_column_count orelse 0) - 1; |
242 | | - } |
243 | | - |
244 | | - /// Remove a row by its index |
245 | | - /// |
246 | | - /// All prior row indexes will be invalidated. |
247 | | - pub fn deleteRowByIndex(self: *Table, row_index: usize) TableError!void { |
248 | | - if (row_index >= self.data.items.len) return TableError.RowNotFound; |
249 | | - self.data.items[row_index].deinit(self.allocator); |
250 | | - _ = self.data.orderedRemove(row_index); |
251 | | - } |
252 | | - |
253 | | - /// Returns a slice of bytes containing the CSV data stored in the struct Table. |
254 | | - pub fn exportCSV(self: *Table, allocator: Allocator) TableError![]const u8 { |
255 | | - var csv: ArrayList(u8) = .empty; |
256 | | - for (self.data.items, 0..) |row, row_index| { |
257 | | - if (row_index > 0) { |
258 | | - try csv.appendSlice(allocator, self.settings.terminator); |
259 | | - } |
260 | | - for (row.items, 0..) |column, column_index| { |
261 | | - if (column_index > 0) { |
262 | | - try csv.appendSlice(allocator, self.settings.delimiter); |
263 | | - } |
264 | | - try csv.appendSlice(allocator, column); |
265 | | - } |
266 | | - } |
267 | | - return csv.toOwnedSlice(allocator); |
268 | | - } |
269 | | -}; |
| 1 | +const table = @import("table.zig"); |
| 2 | +const schema = @import("schema.zig"); |
| 3 | + |
| 4 | +/// Thin root module that re-exports the core Table implementation and the schema |
| 5 | +/// module. This avoids circular import issues by keeping the core implementation |
| 6 | +/// in `table.zig` while allowing consumers to import this single entrypoint. |
| 7 | +pub const Table = table.Table; |
| 8 | +pub const Settings = table.Settings; |
| 9 | +pub const TableError = table.TableError; |
| 10 | +pub const StructureError = schema.StructureError; |
| 11 | +pub const ParseResult = schema.ParseResult; |
| 12 | +pub const StructuredTable = schema.StructuredTable; |
0 commit comments